test/pylib: mark cluster as dirty after a failed test

We don't expect the cluster to be functioning at all after a failed
test. The whole cluster might have crashed, for example. In these
situations the framework would report multiple errors (one for the
actual failure, another for a failed post-condition check because the
cluster was down) which would only obscure the report and make debugging
harder. It's also not safe in general to reuse the cluster in another
test - if the test previous failed, we should not assume that it's in a
valid state.

Therefore, mark the cluster as dirty after a failed test. This will let
us recycle the cluster based on the dirty flag and it will disable
post-condition check after a failed test (which is only done on
non-dirty clusters).

To implement this in topology tests, we use the
`pytest_runtest_makereport` hook which executes after a test finishes
but before fixtures finish. There we store a test-failed flag in a stash
provided by pytest, then access the flag in the `manager` fixture.
This commit is contained in:
Kamil Braun
2023-01-26 17:33:32 +01:00
parent 977375d13f
commit a9dbd89478
4 changed files with 40 additions and 11 deletions

View File

@@ -706,7 +706,7 @@ class CQLApprovalTest(Test):
self.is_before_test_ok = True
cluster.take_log_savepoint()
self.is_executed_ok = await run_test(self, options, env=self.env)
cluster.after_test(self.uname)
cluster.after_test(self.uname, self.is_executed_ok)
self.is_after_test_ok = True
if self.is_executed_ok is False:
@@ -860,7 +860,7 @@ class PythonTest(Test):
self.is_before_test_ok = True
cluster.take_log_savepoint()
status = await run_test(self, options)
cluster.after_test(self.uname)
cluster.after_test(self.uname, status)
self.is_after_test_ok = True
self.success = status
except Exception as e:

View File

@@ -86,10 +86,10 @@ class ManagerClient():
# await self._wait_for_cluster()
await self.driver_connect() # Connect driver to new cluster
async def after_test(self, test_case_name: str) -> None:
async def after_test(self, test_case_name: str, success: bool) -> None:
"""Tell harness this test finished"""
logger.debug("after_test for %s", test_case_name)
cluster_str = await self.client.get_text(f"/cluster/after-test")
logger.debug("after_test for %s (success: %s)", test_case_name, success)
cluster_str = await self.client.get_text(f"/cluster/after-test/{success}")
logger.info("Cluster after test %s: %s", test_case_name, cluster_str)
async def is_manager_up(self) -> bool:

View File

@@ -740,10 +740,14 @@ class ScyllaCluster:
for server in self.running.values():
server.write_log_marker(f"------ Starting test {name} ------\n")
def after_test(self, name) -> None:
"""If the cluster is not dirty, check that it's still alive and the test
def after_test(self, name: str, success: bool) -> None:
"""Mark the cluster as dirty after a failed test.
If the cluster is not dirty, check that it's still alive and the test
hasn't left any garbage."""
assert self.start_exception is None
if not success:
self.logger.debug(f"Test failed using cluster {self.name}, marking the cluster as dirty")
self.is_dirty = True
if self.is_dirty:
self.logger.info(f"The cluster {self.name} is dirty, not checking"
f" keyspace count post-condition")
@@ -939,7 +943,7 @@ class ScyllaClusterManager:
add_get('/cluster/host-ip/{server_id}', self._cluster_server_ip_addr)
add_get('/cluster/host-id/{server_id}', self._cluster_host_id)
add_get('/cluster/before-test/{test_case_name}', self._before_test_req)
add_get('/cluster/after-test', self._after_test)
add_get('/cluster/after-test/{success}', self._after_test)
add_get('/cluster/mark-dirty', self._mark_dirty)
add_get('/cluster/server/{server_id}/stop', self._cluster_server_stop)
add_get('/cluster/server/{server_id}/stop_gracefully', self._cluster_server_stop_gracefully)
@@ -991,9 +995,11 @@ class ScyllaClusterManager:
async def _after_test(self, _request) -> aiohttp.web.Response:
assert self.cluster is not None
assert self.current_test_case_full_name
self.logger.info("Finished test %s, cluster: %s", self.current_test_case_full_name, self.cluster)
success = _request.match_info["success"] == "True"
self.logger.info("Test %s %s, cluster: %s", self.current_test_case_full_name,
"SUCCEEDED" if success else "FAILED", self.cluster)
try:
self.cluster.after_test(self.current_test_case_full_name)
self.cluster.after_test(self.current_test_case_full_name, success)
finally:
self.current_test_case_full_name = ''
self.is_after_test_ok = True

View File

@@ -37,6 +37,26 @@ def pytest_addoption(parser):
parser.addoption('--ssl', action='store_true',
help='Connect to CQL via an encrypted TLSv1.2 connection')
# This is a constant used in `pytest_runtest_makereport` below to store a flag
# indicating test failure in a stash which can then be accessed from fixtures.
FAILED_KEY = pytest.StashKey[bool]()
@pytest.hookimpl(tryfirst=True, hookwrapper=True)
def pytest_runtest_makereport(item, call):
"""This is a post-test hook execucted by the pytest library.
Use it to access the test result and store a flag indicating failure
so we can later retrieve it in our fixtures like `manager`.
`item.stash` is the same stash as `request.node.stash` (in the `request`
fixture provided by pytest).
"""
outcome = yield
report = outcome.get_result()
item.stash[FAILED_KEY] = report.when == "call" and report.failed
# Change default pytest-asyncio event_loop fixture scope to session to
# allow async fixtures with scope larger than function. (e.g. manager fixture)
# See https://github.com/pytest-dev/pytest-asyncio/issues/68
@@ -159,7 +179,10 @@ async def manager(request, manager_internal):
test_case_name = request.node.name
await manager_internal.before_test(test_case_name)
yield manager_internal
await manager_internal.after_test(test_case_name)
# `request.node.stash` contains a flag stored in `pytest_runtest_makereport`
# that indicates test failure.
failed = request.node.stash[FAILED_KEY]
await manager_internal.after_test(test_case_name, not failed)
# "cql" fixture: set up client object for communicating with the CQL API.
# Since connection is managed by manager just return that object