Merge 'Fix test_tablet_missing_data_repair' from Asias He
This PR fixes test_tablet_missing_data_repair and enable the test again. If a node is not UP yet, repair in the test will be a partial repair. The partial repair will not repair all the data which cause the check of rows after repair to fail. Check nodes see each other as UP before repair. Closes scylladb/scylladb#16930 * github.com:scylladb/scylladb: test: Enable test_tablet_missing_data_repair again test: Wait for nodes to be up when repair test: Check repair status in ScyllaRESTAPIClient
This commit is contained in:
@@ -292,7 +292,8 @@ class ScyllaRESTAPIClient():
|
||||
"""Repair the given table and wait for it to complete"""
|
||||
sequence_number = await self.client.post_json(f"/storage_service/repair_async/{keyspace}", host=node_ip, params={"columnFamilies": table})
|
||||
status = await self.client.get_json(f"/storage_service/repair_status", host=node_ip, params={"id": str(sequence_number)})
|
||||
return status
|
||||
if status != 'SUCCESSFUL':
|
||||
raise Exception(f"Repair id {sequence_number} on node {node_ip} for table {keyspace}.{table} failed: status={status}")
|
||||
|
||||
class ScyllaMetrics:
|
||||
def __init__(self, lines: list[str]):
|
||||
|
||||
@@ -66,6 +66,16 @@ async def get_tablet_replica(manager: ManagerClient, server: ServerInfo, keyspac
|
||||
replicas = await get_tablet_replicas(manager, server, keyspace_name, table_name, token)
|
||||
return replicas[0]
|
||||
|
||||
async def repair_on_node(manager: ManagerClient, server: ServerInfo, servers: list[ServerInfo]):
|
||||
node = server.ip_addr
|
||||
await manager.servers_see_each_other(servers)
|
||||
live_nodes_wanted = [s.ip_addr for s in servers]
|
||||
live_nodes = await manager.api.get_alive_endpoints(node)
|
||||
live_nodes_wanted.sort()
|
||||
live_nodes.sort()
|
||||
assert live_nodes == live_nodes_wanted
|
||||
logger.info(f"Repair table on node {node} live_nodes={live_nodes} live_nodes_wanted={live_nodes_wanted}")
|
||||
await manager.api.repair(node, "test", "test")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_tablet_metadata_propagates_with_schema_changes_in_snapshot_mode(manager: ManagerClient):
|
||||
@@ -396,8 +406,7 @@ async def test_tablet_repair(manager: ManagerClient):
|
||||
keys = range(256)
|
||||
await asyncio.gather(*[cql.run_async(f"INSERT INTO test.test (pk, c) VALUES ({k}, {k});") for k in keys])
|
||||
|
||||
logger.info("Repair table")
|
||||
await manager.api.repair(servers[0].ip_addr, "test", "test")
|
||||
await repair_on_node(manager, servers[0], servers)
|
||||
|
||||
async def check():
|
||||
logger.info("Checking table")
|
||||
@@ -410,7 +419,6 @@ async def test_tablet_repair(manager: ManagerClient):
|
||||
|
||||
await cql.run_async("DROP KEYSPACE test;")
|
||||
|
||||
@pytest.mark.skip(reason="failing a lot, see https://github.com/scylladb/scylladb/issues/16859")
|
||||
@pytest.mark.repair
|
||||
@pytest.mark.asyncio
|
||||
async def test_tablet_missing_data_repair(manager: ManagerClient):
|
||||
@@ -440,8 +448,8 @@ async def test_tablet_missing_data_repair(manager: ManagerClient):
|
||||
logger.info(f"Started server {idx}");
|
||||
|
||||
await wait_for_cql_and_get_hosts(cql, servers, time.time() + 60)
|
||||
logger.info("Repair table")
|
||||
await manager.api.repair(servers[0].ip_addr, "test", "test")
|
||||
|
||||
await repair_on_node(manager, servers[0], servers)
|
||||
|
||||
async def check():
|
||||
logger.info("Checking table")
|
||||
|
||||
Reference in New Issue
Block a user