From 92f8f2c2dbe5622eb58b35c97dc439f8e6e41c6c Mon Sep 17 00:00:00 2001 From: Asias He Date: Mon, 13 Apr 2026 08:38:54 +0800 Subject: [PATCH] test: Stabilize tablet incremental repair error test Use async tablet repair task flow to avoid a race where client timeout returns while server-side repair continues after injections are disabled. Start repair with await_completion=false, assert it does not complete within timeout under injection, abort/wait the task, then verify sstables_repaired_at is unchanged. Fixes SCYLLADB-1184 Closes scylladb/scylladb#29452 (cherry picked from commit 4137a4229c308a06caef31c79a3ffb0b6721704f) Closes scylladb/scylladb#29500 --- test/cluster/test_incremental_repair.py | 26 +++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/test/cluster/test_incremental_repair.py b/test/cluster/test_incremental_repair.py index 1dede2ad88..273e6ea0c6 100644 --- a/test/cluster/test_incremental_repair.py +++ b/test/cluster/test_incremental_repair.py @@ -312,14 +312,28 @@ async def test_tablet_incremental_repair_error(manager: ManagerClient): token = -1 map0 = await load_tablet_sstables_repaired_at(manager, cql, servers[0], hosts[0], table_id) - # Repair should not finish with error + # Repair should not finish while the injection is enabled. We abort the task + # before turning the injection off, otherwise it may continue in background + # and increase sstables_repaired_at. await inject_error_on(manager, "repair_tablet_fail_on_rpc_call", servers) try: - await manager.api.tablet_repair(servers[0].ip_addr, ks, "test", token, incremental_mode='incremental', timeout=10) - assert False # Check the tablet repair is not supposed to finish - except TimeoutError: - logger.info("Repair timeout as expected") - await inject_error_off(manager, "repair_tablet_fail_on_rpc_call", servers) + repair_response = await manager.api.tablet_repair( + servers[0].ip_addr, + ks, + "test", + token, + await_completion=False, + incremental_mode='incremental', + ) + task_id = repair_response['tablet_task_id'] + + with pytest.raises(asyncio.TimeoutError): + await asyncio.wait_for(manager.api.wait_task(servers[0].ip_addr, task_id), timeout=10) + + await manager.api.abort_task(servers[0].ip_addr, task_id) + await manager.api.wait_task(servers[0].ip_addr, task_id) + finally: + await inject_error_off(manager, "repair_tablet_fail_on_rpc_call", servers) map1 = await load_tablet_sstables_repaired_at(manager, cql, servers[0], hosts[0], table_id)