Merge 'repair: handle no_such_keyspace in repair preparation phase' from Aleksandra Martyniuk
Currently, data sync repair handles most no_such_keyspace exceptions, but it omits the preparation phase, where the exception could be thrown during make_global_effective_replication_map. Skip the keyspace repair if no_such_keyspace is thrown during preparations. Fixes: #22073. Requires backport to 6.1 and 6.2 as they contain the bug Closes scylladb/scylladb#22473 * github.com:scylladb/scylladb: test: add test to check if repair handles no_such_keyspace repair: handle keyspace dropped
This commit is contained in:
@@ -657,6 +657,8 @@ future<> global_vnode_effective_replication_map::get_keyspace_erms(sharded<repli
|
||||
// all under the lock.
|
||||
auto lk = co_await db.get_shared_token_metadata().get_lock();
|
||||
auto erm = db.find_keyspace(keyspace_name).get_vnode_effective_replication_map();
|
||||
utils::get_local_injector().inject("get_keyspace_erms_throw_no_such_keyspace",
|
||||
[&keyspace_name] { throw data_dictionary::no_such_keyspace{keyspace_name}; });
|
||||
auto ring_version = erm->get_token_metadata().get_ring_version();
|
||||
_erms[0] = make_foreign(std::move(erm));
|
||||
co_await coroutine::parallel_for_each(std::views::iota(1u, smp::count), [this, &sharded_db, keyspace_name, ring_version] (unsigned shard) -> future<> {
|
||||
|
||||
@@ -1499,7 +1499,16 @@ future<> repair::data_sync_repair_task_impl::run() {
|
||||
auto& keyspace = _status.keyspace;
|
||||
auto& sharded_db = rs.get_db();
|
||||
auto& db = sharded_db.local();
|
||||
auto germs = make_lw_shared(co_await locator::make_global_effective_replication_map(sharded_db, keyspace));
|
||||
auto germs_fut = co_await coroutine::as_future(locator::make_global_effective_replication_map(sharded_db, keyspace));
|
||||
if (germs_fut.failed()) {
|
||||
auto ex = germs_fut.get_exception();
|
||||
if (try_catch<data_dictionary::no_such_keyspace>(ex)) {
|
||||
rlogger.warn("sync data: keyspace {} does not exist, skipping", keyspace);
|
||||
co_return;
|
||||
}
|
||||
co_await coroutine::return_exception_ptr(std::move(ex));
|
||||
}
|
||||
auto germs = make_lw_shared(germs_fut.get());
|
||||
|
||||
auto id = get_repair_uniq_id();
|
||||
|
||||
|
||||
@@ -259,3 +259,19 @@ async def test_repair_abort(manager):
|
||||
await manager.api.client.get_json(f"/task_manager/wait_task/{id}", host=servers[0].ip_addr)
|
||||
statuses = await manager.api.client.get_json(f"/task_manager/task_status_recursive/{id}", host=servers[0].ip_addr)
|
||||
assert all([status["state"] == "failed" for status in statuses])
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@skip_mode('release', 'error injections are not supported in release mode')
|
||||
async def test_keyspace_drop_during_data_sync_repair(manager):
|
||||
cfg = {
|
||||
'enable_tablets': False,
|
||||
'error_injections_at_startup': ['get_keyspace_erms_throw_no_such_keyspace']
|
||||
}
|
||||
await manager.server_add(config=cfg)
|
||||
|
||||
cql = manager.get_cql()
|
||||
|
||||
cql.execute("CREATE KEYSPACE ks WITH replication = {'class': 'NetworkTopologyStrategy', 'replication_factor': 2}")
|
||||
cql.execute("CREATE TABLE ks.tbl (pk int, ck int, PRIMARY KEY (pk, ck)) WITH tombstone_gc = {'mode': 'repair'}")
|
||||
|
||||
await manager.server_add(config=cfg)
|
||||
|
||||
Reference in New Issue
Block a user