topology.tablets_migration: Add cleanup_target transition stage

The new stage will be used to revert migration that fails at some
stages. The goal is to cleanup the pending replica, which may already
received some writes by doing the cleanup RPC to the pending replica,
then jumping to "revert_migration" stage introduced earlier.

If pending node is dead, the call to cleanup RPC is skipped.

Coordinators use old replicas.

Signed-off-by: Pavel Emelyanov <xemul@scylladb.com>
This commit is contained in:
Pavel Emelyanov
2024-02-15 18:45:41 +03:00
parent ced5bf56eb
commit 72f3b1d5fe
5 changed files with 27 additions and 1 deletions

View File

@@ -38,6 +38,8 @@ write_replica_set_selector get_selector_for_writes(tablet_transition_stage stage
return write_replica_set_selector::next;
case tablet_transition_stage::cleanup:
return write_replica_set_selector::next;
case tablet_transition_stage::cleanup_target:
return write_replica_set_selector::previous;
case tablet_transition_stage::revert_migration:
return write_replica_set_selector::previous;
case tablet_transition_stage::end_migration:
@@ -61,6 +63,8 @@ read_replica_set_selector get_selector_for_reads(tablet_transition_stage stage)
return read_replica_set_selector::next;
case tablet_transition_stage::cleanup:
return read_replica_set_selector::next;
case tablet_transition_stage::cleanup_target:
return read_replica_set_selector::previous;
case tablet_transition_stage::revert_migration:
return read_replica_set_selector::previous;
case tablet_transition_stage::end_migration:
@@ -279,6 +283,7 @@ static const std::unordered_map<tablet_transition_stage, sstring> tablet_transit
{tablet_transition_stage::streaming, "streaming"},
{tablet_transition_stage::use_new, "use_new"},
{tablet_transition_stage::cleanup, "cleanup"},
{tablet_transition_stage::cleanup_target, "cleanup_target"},
{tablet_transition_stage::revert_migration, "revert_migration"},
{tablet_transition_stage::end_migration, "end_migration"},
};

View File

@@ -157,6 +157,7 @@ enum class tablet_transition_stage {
write_both_read_new,
use_new,
cleanup,
cleanup_target,
revert_migration,
end_migration,
};

View File

@@ -5512,8 +5512,12 @@ future<> storage_service::cleanup_tablet(locator::global_tablet_id tablet) {
if (leaving_replica.host != tm->get_my_id()) {
throw std::runtime_error(fmt::format("Tablet {} has leaving replica different than this one", tablet));
}
} else if (trinfo->stage == locator::tablet_transition_stage::cleanup_target) {
if (trinfo->pending_replica.host != tm->get_my_id()) {
throw std::runtime_error(fmt::format("Tablet {} has pending replica different than this one", tablet));
}
} else {
throw std::runtime_error(fmt::format("Tablet {} stage is not at cleanup", tablet));
throw std::runtime_error(fmt::format("Tablet {} stage is not at cleanup/cleanup_target", tablet));
}
auto shard_opt = tmap.get_shard(tablet.tablet, tm->get_my_id());

View File

@@ -425,6 +425,8 @@ private:
return false;
case tablet_transition_stage::cleanup:
return false;
case tablet_transition_stage::cleanup_target:
return false;
case tablet_transition_stage::revert_migration:
return false;
case tablet_transition_stage::end_migration:

View File

@@ -1047,6 +1047,20 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
transition_to(locator::tablet_transition_stage::end_migration);
}
break;
case locator::tablet_transition_stage::cleanup_target:
if (advance_in_background(gid, tablet_state.cleanup, "cleanup_target", [&] {
locator::tablet_replica dst = trinfo.pending_replica;
if (is_excluded(raft::server_id(dst.host.uuid()))) {
rtlogger.info("Tablet cleanup of {} on {} skipped because node is excluded and doesn't need to revert migration", gid, dst);
return make_ready_future<>();
}
rtlogger.info("Initiating tablet cleanup of {} on {} to revert migration", gid, dst);
return ser::storage_service_rpc_verbs::send_tablet_cleanup(&_messaging,
netw::msg_addr(id2ip(dst.host)), _as, raft::server_id(dst.host.uuid()), gid);
})) {
transition_to(locator::tablet_transition_stage::revert_migration);
}
break;
case locator::tablet_transition_stage::revert_migration:
// Need a separate stage and a barrier after cleanup RPC to cut off stale RPCs.
// See do_tablet_operation() doc.