topology.tablets_migration: Add cleanup_target transition stage
The new stage will be used to revert migration that fails at some stages. The goal is to cleanup the pending replica, which may already received some writes by doing the cleanup RPC to the pending replica, then jumping to "revert_migration" stage introduced earlier. If pending node is dead, the call to cleanup RPC is skipped. Coordinators use old replicas. Signed-off-by: Pavel Emelyanov <xemul@scylladb.com>
This commit is contained in:
@@ -38,6 +38,8 @@ write_replica_set_selector get_selector_for_writes(tablet_transition_stage stage
|
||||
return write_replica_set_selector::next;
|
||||
case tablet_transition_stage::cleanup:
|
||||
return write_replica_set_selector::next;
|
||||
case tablet_transition_stage::cleanup_target:
|
||||
return write_replica_set_selector::previous;
|
||||
case tablet_transition_stage::revert_migration:
|
||||
return write_replica_set_selector::previous;
|
||||
case tablet_transition_stage::end_migration:
|
||||
@@ -61,6 +63,8 @@ read_replica_set_selector get_selector_for_reads(tablet_transition_stage stage)
|
||||
return read_replica_set_selector::next;
|
||||
case tablet_transition_stage::cleanup:
|
||||
return read_replica_set_selector::next;
|
||||
case tablet_transition_stage::cleanup_target:
|
||||
return read_replica_set_selector::previous;
|
||||
case tablet_transition_stage::revert_migration:
|
||||
return read_replica_set_selector::previous;
|
||||
case tablet_transition_stage::end_migration:
|
||||
@@ -279,6 +283,7 @@ static const std::unordered_map<tablet_transition_stage, sstring> tablet_transit
|
||||
{tablet_transition_stage::streaming, "streaming"},
|
||||
{tablet_transition_stage::use_new, "use_new"},
|
||||
{tablet_transition_stage::cleanup, "cleanup"},
|
||||
{tablet_transition_stage::cleanup_target, "cleanup_target"},
|
||||
{tablet_transition_stage::revert_migration, "revert_migration"},
|
||||
{tablet_transition_stage::end_migration, "end_migration"},
|
||||
};
|
||||
|
||||
@@ -157,6 +157,7 @@ enum class tablet_transition_stage {
|
||||
write_both_read_new,
|
||||
use_new,
|
||||
cleanup,
|
||||
cleanup_target,
|
||||
revert_migration,
|
||||
end_migration,
|
||||
};
|
||||
|
||||
@@ -5512,8 +5512,12 @@ future<> storage_service::cleanup_tablet(locator::global_tablet_id tablet) {
|
||||
if (leaving_replica.host != tm->get_my_id()) {
|
||||
throw std::runtime_error(fmt::format("Tablet {} has leaving replica different than this one", tablet));
|
||||
}
|
||||
} else if (trinfo->stage == locator::tablet_transition_stage::cleanup_target) {
|
||||
if (trinfo->pending_replica.host != tm->get_my_id()) {
|
||||
throw std::runtime_error(fmt::format("Tablet {} has pending replica different than this one", tablet));
|
||||
}
|
||||
} else {
|
||||
throw std::runtime_error(fmt::format("Tablet {} stage is not at cleanup", tablet));
|
||||
throw std::runtime_error(fmt::format("Tablet {} stage is not at cleanup/cleanup_target", tablet));
|
||||
}
|
||||
|
||||
auto shard_opt = tmap.get_shard(tablet.tablet, tm->get_my_id());
|
||||
|
||||
@@ -425,6 +425,8 @@ private:
|
||||
return false;
|
||||
case tablet_transition_stage::cleanup:
|
||||
return false;
|
||||
case tablet_transition_stage::cleanup_target:
|
||||
return false;
|
||||
case tablet_transition_stage::revert_migration:
|
||||
return false;
|
||||
case tablet_transition_stage::end_migration:
|
||||
|
||||
@@ -1047,6 +1047,20 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
|
||||
transition_to(locator::tablet_transition_stage::end_migration);
|
||||
}
|
||||
break;
|
||||
case locator::tablet_transition_stage::cleanup_target:
|
||||
if (advance_in_background(gid, tablet_state.cleanup, "cleanup_target", [&] {
|
||||
locator::tablet_replica dst = trinfo.pending_replica;
|
||||
if (is_excluded(raft::server_id(dst.host.uuid()))) {
|
||||
rtlogger.info("Tablet cleanup of {} on {} skipped because node is excluded and doesn't need to revert migration", gid, dst);
|
||||
return make_ready_future<>();
|
||||
}
|
||||
rtlogger.info("Initiating tablet cleanup of {} on {} to revert migration", gid, dst);
|
||||
return ser::storage_service_rpc_verbs::send_tablet_cleanup(&_messaging,
|
||||
netw::msg_addr(id2ip(dst.host)), _as, raft::server_id(dst.host.uuid()), gid);
|
||||
})) {
|
||||
transition_to(locator::tablet_transition_stage::revert_migration);
|
||||
}
|
||||
break;
|
||||
case locator::tablet_transition_stage::revert_migration:
|
||||
// Need a separate stage and a barrier after cleanup RPC to cut off stale RPCs.
|
||||
// See do_tablet_operation() doc.
|
||||
|
||||
Reference in New Issue
Block a user