diff --git a/locator/tablets.cc b/locator/tablets.cc index 94bee2628f..d944e27082 100644 --- a/locator/tablets.cc +++ b/locator/tablets.cc @@ -38,6 +38,8 @@ write_replica_set_selector get_selector_for_writes(tablet_transition_stage stage return write_replica_set_selector::next; case tablet_transition_stage::cleanup: return write_replica_set_selector::next; + case tablet_transition_stage::cleanup_target: + return write_replica_set_selector::previous; case tablet_transition_stage::revert_migration: return write_replica_set_selector::previous; case tablet_transition_stage::end_migration: @@ -61,6 +63,8 @@ read_replica_set_selector get_selector_for_reads(tablet_transition_stage stage) return read_replica_set_selector::next; case tablet_transition_stage::cleanup: return read_replica_set_selector::next; + case tablet_transition_stage::cleanup_target: + return read_replica_set_selector::previous; case tablet_transition_stage::revert_migration: return read_replica_set_selector::previous; case tablet_transition_stage::end_migration: @@ -279,6 +283,7 @@ static const std::unordered_map tablet_transit {tablet_transition_stage::streaming, "streaming"}, {tablet_transition_stage::use_new, "use_new"}, {tablet_transition_stage::cleanup, "cleanup"}, + {tablet_transition_stage::cleanup_target, "cleanup_target"}, {tablet_transition_stage::revert_migration, "revert_migration"}, {tablet_transition_stage::end_migration, "end_migration"}, }; diff --git a/locator/tablets.hh b/locator/tablets.hh index f2a6abdce1..85696ba9ab 100644 --- a/locator/tablets.hh +++ b/locator/tablets.hh @@ -157,6 +157,7 @@ enum class tablet_transition_stage { write_both_read_new, use_new, cleanup, + cleanup_target, revert_migration, end_migration, }; diff --git a/service/storage_service.cc b/service/storage_service.cc index a19744d5e1..d772ced8a6 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -5512,8 +5512,12 @@ future<> storage_service::cleanup_tablet(locator::global_tablet_id tablet) { if (leaving_replica.host != tm->get_my_id()) { throw std::runtime_error(fmt::format("Tablet {} has leaving replica different than this one", tablet)); } + } else if (trinfo->stage == locator::tablet_transition_stage::cleanup_target) { + if (trinfo->pending_replica.host != tm->get_my_id()) { + throw std::runtime_error(fmt::format("Tablet {} has pending replica different than this one", tablet)); + } } else { - throw std::runtime_error(fmt::format("Tablet {} stage is not at cleanup", tablet)); + throw std::runtime_error(fmt::format("Tablet {} stage is not at cleanup/cleanup_target", tablet)); } auto shard_opt = tmap.get_shard(tablet.tablet, tm->get_my_id()); diff --git a/service/tablet_allocator.cc b/service/tablet_allocator.cc index 300384d6a7..0a1d8de37e 100644 --- a/service/tablet_allocator.cc +++ b/service/tablet_allocator.cc @@ -425,6 +425,8 @@ private: return false; case tablet_transition_stage::cleanup: return false; + case tablet_transition_stage::cleanup_target: + return false; case tablet_transition_stage::revert_migration: return false; case tablet_transition_stage::end_migration: diff --git a/service/topology_coordinator.cc b/service/topology_coordinator.cc index a37964b0eb..e655b23265 100644 --- a/service/topology_coordinator.cc +++ b/service/topology_coordinator.cc @@ -1047,6 +1047,20 @@ class topology_coordinator : public endpoint_lifecycle_subscriber { transition_to(locator::tablet_transition_stage::end_migration); } break; + case locator::tablet_transition_stage::cleanup_target: + if (advance_in_background(gid, tablet_state.cleanup, "cleanup_target", [&] { + locator::tablet_replica dst = trinfo.pending_replica; + if (is_excluded(raft::server_id(dst.host.uuid()))) { + rtlogger.info("Tablet cleanup of {} on {} skipped because node is excluded and doesn't need to revert migration", gid, dst); + return make_ready_future<>(); + } + rtlogger.info("Initiating tablet cleanup of {} on {} to revert migration", gid, dst); + return ser::storage_service_rpc_verbs::send_tablet_cleanup(&_messaging, + netw::msg_addr(id2ip(dst.host)), _as, raft::server_id(dst.host.uuid()), gid); + })) { + transition_to(locator::tablet_transition_stage::revert_migration); + } + break; case locator::tablet_transition_stage::revert_migration: // Need a separate stage and a barrier after cleanup RPC to cut off stale RPCs. // See do_tablet_operation() doc.