storage_service: get_ignore_dead_nodes_for_replace: use tm.parse_host_id_and_endpoint

Allow specifying the dead node to ignore either as host_id
or ip address.

Signed-off-by: Benny Halevy <bhalevy@scylladb.com>
This commit is contained in:
Benny Halevy
2022-10-25 09:18:24 +03:00
parent b74807cb8a
commit 40cd685371
3 changed files with 9 additions and 8 deletions

View File

@@ -790,7 +790,7 @@ db::config::config(std::shared_ptr<db::extensions> exts)
, load_ring_state(this, "load_ring_state", value_status::Used, true, "When set to true, load tokens and host_ids previously saved. Same as -Dcassandra.load_ring_state in cassandra.")
, replace_address(this, "replace_address", value_status::Used, "", "The listen_address or broadcast_address of the dead node to replace. Same as -Dcassandra.replace_address.")
, replace_address_first_boot(this, "replace_address_first_boot", value_status::Used, "", "Like replace_address option, but if the node has been bootstrapped successfully it will be ignored. Same as -Dcassandra.replace_address_first_boot.")
, ignore_dead_nodes_for_replace(this, "ignore_dead_nodes_for_replace", value_status::Used, "", "List dead nodes to ingore for replace operation. E.g., scylla --ignore-dead-nodes-for-replace 127.0.0.1,127.0.0.2")
, ignore_dead_nodes_for_replace(this, "ignore_dead_nodes_for_replace", value_status::Used, "", "List dead nodes to ingore for replace operation using a comman-separated list of either host IDs or ip addresses. E.g., scylla --ignore-dead-nodes-for-replace 8d5ed9f4-7764-4dbd-bad8-43fddce94b7c,125ed9f4-7777-1dbn-mac8-43fddce9123e")
, override_decommission(this, "override_decommission", value_status::Used, false, "Set true to force a decommissioned node to join the cluster")
, enable_repair_based_node_ops(this, "enable_repair_based_node_ops", liveness::LiveUpdate, value_status::Used, true, "Set true to use enable repair based node operations instead of streaming based")
, allowed_repair_based_node_ops(this, "allowed_repair_based_node_ops", liveness::LiveUpdate, value_status::Used, "replace", "A comma separated list of node operations which are allowed to enable repair based node operations. The operations can be bootstrap, replace, removenode, decommission and rebuild")

View File

@@ -618,7 +618,7 @@ future<> storage_service::mark_existing_views_as_built(sharded<db::system_distri
});
}
std::list<gms::inet_address> storage_service::get_ignore_dead_nodes_for_replace() {
std::list<gms::inet_address> storage_service::get_ignore_dead_nodes_for_replace(const token_metadata& tm) {
std::vector<sstring> ignore_nodes_strs;
std::list<gms::inet_address> ignore_nodes;
boost::split(ignore_nodes_strs, _db.local().get_config().ignore_dead_nodes_for_replace(), boost::is_any_of(","));
@@ -628,11 +628,11 @@ std::list<gms::inet_address> storage_service::get_ignore_dead_nodes_for_replace(
std::replace(n.begin(), n.end(), '\'', ' ');
boost::trim_all(n);
if (!n.empty()) {
auto node = gms::inet_address(n);
ignore_nodes.push_back(node);
auto ep_and_id = tm.parse_host_id_and_endpoint(n);
ignore_nodes.push_back(ep_and_id.endpoint);
}
} catch (...) {
throw std::runtime_error(format("Failed to parse --ignore-dead-nodes-for-replace parameter: ignore_nodes={}, node={}", ignore_nodes_strs, n));
throw std::runtime_error(format("Failed to parse --ignore-dead-nodes-for-replace parameter: ignore_nodes={}, node={}: {}", ignore_nodes_strs, n, std::current_exception()));
}
}
return ignore_nodes;
@@ -2204,7 +2204,8 @@ void storage_service::run_bootstrap_ops(std::unordered_set<token>& bootstrap_tok
void storage_service::run_replace_ops(std::unordered_set<token>& bootstrap_tokens) {
auto replace_address = get_replace_address().value();
auto uuid = utils::make_random_uuid();
std::list<gms::inet_address> ignore_nodes = get_ignore_dead_nodes_for_replace();
auto tmptr = get_token_metadata_ptr();
std::list<gms::inet_address> ignore_nodes = get_ignore_dead_nodes_for_replace(*tmptr);
// Step 1: Decide who needs to sync data for replace operation
std::list<gms::inet_address> sync_nodes;
for (const auto& x :_gossiper.get_endpoint_states()) {
@@ -2248,7 +2249,7 @@ void storage_service::run_replace_ops(std::unordered_set<token>& bootstrap_token
throw std::runtime_error(msg);
}
if (!nodes_down.empty()) {
auto msg = format("replace[{}]: Nodes={} needed for replace operation are down. It is highly recommended to fix the down nodes and try again. To proceed with best-effort mode which might cause data inconsistency, add --ignore-dead-nodes-for-replace <list_of_dead_nodes>. E.g., scylla --ignore-dead-nodes-for-replace 127.0.0.1,127.0.0.2", uuid, nodes_down);
auto msg = format("replace[{}]: Nodes={} needed for replace operation are down. It is highly recommended to fix the down nodes and try again. To proceed with best-effort mode which might cause data inconsistency, add --ignore-dead-nodes-for-replace <list_of_dead_nodes>. E.g., scylla --ignore-dead-nodes-for-replace 8d5ed9f4-7764-4dbd-bad8-43fddce94b7c,125ed9f4-7777-1dbn-mac8-43fddce9123e", uuid, nodes_down);
slogger.warn("{}", msg);
throw std::runtime_error(msg);
}

View File

@@ -298,7 +298,7 @@ private:
void run_replace_ops(std::unordered_set<token>& bootstrap_tokens);
void run_bootstrap_ops(std::unordered_set<token>& bootstrap_tokens);
std::list<gms::inet_address> get_ignore_dead_nodes_for_replace();
std::list<gms::inet_address> get_ignore_dead_nodes_for_replace(const locator::token_metadata& tm);
future<> wait_for_ring_to_settle(std::chrono::milliseconds delay);
public: