repair: Add ignore_nodes option

In some cases, user may want to repair the cluster, ignoring the node
that is down. For example, run repair before run removenode operation to
remove a dead node.

Currently, repair will ignore the dead node and keep running repair
without the dead node but report the repair is partial and report the
repair is failed. It is hard to tell if the repair is failed only due to
the dead node is not present or some other errors.

In order to exclude the dead node, one can use the hosts option. But it
is hard to understand and use, because one needs to list all the "good"
hosts including the node itself. It will be much simpler, if one can
just specify the node to exclude explicitly.

In addition, we support ignore nodes option in other node operations
like removenode. This change makes the interface to ignore a node
explicitly more consistent.

Refs: #7806

Closes #8233
This commit is contained in:
Asias He
2021-03-08 10:23:26 +08:00
committed by Tomasz Grabiec
parent 2a41ad0b57
commit 61ac8d03b9
4 changed files with 47 additions and 7 deletions

View File

@@ -970,6 +970,14 @@
"type":"string",
"paramType":"query"
},
{
"name":"ignore_nodes",
"description":"Which hosts are to ignore in this repair. Multiple hosts can be listed separated by commas.",
"required":false,
"allowMultiple":false,
"type":"string",
"paramType":"query"
},
{
"name":"trace",
"description":"If the value is the string 'true' with any capitalization, enable tracing of the repair.",

View File

@@ -161,7 +161,7 @@ void unset_rpc_controller(http_context& ctx, routes& r) {
void set_repair(http_context& ctx, routes& r, sharded<netw::messaging_service>& ms) {
ss::repair_async.set(r, [&ctx, &ms](std::unique_ptr<request> req) {
static std::vector<sstring> options = {"primaryRange", "parallelism", "incremental",
"jobThreads", "ranges", "columnFamilies", "dataCenters", "hosts", "trace",
"jobThreads", "ranges", "columnFamilies", "dataCenters", "hosts", "ignore_nodes", "trace",
"startToken", "endToken" };
std::unordered_map<sstring, sstring> options_map;
for (auto o : options) {

View File

@@ -189,7 +189,8 @@ void remove_item(Collection& c, T& item) {
static std::vector<gms::inet_address> get_neighbors(database& db,
const sstring& ksname, query::range<dht::token> range,
const std::vector<sstring>& data_centers,
const std::vector<sstring>& hosts) {
const std::vector<sstring>& hosts,
const std::unordered_set<gms::inet_address>& ignore_nodes) {
keyspace& ks = db.find_keyspace(ksname);
auto& rs = ks.get_replication_strategy();
@@ -271,6 +272,11 @@ static std::vector<gms::inet_address> get_neighbors(database& db,
"part of the supplied list of hosts to use during the "
"repair (%s).", me, others, hosts));
}
} else if (!ignore_nodes.empty()) {
auto it = std::remove_if(ret.begin(), ret.end(), [&ignore_nodes] (const gms::inet_address& node) {
return ignore_nodes.contains(node);
});
ret.erase(it, ret.end());
}
return ret;
@@ -811,6 +817,7 @@ repair_info::repair_info(seastar::sharded<database>& db_,
repair_uniq_id id_,
const std::vector<sstring>& data_centers_,
const std::vector<sstring>& hosts_,
const std::unordered_set<gms::inet_address>& ignore_nodes_,
streaming::stream_reason reason_,
std::optional<utils::UUID> ops_uuid)
: db(db_)
@@ -824,6 +831,7 @@ repair_info::repair_info(seastar::sharded<database>& db_,
, shard(this_shard_id())
, data_centers(data_centers_)
, hosts(hosts_)
, ignore_nodes(ignore_nodes_)
, reason(reason_)
, nr_ranges_total(ranges.size())
, _row_level_repair(db.local().features().cluster_supports_row_level_repair())
@@ -930,7 +938,7 @@ void repair_info::check_in_abort() {
repair_neighbors repair_info::get_repair_neighbors(const dht::token_range& range) {
return neighbors.empty() ?
repair_neighbors(get_neighbors(db.local(), keyspace, range, data_centers, hosts)) :
repair_neighbors(get_neighbors(db.local(), keyspace, range, data_centers, hosts, ignore_nodes)) :
neighbors[range];
}
@@ -1311,6 +1319,10 @@ struct repair_options {
// range repaired, only the relevant subset of the hosts (holding a
// replica of this range) is used.
std::vector<sstring> hosts;
// The ignore_nodes specifies the list of nodes to ignore in this repair,
// e.g., the user knows a node is down and wants to run repair without this
// specific node.
std::vector<sstring> ignore_nodes;
// data_centers is used to restrict the repair to the local data center.
// The node starting the repair must be in the data center; Issuing a
// repair to a data center other than the named one returns an error.
@@ -1321,6 +1333,7 @@ struct repair_options {
ranges_opt(ranges, options, RANGES_KEY);
list_opt(column_families, options, COLUMNFAMILIES_KEY);
list_opt(hosts, options, HOSTS_KEY);
list_opt(ignore_nodes, options, IGNORE_NODES_KEY);
list_opt(data_centers, options, DATACENTERS_KEY);
// We currently do not support incremental repair. We could probably
// ignore this option as it is just an optimization, but for now,
@@ -1366,6 +1379,7 @@ struct repair_options {
static constexpr const char* COLUMNFAMILIES_KEY = "columnFamilies";
static constexpr const char* DATACENTERS_KEY = "dataCenters";
static constexpr const char* HOSTS_KEY = "hosts";
static constexpr const char* IGNORE_NODES_KEY = "ignore_nodes";
static constexpr const char* TRACE_KEY = "trace";
static constexpr const char* START_TOKEN = "startToken";
static constexpr const char* END_TOKEN = "endToken";
@@ -1600,6 +1614,20 @@ static int do_repair_start(seastar::sharded<database>& db, seastar::sharded<netw
throw std::runtime_error("Cannot combine data centers and hosts options.");
}
if (!options.ignore_nodes.empty() && !options.hosts.empty()) {
throw std::runtime_error("Cannot combine ignore_nodes and hosts options.");
}
std::unordered_set<gms::inet_address> ignore_nodes;
for (const auto& n: options.ignore_nodes) {
try {
auto node = gms::inet_address(n);
ignore_nodes.insert(node);
} catch(...) {
throw std::runtime_error(format("Failed to parse node={} in ignore_nodes={} specified by user: {}",
n, options.ignore_nodes, std::current_exception()));
}
}
if (!options.start_token.empty() || !options.end_token.empty()) {
// Intersect the list of local ranges with the given token range,
// dropping ranges with no intersection.
@@ -1636,17 +1664,17 @@ static int do_repair_start(seastar::sharded<database>& db, seastar::sharded<netw
// Do it in the background.
(void)repair_tracker().run(id, [&db, &ms, id, keyspace = std::move(keyspace),
cfs = std::move(cfs), ranges = std::move(ranges), options = std::move(options)] () mutable {
cfs = std::move(cfs), ranges = std::move(ranges), options = std::move(options), ignore_nodes = std::move(ignore_nodes)] () mutable {
std::vector<future<>> repair_results;
repair_results.reserve(smp::count);
auto table_ids = get_table_ids(db.local(), keyspace, cfs);
for (auto shard : boost::irange(unsigned(0), smp::count)) {
auto f = db.invoke_on(shard, [&db, &ms, keyspace, table_ids, id, ranges,
data_centers = options.data_centers, hosts = options.hosts] (database& localdb) mutable {
data_centers = options.data_centers, hosts = options.hosts, ignore_nodes] (database& localdb) mutable {
_node_ops_metrics.repair_total_ranges_sum += ranges.size();
auto ri = make_lw_shared<repair_info>(db, ms,
std::move(keyspace), std::move(ranges), std::move(table_ids),
id, std::move(data_centers), std::move(hosts), streaming::stream_reason::repair, id.uuid);
id, std::move(data_centers), std::move(hosts), std::move(ignore_nodes), streaming::stream_reason::repair, id.uuid);
return repair_ranges(ri);
});
repair_results.push_back(std::move(f));
@@ -1737,9 +1765,10 @@ static future<> sync_data_using_repair(seastar::sharded<database>& db,
auto f = db.invoke_on(shard, [&db, &ms, keyspace, table_ids, id, ranges, neighbors, reason, ops_uuid] (database& localdb) mutable {
auto data_centers = std::vector<sstring>();
auto hosts = std::vector<sstring>();
auto ignore_nodes = std::unordered_set<gms::inet_address>();
auto ri = make_lw_shared<repair_info>(db, ms,
std::move(keyspace), std::move(ranges), std::move(table_ids),
id, std::move(data_centers), std::move(hosts), reason, ops_uuid);
id, std::move(data_centers), std::move(hosts), std::move(ignore_nodes), reason, ops_uuid);
ri->neighbors = std::move(neighbors);
return repair_ranges(ri);
});

View File

@@ -21,6 +21,7 @@
#pragma once
#include <unordered_set>
#include <unordered_map>
#include <exception>
#include <absl/container/btree_set.h>
@@ -227,6 +228,7 @@ public:
shard_id shard;
std::vector<sstring> data_centers;
std::vector<sstring> hosts;
std::unordered_set<gms::inet_address> ignore_nodes;
streaming::stream_reason reason;
std::unordered_map<dht::token_range, repair_neighbors> neighbors;
uint64_t nr_ranges_finished = 0;
@@ -264,6 +266,7 @@ public:
repair_uniq_id id_,
const std::vector<sstring>& data_centers_,
const std::vector<sstring>& hosts_,
const std::unordered_set<gms::inet_address>& ingore_nodes_,
streaming::stream_reason reason_,
std::optional<utils::UUID> ops_uuid);
future<> do_streaming();