storage_proxy: Make split_stats resilient to being called from different scheduling group
Fixes #11017 When doing writes, storage proxy creates types deriving from abstract_write_response_handler. These are created in the various scheduling groups executing the write inducing code. They pick up a group-local reference to the various metrics used by SP. Normally all code using (and esp. modifying) these metrics are executed in the same scheduling group. However, if gossip sees a node go down, it will notify listeners, which eventually calls get_ep_stat and register_metrics. This code (before this patch) uses _active_ scheduling group to eventually add metrics, using a local dict as guard against double regs. If, as described above, we're called in a different sched group than the original one however, this can cause double registrations. Fixed here by keeping a reference to creating scheduling group and using this, not active one, when/if creating new metrics. Closes #14294
This commit is contained in:
committed by
Botond Dénes
parent
643e69af89
commit
f18e967939
@@ -116,8 +116,12 @@ static const seastar::metrics::label op_type_label("op_type");
|
||||
static const seastar::metrics::label scheduling_group_label("scheduling_group_name");
|
||||
static const seastar::metrics::label rejected_by_coordinator_label("rejected_by_coordinator");
|
||||
|
||||
seastar::metrics::label_instance make_scheduling_group_label(const scheduling_group& sg) {
|
||||
return scheduling_group_label(sg.name());
|
||||
}
|
||||
|
||||
seastar::metrics::label_instance current_scheduling_group_label() {
|
||||
return scheduling_group_label(current_scheduling_group().name());
|
||||
return make_scheduling_group_label(current_scheduling_group());
|
||||
}
|
||||
|
||||
}
|
||||
@@ -2376,7 +2380,8 @@ storage_proxy_stats::split_stats::split_stats(const sstring& category, const sst
|
||||
, _long_description_prefix(long_description_prefix)
|
||||
, _category(category)
|
||||
, _op_type(op_type)
|
||||
, _auto_register_metrics(auto_register_metrics) { }
|
||||
, _auto_register_metrics(auto_register_metrics)
|
||||
, _sg(current_scheduling_group()) { }
|
||||
|
||||
storage_proxy_stats::write_stats::write_stats()
|
||||
: writes_attempts(COORDINATOR_STATS_CATEGORY, "total_write_attempts", "total number of write requests", "mutation_data")
|
||||
@@ -2693,7 +2698,7 @@ void storage_proxy_stats::split_stats::register_metrics_local() {
|
||||
auto new_metrics = sm::metric_groups();
|
||||
new_metrics.add_group(_category, {
|
||||
sm::make_counter(_short_description_prefix + sstring("_local_node"), [this] { return _local.val; },
|
||||
sm::description(_long_description_prefix + "on a local Node"), {storage_proxy_stats::current_scheduling_group_label(), op_type_label(_op_type)})
|
||||
sm::description(_long_description_prefix + "on a local Node"), {storage_proxy_stats::make_scheduling_group_label(_sg), op_type_label(_op_type)})
|
||||
});
|
||||
_metrics = std::exchange(new_metrics, {});
|
||||
}
|
||||
@@ -2706,7 +2711,7 @@ void storage_proxy_stats::split_stats::register_metrics_for(sstring dc, gms::ine
|
||||
if (auto [ignored, added] = _dc_stats.try_emplace(dc); added) {
|
||||
_metrics.add_group(_category, {
|
||||
sm::make_counter(_short_description_prefix + sstring("_remote_node"), [this, dc] { return _dc_stats[dc].val; },
|
||||
sm::description(seastar::format("{} when communicating with external Nodes in another DC", _long_description_prefix)), {storage_proxy_stats::current_scheduling_group_label(), datacenter_label(dc), op_type_label(_op_type)})
|
||||
sm::description(seastar::format("{} when communicating with external Nodes in another DC", _long_description_prefix)), {storage_proxy_stats::make_scheduling_group_label(_sg), datacenter_label(dc), op_type_label(_op_type)})
|
||||
.set_skip_when_empty()
|
||||
});
|
||||
}
|
||||
|
||||
@@ -44,6 +44,7 @@ private:
|
||||
// whether to register per-endpoint metrics automatically
|
||||
bool _auto_register_metrics;
|
||||
|
||||
scheduling_group _sg;
|
||||
public:
|
||||
/**
|
||||
* @param category a statistics category, e.g. "client" or "replica"
|
||||
|
||||
@@ -103,3 +103,33 @@ SEASTAR_TEST_CASE(test_get_restricted_ranges) {
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_split_stats) {
|
||||
auto ep1 = gms::inet_address("127.0.0.1");
|
||||
auto sg1 = create_scheduling_group("apa1", 100).get();
|
||||
auto sg2 = create_scheduling_group("apa2", 100).get();
|
||||
|
||||
std::optional<service::storage_proxy_stats::split_stats> stats1, stats2;
|
||||
|
||||
// pretending to be abstract_write_response_handler type.
|
||||
// created in various scheduling groups, in which they
|
||||
// instantiate group-local split_stats.
|
||||
with_scheduling_group(sg1, [&] {
|
||||
stats1.emplace("tuta", "nils", "en nils", "nilsa", true);
|
||||
}).get0();
|
||||
|
||||
with_scheduling_group(sg2, [&] {
|
||||
stats2.emplace("tuta", "nils", "en nils", "nilsa", true);
|
||||
}).get0();
|
||||
|
||||
// simulating the calling of storage_proxy::on_down, from gossip
|
||||
// on node dropping out. If inside a write operation, we'll pick up
|
||||
// write handlers and to "timeout_cb" on them, which in turn might
|
||||
// call get_ep_stat, which evenually calls register_metrics for
|
||||
// the DC written to.
|
||||
// Point being is that either the above should not happen, or
|
||||
// split_stats should be resilient to being called from different
|
||||
// scheduling group.
|
||||
stats1->register_metrics_for("DC1", ep1);
|
||||
stats2->register_metrics_for("DC1", ep1);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user