storage_proxy: metrics for per-partition rate limiting of reads

Adds a metric "read_rate_limited" which indicates how many times a read
operation was rejected due to per-partition rate limiting. The metric
differentiates between reads rejected by the coordinator and reads
rejected by replicas.
This commit is contained in:
Piotr Dulikowski
2022-06-22 14:46:13 +02:00
parent 6e5d486970
commit 442901f14a
2 changed files with 18 additions and 0 deletions

View File

@@ -1672,6 +1672,14 @@ void storage_proxy_stats::stats::register_stats() {
sm::description("number read requests failed due to an \"unavailable\" error"),
{storage_proxy_stats::current_scheduling_group_label()}),
sm::make_total_operations("read_rate_limited", read_rate_limited_by_replicas._count,
sm::description("number of read requests which were rejected by replicas because rate limit for the partition was reached."),
{storage_proxy_stats::current_scheduling_group_label(), storage_proxy_stats::rejected_by_coordinator_label(false)}),
sm::make_total_operations("read_rate_limited", read_rate_limited_by_coordinator._count,
sm::description("number of read requests which were rejected directly on the coordinator because rate limit for the partition was reached."),
{storage_proxy_stats::current_scheduling_group_label(), storage_proxy_stats::rejected_by_coordinator_label(true)}),
sm::make_total_operations("range_timeouts", range_slice_timeouts._count,
sm::description("number of range read operations failed due to a timeout"),
{storage_proxy_stats::current_scheduling_group_label()}),
@@ -4251,6 +4259,14 @@ void storage_proxy::handle_read_error(std::variant<exceptions::coordinator_excep
get_stats().read_timeouts.mark();
}
return bo::success();
}), utils::result_catch<exceptions::rate_limit_exception>([&] (const auto& ex) {
slogger.debug("Read was rate limited");
if (ex.rejected_by_coordinator) {
get_stats().read_rate_limited_by_coordinator.mark();
} else {
get_stats().read_rate_limited_by_replicas.mark();
}
return bo::success();
}), utils::result_catch_dots([&] (auto&& handle) {
slogger.debug("Error during read query {}", handle.as_inner());
return bo::success();

View File

@@ -127,6 +127,8 @@ struct stats : public write_stats {
seastar::metrics::metric_groups _metrics;
utils::timed_rate_moving_average read_timeouts;
utils::timed_rate_moving_average read_unavailables;
utils::timed_rate_moving_average read_rate_limited_by_replicas;
utils::timed_rate_moving_average read_rate_limited_by_coordinator;
utils::timed_rate_moving_average range_slice_timeouts;
utils::timed_rate_moving_average range_slice_unavailables;