database: Reduce the number of per-table metrics
This patch reduces the number of metrics that is reported per table, when the per-table flag is on. When possible, it moves from time_estimated_histogram and timed_rate_moving_average_and_histogram to use the unified timer. Instead of a histogram per shard, it will now report a summary per shard and a histogram per node. Counters, histograms, and summaries will not be reported if they were never used. The API was updated accordingly so it would not break. Signed-off-by: Amnon Heiman <amnon@scylladb.com>
This commit is contained in:
@@ -79,14 +79,14 @@ future<json::json_return_type> get_cf_stats(http_context& ctx,
|
||||
}
|
||||
|
||||
static future<json::json_return_type> get_cf_stats_count(http_context& ctx, const sstring& name,
|
||||
utils::timed_rate_moving_average_and_histogram replica::column_family_stats::*f) {
|
||||
utils::timed_rate_moving_average_summary_and_histogram replica::column_family_stats::*f) {
|
||||
return map_reduce_cf(ctx, name, int64_t(0), [f](const replica::column_family& cf) {
|
||||
return (cf.get_stats().*f).hist.count;
|
||||
}, std::plus<int64_t>());
|
||||
}
|
||||
|
||||
static future<json::json_return_type> get_cf_stats_sum(http_context& ctx, const sstring& name,
|
||||
utils::timed_rate_moving_average_and_histogram replica::column_family_stats::*f) {
|
||||
utils::timed_rate_moving_average_summary_and_histogram replica::column_family_stats::*f) {
|
||||
auto uuid = get_uuid(name, ctx.db.local());
|
||||
return ctx.db.map_reduce0([uuid, f](replica::database& db) {
|
||||
// Histograms information is sample of the actual load
|
||||
@@ -102,7 +102,7 @@ static future<json::json_return_type> get_cf_stats_sum(http_context& ctx, const
|
||||
|
||||
|
||||
static future<json::json_return_type> get_cf_stats_count(http_context& ctx,
|
||||
utils::timed_rate_moving_average_and_histogram replica::column_family_stats::*f) {
|
||||
utils::timed_rate_moving_average_summary_and_histogram replica::column_family_stats::*f) {
|
||||
return map_reduce_cf(ctx, int64_t(0), [f](const replica::column_family& cf) {
|
||||
return (cf.get_stats().*f).hist.count;
|
||||
}, std::plus<int64_t>());
|
||||
@@ -120,7 +120,19 @@ static future<json::json_return_type> get_cf_histogram(http_context& ctx, const
|
||||
});
|
||||
}
|
||||
|
||||
static future<json::json_return_type> get_cf_histogram(http_context& ctx, utils::timed_rate_moving_average_and_histogram replica::column_family_stats::*f) {
|
||||
static future<json::json_return_type> get_cf_histogram(http_context& ctx, const sstring& name,
|
||||
utils::timed_rate_moving_average_summary_and_histogram replica::column_family_stats::*f) {
|
||||
utils::UUID uuid = get_uuid(name, ctx.db.local());
|
||||
return ctx.db.map_reduce0([f, uuid](const replica::database& p) {
|
||||
return (p.find_column_family(uuid).get_stats().*f).hist;},
|
||||
utils::ihistogram(),
|
||||
std::plus<utils::ihistogram>())
|
||||
.then([](const utils::ihistogram& val) {
|
||||
return make_ready_future<json::json_return_type>(to_json(val));
|
||||
});
|
||||
}
|
||||
|
||||
static future<json::json_return_type> get_cf_histogram(http_context& ctx, utils::timed_rate_moving_average_summary_and_histogram replica::column_family_stats::*f) {
|
||||
std::function<utils::ihistogram(const replica::database&)> fun = [f] (const replica::database& db) {
|
||||
utils::ihistogram res;
|
||||
for (auto i : db.get_column_families()) {
|
||||
@@ -136,7 +148,7 @@ static future<json::json_return_type> get_cf_histogram(http_context& ctx, utils:
|
||||
}
|
||||
|
||||
static future<json::json_return_type> get_cf_rate_and_histogram(http_context& ctx, const sstring& name,
|
||||
utils::timed_rate_moving_average_and_histogram replica::column_family_stats::*f) {
|
||||
utils::timed_rate_moving_average_summary_and_histogram replica::column_family_stats::*f) {
|
||||
utils::UUID uuid = get_uuid(name, ctx.db.local());
|
||||
return ctx.db.map_reduce0([f, uuid](const replica::database& p) {
|
||||
return (p.find_column_family(uuid).get_stats().*f).rate();},
|
||||
@@ -147,7 +159,7 @@ static future<json::json_return_type> get_cf_rate_and_histogram(http_context& c
|
||||
});
|
||||
}
|
||||
|
||||
static future<json::json_return_type> get_cf_rate_and_histogram(http_context& ctx, utils::timed_rate_moving_average_and_histogram replica::column_family_stats::*f) {
|
||||
static future<json::json_return_type> get_cf_rate_and_histogram(http_context& ctx, utils::timed_rate_moving_average_summary_and_histogram replica::column_family_stats::*f) {
|
||||
std::function<utils::rate_moving_average_and_histogram(const replica::database&)> fun = [f] (const replica::database& db) {
|
||||
utils::rate_moving_average_and_histogram res;
|
||||
for (auto i : db.get_column_families()) {
|
||||
@@ -803,19 +815,19 @@ void set_column_family(http_context& ctx, routes& r) {
|
||||
|
||||
cf::get_cas_prepare.set(r, [&ctx] (std::unique_ptr<request> req) {
|
||||
return map_reduce_cf_time_histogram(ctx, req->param["name"], [](const replica::column_family& cf) {
|
||||
return cf.get_stats().estimated_cas_prepare;
|
||||
return cf.get_stats().cas_prepare.histogram();
|
||||
});
|
||||
});
|
||||
|
||||
cf::get_cas_propose.set(r, [&ctx] (std::unique_ptr<request> req) {
|
||||
return map_reduce_cf_time_histogram(ctx, req->param["name"], [](const replica::column_family& cf) {
|
||||
return cf.get_stats().estimated_cas_accept;
|
||||
return cf.get_stats().cas_accept.histogram();
|
||||
});
|
||||
});
|
||||
|
||||
cf::get_cas_commit.set(r, [&ctx] (std::unique_ptr<request> req) {
|
||||
return map_reduce_cf_time_histogram(ctx, req->param["name"], [](const replica::column_family& cf) {
|
||||
return cf.get_stats().estimated_cas_learn;
|
||||
return cf.get_stats().cas_learn.histogram();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -921,13 +933,13 @@ void set_column_family(http_context& ctx, routes& r) {
|
||||
|
||||
cf::get_read_latency_estimated_histogram.set(r, [&ctx](std::unique_ptr<request> req) {
|
||||
return map_reduce_cf_time_histogram(ctx, req->param["name"], [](const replica::column_family& cf) {
|
||||
return cf.get_stats().estimated_read;
|
||||
return cf.get_stats().reads.histogram();
|
||||
});
|
||||
});
|
||||
|
||||
cf::get_write_latency_estimated_histogram.set(r, [&ctx](std::unique_ptr<request> req) {
|
||||
return map_reduce_cf_time_histogram(ctx, req->param["name"], [](const replica::column_family& cf) {
|
||||
return cf.get_stats().estimated_write;
|
||||
return cf.get_stats().writes.histogram();
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
@@ -341,16 +341,11 @@ struct table_stats {
|
||||
int64_t memtable_range_tombstone_reads = 0;
|
||||
int64_t memtable_row_tombstone_reads = 0;
|
||||
mutation_application_stats memtable_app_stats;
|
||||
utils::timed_rate_moving_average_and_histogram reads{256};
|
||||
utils::timed_rate_moving_average_and_histogram writes{256};
|
||||
utils::timed_rate_moving_average_and_histogram cas_prepare{256};
|
||||
utils::timed_rate_moving_average_and_histogram cas_accept{256};
|
||||
utils::timed_rate_moving_average_and_histogram cas_learn{256};
|
||||
utils::time_estimated_histogram estimated_read;
|
||||
utils::time_estimated_histogram estimated_write;
|
||||
utils::time_estimated_histogram estimated_cas_prepare;
|
||||
utils::time_estimated_histogram estimated_cas_accept;
|
||||
utils::time_estimated_histogram estimated_cas_learn;
|
||||
utils::timed_rate_moving_average_summary_and_histogram reads{256};
|
||||
utils::timed_rate_moving_average_summary_and_histogram writes{256};
|
||||
utils::timed_rate_moving_average_summary_and_histogram cas_prepare{256};
|
||||
utils::timed_rate_moving_average_summary_and_histogram cas_accept{256};
|
||||
utils::timed_rate_moving_average_summary_and_histogram cas_learn{256};
|
||||
utils::estimated_histogram estimated_sstable_per_read{35};
|
||||
utils::timed_rate_moving_average_and_histogram tombstone_scanned;
|
||||
utils::timed_rate_moving_average_and_histogram live_scanned;
|
||||
|
||||
@@ -778,14 +778,14 @@ void table::set_metrics() {
|
||||
namespace ms = seastar::metrics;
|
||||
if (_config.enable_metrics_reporting) {
|
||||
_metrics.add_group("column_family", {
|
||||
ms::make_counter("memtable_switch", ms::description("Number of times flush has resulted in the memtable being switched out"), _stats.memtable_switch_count)(cf)(ks),
|
||||
ms::make_counter("memtable_partition_writes", [this] () { return _stats.memtable_partition_insertions + _stats.memtable_partition_hits; }, ms::description("Number of write operations performed on partitions in memtables"))(cf)(ks),
|
||||
ms::make_counter("memtable_partition_hits", _stats.memtable_partition_hits, ms::description("Number of times a write operation was issued on an existing partition in memtables"))(cf)(ks),
|
||||
ms::make_counter("memtable_row_writes", _stats.memtable_app_stats.row_writes, ms::description("Number of row writes performed in memtables"))(cf)(ks),
|
||||
ms::make_counter("memtable_row_hits", _stats.memtable_app_stats.row_hits, ms::description("Number of rows overwritten by write operations in memtables"))(cf)(ks),
|
||||
ms::make_counter("memtable_rows_dropped_by_tombstones", _stats.memtable_app_stats.rows_dropped_by_tombstones, ms::description("Number of rows dropped in memtables by a tombstone write"))(cf)(ks),
|
||||
ms::make_counter("memtable_rows_compacted_with_tombstones", _stats.memtable_app_stats.rows_compacted_with_tombstones, ms::description("Number of rows scanned during write of a tombstone for the purpose of compaction in memtables"))(cf)(ks),
|
||||
ms::make_counter("memtable_range_tombstone_reads", _stats.memtable_range_tombstone_reads, ms::description("Number of range tombstones read from memtables"))(cf)(ks),
|
||||
ms::make_counter("memtable_switch", ms::description("Number of times flush has resulted in the memtable being switched out"), _stats.memtable_switch_count)(cf)(ks).set_skip_when_empty(),
|
||||
ms::make_counter("memtable_partition_writes", [this] () { return _stats.memtable_partition_insertions + _stats.memtable_partition_hits; }, ms::description("Number of write operations performed on partitions in memtables"))(cf)(ks).set_skip_when_empty(),
|
||||
ms::make_counter("memtable_partition_hits", _stats.memtable_partition_hits, ms::description("Number of times a write operation was issued on an existing partition in memtables"))(cf)(ks).set_skip_when_empty(),
|
||||
ms::make_counter("memtable_row_writes", _stats.memtable_app_stats.row_writes, ms::description("Number of row writes performed in memtables"))(cf)(ks).set_skip_when_empty(),
|
||||
ms::make_counter("memtable_row_hits", _stats.memtable_app_stats.row_hits, ms::description("Number of rows overwritten by write operations in memtables"))(cf)(ks).set_skip_when_empty().set_skip_when_empty(),
|
||||
ms::make_counter("memtable_rows_dropped_by_tombstones", _stats.memtable_app_stats.rows_dropped_by_tombstones, ms::description("Number of rows dropped in memtables by a tombstone write"))(cf)(ks).set_skip_when_empty(),
|
||||
ms::make_counter("memtable_rows_compacted_with_tombstones", _stats.memtable_app_stats.rows_compacted_with_tombstones, ms::description("Number of rows scanned during write of a tombstone for the purpose of compaction in memtables"))(cf)(ks).set_skip_when_empty(),
|
||||
ms::make_counter("memtable_range_tombstone_reads", _stats.memtable_range_tombstone_reads, ms::description("Number of range tombstones read from memtables"))(cf)(ks).set_skip_when_empty(),
|
||||
ms::make_counter("memtable_row_tombstone_reads", _stats.memtable_row_tombstone_reads, ms::description("Number of row tombstones read from memtables"))(cf)(ks),
|
||||
ms::make_gauge("pending_tasks", ms::description("Estimated number of tasks pending for this column family"), _stats.pending_flushes)(cf)(ks),
|
||||
ms::make_gauge("live_disk_space", ms::description("Live disk space used"), _stats.live_disk_space_used)(cf)(ks),
|
||||
@@ -800,10 +800,10 @@ void table::set_metrics() {
|
||||
// Metrics related to row locking
|
||||
auto add_row_lock_metrics = [this, ks, cf] (row_locker::single_lock_stats& stats, sstring stat_name) {
|
||||
_metrics.add_group("column_family", {
|
||||
ms::make_total_operations(format("row_lock_{}_acquisitions", stat_name), stats.lock_acquisitions, ms::description(format("Row lock acquisitions for {} lock", stat_name)))(cf)(ks),
|
||||
ms::make_total_operations(format("row_lock_{}_acquisitions", stat_name), stats.lock_acquisitions, ms::description(format("Row lock acquisitions for {} lock", stat_name)))(cf)(ks).set_skip_when_empty(),
|
||||
ms::make_queue_length(format("row_lock_{}_operations_currently_waiting_for_lock", stat_name), stats.operations_currently_waiting_for_lock, ms::description(format("Operations currently waiting for {} lock", stat_name)))(cf)(ks),
|
||||
ms::make_histogram(format("row_lock_{}_waiting_time", stat_name), ms::description(format("Histogram representing time that operations spent on waiting for {} lock", stat_name)),
|
||||
[&stats] {return to_metrics_histogram(stats.estimated_waiting_for_lock);})(cf)(ks)
|
||||
[&stats] {return to_metrics_histogram(stats.estimated_waiting_for_lock);})(cf)(ks).aggregate({seastar::metrics::shard_label}).set_skip_when_empty()
|
||||
});
|
||||
};
|
||||
add_row_lock_metrics(_row_locker_stats.exclusive_row, "exclusive_row");
|
||||
@@ -818,11 +818,17 @@ void table::set_metrics() {
|
||||
|
||||
if (!is_internal_keyspace(_schema->ks_name())) {
|
||||
_metrics.add_group("column_family", {
|
||||
ms::make_histogram("read_latency", ms::description("Read latency histogram"), [this] {return to_metrics_histogram(_stats.estimated_read);})(cf)(ks),
|
||||
ms::make_histogram("write_latency", ms::description("Write latency histogram"), [this] {return to_metrics_histogram(_stats.estimated_write);})(cf)(ks),
|
||||
ms::make_histogram("cas_prepare_latency", ms::description("CAS prepare round latency histogram"), [this] {return to_metrics_histogram(_stats.estimated_cas_prepare);})(cf)(ks),
|
||||
ms::make_histogram("cas_propose_latency", ms::description("CAS accept round latency histogram"), [this] {return to_metrics_histogram(_stats.estimated_cas_accept);})(cf)(ks),
|
||||
ms::make_histogram("cas_commit_latency", ms::description("CAS learn round latency histogram"), [this] {return to_metrics_histogram(_stats.estimated_cas_learn);})(cf)(ks),
|
||||
ms::make_summary("read_latency_summary", ms::description("Read latency summary"), [this] {return to_metrics_summary(_stats.reads.summary());})(cf)(ks).set_skip_when_empty(),
|
||||
ms::make_summary("write_latency_summary", ms::description("Write latency summary"), [this] {return to_metrics_summary(_stats.writes.summary());})(cf)(ks).set_skip_when_empty(),
|
||||
ms::make_summary("cas_prepare_latency_summary", ms::description("CAS prepare round latency summary"), [this] {return to_metrics_summary(_stats.cas_prepare.summary());})(cf)(ks).set_skip_when_empty(),
|
||||
ms::make_summary("cas_propose_latency_summary", ms::description("CAS accept round latency summary"), [this] {return to_metrics_summary(_stats.cas_accept.summary());})(cf)(ks).set_skip_when_empty(),
|
||||
ms::make_summary("cas_commit_latency_summary", ms::description("CAS learn round latency summary"), [this] {return to_metrics_summary(_stats.cas_learn.summary());})(cf)(ks).set_skip_when_empty(),
|
||||
|
||||
ms::make_histogram("read_latency", ms::description("Read latency histogram"), [this] {return to_metrics_histogram(_stats.reads.histogram());})(cf)(ks).aggregate({seastar::metrics::shard_label}).set_skip_when_empty(),
|
||||
ms::make_histogram("write_latency", ms::description("Write latency histogram"), [this] {return to_metrics_histogram(_stats.writes.histogram());})(cf)(ks).aggregate({seastar::metrics::shard_label}).set_skip_when_empty(),
|
||||
ms::make_histogram("cas_prepare_latency", ms::description("CAS prepare round latency histogram"), [this] {return to_metrics_histogram(_stats.cas_prepare.histogram());})(cf)(ks).aggregate({seastar::metrics::shard_label}).set_skip_when_empty(),
|
||||
ms::make_histogram("cas_propose_latency", ms::description("CAS accept round latency histogram"), [this] {return to_metrics_histogram(_stats.cas_accept.histogram());})(cf)(ks).aggregate({seastar::metrics::shard_label}).set_skip_when_empty(),
|
||||
ms::make_histogram("cas_commit_latency", ms::description("CAS learn round latency histogram"), [this] {return to_metrics_histogram(_stats.cas_learn.histogram());})(cf)(ks).aggregate({seastar::metrics::shard_label}).set_skip_when_empty(),
|
||||
ms::make_gauge("cache_hit_rate", ms::description("Cache hit rate"), [this] {return float(_global_cache_hit_rate);})(cf)(ks)
|
||||
});
|
||||
}
|
||||
@@ -1923,9 +1929,6 @@ void table::do_apply(db::rp_handle&& h, Args&&... args) {
|
||||
throw;
|
||||
}
|
||||
_stats.writes.mark(lc);
|
||||
if (lc.is_start()) {
|
||||
_stats.estimated_write.add(lc.latency());
|
||||
}
|
||||
}
|
||||
|
||||
future<> table::apply(const mutation& m, db::rp_handle&& h, db::timeout_clock::time_point timeout) {
|
||||
@@ -2037,9 +2040,6 @@ table::query(schema_ptr s,
|
||||
|
||||
auto finally = defer([&] () noexcept {
|
||||
_stats.reads.mark(lc);
|
||||
if (lc.is_start()) {
|
||||
_stats.estimated_read.add(lc.latency());
|
||||
}
|
||||
_async_gate.leave();
|
||||
});
|
||||
|
||||
|
||||
@@ -118,7 +118,6 @@ future<prepare_response> paxos_state::prepare(storage_proxy& sp, tracing::trace_
|
||||
}).finally([&sp, schema, lc] () mutable {
|
||||
auto& stats = sp.get_db().local().find_column_family(schema).get_stats();
|
||||
stats.cas_prepare.mark(lc.stop().latency());
|
||||
stats.estimated_cas_prepare.add(lc.latency());
|
||||
});
|
||||
});
|
||||
}
|
||||
@@ -158,7 +157,6 @@ future<bool> paxos_state::accept(storage_proxy& sp, tracing::trace_state_ptr tr_
|
||||
}).finally([&sp, schema, lc] () mutable {
|
||||
auto& stats = sp.get_db().local().find_column_family(schema).get_stats();
|
||||
stats.cas_accept.mark(lc.stop().latency());
|
||||
stats.estimated_cas_accept.add(lc.latency());
|
||||
});
|
||||
});
|
||||
}
|
||||
@@ -225,7 +223,6 @@ future<> paxos_state::learn(storage_proxy& sp, schema_ptr schema, proposal decis
|
||||
}).finally([&sp, schema, lc] () mutable {
|
||||
auto& stats = sp.get_db().local().find_column_family(schema).get_stats();
|
||||
stats.cas_learn.mark(lc.stop().latency());
|
||||
stats.estimated_cas_learn.add(lc.latency());
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user