From 3b8fd11fa3566c26fb437bfd8415d42788f71c24 Mon Sep 17 00:00:00 2001 From: Piotr Sarna Date: Tue, 11 Aug 2020 09:48:12 +0200 Subject: [PATCH 1/6] database: remove unused semaphore A semaphore for limiting the number of loaded sstables is completely unused, so it can be removed. --- database.hh | 6 ------ 1 file changed, 6 deletions(-) diff --git a/database.hh b/database.hh index c8540ff5cd..e2db0cc7f4 100644 --- a/database.hh +++ b/database.hh @@ -1246,7 +1246,6 @@ private: size_t max_memory_streaming_concurrent_reads() { return _dbcfg.available_memory * 0.02; } static constexpr size_t max_count_system_concurrent_reads{10}; size_t max_memory_system_concurrent_reads() { return _dbcfg.available_memory * 0.02; }; - static constexpr size_t max_concurrent_sstable_loads() { return 3; } size_t max_memory_pending_view_updates() const { return _dbcfg.available_memory * 0.1; } struct db_stats { @@ -1283,8 +1282,6 @@ private: reader_concurrency_semaphore _compaction_concurrency_sem; reader_concurrency_semaphore _system_read_concurrency_sem; - named_semaphore _sstable_load_concurrency_sem{max_concurrent_sstable_loads(), named_semaphore_exception_factory{"sstable load concurrency"}}; - db::timeout_semaphore _view_update_concurrency_sem{max_memory_pending_view_updates()}; cache_tracker _row_cache_tracker; @@ -1557,9 +1554,6 @@ public: std::unordered_set get_initial_tokens(); std::optional get_replace_address(); bool is_replacing(); - named_semaphore& sstable_load_concurrency_sem() { - return _sstable_load_concurrency_sem; - } void register_connection_drop_notifier(netw::messaging_service& ms); db_stats& get_stats() { From 8b56b247375edb3ce679658dcc234881085fefa4 Mon Sep 17 00:00:00 2001 From: Piotr Sarna Date: Tue, 11 Aug 2020 09:51:27 +0200 Subject: [PATCH 2/6] table: add metrics for sstable deletion semaphore It's now possible to read the number of tasks waiting on the sstable deletion semaphore. --- table.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/table.cc b/table.cc index f01a495ef0..56f1623aec 100644 --- a/table.cc +++ b/table.cc @@ -962,7 +962,10 @@ void table::set_metrics() { ms::make_gauge("live_disk_space", ms::description("Live disk space used"), _stats.live_disk_space_used)(cf)(ks), ms::make_gauge("total_disk_space", ms::description("Total disk space used"), _stats.total_disk_space_used)(cf)(ks), ms::make_gauge("live_sstable", ms::description("Live sstable count"), _stats.live_sstable_count)(cf)(ks), - ms::make_gauge("pending_compaction", ms::description("Estimated number of compactions pending for this column family"), _stats.pending_compactions)(cf)(ks) + ms::make_gauge("pending_compaction", ms::description("Estimated number of compactions pending for this column family"), _stats.pending_compactions)(cf)(ks), + ms::make_gauge("pending_sstable_deletions", + ms::description("Number of tasks waiting to delete sstables from a table"), + [this] { return _sstable_deletion_sem.waiters(); })(cf)(ks) }); // Metrics related to row locking From 58a9fa7d2e091c7dbb6df05546d7549ba657f48c Mon Sep 17 00:00:00 2001 From: Piotr Sarna Date: Tue, 11 Aug 2020 09:55:52 +0200 Subject: [PATCH 3/6] hints: add drain queue length to metrics The number of tasks waiting for a drain is now tracked. --- db/hints/manager.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/db/hints/manager.cc b/db/hints/manager.cc index 434dad94b6..114f1b798b 100644 --- a/db/hints/manager.cc +++ b/db/hints/manager.cc @@ -87,6 +87,10 @@ void manager::register_metrics(const sstring& group_name) { sm::make_derive("corrupted_files", _stats.corrupted_files, sm::description("Number of hints files that were discarded during sending because the file was corrupted.")), + + sm::make_gauge("pending_drains", + sm::description("Number of tasks waiting in the queue for draining hints"), + [this] { return _drain_lock.waiters(); }) }); } From 180a1505fd103bb97f2b56e3f57079986b5ef775 Mon Sep 17 00:00:00 2001 From: Piotr Sarna Date: Tue, 11 Aug 2020 10:21:49 +0200 Subject: [PATCH 4/6] hints: track resource_manager sending queue length The number of tasks waiting for a hint to be sent is now tracked. --- db/hints/manager.cc | 6 +++++- db/hints/resource_manager.cc | 4 ++++ db/hints/resource_manager.hh | 1 + 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/db/hints/manager.cc b/db/hints/manager.cc index 114f1b798b..d8e60ba82c 100644 --- a/db/hints/manager.cc +++ b/db/hints/manager.cc @@ -90,7 +90,11 @@ void manager::register_metrics(const sstring& group_name) { sm::make_gauge("pending_drains", sm::description("Number of tasks waiting in the queue for draining hints"), - [this] { return _drain_lock.waiters(); }) + [this] { return _drain_lock.waiters(); }), + + sm::make_gauge("pending_sends", + sm::description("Number of tasks waiting in the queue for sending a hint"), + [this] { return _resource_manager.sending_queue_length(); }) }); } diff --git a/db/hints/resource_manager.cc b/db/hints/resource_manager.cc index 73aa5f852b..a880eaf7d2 100644 --- a/db/hints/resource_manager.cc +++ b/db/hints/resource_manager.cc @@ -59,6 +59,10 @@ future> resource_manager::ge return get_units(_send_limiter, hint_memory_budget); } +size_t resource_manager::sending_queue_length() const { + return _send_limiter.waiters(); +} + const std::chrono::seconds space_watchdog::_watchdog_period = std::chrono::seconds(1); space_watchdog::space_watchdog(shard_managers_set& managers, per_device_limits_map& per_device_limits_map) diff --git a/db/hints/resource_manager.hh b/db/hints/resource_manager.hh index 3197f8965a..9e9d9d3aac 100644 --- a/db/hints/resource_manager.hh +++ b/db/hints/resource_manager.hh @@ -140,6 +140,7 @@ public: resource_manager& operator=(resource_manager&&) = delete; future> get_send_units_for(size_t buf_size); + size_t sending_queue_length() const; future<> start(shared_ptr proxy_ptr, shared_ptr gossiper_ptr, shared_ptr ss_ptr); void allow_replaying() noexcept; From e4d78b60ff984cc34fe8f6b60fcd053ab5349b34 Mon Sep 17 00:00:00 2001 From: Piotr Sarna Date: Tue, 11 Aug 2020 10:50:59 +0200 Subject: [PATCH 5/6] db, view: add view update generator metrics The view update generator completely lacked metrics, so a basic set of them is now exposed. --- db/view/view_update_generator.cc | 17 +++++++++++++++++ db/view/view_update_generator.hh | 6 +++++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/db/view/view_update_generator.cc b/db/view/view_update_generator.cc index ee90304151..eea0d619bd 100644 --- a/db/view/view_update_generator.cc +++ b/db/view/view_update_generator.cc @@ -152,4 +152,21 @@ future<> view_update_generator::register_staging_sstable(sstables::shared_sstabl } } +void view_update_generator::setup_metrics() { + namespace sm = seastar::metrics; + + _metrics.add_group("view_update_generator", { + sm::make_gauge("pending_registrations", sm::description("Number of tasks waiting to register staging sstables"), + [this] { return _registration_sem.waiters(); }), + + sm::make_gauge("queued_batches_count", + sm::description("Number of sets of sstables queued for view update generation"), + [this] { return _sstables_with_tables.size(); }), + + sm::make_gauge("sstables_to_move_count", + sm::description("Number of sets of sstables which are already processed and wait to be moved from their staging directory"), + [this] { return _sstables_to_move.size(); }) + }); +} + } diff --git a/db/view/view_update_generator.hh b/db/view/view_update_generator.hh index 3b63d42eb6..d753aeceb0 100644 --- a/db/view/view_update_generator.hh +++ b/db/view/view_update_generator.hh @@ -48,8 +48,11 @@ private: }; std::unordered_map, std::vector> _sstables_with_tables; std::unordered_map, std::vector> _sstables_to_move; + metrics::metric_groups _metrics; public: - view_update_generator(database& db) : _db(db) { } + view_update_generator(database& db) : _db(db) { + setup_metrics(); + } future<> start(); future<> stop(); @@ -58,6 +61,7 @@ public: ssize_t available_register_units() const { return _registration_sem.available_units(); } private: bool should_throttle() const; + void setup_metrics(); }; } From 5086a5ca3217e26ca7e699841b8adcb262fa75a9 Mon Sep 17 00:00:00 2001 From: Piotr Sarna Date: Tue, 11 Aug 2020 11:28:33 +0200 Subject: [PATCH 6/6] view_builder: add metrics The view builder service lacked metrics, so a basic set of them is added. --- db/view/view.cc | 25 +++++++++++++++++++++++++ db/view/view_builder.hh | 8 ++++++++ 2 files changed, 33 insertions(+) diff --git a/db/view/view.cc b/db/view/view.cc index a33f5fc9ab..2b6af11ecb 100644 --- a/db/view/view.cc +++ b/db/view/view.cc @@ -1213,6 +1213,29 @@ view_builder::view_builder(database& db, db::system_distributed_keyspace& sys_di : _db(db) , _sys_dist_ks(sys_dist_ks) , _mnotifier(mn) { + setup_metrics(); +} + +void view_builder::setup_metrics() { + namespace sm = seastar::metrics; + + _metrics.add_group("view_builder", { + sm::make_gauge("pending_bookkeeping_ops", + sm::description("Number of tasks waiting to perform bookkeeping operations"), + [this] { return _sem.waiters(); }), + + sm::make_derive("steps_performed", + sm::description("Number of performed build steps."), + _stats.steps_performed), + + sm::make_derive("steps_failed", + sm::description("Number of failed build steps."), + _stats.steps_failed), + + sm::make_gauge("builds_in_progress", + sm::description("Number of currently active view builds."), + [this] { return _base_to_build_step.size(); }) + }); } future<> view_builder::start(service::migration_manager& mm) { @@ -1599,6 +1622,7 @@ future<> view_builder::do_build_step() { exponential_backoff_retry r(1s, 1min); while (!_base_to_build_step.empty() && !_as.abort_requested()) { auto units = get_units(_sem, 1).get0(); + ++_stats.steps_performed; try { execute(_current_step->second, exponential_backoff_retry(1s, 1min)); r.reset(); @@ -1606,6 +1630,7 @@ future<> view_builder::do_build_step() { return; } catch (...) { ++_current_step->second.base->cf_stats()->view_building_paused; + ++_stats.steps_failed; auto base = _current_step->second.base->schema(); vlogger.warn("Error executing build step for base {}.{}: {}", base->ks_name(), base->cf_name(), std::current_exception()); r.retry(_as).get(); diff --git a/db/view/view_builder.hh b/db/view/view_builder.hh index 449ccfb66f..0c5513efcf 100644 --- a/db/view/view_builder.hh +++ b/db/view/view_builder.hh @@ -115,6 +115,11 @@ class view_builder final : public service::migration_listener::only_view_notific std::optional next_token; }; + struct stats { + uint64_t steps_performed = 0; + uint64_t steps_failed = 0; + }; + /** * Keeps track of the build progress for all the views of a particular * base table. Each execution of the build step comprises a query of @@ -164,6 +169,8 @@ class view_builder final : public service::migration_listener::only_view_notific seastar::shared_promise<> _shards_finished_read_promise; // Used for testing. std::unordered_map, seastar::shared_promise<>, utils::tuple_hash> _build_notifiers; + stats _stats; + metrics::metric_groups _metrics; public: // The view builder processes the base table in steps of batch_size rows. @@ -206,6 +213,7 @@ private: future<> do_build_step(); void execute(build_step&, exponential_backoff_retry); future<> maybe_mark_view_as_built(view_ptr, dht::token); + void setup_metrics(); struct consumer; };