diff --git a/db/commitlog/commitlog.cc b/db/commitlog/commitlog.cc index 9880e100bf..614b9294e3 100644 --- a/db/commitlog/commitlog.cc +++ b/db/commitlog/commitlog.cc @@ -342,6 +342,21 @@ public: // size allocated on disk - i.e. files created (new, reserve, recycled) uint64_t total_size_on_disk = 0; uint64_t requests_blocked_memory = 0; + uint64_t blocked_on_new_segment = 0; + uint64_t active_allocations = 0; + }; + + class scope_increment_counter { + uint64_t& _dst; + public: + scope_increment_counter(uint64_t& dst) + : _dst(dst) + { + ++_dst; + } + ~scope_increment_counter() { + --_dst; + } }; stats totals; @@ -1258,6 +1273,8 @@ future db::commitlog::segment_manager::allocate_when_possible(T writer, db::t totals.requests_blocked_memory++; } + scope_increment_counter allocating(totals.active_allocations); + auto permit = co_await std::move(fut); sseg_ptr s; @@ -1493,6 +1510,12 @@ void db::commitlog::segment_manager::create_counters(const sstring& metrics_cate sm::make_gauge("memory_buffer_bytes", totals.buffer_list_bytes, sm::description("Holds the total number of bytes in internal memory buffers.")), + + sm::make_gauge("blocked_on_new_segment", totals.blocked_on_new_segment, + sm::description("Number of allocations blocked on acquiring new segment.")), + + sm::make_gauge("active_allocations", totals.active_allocations, + sm::description("Current number of active allocations.")), }); } @@ -1739,6 +1762,8 @@ future db::commitlog::segment_manager: co_return _segments.back(); } + scope_increment_counter blocked_on_new(totals.blocked_on_new_segment); + // #9896 - we don't want to issue a new_segment call until // the old one has terminated with either result or exception. // Do all waiting through the shared_future @@ -2790,6 +2815,14 @@ uint64_t db::commitlog::get_num_active_segments() const { return _segment_manager->get_num_active_segments(); } +uint64_t db::commitlog::get_num_blocked_on_new_segment() const { + return _segment_manager->totals.blocked_on_new_segment; +} + +uint64_t db::commitlog::get_num_active_allocations() const { + return _segment_manager->totals.active_allocations; +} + future> db::commitlog::list_existing_descriptors() const { return list_existing_descriptors(active_config().commit_log_location); } diff --git a/db/commitlog/commitlog.hh b/db/commitlog/commitlog.hh index 8a22cb5762..9615bd846f 100644 --- a/db/commitlog/commitlog.hh +++ b/db/commitlog/commitlog.hh @@ -290,6 +290,10 @@ public: uint64_t get_flush_limit_exceeded_count() const; uint64_t get_num_segments_created() const; uint64_t get_num_segments_destroyed() const; + uint64_t get_num_blocked_on_new_segment() const; + uint64_t get_num_active_allocations() const; + + /** * Get number of inactive (finished), segments lingering * due to still being dirty diff --git a/test/boost/commitlog_test.cc b/test/boost/commitlog_test.cc index 41786b5556..1f0416adf0 100644 --- a/test/boost/commitlog_test.cc +++ b/test/boost/commitlog_test.cc @@ -782,19 +782,25 @@ SEASTAR_TEST_CASE(test_commitlog_deadlock_in_recycle) { }; }); + uint64_t num_active_allocations = 0, num_blocked_on_new_segment = 0; + // add a flush handler that delays releasing things until disk threshold is reached. auto r = log.add_flush_handler([&](cf_id_type, replay_position pos) { auto old = std::exchange(rps, rp_set{}); queue.emplace_back(std::move(old)); - if (log.disk_footprint() >= log.disk_limit() && !t.armed()) { - t.arm(5s); + if (log.disk_footprint() >= log.disk_limit()) { + num_active_allocations += log.get_num_active_allocations(); + num_blocked_on_new_segment += log.get_num_blocked_on_new_segment(); + if (!t.armed()) { + t.arm(5s); + } } }); bool release = true; try { - while (n < 10) { + while (n < 10 || !num_active_allocations || !num_blocked_on_new_segment) { auto now = timeout_clock::now(); rp_handle h = co_await with_timeout(now + 30s, log.add_mutation(uuid, size, db::commitlog::force_sync::no, [&](db::commitlog::output& dst) { dst.fill('1', size); @@ -810,6 +816,9 @@ SEASTAR_TEST_CASE(test_commitlog_deadlock_in_recycle) { co_await log.shutdown(); co_await log.clear(); } + + BOOST_REQUIRE_GT(num_active_allocations, 0); + BOOST_REQUIRE_GT(num_blocked_on_new_segment, 0); } // Test for #8438 - ensure we can shut down (in orderly fashion)