diff --git a/service/raft/raft_group0.cc b/service/raft/raft_group0.cc index 1bf8f039d1..96300f5009 100644 --- a/service/raft/raft_group0.cc +++ b/service/raft/raft_group0.cc @@ -92,8 +92,10 @@ raft_group0::raft_group0(seastar::abort_source& abort_source, gms::feature_service& feat, raft_group0_client& client) : _abort_source(abort_source), _raft_gr(raft_gr), _ms(ms), _gossiper(gs), _qp(qp), _mm(mm), _feat(feat), _client(client) + , _status_for_monitoring(_raft_gr.is_enabled() ? status_for_monitoring::normal : status_for_monitoring::disabled) { init_rpc_verbs(); + register_metrics(); } void raft_group0::init_rpc_verbs() { @@ -182,6 +184,7 @@ raft_server_for_group raft_group0::create_server_for_group0(raft::group_id gid, .max_command_size = cl ? cl->max_record_size() / 2 : 0, .on_background_error = [gid, this](std::exception_ptr e) { _raft_gr.abort_server(gid, fmt::format("background error, {}", e)); + _status_for_monitoring = status_for_monitoring::aborted; } }); @@ -1435,6 +1438,14 @@ future<> raft_group0::do_upgrade_to_group0(group0_upgrade_state start_state) { upgrade_log.info("Schema synchronized."); } +void raft_group0::register_metrics() { + namespace sm = seastar::metrics; + _metrics.add_group("raft_group0", { + sm::make_gauge("status", [this] { return static_cast(_status_for_monitoring); }, + sm::description("status of the raft group, 0 - disabled, 1 - normal, 2 - aborted")) + }); +} + std::ostream& operator<<(std::ostream& os, group0_upgrade_state state) { switch (state) { case group0_upgrade_state::recovery: diff --git a/service/raft/raft_group0.hh b/service/raft/raft_group0.hh index 24cde7e62c..76ca6fa772 100644 --- a/service/raft/raft_group0.hh +++ b/service/raft/raft_group0.hh @@ -82,6 +82,17 @@ class raft_group0 { gms::feature::listener_registration _raft_support_listener; + seastar::metrics::metric_groups _metrics; + void register_metrics(); + + // Status of the raft group0 for monitoring. + enum class status_for_monitoring : uint8_t { + // Raft is disabled. + disabled = 0, + normal = 1, + aborted = 2 + } _status_for_monitoring; + public: // Assumes that the provided services are fully started. raft_group0(seastar::abort_source& abort_source, diff --git a/test/boost/group0_test.cc b/test/boost/group0_test.cc index 8d461761a0..156da4e8bd 100644 --- a/test/boost/group0_test.cc +++ b/test/boost/group0_test.cc @@ -16,6 +16,7 @@ #include "utils/error_injection.hh" #include "transport/messages/result_message.hh" #include "service/migration_manager.hh" +#include "seastar/core/metrics_api.hh" static future>> fetch_rows(cql_test_env& e, std::string_view cql) { auto msg = co_await e.execute_cql(cql); @@ -36,6 +37,17 @@ SEASTAR_TEST_CASE(test_abort_server_on_background_error) { return do_with_cql_env([] (cql_test_env& e) -> future<> { utils::get_local_injector().enable("store_log_entries/test-failure", true); + auto get_metric_ui64 = [&](sstring name) { + const auto& value_map = seastar::metrics::impl::get_value_map(); + const auto& metric_family = value_map.at("raft_group0_" + name); + const auto& registered_metric = metric_family.at({{"shard", "0"}}); + return (*registered_metric)().ui(); + }; + + auto get_status = [&] { + return get_metric_ui64("status"); + }; + auto perform_schema_change = [&, has_ks = false] () mutable -> future<> { if (has_ks) { co_await e.execute_cql("drop keyspace new_ks"); @@ -46,11 +58,15 @@ SEASTAR_TEST_CASE(test_abort_server_on_background_error) { }; auto check_error = [](const raft::stopped_error& e) { - return e.what() == sstring("Raft instance is stopped, reason: \"background error, store_log_entries/test-failure\""); + return e.what() == sstring("Raft instance is stopped, reason: \"background error, std::runtime_error (store_log_entries/test-failure)\""); }; + BOOST_REQUIRE_EQUAL(get_status(), 1); BOOST_CHECK_EXCEPTION(co_await perform_schema_change(), raft::stopped_error, check_error); + BOOST_REQUIRE_EQUAL(get_status(), 2); BOOST_CHECK_EXCEPTION(co_await perform_schema_change(), raft::stopped_error, check_error); + BOOST_REQUIRE_EQUAL(get_status(), 2); BOOST_CHECK_EXCEPTION(co_await perform_schema_change(), raft::stopped_error, check_error); + BOOST_REQUIRE_EQUAL(get_status(), 2); }, raft_cql_test_config()); #endif } diff --git a/test/lib/cql_test_env.cc b/test/lib/cql_test_env.cc index 9d0b6fcfec..75625e960b 100644 --- a/test/lib/cql_test_env.cc +++ b/test/lib/cql_test_env.cc @@ -135,6 +135,7 @@ private: sharded& _batchlog_manager; sharded& _gossiper; service::raft_group0_client& _group0_client; + sharded& _group0_registry; private: struct core_local_state { @@ -186,7 +187,8 @@ public: sharded &sl_controller, sharded& batchlog_manager, sharded& gossiper, - service::raft_group0_client& client) + service::raft_group0_client& client, + sharded& group0_registry) : _db(db) , _qp(qp) , _auth_service(auth_service) @@ -198,6 +200,7 @@ public: , _batchlog_manager(batchlog_manager) , _gossiper(gossiper) , _group0_client(client) + , _group0_registry(group0_registry) { adjust_rlimit(); } @@ -417,6 +420,10 @@ public: return _group0_client; } + virtual sharded& get_raft_group_registry() override { + return _group0_registry; + } + virtual future<> refresh_client_state() override { return _core_local.invoke_on_all([] (core_local_state& state) { return state.client_state.maybe_update_per_service_level_params(); @@ -872,7 +879,7 @@ public: // The default user may already exist if this `cql_test_env` is starting with previously populated data. } - single_node_cql_env env(db, qp, auth_service, view_builder, view_update_generator, mm_notif, mm, std::ref(sl_controller), bm, gossiper, group0_client); + single_node_cql_env env(db, qp, auth_service, view_builder, view_update_generator, mm_notif, mm, std::ref(sl_controller), bm, gossiper, group0_client, raft_gr); env.start().get(); auto stop_env = defer([&env] { env.stop().get(); }); diff --git a/test/lib/cql_test_env.hh b/test/lib/cql_test_env.hh index debce81ad0..345b40be8d 100644 --- a/test/lib/cql_test_env.hh +++ b/test/lib/cql_test_env.hh @@ -51,6 +51,7 @@ namespace service { class client_state; class migration_manager; class raft_group0_client; +class raft_group_registry; } @@ -166,6 +167,8 @@ public: virtual service::raft_group0_client& get_raft_group0_client() = 0; + virtual sharded& get_raft_group_registry() = 0; + data_dictionary::database data_dictionary(); };