raft server, status metric
This commit is contained in:
@@ -92,8 +92,10 @@ raft_group0::raft_group0(seastar::abort_source& abort_source,
|
||||
gms::feature_service& feat,
|
||||
raft_group0_client& client)
|
||||
: _abort_source(abort_source), _raft_gr(raft_gr), _ms(ms), _gossiper(gs), _qp(qp), _mm(mm), _feat(feat), _client(client)
|
||||
, _status_for_monitoring(_raft_gr.is_enabled() ? status_for_monitoring::normal : status_for_monitoring::disabled)
|
||||
{
|
||||
init_rpc_verbs();
|
||||
register_metrics();
|
||||
}
|
||||
|
||||
void raft_group0::init_rpc_verbs() {
|
||||
@@ -182,6 +184,7 @@ raft_server_for_group raft_group0::create_server_for_group0(raft::group_id gid,
|
||||
.max_command_size = cl ? cl->max_record_size() / 2 : 0,
|
||||
.on_background_error = [gid, this](std::exception_ptr e) {
|
||||
_raft_gr.abort_server(gid, fmt::format("background error, {}", e));
|
||||
_status_for_monitoring = status_for_monitoring::aborted;
|
||||
}
|
||||
});
|
||||
|
||||
@@ -1435,6 +1438,14 @@ future<> raft_group0::do_upgrade_to_group0(group0_upgrade_state start_state) {
|
||||
upgrade_log.info("Schema synchronized.");
|
||||
}
|
||||
|
||||
void raft_group0::register_metrics() {
|
||||
namespace sm = seastar::metrics;
|
||||
_metrics.add_group("raft_group0", {
|
||||
sm::make_gauge("status", [this] { return static_cast<uint8_t>(_status_for_monitoring); },
|
||||
sm::description("status of the raft group, 0 - disabled, 1 - normal, 2 - aborted"))
|
||||
});
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, group0_upgrade_state state) {
|
||||
switch (state) {
|
||||
case group0_upgrade_state::recovery:
|
||||
|
||||
@@ -82,6 +82,17 @@ class raft_group0 {
|
||||
|
||||
gms::feature::listener_registration _raft_support_listener;
|
||||
|
||||
seastar::metrics::metric_groups _metrics;
|
||||
void register_metrics();
|
||||
|
||||
// Status of the raft group0 for monitoring.
|
||||
enum class status_for_monitoring : uint8_t {
|
||||
// Raft is disabled.
|
||||
disabled = 0,
|
||||
normal = 1,
|
||||
aborted = 2
|
||||
} _status_for_monitoring;
|
||||
|
||||
public:
|
||||
// Assumes that the provided services are fully started.
|
||||
raft_group0(seastar::abort_source& abort_source,
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
#include "utils/error_injection.hh"
|
||||
#include "transport/messages/result_message.hh"
|
||||
#include "service/migration_manager.hh"
|
||||
#include "seastar/core/metrics_api.hh"
|
||||
|
||||
static future<utils::chunked_vector<std::vector<bytes_opt>>> fetch_rows(cql_test_env& e, std::string_view cql) {
|
||||
auto msg = co_await e.execute_cql(cql);
|
||||
@@ -36,6 +37,17 @@ SEASTAR_TEST_CASE(test_abort_server_on_background_error) {
|
||||
return do_with_cql_env([] (cql_test_env& e) -> future<> {
|
||||
utils::get_local_injector().enable("store_log_entries/test-failure", true);
|
||||
|
||||
auto get_metric_ui64 = [&](sstring name) {
|
||||
const auto& value_map = seastar::metrics::impl::get_value_map();
|
||||
const auto& metric_family = value_map.at("raft_group0_" + name);
|
||||
const auto& registered_metric = metric_family.at({{"shard", "0"}});
|
||||
return (*registered_metric)().ui();
|
||||
};
|
||||
|
||||
auto get_status = [&] {
|
||||
return get_metric_ui64("status");
|
||||
};
|
||||
|
||||
auto perform_schema_change = [&, has_ks = false] () mutable -> future<> {
|
||||
if (has_ks) {
|
||||
co_await e.execute_cql("drop keyspace new_ks");
|
||||
@@ -46,11 +58,15 @@ SEASTAR_TEST_CASE(test_abort_server_on_background_error) {
|
||||
};
|
||||
|
||||
auto check_error = [](const raft::stopped_error& e) {
|
||||
return e.what() == sstring("Raft instance is stopped, reason: \"background error, store_log_entries/test-failure\"");
|
||||
return e.what() == sstring("Raft instance is stopped, reason: \"background error, std::runtime_error (store_log_entries/test-failure)\"");
|
||||
};
|
||||
BOOST_REQUIRE_EQUAL(get_status(), 1);
|
||||
BOOST_CHECK_EXCEPTION(co_await perform_schema_change(), raft::stopped_error, check_error);
|
||||
BOOST_REQUIRE_EQUAL(get_status(), 2);
|
||||
BOOST_CHECK_EXCEPTION(co_await perform_schema_change(), raft::stopped_error, check_error);
|
||||
BOOST_REQUIRE_EQUAL(get_status(), 2);
|
||||
BOOST_CHECK_EXCEPTION(co_await perform_schema_change(), raft::stopped_error, check_error);
|
||||
BOOST_REQUIRE_EQUAL(get_status(), 2);
|
||||
}, raft_cql_test_config());
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -135,6 +135,7 @@ private:
|
||||
sharded<db::batchlog_manager>& _batchlog_manager;
|
||||
sharded<gms::gossiper>& _gossiper;
|
||||
service::raft_group0_client& _group0_client;
|
||||
sharded<service::raft_group_registry>& _group0_registry;
|
||||
|
||||
private:
|
||||
struct core_local_state {
|
||||
@@ -186,7 +187,8 @@ public:
|
||||
sharded<qos::service_level_controller> &sl_controller,
|
||||
sharded<db::batchlog_manager>& batchlog_manager,
|
||||
sharded<gms::gossiper>& gossiper,
|
||||
service::raft_group0_client& client)
|
||||
service::raft_group0_client& client,
|
||||
sharded<service::raft_group_registry>& group0_registry)
|
||||
: _db(db)
|
||||
, _qp(qp)
|
||||
, _auth_service(auth_service)
|
||||
@@ -198,6 +200,7 @@ public:
|
||||
, _batchlog_manager(batchlog_manager)
|
||||
, _gossiper(gossiper)
|
||||
, _group0_client(client)
|
||||
, _group0_registry(group0_registry)
|
||||
{
|
||||
adjust_rlimit();
|
||||
}
|
||||
@@ -417,6 +420,10 @@ public:
|
||||
return _group0_client;
|
||||
}
|
||||
|
||||
virtual sharded<service::raft_group_registry>& get_raft_group_registry() override {
|
||||
return _group0_registry;
|
||||
}
|
||||
|
||||
virtual future<> refresh_client_state() override {
|
||||
return _core_local.invoke_on_all([] (core_local_state& state) {
|
||||
return state.client_state.maybe_update_per_service_level_params();
|
||||
@@ -872,7 +879,7 @@ public:
|
||||
// The default user may already exist if this `cql_test_env` is starting with previously populated data.
|
||||
}
|
||||
|
||||
single_node_cql_env env(db, qp, auth_service, view_builder, view_update_generator, mm_notif, mm, std::ref(sl_controller), bm, gossiper, group0_client);
|
||||
single_node_cql_env env(db, qp, auth_service, view_builder, view_update_generator, mm_notif, mm, std::ref(sl_controller), bm, gossiper, group0_client, raft_gr);
|
||||
env.start().get();
|
||||
auto stop_env = defer([&env] { env.stop().get(); });
|
||||
|
||||
|
||||
@@ -51,6 +51,7 @@ namespace service {
|
||||
class client_state;
|
||||
class migration_manager;
|
||||
class raft_group0_client;
|
||||
class raft_group_registry;
|
||||
|
||||
}
|
||||
|
||||
@@ -166,6 +167,8 @@ public:
|
||||
|
||||
virtual service::raft_group0_client& get_raft_group0_client() = 0;
|
||||
|
||||
virtual sharded<service::raft_group_registry>& get_raft_group_registry() = 0;
|
||||
|
||||
data_dictionary::database data_dictionary();
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user