From c530756837f84527b039d1aac508aa89587adabc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Grzebieluch?= Date: Fri, 19 Jan 2024 16:41:38 +0100 Subject: [PATCH] storage_service: add MAINTENANCE option to mode enum join_cluster and start_maintenance_mode are incompatible. To make sure that only one is called when the node starts, add the MAINTENANCE option. start_maintenance_mode sets _operation_mode to MAINTENANCE. join_cluster sets _operation_mode to STARTING. set_mode will result in an internal error if: * it tries to set MAINTENANCE mode when the _operation_mode is other than NONE, i.e. start_maintenance_mode is called after join_cluster (or it is called during the drain, but it also shouldn't happen). * it tries to set STARTING mode when the mode is set to MAINTENANCE, i.e. join_cluster is called after start_maintenance_mode. --- api/storage_service.cc | 4 ++-- service/storage_service.cc | 15 +++++++++++++-- service/storage_service.hh | 3 ++- 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/api/storage_service.cc b/api/storage_service.cc index 902380d285..628a65586d 100644 --- a/api/storage_service.cc +++ b/api/storage_service.cc @@ -1001,7 +1001,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded req) { return ss.local().get_operation_mode().then([&ss] (auto mode) { - bool is_initialized = mode >= service::storage_service::mode::STARTING; + bool is_initialized = mode >= service::storage_service::mode::STARTING && mode != service::storage_service::mode::MAINTENANCE; if (mode == service::storage_service::mode::NORMAL) { is_initialized = ss.local().gossiper().is_enabled(); } @@ -1015,7 +1015,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded req) { return ss.local().get_operation_mode().then([] (auto mode) { - return make_ready_future(mode >= service::storage_service::mode::JOINING); + return make_ready_future(mode >= service::storage_service::mode::JOINING && mode != service::storage_service::mode::MAINTENANCE); }); }); diff --git a/service/storage_service.cc b/service/storage_service.cc index 73aef33be4..a2ecbad1bf 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -175,7 +175,8 @@ enum class node_external_status { DECOMMISSIONED = 5, DRAINING = 6, DRAINED = 7, - MOVING = 8 //deprecated + MOVING = 8, //deprecated + MAINTENANCE = 9 }; static node_external_status map_operation_mode(storage_service::mode m) { @@ -190,6 +191,7 @@ static node_external_status map_operation_mode(storage_service::mode m) { case storage_service::mode::DRAINING: return node_external_status::DRAINING; case storage_service::mode::DRAINED: return node_external_status::DRAINED; case storage_service::mode::MOVING: return node_external_status::MOVING; + case storage_service::mode::MAINTENANCE: return node_external_status::MAINTENANCE; } return node_external_status::UNKNOWN; } @@ -203,7 +205,7 @@ void storage_service::register_metrics() { namespace sm = seastar::metrics; _metrics.add_group("node", { sm::make_gauge("operation_mode", sm::description("The operation mode of the current node. UNKNOWN = 0, STARTING = 1, JOINING = 2, NORMAL = 3, " - "LEAVING = 4, DECOMMISSIONED = 5, DRAINING = 6, DRAINED = 7, MOVING = 8"), [this] { + "LEAVING = 4, DECOMMISSIONED = 5, DRAINING = 6, DRAINED = 7, MOVING = 8, MAINTENANCE = 9"), [this] { return static_cast>(map_operation_mode(_operation_mode)); }), }); @@ -2797,6 +2799,15 @@ future> storage_service::effective_ownership( } void storage_service::set_mode(mode m) { + if (m == mode::MAINTENANCE && _operation_mode != mode::NONE) { + // Prevent from calling `start_maintenance_mode` after `join_cluster`. + on_fatal_internal_error(slogger, format("Node should enter maintenance mode only from mode::NONE (current mode: {})", _operation_mode)); + } + if (m == mode::STARTING && _operation_mode == mode::MAINTENANCE) { + // Prevent from calling `join_cluster` after `start_maintenance_mode`. + on_fatal_internal_error(slogger, "Node in the maintenance mode cannot enter the starting mode"); + } + if (m != _operation_mode) { slogger.info("entering {} mode", m); _operation_mode = m; diff --git a/service/storage_service.hh b/service/storage_service.hh index e43ad4ad1a..e9b901982c 100644 --- a/service/storage_service.hh +++ b/service/storage_service.hh @@ -283,7 +283,7 @@ private: public: std::chrono::milliseconds get_ring_delay(); - enum class mode { NONE, STARTING, JOINING, BOOTSTRAP, NORMAL, LEAVING, DECOMMISSIONED, MOVING, DRAINING, DRAINED }; + enum class mode { NONE, STARTING, JOINING, BOOTSTRAP, NORMAL, LEAVING, DECOMMISSIONED, MOVING, DRAINING, DRAINED, MAINTENANCE }; private: mode _operation_mode = mode::NONE; /* Used for tracking drain progress */ @@ -837,6 +837,7 @@ struct fmt::formatter : fmt::formatter