diff --git a/service/qos/raft_service_level_distributed_data_accessor.cc b/service/qos/raft_service_level_distributed_data_accessor.cc index 58c976e411..4f8966c186 100644 --- a/service/qos/raft_service_level_distributed_data_accessor.cc +++ b/service/qos/raft_service_level_distributed_data_accessor.cc @@ -100,6 +100,10 @@ bool raft_service_level_distributed_data_accessor::is_v2() const { return true; } +bool raft_service_level_distributed_data_accessor::can_use_effective_service_level_cache() const { + return !auth::legacy_mode(_qp); +} + ::shared_ptr raft_service_level_distributed_data_accessor::upgrade_to_v2(cql3::query_processor& qp, service::raft_group0_client& group0_client) const { return nullptr; } diff --git a/service/qos/raft_service_level_distributed_data_accessor.hh b/service/qos/raft_service_level_distributed_data_accessor.hh index 8b30497a08..54fef401c4 100644 --- a/service/qos/raft_service_level_distributed_data_accessor.hh +++ b/service/qos/raft_service_level_distributed_data_accessor.hh @@ -39,6 +39,7 @@ public: virtual future<> commit_mutations(service::group0_batch&& mc, abort_source& as) const override; virtual bool is_v2() const override; + virtual bool can_use_effective_service_level_cache() const override; virtual ::shared_ptr upgrade_to_v2(cql3::query_processor& qp, service::raft_group0_client& group0_client) const override; }; diff --git a/service/qos/service_level_controller.cc b/service/qos/service_level_controller.cc index 91f7979ce7..39a08060df 100644 --- a/service/qos/service_level_controller.cc +++ b/service/qos/service_level_controller.cc @@ -309,6 +309,17 @@ future<> service_level_controller::update_effective_service_levels_cache() { // might be not initialized yet. co_return; } + if (!_sl_data_accessor || !_sl_data_accessor->can_use_effective_service_level_cache()) { + // Don't populate the effective service level cache until auth is migrated to raft. + // Otherwise, executing the code that follows would read roles data + // from system_auth tables; that would be bad because reading from + // those tables is prone to timeouts, and `update_effective_service_levels_cache` + // is called from the group0 context - a timeout like that would render + // group0 non-functional on the node until restart. + // + // See scylladb/scylladb#24963 for more details. + co_return; + } auto units = co_await get_units(_global_controller_db->notifications_serializer, 1); auto& role_manager = _auth_service.local().underlying_role_manager(); @@ -385,7 +396,7 @@ void service_level_controller::stop_legacy_update_from_distributed_data() { } future> service_level_controller::find_effective_service_level(const sstring& role_name) { - if (_sl_data_accessor->is_v2()) { + if (_sl_data_accessor->can_use_effective_service_level_cache()) { auto effective_sl_it = _effective_service_levels_db.find(role_name); co_return effective_sl_it != _effective_service_levels_db.end() ? std::optional(effective_sl_it->second) diff --git a/service/qos/service_level_controller.hh b/service/qos/service_level_controller.hh index f7fb8e3a37..13fabb6239 100644 --- a/service/qos/service_level_controller.hh +++ b/service/qos/service_level_controller.hh @@ -114,6 +114,9 @@ public: virtual future<> commit_mutations(service::group0_batch&& mc, abort_source& as) const = 0; virtual bool is_v2() const = 0; + // Returns whether effective service level cache can be populated and used. + // This is equivalent to checking whether auth + raft have been migrated to raft. + virtual bool can_use_effective_service_level_cache() const = 0; // Returns v2(raft) data accessor. If data accessor is already a raft one, returns nullptr. virtual ::shared_ptr upgrade_to_v2(cql3::query_processor& qp, service::raft_group0_client& group0_client) const = 0; }; diff --git a/service/qos/standard_service_level_distributed_data_accessor.cc b/service/qos/standard_service_level_distributed_data_accessor.cc index c7f7171561..33873b04de 100644 --- a/service/qos/standard_service_level_distributed_data_accessor.cc +++ b/service/qos/standard_service_level_distributed_data_accessor.cc @@ -37,6 +37,10 @@ bool standard_service_level_distributed_data_accessor::is_v2() const { return false; } +bool standard_service_level_distributed_data_accessor::can_use_effective_service_level_cache() const { + return false; +} + ::shared_ptr standard_service_level_distributed_data_accessor::upgrade_to_v2(cql3::query_processor& qp, service::raft_group0_client& group0_client) const { return ::static_pointer_cast( ::make_shared(qp, group0_client)); diff --git a/service/qos/standard_service_level_distributed_data_accessor.hh b/service/qos/standard_service_level_distributed_data_accessor.hh index c37faeabcf..91b1195008 100644 --- a/service/qos/standard_service_level_distributed_data_accessor.hh +++ b/service/qos/standard_service_level_distributed_data_accessor.hh @@ -31,6 +31,7 @@ public: virtual future<> commit_mutations(service::group0_batch&& mc, abort_source& as) const override { return make_ready_future(); } virtual bool is_v2() const override; + virtual bool can_use_effective_service_level_cache() const override; virtual ::shared_ptr upgrade_to_v2(cql3::query_processor& qp, service::raft_group0_client& group0_client) const override; }; } diff --git a/test/boost/service_level_controller_test.cc b/test/boost/service_level_controller_test.cc index e1ad9ac6cc..405893f157 100644 --- a/test/boost/service_level_controller_test.cc +++ b/test/boost/service_level_controller_test.cc @@ -166,6 +166,9 @@ SEASTAR_THREAD_TEST_CASE(too_many_service_levels) { virtual bool is_v2() const override { return true; } + virtual bool can_use_effective_service_level_cache() const override { + return true; + } virtual ::shared_ptr upgrade_to_v2(cql3::query_processor& qp, service::raft_group0_client& group0_client) const override { return make_shared(); }