raft_group0: simplify get_group0_upgrade_state function since no upgrade can happen any more

No need for locking any more so the function may just return a value and
be synchronous.
This commit is contained in:
Gleb Natapov
2026-02-03 18:32:23 +02:00
parent 0689fb5ab2
commit 7d7cbae763
4 changed files with 14 additions and 68 deletions

View File

@@ -61,10 +61,6 @@ migration_manager::migration_manager(migration_notifier& notifier, gms::feature_
, _sys_ks(sysks)
, _group0_barrier(this_shard_id() == 0 ?
std::function<future<>()>([this] () -> future<> {
if ((co_await _group0_client.get_group0_upgrade_state()).second == group0_upgrade_state::use_pre_raft_procedures) {
on_internal_error(mlogger, "Trying to pull schema over raft while in pre raft procedures");
}
// This will run raft barrier and will sync schema with the leader
co_await with_scheduling_group(_gossiper.get_scheduling_group(), [this] {
return start_group0_operation().discard_result();

View File

@@ -674,7 +674,7 @@ struct group0_members {
future<bool> raft_group0::use_raft() {
SCYLLA_ASSERT(this_shard_id() == 0);
if (((co_await _client.get_group0_upgrade_state()).second) == group0_upgrade_state::recovery) {
if (_client.get_group0_upgrade_state() == group0_upgrade_state::recovery) {
group0_log.warn("setup_group0: Raft RECOVERY mode, skipping group 0 setup.");
co_return false;
}
@@ -708,7 +708,7 @@ future<> raft_group0::setup_group0_if_exist(db::system_keyspace& sys_ks, service
// If we're not restarting in the middle of the Raft upgrade procedure, we must disable
// migration_manager schema pulls.
auto start_state = (co_await _client.get_group0_upgrade_state()).second;
auto start_state = _client.get_group0_upgrade_state();
if (start_state != group0_upgrade_state::use_post_raft_procedures) {
throw std::runtime_error("The cluster is not yet fully upgraded to use raft. This means that you try to upgrade"
" a node of a cluster that is not using Raft yet. This is no longer supported. Please first complete the upgrade of the cluster to use Raft");

View File

@@ -258,14 +258,12 @@ future<group0_guard> raft_group0_client::start_operation(seastar::abort_source&
throw exceptions::configuration_exception{"cannot start group0 operation in the maintenance mode"};
}
std::pair<rwlock::holder, group0_upgrade_state> upgrade_lock_and_state = co_await get_group0_upgrade_state();
auto [upgrade_lock_holder, upgrade_state] = std::move(upgrade_lock_and_state);
switch (upgrade_state) {
case group0_upgrade_state::synchronize:
// The version no longer support upgrade to group0, so the state should be unused
on_internal_error(logger, "unexpected group0 upgrade state 'synchronize' in start_operation");
[[fallthrough]];
case group0_upgrade_state::use_post_raft_procedures: {
group0_upgrade_state upgrade_state = get_group0_upgrade_state();
if (upgrade_state != group0_upgrade_state::use_post_raft_procedures) {
// The version no longer supports pre raft procedures
on_internal_error(logger, format("unexpected group0 upgrade state {} in start_operation", upgrade_state));
}
auto operation_holder = co_await get_units(_operation_mutex, 1, as);
co_await _raft_gr.group0_with_timeouts().read_barrier(&as, timeout);
@@ -283,27 +281,10 @@ future<group0_guard> raft_group0_client::start_operation(seastar::abort_source&
observed_group0_state_id,
new_group0_state_id,
// Not holding any lock in this case, but move the upgrade lock holder for consistent code
std::move(upgrade_lock_holder),
rwlock::holder{},
true
)
};
}
case group0_upgrade_state::recovery:
logger.warn("starting operation in RECOVERY mode (using old procedures)");
[[fallthrough]];
case group0_upgrade_state::use_pre_raft_procedures:
co_return group0_guard {
std::make_unique<group0_guard::impl>(
semaphore_units<>{},
semaphore_units<>{},
utils::UUID{},
generate_group0_state_id(utils::UUID{}),
std::move(upgrade_lock_holder),
false
)
};
}
}
template<typename Command>
@@ -392,21 +373,11 @@ future<> raft_group0_client::init() {
}
}
future<std::pair<rwlock::holder, group0_upgrade_state>> raft_group0_client::get_group0_upgrade_state() {
auto holder = co_await _upgrade_lock.hold_read_lock();
if (_upgrade_state == group0_upgrade_state::use_pre_raft_procedures) {
co_return std::pair{std::move(holder), _upgrade_state};
}
co_return std::pair{rwlock::holder{}, _upgrade_state};
group0_upgrade_state raft_group0_client::get_group0_upgrade_state() {
return _upgrade_state;
}
future<> raft_group0_client::set_group0_upgrade_state(group0_upgrade_state state) {
// We could explicitly handle abort here but we assume that if someone holds the lock,
// they will eventually finish (say, due to abort) and release it.
auto holder = co_await _upgrade_lock.hold_write_lock();
auto value = [] (group0_upgrade_state s) constexpr {
switch (s) {
case service::group0_upgrade_state::use_post_raft_procedures:
@@ -430,9 +401,6 @@ future<> raft_group0_client::set_group0_upgrade_state(group0_upgrade_state state
co_await _sys_ks.save_group0_upgrade_state(value(state));
_upgrade_state = state;
if (_upgrade_state == group0_upgrade_state::use_post_raft_procedures) {
_upgraded.broadcast();
}
}
future<semaphore_units<>> raft_group0_client::hold_read_apply_mutex(abort_source& as) {

View File

@@ -96,8 +96,6 @@ class raft_group0_client {
// `_upgrade_state` is a cached (perhaps outdated) version of the upgrade state stored on disk.
group0_upgrade_state _upgrade_state{group0_upgrade_state::recovery}; // loaded from disk in `init()`
seastar::rwlock _upgrade_lock;
seastar::condition_variable _upgraded;
std::unordered_map<utils::UUID, std::optional<service::broadcast_tables::query_result>> _results;
@@ -167,26 +165,10 @@ public:
// Returns the current group 0 upgrade state.
//
// The possible transitions are: `use_pre_raft_procedures` -> `synchronize` -> `use_post_raft_procedures`.
// Once entering a state, we cannot rollback (even after a restart - the state is persisted).
//
// An exception to these rules is manual recovery, represented by `recovery` state.
// It can be entered by the user manually modifying the system.local table through CQL
// and restarting the node. In this state the node will not join group 0 or start the group 0 Raft server,
// it will perform all operations as in `use_pre_raft_procedures`, and not attempt to perform the upgrade.
//
// If the returned state is `use_pre_raft_procedures`, the returned `rwlock::holder`
// prevents the state from being changed (`set_group0_upgrade_state` will wait).
//
// When performing an operation that assumes the state to be `use_pre_raft_procedures`
// (e.g. a schema change using the old method), keep the holder until your operation finishes.
//
// Note that we don't need to hold the lock during `synchronize` or `use_post_raft_procedures`:
// in `synchronize` group 0 operations are disabled, and `use_post_raft_procedures` is the final
// state so it won't change (unless through manual recovery - which should not be required
// in the final state anyway). Thus, when `synchronize` or `use_post_raft_procedures` is returned,
// the holder does not actually hold any lock.
future<std::pair<rwlock::holder, group0_upgrade_state>> get_group0_upgrade_state();
// Possible values now are: recovery (maintenance mode only) and use_post_raft_procedures
// Other value are no longer supported and if discovered during boot the boot will fail
group0_upgrade_state get_group0_upgrade_state();
// Ensures that nobody holds any `rwlock::holder`s returned from `get_group0_upgrade_state()`
// then changes the state to `s`.