raft_group0: simplify get_group0_upgrade_state function since no upgrade can happen any more
No need for locking any more so the function may just return a value and be synchronous.
This commit is contained in:
@@ -61,10 +61,6 @@ migration_manager::migration_manager(migration_notifier& notifier, gms::feature_
|
||||
, _sys_ks(sysks)
|
||||
, _group0_barrier(this_shard_id() == 0 ?
|
||||
std::function<future<>()>([this] () -> future<> {
|
||||
if ((co_await _group0_client.get_group0_upgrade_state()).second == group0_upgrade_state::use_pre_raft_procedures) {
|
||||
on_internal_error(mlogger, "Trying to pull schema over raft while in pre raft procedures");
|
||||
}
|
||||
|
||||
// This will run raft barrier and will sync schema with the leader
|
||||
co_await with_scheduling_group(_gossiper.get_scheduling_group(), [this] {
|
||||
return start_group0_operation().discard_result();
|
||||
|
||||
@@ -674,7 +674,7 @@ struct group0_members {
|
||||
future<bool> raft_group0::use_raft() {
|
||||
SCYLLA_ASSERT(this_shard_id() == 0);
|
||||
|
||||
if (((co_await _client.get_group0_upgrade_state()).second) == group0_upgrade_state::recovery) {
|
||||
if (_client.get_group0_upgrade_state() == group0_upgrade_state::recovery) {
|
||||
group0_log.warn("setup_group0: Raft RECOVERY mode, skipping group 0 setup.");
|
||||
co_return false;
|
||||
}
|
||||
@@ -708,7 +708,7 @@ future<> raft_group0::setup_group0_if_exist(db::system_keyspace& sys_ks, service
|
||||
|
||||
// If we're not restarting in the middle of the Raft upgrade procedure, we must disable
|
||||
// migration_manager schema pulls.
|
||||
auto start_state = (co_await _client.get_group0_upgrade_state()).second;
|
||||
auto start_state = _client.get_group0_upgrade_state();
|
||||
if (start_state != group0_upgrade_state::use_post_raft_procedures) {
|
||||
throw std::runtime_error("The cluster is not yet fully upgraded to use raft. This means that you try to upgrade"
|
||||
" a node of a cluster that is not using Raft yet. This is no longer supported. Please first complete the upgrade of the cluster to use Raft");
|
||||
|
||||
@@ -258,14 +258,12 @@ future<group0_guard> raft_group0_client::start_operation(seastar::abort_source&
|
||||
throw exceptions::configuration_exception{"cannot start group0 operation in the maintenance mode"};
|
||||
}
|
||||
|
||||
std::pair<rwlock::holder, group0_upgrade_state> upgrade_lock_and_state = co_await get_group0_upgrade_state();
|
||||
auto [upgrade_lock_holder, upgrade_state] = std::move(upgrade_lock_and_state);
|
||||
switch (upgrade_state) {
|
||||
case group0_upgrade_state::synchronize:
|
||||
// The version no longer support upgrade to group0, so the state should be unused
|
||||
on_internal_error(logger, "unexpected group0 upgrade state 'synchronize' in start_operation");
|
||||
[[fallthrough]];
|
||||
case group0_upgrade_state::use_post_raft_procedures: {
|
||||
group0_upgrade_state upgrade_state = get_group0_upgrade_state();
|
||||
if (upgrade_state != group0_upgrade_state::use_post_raft_procedures) {
|
||||
// The version no longer supports pre raft procedures
|
||||
on_internal_error(logger, format("unexpected group0 upgrade state {} in start_operation", upgrade_state));
|
||||
}
|
||||
|
||||
auto operation_holder = co_await get_units(_operation_mutex, 1, as);
|
||||
co_await _raft_gr.group0_with_timeouts().read_barrier(&as, timeout);
|
||||
|
||||
@@ -283,27 +281,10 @@ future<group0_guard> raft_group0_client::start_operation(seastar::abort_source&
|
||||
observed_group0_state_id,
|
||||
new_group0_state_id,
|
||||
// Not holding any lock in this case, but move the upgrade lock holder for consistent code
|
||||
std::move(upgrade_lock_holder),
|
||||
rwlock::holder{},
|
||||
true
|
||||
)
|
||||
};
|
||||
}
|
||||
|
||||
case group0_upgrade_state::recovery:
|
||||
logger.warn("starting operation in RECOVERY mode (using old procedures)");
|
||||
[[fallthrough]];
|
||||
case group0_upgrade_state::use_pre_raft_procedures:
|
||||
co_return group0_guard {
|
||||
std::make_unique<group0_guard::impl>(
|
||||
semaphore_units<>{},
|
||||
semaphore_units<>{},
|
||||
utils::UUID{},
|
||||
generate_group0_state_id(utils::UUID{}),
|
||||
std::move(upgrade_lock_holder),
|
||||
false
|
||||
)
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
template<typename Command>
|
||||
@@ -392,21 +373,11 @@ future<> raft_group0_client::init() {
|
||||
}
|
||||
}
|
||||
|
||||
future<std::pair<rwlock::holder, group0_upgrade_state>> raft_group0_client::get_group0_upgrade_state() {
|
||||
auto holder = co_await _upgrade_lock.hold_read_lock();
|
||||
|
||||
if (_upgrade_state == group0_upgrade_state::use_pre_raft_procedures) {
|
||||
co_return std::pair{std::move(holder), _upgrade_state};
|
||||
}
|
||||
|
||||
co_return std::pair{rwlock::holder{}, _upgrade_state};
|
||||
group0_upgrade_state raft_group0_client::get_group0_upgrade_state() {
|
||||
return _upgrade_state;
|
||||
}
|
||||
|
||||
future<> raft_group0_client::set_group0_upgrade_state(group0_upgrade_state state) {
|
||||
// We could explicitly handle abort here but we assume that if someone holds the lock,
|
||||
// they will eventually finish (say, due to abort) and release it.
|
||||
auto holder = co_await _upgrade_lock.hold_write_lock();
|
||||
|
||||
auto value = [] (group0_upgrade_state s) constexpr {
|
||||
switch (s) {
|
||||
case service::group0_upgrade_state::use_post_raft_procedures:
|
||||
@@ -430,9 +401,6 @@ future<> raft_group0_client::set_group0_upgrade_state(group0_upgrade_state state
|
||||
|
||||
co_await _sys_ks.save_group0_upgrade_state(value(state));
|
||||
_upgrade_state = state;
|
||||
if (_upgrade_state == group0_upgrade_state::use_post_raft_procedures) {
|
||||
_upgraded.broadcast();
|
||||
}
|
||||
}
|
||||
|
||||
future<semaphore_units<>> raft_group0_client::hold_read_apply_mutex(abort_source& as) {
|
||||
|
||||
@@ -96,8 +96,6 @@ class raft_group0_client {
|
||||
|
||||
// `_upgrade_state` is a cached (perhaps outdated) version of the upgrade state stored on disk.
|
||||
group0_upgrade_state _upgrade_state{group0_upgrade_state::recovery}; // loaded from disk in `init()`
|
||||
seastar::rwlock _upgrade_lock;
|
||||
seastar::condition_variable _upgraded;
|
||||
|
||||
std::unordered_map<utils::UUID, std::optional<service::broadcast_tables::query_result>> _results;
|
||||
|
||||
@@ -167,26 +165,10 @@ public:
|
||||
|
||||
// Returns the current group 0 upgrade state.
|
||||
//
|
||||
// The possible transitions are: `use_pre_raft_procedures` -> `synchronize` -> `use_post_raft_procedures`.
|
||||
// Once entering a state, we cannot rollback (even after a restart - the state is persisted).
|
||||
//
|
||||
// An exception to these rules is manual recovery, represented by `recovery` state.
|
||||
// It can be entered by the user manually modifying the system.local table through CQL
|
||||
// and restarting the node. In this state the node will not join group 0 or start the group 0 Raft server,
|
||||
// it will perform all operations as in `use_pre_raft_procedures`, and not attempt to perform the upgrade.
|
||||
//
|
||||
// If the returned state is `use_pre_raft_procedures`, the returned `rwlock::holder`
|
||||
// prevents the state from being changed (`set_group0_upgrade_state` will wait).
|
||||
//
|
||||
// When performing an operation that assumes the state to be `use_pre_raft_procedures`
|
||||
// (e.g. a schema change using the old method), keep the holder until your operation finishes.
|
||||
//
|
||||
// Note that we don't need to hold the lock during `synchronize` or `use_post_raft_procedures`:
|
||||
// in `synchronize` group 0 operations are disabled, and `use_post_raft_procedures` is the final
|
||||
// state so it won't change (unless through manual recovery - which should not be required
|
||||
// in the final state anyway). Thus, when `synchronize` or `use_post_raft_procedures` is returned,
|
||||
// the holder does not actually hold any lock.
|
||||
future<std::pair<rwlock::holder, group0_upgrade_state>> get_group0_upgrade_state();
|
||||
// Possible values now are: recovery (maintenance mode only) and use_post_raft_procedures
|
||||
// Other value are no longer supported and if discovered during boot the boot will fail
|
||||
group0_upgrade_state get_group0_upgrade_state();
|
||||
|
||||
// Ensures that nobody holds any `rwlock::holder`s returned from `get_group0_upgrade_state()`
|
||||
// then changes the state to `s`.
|
||||
|
||||
Reference in New Issue
Block a user