From 571db3c983262b857b744af3b5cd724f93ce424c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patryk=20J=C4=99drzejczak?= Date: Thu, 14 Dec 2023 16:42:06 +0100 Subject: [PATCH 1/6] db: config: make force_schema_commit_log deprecated In scylladb/scylladb#16254, we made force_schema_commit_log unused. After this change, if someone passes this option as the command line argument, the boot fails. This behavior is undesired. We only want this option to be ignored. We can achieve this effect by making it deprecated. --- db/config.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/db/config.cc b/db/config.cc index ebaf3f9e9c..9a7a1e4e2e 100644 --- a/db/config.cc +++ b/db/config.cc @@ -1087,7 +1087,7 @@ db::config::config(std::shared_ptr exts) , restrict_future_timestamp(this, "restrict_future_timestamp",liveness::LiveUpdate, value_status::Used, true, "Controls whether to detect and forbid unreasonable USING TIMESTAMP, more than 3 days into the future.") , ignore_truncation_record(this, "unsafe_ignore_truncation_record", value_status::Used, false, "Ignore truncation record stored in system tables as if tables were never truncated.") - , force_schema_commit_log(this, "force_schema_commit_log", value_status::Unused, false, + , force_schema_commit_log(this, "force_schema_commit_log", value_status::Deprecated, false, "Use separate schema commit log unconditionally rater than after restart following discovery of cluster-wide support for it.") , task_ttl_seconds(this, "task_ttl_in_seconds", liveness::LiveUpdate, value_status::Used, 0, "Time for which information about finished task stays in memory.") , nodeops_watchdog_timeout_seconds(this, "nodeops_watchdog_timeout_seconds", liveness::LiveUpdate, value_status::Used, 120, "Time in seconds after which node operations abort when not hearing from the coordinator") From a54f9052fc8531418878b232e1b1969c03a510b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patryk=20J=C4=99drzejczak?= Date: Fri, 1 Dec 2023 12:35:29 +0100 Subject: [PATCH 2/6] db: config: make override_decommission deprecated The override_decommission option is supported only when consistent_cluster_management is disabled. In the following commit, we make consistent_cluster_management mandatory, which makes overwrite_decommission unusable. --- db/config.cc | 2 +- service/storage_service.cc | 13 ++++--------- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/db/config.cc b/db/config.cc index 9a7a1e4e2e..0967982a05 100644 --- a/db/config.cc +++ b/db/config.cc @@ -942,7 +942,7 @@ db::config::config(std::shared_ptr exts) , replace_address(this, "replace_address", value_status::Used, "", "[[deprecated]] The listen_address or broadcast_address of the dead node to replace. Same as -Dcassandra.replace_address.") , replace_address_first_boot(this, "replace_address_first_boot", value_status::Used, "", "[[deprecated]] Like replace_address option, but if the node has been bootstrapped successfully it will be ignored. Same as -Dcassandra.replace_address_first_boot.") , ignore_dead_nodes_for_replace(this, "ignore_dead_nodes_for_replace", value_status::Used, "", "List dead nodes to ignore for replace operation using a comma-separated list of host IDs. E.g., scylla --ignore-dead-nodes-for-replace 8d5ed9f4-7764-4dbd-bad8-43fddce94b7c,125ed9f4-7777-1dbn-mac8-43fddce9123e") - , override_decommission(this, "override_decommission", value_status::Used, false, "Set true to force a decommissioned node to join the cluster (cannot be set if consistent-cluster-management is enabled") + , override_decommission(this, "override_decommission", value_status::Deprecated, false, "Set true to force a decommissioned node to join the cluster (cannot be set if consistent-cluster-management is enabled") , enable_repair_based_node_ops(this, "enable_repair_based_node_ops", liveness::LiveUpdate, value_status::Used, true, "Set true to use enable repair based node operations instead of streaming based") , allowed_repair_based_node_ops(this, "allowed_repair_based_node_ops", liveness::LiveUpdate, value_status::Used, "replace,removenode,rebuild,bootstrap,decommission", "A comma separated list of node operations which are allowed to enable repair based node operations. The operations can be bootstrap, replace, removenode, decommission and rebuild") , enable_compacting_data_for_streaming_and_repair(this, "enable_compacting_data_for_streaming_and_repair", liveness::LiveUpdate, value_status::Used, true, "Enable the compacting reader, which compacts the data for streaming and repair (load'n'stream included) before sending it to, or synchronizing it with peers. Can reduce the amount of data to be processed by removing dead data, but adds CPU overhead.") diff --git a/service/storage_service.cc b/service/storage_service.cc index 302270a0bb..951b14e3fa 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -3010,15 +3010,10 @@ future<> storage_service::join_token_ring(sharded cdc_gen_id; if (_sys_ks.local().was_decommissioned()) { - if (_db.local().get_config().override_decommission() && !_db.local().get_config().consistent_cluster_management()) { - slogger.warn("This node was decommissioned, but overriding by operator request."); - co_await _sys_ks.local().set_bootstrap_state(db::system_keyspace::bootstrap_state::COMPLETED); - } else { - auto msg = sstring("This node was decommissioned and will not rejoin the ring unless override_decommission=true has been set and consistent cluster management is not in use," - "or all existing data is removed and the node is bootstrapped again"); - slogger.error("{}", msg); - throw std::runtime_error(msg); - } + auto msg = sstring("This node was decommissioned and will not rejoin the ring unless " + "all existing data is removed and the node is bootstrapped again"); + slogger.error("{}", msg); + throw std::runtime_error(msg); } bool replacing_a_node_with_same_ip = false; From 7dd7ec8996d1ae2463a7cd707a30ad03f33c3f8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patryk=20J=C4=99drzejczak?= Date: Tue, 12 Dec 2023 15:08:37 +0100 Subject: [PATCH 3/6] test: boost: schema_change_test: replace disable_raft_schema_config In the following commits, we make consistent cluster management mandatory. This will make disable_raft_schema_config unusable, so we need to get rid of it. However, we don't want to remove tests that use it. The idea is to use the Raft RECOVERY mode instead of disabling consistent cluster management directly. --- test/boost/schema_change_test.cc | 12 ++++++------ test/lib/cql_test_env.cc | 4 ++++ test/lib/cql_test_env.hh | 1 + 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/test/boost/schema_change_test.cc b/test/boost/schema_change_test.cc index 7be0eaf208..c7147d8bef 100644 --- a/test/boost/schema_change_test.cc +++ b/test/boost/schema_change_test.cc @@ -31,9 +31,9 @@ #include "cdc/cdc_extension.hh" #include "utils/UUID_gen.hh" -static cql_test_config disable_raft_schema_config() { +static cql_test_config run_with_raft_recovery_config() { cql_test_config c; - c.db_config->consistent_cluster_management(false); + c.run_with_raft_recovery = true; return c; } @@ -158,10 +158,10 @@ SEASTAR_TEST_CASE(test_tombstones_are_ignored_in_version_calculation) { // a digest to be calculated when applying the schema change, and the digest // will be different than the first version sent. // - // Hence we use `disable_raft_schema_config()` in this test. + // Hence we use `run_with_raft_recovery_config()` in this test. BOOST_REQUIRE_EQUAL(new_node_version, old_node_version); }); - }, disable_raft_schema_config()); + }, run_with_raft_recovery_config()); } SEASTAR_TEST_CASE(test_concurrent_column_addition) { @@ -220,10 +220,10 @@ SEASTAR_TEST_CASE(test_concurrent_column_addition) { // This is fine with group 0 where all schema changes are linearized, so this scenario // of merging concurrent schema changes doesn't happen. // - // Hence we use `disable_raft_schema_config()` in this test. + // Hence we use `run_with_raft_recovery_config()` in this test. BOOST_REQUIRE(new_schema->version() != s2->version()); }); - }, disable_raft_schema_config()); + }, run_with_raft_recovery_config()); } SEASTAR_TEST_CASE(test_sort_type_in_update) { diff --git a/test/lib/cql_test_env.cc b/test/lib/cql_test_env.cc index edd09e2478..36ebf83da1 100644 --- a/test/lib/cql_test_env.cc +++ b/test/lib/cql_test_env.cc @@ -815,6 +815,10 @@ private: }).get(); } + if (cfg_in.run_with_raft_recovery) { + _sys_ks.local().save_group0_upgrade_state("RECOVERY").get(); + } + group0_client.init().get(); auto stop_system_keyspace = defer([this] { _sys_ks.invoke_on_all(&db::system_keyspace::shutdown).get(); diff --git a/test/lib/cql_test_env.hh b/test/lib/cql_test_env.hh index 20b3b7aaa0..edf1749b9a 100644 --- a/test/lib/cql_test_env.hh +++ b/test/lib/cql_test_env.hh @@ -96,6 +96,7 @@ public: locator::host_id host_id; gms::inet_address broadcast_address = gms::inet_address("localhost"); bool ms_listen = false; + bool run_with_raft_recovery = false; cql_test_config(); cql_test_config(const cql_test_config&); From 5ebfbf42bcff440012f6c2a37c27cf72f98db09e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patryk=20J=C4=99drzejczak?= Date: Fri, 1 Dec 2023 12:37:00 +0100 Subject: [PATCH 4/6] db: config: make consistent_cluster_management mandatory Code that executed only when consistent_cluster_management=false is removed. In particular, after this patch: - raft_group0 and raft_group_registry are always enabled, - raft_group0::status_for_monitoring::disabled becomes unused, - topology tests can only run with consistent_cluster_management. --- conf/scylla.yaml | 12 ------------ db/config.cc | 2 +- db/system_keyspace.cc | 4 ++-- gms/feature_service.cc | 4 ---- main.cc | 18 ++++-------------- service/raft/raft_group0.cc | 24 ++---------------------- service/raft/raft_group0.hh | 8 +------- service/raft/raft_group_registry.cc | 10 ++-------- service/raft/raft_group_registry.hh | 13 ++----------- service/storage_service.cc | 2 +- test/alternator/run | 6 ------ test/boost/cql_query_test.cc | 1 - test/boost/group0_test.cc | 6 +++--- test/boost/schema_change_test.cc | 2 +- test/boost/tablets_test.cc | 1 - test/broadcast_tables/suite.yaml | 1 - test/cql-pytest/run | 12 ------------ test/lib/cql_test_env.cc | 14 +++----------- test/lib/cql_test_env.hh | 3 --- test/perf/perf_simple_query.cc | 1 - test/pylib/scylla_cluster.py | 2 -- test/raft/raft_sys_table_storage_test.cc | 10 +++++----- test/topology/conftest.py | 24 ------------------------ test/topology/test_concurrent_schema.py | 2 +- 24 files changed, 28 insertions(+), 154 deletions(-) diff --git a/conf/scylla.yaml b/conf/scylla.yaml index 91987c5bdc..96ea702dd5 100644 --- a/conf/scylla.yaml +++ b/conf/scylla.yaml @@ -562,18 +562,6 @@ murmur3_partitioner_ignore_msb_bits: 12 # Time for which task manager task is kept in memory after it completes. task_ttl_in_seconds: 10 -# Use Raft to consistently manage schema information in the cluster. -# Refer to https://docs.scylladb.com/master/architecture/raft.html for more details. -# The 'Handling Failures' section is especially important. -# -# Once enabled in a cluster, this cannot be turned off. -# If you want to bootstrap a new cluster without Raft, make sure to set this to `false` -# before starting your nodes for the first time. -# -# A cluster not using Raft can be 'upgraded' to use Raft. Refer to the aforementioned -# documentation, section 'Enabling Raft in ScyllaDB 5.2 and further', for the procedure. -consistent_cluster_management: true - # In materialized views, restrictions are allowed only on the view's primary key columns. # In old versions Scylla mistakenly allowed IS NOT NULL restrictions on columns which were not part # of the view's primary key. These invalid restrictions were ignored. diff --git a/db/config.cc b/db/config.cc index 0967982a05..eeae4b5bb5 100644 --- a/db/config.cc +++ b/db/config.cc @@ -1096,7 +1096,7 @@ db::config::config(std::shared_ptr exts) "Keep SSTable index pages in the global cache after a SSTable read. Expected to improve performance for workloads with big partitions, but may degrade performance for workloads with small partitions. The amount of memory usable by index cache is limited with `index_cache_fraction`.") , index_cache_fraction(this, "index_cache_fraction", liveness::LiveUpdate, value_status::Used, 0.2, "The maximum fraction of cache memory permitted for use by index cache. Clamped to the [0.0; 1.0] range. Must be small enough to not deprive the row cache of memory, but should be big enough to fit a large fraction of the index. The default value 0.2 means that at least 80\% of cache memory is reserved for the row cache, while at most 20\% is usable by the index cache.") - , consistent_cluster_management(this, "consistent_cluster_management", value_status::Used, true, "Use RAFT for cluster management and DDL") + , consistent_cluster_management(this, "consistent_cluster_management", value_status::Deprecated, true, "Use RAFT for cluster management and DDL") , wasm_cache_memory_fraction(this, "wasm_cache_memory_fraction", value_status::Used, 0.01, "Maximum total size of all WASM instances stored in the cache as fraction of total shard memory") , wasm_cache_timeout_in_ms(this, "wasm_cache_timeout_in_ms", value_status::Used, 5000, "Time after which an instance is evicted from the cache") , wasm_cache_instance_size_limit(this, "wasm_cache_instance_size_limit", value_status::Used, 1024*1024, "Instances with size above this limit will not be stored in the cache") diff --git a/db/system_keyspace.cc b/db/system_keyspace.cc index d86238f016..f9b1bd3db3 100644 --- a/db/system_keyspace.cc +++ b/db/system_keyspace.cc @@ -1874,7 +1874,7 @@ std::vector system_keyspace::all_tables(const db::config& cfg) { v3::truncated(), v3::cdc_local(), }); - if (cfg.consistent_cluster_management()) { + r.insert(r.end(), {raft(), raft_snapshots(), raft_snapshot_config(), group0_history(), discovery()}); if (cfg.check_experimental(db::experimental_features_t::feature::CONSISTENT_TOPOLOGY_CHANGES)) { @@ -1888,7 +1888,7 @@ std::vector system_keyspace::all_tables(const db::config& cfg) { if (cfg.check_experimental(db::experimental_features_t::feature::TABLETS)) { r.insert(r.end(), {tablets()}); } - } + if (cfg.check_experimental(db::experimental_features_t::feature::KEYSPACE_STORAGE_OPTIONS)) { r.insert(r.end(), {sstables_registry()}); } diff --git a/gms/feature_service.cc b/gms/feature_service.cc index 88c12a8898..59f87e37bd 100644 --- a/gms/feature_service.cc +++ b/gms/feature_service.cc @@ -79,10 +79,6 @@ feature_config feature_config_from_db_config(const db::config& cfg, std::setconsistent_cluster_management() && + const bool raft_topology_change_enabled = cfg->check_experimental(db::experimental_features_t::feature::CONSISTENT_TOPOLOGY_CHANGES); gms::feature_config fcfg = gms::feature_config_from_db_config(*cfg); @@ -1334,8 +1334,8 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl fd.stop().get(); }); - raft_gr.start(cfg->consistent_cluster_management(), raft::server_id{host_id.id}, - std::ref(raft_address_map), std::ref(messaging), std::ref(gossiper), std::ref(fd)).get(); + raft_gr.start(raft::server_id{host_id.id}, std::ref(raft_address_map), + std::ref(messaging), std::ref(gossiper), std::ref(fd)).get(); // group0 client exists only on shard 0. // The client has to be created before `stop_raft` since during @@ -1425,19 +1425,9 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl sst_format_listener.stop().get(); }); - if (raft_gr.local().is_enabled()) { supervisor::notify("starting Raft Group Registry service"); raft_gr.invoke_on_all(&service::raft_group_registry::start).get(); - } else { - if (cfg->check_experimental(db::experimental_features_t::feature::BROADCAST_TABLES)) { - startlog.error("Bad configuration: RAFT feature has to be enabled if BROADCAST_TABLES is enabled"); - throw bad_configuration_error(); - } - if (cfg->check_experimental(db::experimental_features_t::feature::TABLETS)) { - startlog.error("Bad configuration: consistent_cluster_management feature has to be enabled if tablets feature is enabled"); - throw bad_configuration_error(); - } - } + group0_client.init().get(); // schema migration, if needed, is also done on shard 0 diff --git a/service/raft/raft_group0.cc b/service/raft/raft_group0.cc index ea11fd1eca..a2d2ed8c1b 100644 --- a/service/raft/raft_group0.cc +++ b/service/raft/raft_group0.cc @@ -138,7 +138,7 @@ raft_group0::raft_group0(seastar::abort_source& abort_source, db::system_keyspace& sys_ks, raft_group0_client& client) : _abort_source(abort_source), _raft_gr(raft_gr), _ms(ms), _gossiper(gs), _feat(feat), _sys_ks(sys_ks), _client(client) - , _status_for_monitoring(_raft_gr.is_enabled() ? status_for_monitoring::normal : status_for_monitoring::disabled) + , _status_for_monitoring(status_for_monitoring::normal) { register_metrics(); } @@ -591,14 +591,6 @@ static future synchronize_schema( future raft_group0::use_raft() { assert(this_shard_id() == 0); - if (!_raft_gr.is_enabled()) { - group0_log.info("setup_group0: local RAFT feature disabled, skipping group 0 setup."); - // Note: if the local feature was enabled by every node earlier, that would enable the cluster - // SUPPORTS_RAFT feature, and the node should then refuse to start during feature check - // (because if the local feature is disabled, then the cluster feature - enabled in the cluster - is 'unknown' to us). - co_return false; - } - if (((co_await _client.get_group0_upgrade_state()).second) == group0_upgrade_state::recovery) { group0_log.warn("setup_group0: Raft RECOVERY mode, skipping group 0 setup."); co_return false; @@ -772,9 +764,6 @@ future<> raft_group0::finish_setup_after_join(service::storage_service& ss, cql3 // (that's the only way to join as non-voter today). co_return; } - } else if (!_raft_gr.is_enabled()) { - group0_log.info("finish_setup_after_join: local RAFT feature disabled, skipping."); - co_return; } else { // We're either upgrading or in recovery mode. } @@ -867,11 +856,6 @@ future<> raft_group0::remove_from_group0(raft::server_id node) { future raft_group0::wait_for_raft() { assert(this_shard_id() == 0); - if (!_raft_gr.is_enabled()) { - group0_log.info("Local RAFT feature disabled."); - co_return false; - } - auto upgrade_state = (co_await _client.get_group0_upgrade_state()).second; if (upgrade_state == group0_upgrade_state::recovery) { group0_log.warn("In Raft RECOVERY mode."); @@ -1568,10 +1552,6 @@ static auto warn_if_upgrade_takes_too_long() { future<> raft_group0::upgrade_to_group0(service::storage_service& ss, cql3::query_processor& qp, service::migration_manager& mm, bool topology_change_enabled) { assert(this_shard_id() == 0); - // The SUPPORTS_RAFT cluster feature is enabled, so the local RAFT feature must also be enabled - // (otherwise we wouldn't 'know' the cluster feature). - assert(_raft_gr.is_enabled()); - auto start_state = (co_await _client.get_group0_upgrade_state()).second; switch (start_state) { case group0_upgrade_state::recovery: @@ -1714,7 +1694,7 @@ void raft_group0::register_metrics() { namespace sm = seastar::metrics; _metrics.add_group("raft_group0", { sm::make_gauge("status", [this] { return static_cast(_status_for_monitoring); }, - sm::description("status of the raft group, 0 - disabled, 1 - normal, 2 - aborted")) + sm::description("status of the raft group, 1 - normal, 2 - aborted")) }); } diff --git a/service/raft/raft_group0.hh b/service/raft/raft_group0.hh index 9d68730ab4..2887e7b9f5 100644 --- a/service/raft/raft_group0.hh +++ b/service/raft/raft_group0.hh @@ -114,8 +114,7 @@ class raft_group0 { // Status of the raft group0 for monitoring. enum class status_for_monitoring : uint8_t { - // Raft is disabled. - disabled = 0, + unused = 0, normal = 1, aborted = 2 } _status_for_monitoring; @@ -145,11 +144,6 @@ public: // Call after construction but before using the object. future<> start(); - // Return true if Raft is enabled (but not necessarily having - // an active group 0 - e.g. in case we haven't completed an - // upgrade of a heterogeneous cluster yet. - bool is_raft_enabled() const { return _raft_gr.is_enabled(); } - // Call before destroying the object. future<> abort(); diff --git a/service/raft/raft_group_registry.cc b/service/raft/raft_group_registry.cc index 3b7f4378f8..230615e072 100644 --- a/service/raft/raft_group_registry.cc +++ b/service/raft/raft_group_registry.cc @@ -159,12 +159,11 @@ public: // }}} gossiper_state_change_subscriber_proxy -raft_group_registry::raft_group_registry(bool is_enabled, +raft_group_registry::raft_group_registry( raft::server_id my_id, raft_address_map& address_map, netw::messaging_service& ms, gms::gossiper& gossiper, direct_failure_detector::failure_detector& fd) - : _is_enabled(is_enabled) - , _ms(ms) + : _ms(ms) , _gossiper(gossiper) , _gossiper_proxy(make_shared(address_map)) , _address_map{address_map} @@ -372,8 +371,6 @@ future<> raft_group_registry::stop_servers() noexcept { } seastar::future<> raft_group_registry::start() { - assert(_is_enabled); - _gossiper.register_(_gossiper_proxy); // Once a Raft server starts, it soon times out @@ -390,9 +387,6 @@ const raft::server_id& raft_group_registry::get_my_raft_id() { } seastar::future<> raft_group_registry::stop() { - if (!_is_enabled) { - co_return; - } co_await drain_on_shutdown(); co_await uninit_rpc_verbs(); _direct_fd_subscription.reset(); diff --git a/service/raft/raft_group_registry.hh b/service/raft/raft_group_registry.hh index 2103565554..0e4ecb831a 100644 --- a/service/raft/raft_group_registry.hh +++ b/service/raft/raft_group_registry.hh @@ -66,9 +66,6 @@ class gossiper_state_change_subscriber_proxy; // to the owning shard for a given raft group_id. class raft_group_registry : public seastar::peering_sharded_service { private: - // True if the feature is enabled - bool _is_enabled; - netw::messaging_service& _ms; gms::gossiper& _gossiper; // A proxy class representing subscription to on_change @@ -100,12 +97,10 @@ private: raft::server_id _my_id; public: - // `is_enabled` must be `true` iff the local RAFT feature is enabled. - raft_group_registry(bool is_enabled, raft::server_id my_id, raft_address_map&, - netw::messaging_service& ms, gms::gossiper& gs, direct_failure_detector::failure_detector& fd); + raft_group_registry(raft::server_id my_id, raft_address_map&, netw::messaging_service& ms, + gms::gossiper& gs, direct_failure_detector::failure_detector& fd); ~raft_group_registry(); - // If is_enabled(), // Called manually at start on every shard. seastar::future<> start(); // Called by sharded<>::stop() @@ -148,10 +143,6 @@ public: shared_ptr failure_detector(); raft_address_map& address_map() { return _address_map; } direct_failure_detector::failure_detector& direct_fd() { return _direct_fd; } - - // Is the RAFT local feature enabled? - // Note: do not confuse with the SUPPORTS_RAFT cluster feature. - bool is_enabled() const { return _is_enabled; } }; // Implementation of `direct_failure_detector::pinger` which uses DIRECT_FD_PING verb for pinging. diff --git a/service/storage_service.cc b/service/storage_service.cc index 951b14e3fa..7efb865026 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -171,7 +171,7 @@ storage_service::storage_service(abort_source& abort_source, } auto& cfg = _db.local().get_config(); - init_messaging_service(cfg.consistent_cluster_management() && cfg.check_experimental(db::experimental_features_t::feature::CONSISTENT_TOPOLOGY_CHANGES)); + init_messaging_service(cfg.check_experimental(db::experimental_features_t::feature::CONSISTENT_TOPOLOGY_CHANGES)); } enum class node_external_status { diff --git a/test/alternator/run b/test/alternator/run index 93996800da..e0d6f1ad17 100755 --- a/test/alternator/run +++ b/test/alternator/run @@ -23,13 +23,7 @@ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) print('Scylla is: ' + run.find_scylla() + '.') -# If the "--raft" option is given, switch to the experimental Raft-based -# implementation of schema operations. When the experimental feature becomes -# the default, we can drop this option. extra_scylla_options = [] -if '--raft' in sys.argv: - sys.argv.remove('--raft') - extra_scylla_options += ['--consistent-cluster-management', 'true'] if "-h" in sys.argv or "--help" in sys.argv: run.run_pytest(sys.path[0], sys.argv) diff --git a/test/boost/cql_query_test.cc b/test/boost/cql_query_test.cc index 96ad2ac9db..77ce7ee78f 100644 --- a/test/boost/cql_query_test.cc +++ b/test/boost/cql_query_test.cc @@ -5715,7 +5715,6 @@ cql_test_config tablet_cql_test_config() { db::experimental_features_t::feature::TABLETS, db::experimental_features_t::feature::CONSISTENT_TOPOLOGY_CHANGES, }, db::config::config_source::CommandLine); - c.db_config->consistent_cluster_management(true); return c; } diff --git a/test/boost/group0_test.cc b/test/boost/group0_test.cc index 2cd4cce45c..71f8dabc1a 100644 --- a/test/boost/group0_test.cc +++ b/test/boost/group0_test.cc @@ -67,7 +67,7 @@ SEASTAR_TEST_CASE(test_abort_server_on_background_error) { BOOST_REQUIRE_EQUAL(get_status(), 2); BOOST_CHECK_EXCEPTION(co_await perform_schema_change(), raft::stopped_error, check_error); BOOST_REQUIRE_EQUAL(get_status(), 2); - }, raft_cql_test_config()); + }); #endif } @@ -152,7 +152,7 @@ SEASTAR_TEST_CASE(test_group0_history_clearing_old_entries) { // Therefore `timestamps2` should contain all in `timestamps1` minus those changes plus one (`last_ts`). BOOST_REQUIRE_EQUAL(timestamps2.size(), timestamps1.size() - older_by_sleep_dur + 1); - }, raft_cql_test_config()); + }); } SEASTAR_TEST_CASE(test_concurrent_group0_modifications) { @@ -234,5 +234,5 @@ SEASTAR_TEST_CASE(test_concurrent_group0_modifications) { // Each execution should have succeeded on first attempt because the mutex serialized them all. BOOST_REQUIRE_EQUAL(successes, N*M); - }, raft_cql_test_config()); + }); } diff --git a/test/boost/schema_change_test.cc b/test/boost/schema_change_test.cc index c7147d8bef..ebe08a6850 100644 --- a/test/boost/schema_change_test.cc +++ b/test/boost/schema_change_test.cc @@ -1116,7 +1116,7 @@ SEASTAR_TEST_CASE(test_schema_tables_use_null_sharder) { BOOST_REQUIRE_EQUAL(s->get_sharder().shard_count(), 1); } }).get(); - }, raft_cql_test_config()); + }); } SEASTAR_TEST_CASE(test_schema_make_reversed) { diff --git a/test/boost/tablets_test.cc b/test/boost/tablets_test.cc index 754b216849..6ff3557e0f 100644 --- a/test/boost/tablets_test.cc +++ b/test/boost/tablets_test.cc @@ -52,7 +52,6 @@ cql_test_config tablet_cql_test_config() { c.db_config->experimental_features({ db::experimental_features_t::feature::TABLETS, }, db::config::config_source::CommandLine); - c.db_config->consistent_cluster_management(true); return c; } diff --git a/test/broadcast_tables/suite.yaml b/test/broadcast_tables/suite.yaml index 822c3bfb71..afe171621d 100644 --- a/test/broadcast_tables/suite.yaml +++ b/test/broadcast_tables/suite.yaml @@ -1,5 +1,4 @@ type: Python extra_scylla_cmdline_options: - - "--consistent-cluster-management=true" - "--experimental-features=broadcast-tables" - "--enable-user-defined-functions=false" diff --git a/test/cql-pytest/run b/test/cql-pytest/run index 7e4391e510..5935937706 100755 --- a/test/cql-pytest/run +++ b/test/cql-pytest/run @@ -14,18 +14,6 @@ else: cmd = run.run_scylla_cmd check_cql = run.check_cql -# If the "--raft" option is given, switch to the experimental Raft-based -# implementation of schema operations. Some tests are expected to fail -# when not in raft mode, so they use the fails_without_consistent_cluster_management fixture -# that will cause them to xfail when raft isn't used. -if '--raft' in sys.argv: - sys.argv.remove('--raft') - def run_with_raft(pid, dir): - (c, e) = run_with_raft.orig_cmd(pid, dir) - return (c + ['--consistent-cluster-management', 'true'], e) - run_with_raft.orig_cmd = cmd - cmd = run_with_raft - if "-h" in sys.argv or "--help" in sys.argv: run.run_pytest(sys.path[0], sys.argv) exit(0) diff --git a/test/lib/cql_test_env.cc b/test/lib/cql_test_env.cc index 36ebf83da1..2a8288b66a 100644 --- a/test/lib/cql_test_env.cc +++ b/test/lib/cql_test_env.cc @@ -719,7 +719,7 @@ private: _fd.stop().get(); }); - _group0_registry.start(cfg->consistent_cluster_management(), + _group0_registry.start( raft::server_id{host_id.id}, std::ref(_raft_address_map), std::ref(_ms), std::ref(_gossiper), std::ref(_fd)).get(); @@ -809,11 +809,9 @@ private: }); }).get(); - if (_group0_registry.local().is_enabled()) { _group0_registry.invoke_on_all([] (service::raft_group_registry& raft_gr) { return raft_gr.start(); }).get(); - } if (cfg_in.run_with_raft_recovery) { _sys_ks.local().save_group0_upgrade_state("RECOVERY").get(); @@ -884,8 +882,8 @@ private: group0_service.abort().get(); }); - const bool raft_topology_change_enabled = group0_service.is_raft_enabled() - && cfg->check_experimental(db::experimental_features_t::feature::CONSISTENT_TOPOLOGY_CHANGES); + const bool raft_topology_change_enabled = + cfg->check_experimental(db::experimental_features_t::feature::CONSISTENT_TOPOLOGY_CHANGES); _ss.local().set_group0(group0_service, raft_topology_change_enabled); @@ -1017,9 +1015,3 @@ future<> do_with_cql_env_thread(std::function func, cql_tes reader_permit make_reader_permit(cql_test_env& env) { return env.local_db().get_reader_concurrency_semaphore().make_tracking_only_permit(nullptr, "test", db::no_timeout, {}); } - -cql_test_config raft_cql_test_config() { - cql_test_config c; - c.db_config->consistent_cluster_management(true); - return c; -} diff --git a/test/lib/cql_test_env.hh b/test/lib/cql_test_env.hh index edf1749b9a..120c6de868 100644 --- a/test/lib/cql_test_env.hh +++ b/test/lib/cql_test_env.hh @@ -185,6 +185,3 @@ future<> do_with_cql_env(std::function(cql_test_env&)> func, cql_test_c future<> do_with_cql_env_thread(std::function func, cql_test_config = {}, thread_attributes thread_attr = {}, std::optional = {}); reader_permit make_reader_permit(cql_test_env&); - -// CQL test config with raft experimental feature enabled -cql_test_config raft_cql_test_config(); diff --git a/test/perf/perf_simple_query.cc b/test/perf/perf_simple_query.cc index 980082da66..45bb578113 100644 --- a/test/perf/perf_simple_query.cc +++ b/test/perf/perf_simple_query.cc @@ -545,7 +545,6 @@ int scylla_simple_query_main(int argc, char** argv) { if (app.configuration().contains("tablets")) { cfg.db_config->experimental_features({db::experimental_features_t::feature::TABLETS}, db::config::config_source::CommandLine); - cfg.db_config->consistent_cluster_management(true); cfg.initial_tablets = app.configuration()["initial-tablets"].as(); } return do_with_cql_env_thread([&app] (auto&& env) { diff --git a/test/pylib/scylla_cluster.py b/test/pylib/scylla_cluster.py index a9be62ecc5..994865ae8c 100644 --- a/test/pylib/scylla_cluster.py +++ b/test/pylib/scylla_cluster.py @@ -79,8 +79,6 @@ def make_scylla_conf(workdir: pathlib.Path, host_addr: str, seed_addrs: List[str 'keyspace-storage-options', 'tablets'], - 'consistent_cluster_management': True, - 'skip_wait_for_gossip_to_settle': 0, 'ring_delay_ms': 0, 'num_tokens': 16, diff --git a/test/raft/raft_sys_table_storage_test.cc b/test/raft/raft_sys_table_storage_test.cc index 630d8a0425..170a8efcf0 100644 --- a/test/raft/raft_sys_table_storage_test.cc +++ b/test/raft/raft_sys_table_storage_test.cc @@ -87,7 +87,7 @@ SEASTAR_TEST_CASE(test_store_load_term_and_vote) { BOOST_CHECK_EQUAL(vote_term, persisted.first); BOOST_CHECK_EQUAL(vote_id, persisted.second); - }, raft_cql_test_config()); + }); } SEASTAR_TEST_CASE(test_store_load_snapshot) { @@ -117,7 +117,7 @@ SEASTAR_TEST_CASE(test_store_load_snapshot) { raft::snapshot_descriptor loaded_snp = co_await storage.load_snapshot_descriptor(); BOOST_CHECK(snp == loaded_snp); - }, raft_cql_test_config()); + }); } SEASTAR_TEST_CASE(test_store_load_log_entries) { @@ -133,7 +133,7 @@ SEASTAR_TEST_CASE(test_store_load_log_entries) { for (size_t i = 0, end = entries.size(); i != end; ++i) { BOOST_CHECK(*entries[i] == *loaded_entries[i]); } - }, raft_cql_test_config()); + }); } SEASTAR_TEST_CASE(test_truncate_log) { @@ -151,7 +151,7 @@ SEASTAR_TEST_CASE(test_truncate_log) { for (size_t i = 0, end = loaded_entries.size(); i != end; ++i) { BOOST_CHECK(*entries[i] == *loaded_entries[i]); } - }, raft_cql_test_config()); + }); } SEASTAR_TEST_CASE(test_store_snapshot_truncate_log_tail) { @@ -186,5 +186,5 @@ SEASTAR_TEST_CASE(test_store_snapshot_truncate_log_tail) { for (size_t i = 0, end = loaded_entries.size(); i != end; ++i) { BOOST_CHECK(*entries[i + 1] == *loaded_entries[i]); } - }, raft_cql_test_config()); + }); } diff --git a/test/topology/conftest.py b/test/topology/conftest.py index a4e9db13e3..1570e71387 100644 --- a/test/topology/conftest.py +++ b/test/topology/conftest.py @@ -184,30 +184,6 @@ async def manager(request, manager_internal): def cql(manager): yield manager.cql -# Consistent schema change feature is optionally enabled and -# some tests are expected to fail on Scylla without this -# option enabled, and pass with it enabled (and also pass on Cassandra). -# These tests should use the "fails_without_consistent_cluster_management" -# fixture. When consistent mode becomes the default, this fixture can be removed. -@pytest.fixture(scope="function") -def check_pre_consistent_cluster_management(manager): - # If not running on Scylla, return false. - names = [row.table_name for row in manager.cql.execute( - "SELECT * FROM system_schema.tables WHERE keyspace_name = 'system'")] - if not any('scylla' in name for name in names): - return False - # In Scylla, we check Raft mode by inspecting the configuration via CQL. - consistent = list(manager.cql.execute("SELECT value FROM system.config WHERE name = 'consistent_cluster_management'")) - return len(consistent) == 0 or consistent[0].value == "false" - - -@pytest.fixture(scope="function") -def fails_without_consistent_cluster_management(request, check_pre_consistent_cluster_management): - if check_pre_consistent_cluster_management: - request.node.add_marker(pytest.mark.xfail(reason="Test expected to fail without consistent cluster management " - "feature on")) - - # "random_tables" fixture: Creates and returns a temporary RandomTables object # used in tests to make schema changes. Tables are dropped after test finishes # unless the cluster is dirty or the test has failed. diff --git a/test/topology/test_concurrent_schema.py b/test/topology/test_concurrent_schema.py index 6d6cbb751d..b6e8dbc493 100644 --- a/test/topology/test_concurrent_schema.py +++ b/test/topology/test_concurrent_schema.py @@ -20,7 +20,7 @@ logger = logging.getLogger('schema-test') # - Creates 20+20 tables to alter and 20 tables to index # - In parallel run 20 * create table, and drop/add column and index of previous 2 tables @pytest.mark.asyncio -async def test_cassandra_issue_10250(random_tables, fails_without_consistent_cluster_management): +async def test_cassandra_issue_10250(random_tables): tables = random_tables # How many combinations of tables; original Cassandra issue repro had 20 RANGE = 1 From dced4bb924699dea1594a7a2f04c8272ab77fbac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patryk=20J=C4=99drzejczak?= Date: Mon, 4 Dec 2023 11:05:24 +0100 Subject: [PATCH 5/6] system_keyspace, main, cql_test_env: fix indendations Broken in the previous patch. --- db/system_keyspace.cc | 20 ++++++++++---------- main.cc | 4 ++-- test/lib/cql_test_env.cc | 6 +++--- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/db/system_keyspace.cc b/db/system_keyspace.cc index f9b1bd3db3..6c3976640e 100644 --- a/db/system_keyspace.cc +++ b/db/system_keyspace.cc @@ -1875,19 +1875,19 @@ std::vector system_keyspace::all_tables(const db::config& cfg) { v3::cdc_local(), }); - r.insert(r.end(), {raft(), raft_snapshots(), raft_snapshot_config(), group0_history(), discovery()}); + r.insert(r.end(), {raft(), raft_snapshots(), raft_snapshot_config(), group0_history(), discovery()}); - if (cfg.check_experimental(db::experimental_features_t::feature::CONSISTENT_TOPOLOGY_CHANGES)) { - r.insert(r.end(), {topology(), cdc_generations_v3()}); - } + if (cfg.check_experimental(db::experimental_features_t::feature::CONSISTENT_TOPOLOGY_CHANGES)) { + r.insert(r.end(), {topology(), cdc_generations_v3()}); + } - if (cfg.check_experimental(db::experimental_features_t::feature::BROADCAST_TABLES)) { - r.insert(r.end(), {broadcast_kv_store()}); - } + if (cfg.check_experimental(db::experimental_features_t::feature::BROADCAST_TABLES)) { + r.insert(r.end(), {broadcast_kv_store()}); + } - if (cfg.check_experimental(db::experimental_features_t::feature::TABLETS)) { - r.insert(r.end(), {tablets()}); - } + if (cfg.check_experimental(db::experimental_features_t::feature::TABLETS)) { + r.insert(r.end(), {tablets()}); + } if (cfg.check_experimental(db::experimental_features_t::feature::KEYSPACE_STORAGE_OPTIONS)) { r.insert(r.end(), {sstables_registry()}); diff --git a/main.cc b/main.cc index 4ec7f1b475..67331784e1 100644 --- a/main.cc +++ b/main.cc @@ -1425,8 +1425,8 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl sst_format_listener.stop().get(); }); - supervisor::notify("starting Raft Group Registry service"); - raft_gr.invoke_on_all(&service::raft_group_registry::start).get(); + supervisor::notify("starting Raft Group Registry service"); + raft_gr.invoke_on_all(&service::raft_group_registry::start).get(); group0_client.init().get(); diff --git a/test/lib/cql_test_env.cc b/test/lib/cql_test_env.cc index 2a8288b66a..8bbdd3e780 100644 --- a/test/lib/cql_test_env.cc +++ b/test/lib/cql_test_env.cc @@ -809,9 +809,9 @@ private: }); }).get(); - _group0_registry.invoke_on_all([] (service::raft_group_registry& raft_gr) { - return raft_gr.start(); - }).get(); + _group0_registry.invoke_on_all([] (service::raft_group_registry& raft_gr) { + return raft_gr.start(); + }).get(); if (cfg_in.run_with_raft_recovery) { _sys_ks.local().save_group0_upgrade_state("RECOVERY").get(); From f23f8628b78cc5ba7974649bfd83c7e3142177dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patryk=20J=C4=99drzejczak?= Date: Mon, 4 Dec 2023 14:46:36 +0100 Subject: [PATCH 6/6] docs: update after making consistent_cluster_management mandatory We remove Raft documentation irrelevant in 5.5. One of the changes is removing a part of the "Enabling Raft" section in raft.rst. Since Raft is mandatory in 5.5, the only way to enable it in this version is by performing a rolling upgrade from 5.4. We only need to have this case well-documented. In particular, we remove information that also appears in the upgrade guides like verifying schema synchronization. Similarly, we remove a sentence from the "Manual Recovery Procedure" section in handling-node-failures.rst because it mentions enabling Raft manually, which is impossible in 5.5. The rest of the changes are just removing information about checking or setting consistent_cluster_management, which has become unused. --- docs/architecture/raft.rst | 38 ++++--------------- .../cluster-management/_common/prereq.rst | 1 - .../add-dc-to-existing-dc.rst | 1 - .../add-node-to-cluster.rst | 2 - .../create-cluster-multidc.rst | 1 - .../cluster-management/create-cluster.rst | 1 - .../procedures/cluster-management/ec2-dc.rst | 1 - .../replace-dead-node-or-more.rst | 1 - .../cluster-management/replace-dead-node.rst | 2 - .../handling-node-failures.rst | 14 +------ 10 files changed, 8 insertions(+), 54 deletions(-) diff --git a/docs/architecture/raft.rst b/docs/architecture/raft.rst index 7c2d9d6399..72089e3f9d 100644 --- a/docs/architecture/raft.rst +++ b/docs/architecture/raft.rst @@ -35,50 +35,26 @@ of the DCs is down. Enabling Raft --------------- -.. note:: - In ScyllaDB 5.2 and ScyllaDB Enterprise 2023.1 Raft is Generally Available and can be safely used for consistent schema management. - It will get enabled by default when you upgrade your cluster to ScyllaDB 5.4 or 2024.1. - If needed, you can explicitly prevent it from getting enabled upon upgrade. - - .. only:: opensource - - See :doc:`the upgrade guide from 5.2 to 5.4 ` for details. - ScyllaDB Open Source 5.2 and later, and ScyllaDB Enterprise 2023.1 and later come equipped with a procedure that can setup Raft-based consistent cluster management in an existing cluster. We refer to this as the **Raft upgrade procedure** (do not confuse with the :doc:`ScyllaDB version upgrade procedure `). .. warning:: - Once enabled, Raft cannot be disabled on your cluster. The cluster nodes will fail to restart if you remove the Raft feature. + In ScyllaDB Open Source 5.5 and ScyllaDB Enterprise 2024.2 Raft is mandatory. -To enable Raft in an existing cluster, you need to enable the ``consistent_cluster_management`` option in the ``scylla.yaml`` file -for **each node** in the cluster: +When all the nodes in the cluster are upgraded to ScyllaDB Open Source 5.5 or ScyllaDB Enterprise 2024.2, the cluster will start the **Raft upgrade procedure**. -#. Ensure that the schema is synchronized in the cluster by executing :doc:`nodetool describecluster ` on each node and ensuring that the schema version is the same on all nodes. -#. Perform a :doc:`rolling restart `, updating the ``scylla.yaml`` file for **each node** in the cluster before restarting it to enable the ``consistent_cluster_management`` option: +.. only:: opensource - .. code-block:: yaml + See :doc:`the upgrade guide from 5.4 to 5.5 ` for details. - consistent_cluster_management: true - -When all the nodes in the cluster and updated and restarted, the cluster will start the **Raft upgrade procedure**. -**You must then verify** that the Raft upgrade procedure has finished successfully. Refer to the :ref:`next section `. - -Alternatively, you can enable the ``consistent_cluster_management`` option when you are: - -* Performing a rolling upgrade from version 5.1 to 5.2 or version 2022.x to 2023.1 by updating ``scylla.yaml`` before restarting each node. The Raft upgrade procedure will start as soon as the last node was upgraded and restarted. As above, this requires :ref:`verifying ` that the procedure successfully finishes. -* Creating a new cluster. This does not use the Raft upgrade procedure; instead, Raft is functioning in the cluster and managing schema right from the start. - -Until all nodes are restarted with ``consistent_cluster_management: true``, it is still possible to turn this option back off. Once enabled on every node, it must remain turned on (or the node will refuse to restart). +.. warning:: + Once enabled, Raft cannot be disabled on your cluster. .. _verify-raft-procedure: Verifying that the Raft upgrade procedure finished successfully ======================================================================== -The Raft upgrade procedure starts as soon as every node in the cluster restarts with ``consistent_cluster_management`` flag enabled in ``scylla.yaml``. - -.. TODO: update the above sentence once 5.3 and later are released. - -The procedure requires **full cluster availability** to correctly setup the Raft algorithm; after the setup finishes, Raft can proceed with only a majority of nodes, but this initial setup is an exception. +The Raft upgrade procedure requires **full cluster availability** to correctly setup the Raft algorithm; after the setup finishes, Raft can proceed with only a majority of nodes, but this initial setup is an exception. An unlucky event, such as a hardware failure, may cause one of your nodes to fail. If this happens before the Raft upgrade procedure finishes, the procedure will get stuck and your intervention will be required. To verify that the procedure finishes, look at the log of every Scylla node (using ``journalctl _COMM=scylla``). Search for the following patterns: diff --git a/docs/operating-scylla/procedures/cluster-management/_common/prereq.rst b/docs/operating-scylla/procedures/cluster-management/_common/prereq.rst index 516b2e1ec6..0eff6d4ac7 100644 --- a/docs/operating-scylla/procedures/cluster-management/_common/prereq.rst +++ b/docs/operating-scylla/procedures/cluster-management/_common/prereq.rst @@ -3,7 +3,6 @@ * endpoint_snitch - ``grep endpoint_snitch /etc/scylla/scylla.yaml`` * Scylla version - ``scylla --version`` * Authenticator - ``grep authenticator /etc/scylla/scylla.yaml`` -* consistent_cluster_management - ``grep consistent_cluster_management /etc/scylla/scylla.yaml`` .. Note:: diff --git a/docs/operating-scylla/procedures/cluster-management/add-dc-to-existing-dc.rst b/docs/operating-scylla/procedures/cluster-management/add-dc-to-existing-dc.rst index 964b595804..c559112977 100644 --- a/docs/operating-scylla/procedures/cluster-management/add-dc-to-existing-dc.rst +++ b/docs/operating-scylla/procedures/cluster-management/add-dc-to-existing-dc.rst @@ -119,7 +119,6 @@ Add New DC * **listen_address** - IP address that Scylla used to connect to the other Scylla nodes in the cluster. * **endpoint_snitch** - Set the selected snitch. * **rpc_address** - Address for client connections (Thrift, CQL). - * **consistent_cluster_management** - set to the same value as used by your existing nodes. The parameters ``seeds``, ``cluster_name`` and ``endpoint_snitch`` need to match the existing cluster. diff --git a/docs/operating-scylla/procedures/cluster-management/add-node-to-cluster.rst b/docs/operating-scylla/procedures/cluster-management/add-node-to-cluster.rst index dc055b3623..bab43f6d10 100644 --- a/docs/operating-scylla/procedures/cluster-management/add-node-to-cluster.rst +++ b/docs/operating-scylla/procedures/cluster-management/add-node-to-cluster.rst @@ -59,8 +59,6 @@ Procedure * **seeds** - Specifies the IP address of an existing node in the cluster. The new node will use this IP to connect to the cluster and learn the cluster topology and state. - * **consistent_cluster_management** - set to the same value as used by your existing nodes. - .. note:: In earlier versions of ScyllaDB, seed nodes assisted in gossip. Starting with Scylla Open Source 4.3 and Scylla Enterprise 2021.1, the seed concept in gossip has been removed. If you are using an earlier version of ScyllaDB, you need to configure the seeds parameter in the following way: diff --git a/docs/operating-scylla/procedures/cluster-management/create-cluster-multidc.rst b/docs/operating-scylla/procedures/cluster-management/create-cluster-multidc.rst index 2d5b8f492c..da34487704 100644 --- a/docs/operating-scylla/procedures/cluster-management/create-cluster-multidc.rst +++ b/docs/operating-scylla/procedures/cluster-management/create-cluster-multidc.rst @@ -70,7 +70,6 @@ the file can be found under ``/etc/scylla/`` - **listen_address** - IP address that the Scylla use to connect to other Scylla nodes in the cluster - **endpoint_snitch** - Set the selected snitch - **rpc_address** - Address for client connection (Thrift, CQLSH) -- **consistent_cluster_management** - ``true`` by default, can be set to ``false`` if you don't want to use Raft for consistent schema management in this cluster (will be mandatory in later versions). Check the :doc:`Raft in ScyllaDB document` to learn more. 3. In the ``cassandra-rackdc.properties`` file, edit the rack and data center information. The file can be found under ``/etc/scylla/``. diff --git a/docs/operating-scylla/procedures/cluster-management/create-cluster.rst b/docs/operating-scylla/procedures/cluster-management/create-cluster.rst index 5028df6e7c..1f82e69ed1 100644 --- a/docs/operating-scylla/procedures/cluster-management/create-cluster.rst +++ b/docs/operating-scylla/procedures/cluster-management/create-cluster.rst @@ -26,7 +26,6 @@ The file can be found under ``/etc/scylla/`` - **listen_address** - IP address that Scylla used to connect to other Scylla nodes in the cluster - **endpoint_snitch** - Set the selected snitch - **rpc_address** - Address for client connection (Thrift, CQL) -- **consistent_cluster_management** - ``true`` by default, can be set to ``false`` if you don't want to use Raft for consistent schema management in this cluster (will be mandatory in later versions). Check the :doc:`Raft in ScyllaDB document` to learn more. 3. This step needs to be done **only** if you are using the **GossipingPropertyFileSnitch**. If not, skip this step. In the ``cassandra-rackdc.properties`` file, edit the parameters listed below. diff --git a/docs/operating-scylla/procedures/cluster-management/ec2-dc.rst b/docs/operating-scylla/procedures/cluster-management/ec2-dc.rst index 7866b68725..f1a93a75e5 100644 --- a/docs/operating-scylla/procedures/cluster-management/ec2-dc.rst +++ b/docs/operating-scylla/procedures/cluster-management/ec2-dc.rst @@ -63,7 +63,6 @@ Perform the following steps for each node in the new cluster: * **rpc_address** - Address for client connection (Thrift, CQL). * **broadcast_address** - The IP address a node tells other nodes in the cluster to contact it by. * **broadcast_rpc_address** - Default: unset. The RPC address to broadcast to drivers and other Scylla nodes. It cannot be set to 0.0.0.0. If left blank, it will be set to the value of ``rpc_address``. If ``rpc_address`` is set to 0.0.0.0, ``broadcast_rpc_address`` must be explicitly configured. - * **consistent_cluster_management** - ``true`` by default, can be set to ``false`` if you don't want to use Raft for consistent schema management in this cluster (will be mandatory in later versions). Check the :doc:`Raft in ScyllaDB document` to learn more. #. After you have installed and configured Scylla and edited ``scylla.yaml`` file on all the nodes, start the node specified with the ``seeds`` parameter. Then start the rest of the nodes in your cluster, one at a time, using ``sudo systemctl start scylla-server``. diff --git a/docs/operating-scylla/procedures/cluster-management/replace-dead-node-or-more.rst b/docs/operating-scylla/procedures/cluster-management/replace-dead-node-or-more.rst index 7e1fd9e16c..622d7b3cc2 100644 --- a/docs/operating-scylla/procedures/cluster-management/replace-dead-node-or-more.rst +++ b/docs/operating-scylla/procedures/cluster-management/replace-dead-node-or-more.rst @@ -29,7 +29,6 @@ Login to one of the nodes in the cluster with (UN) status, collect the following * seeds - ``cat /etc/scylla/scylla.yaml | grep seeds:`` * endpoint_snitch - ``cat /etc/scylla/scylla.yaml | grep endpoint_snitch`` * Scylla version - ``scylla --version`` -* consistent_cluster_management - ``grep consistent_cluster_management /etc/scylla/scylla.yaml`` Procedure --------- diff --git a/docs/operating-scylla/procedures/cluster-management/replace-dead-node.rst b/docs/operating-scylla/procedures/cluster-management/replace-dead-node.rst index fa833057a8..16b29a5ce8 100644 --- a/docs/operating-scylla/procedures/cluster-management/replace-dead-node.rst +++ b/docs/operating-scylla/procedures/cluster-management/replace-dead-node.rst @@ -72,8 +72,6 @@ Procedure - **rpc_address** - Address for client connection (Thrift, CQL) - - **consistent_cluster_management** - set to the same value as used by your existing nodes. - #. Add the ``replace_node_first_boot`` parameter to the ``scylla.yaml`` config file on the new node. This line can be added to any place in the config file. After a successful node replacement, there is no need to remove it from the ``scylla.yaml`` file. (Note: The obsolete parameters "replace_address" and "replace_address_first_boot" are not supported and should not be used). The value of the ``replace_node_first_boot`` parameter should be the Host ID of the node to be replaced. For example (using the Host ID of the failed node from above): diff --git a/docs/troubleshooting/handling-node-failures.rst b/docs/troubleshooting/handling-node-failures.rst index 85bcd0187f..f3c853388e 100644 --- a/docs/troubleshooting/handling-node-failures.rst +++ b/docs/troubleshooting/handling-node-failures.rst @@ -1,16 +1,6 @@ Handling Node Failures ------------------------ -.. note:: - - This page applies to ScyllaDB clusters that use Raft to ensure consistency. - You can verify that Raft-based consistent management is enabled for your - cluster in the ``scylla.yaml`` file (enabled by default): - ``consistent_cluster_management: true`` - - .. REMOVE IN FUTURE VERSIONS - Remove the above note when Raft is mandatory - and default for both new and existing clusters. - ScyllaDB relies on the Raft consensus algorithm, which requires at least a quorum of nodes in a cluster to be available. If one or more nodes are down, but the quorum is live, reads, writes, and schema updates proceed unaffected. When the node that @@ -81,9 +71,7 @@ You can follow the manual recovery procedure when: * The majority of nodes (for example, 2 out of 3) failed and are irrecoverable. * :ref:`The Raft upgrade procedure ` got stuck because one - of the nodes failed in the middle of the procedure and is irrecoverable. This - may occur in existing clusters where Raft was manually enabled. - See :ref:`Enabling Raft ` for details. + of the nodes failed in the middle of the procedure and is irrecoverable. .. warning::