From d1c1b592362ce6d5f0e94828b390b042e65cf758 Mon Sep 17 00:00:00 2001 From: Tomasz Grabiec Date: Wed, 22 Nov 2023 18:22:39 +0100 Subject: [PATCH] storage_service, api: Add API to disable tablet balancing Load balancing needs to be disabled before making a series of manual migrations so that we don't fight with the load balancer. Also will be used in tests to ensure tablets stick to expected locations. --- api/api-doc/storage_service.json | 24 +++++++++ api/storage_service.cc | 17 ++++++ db/system_keyspace.cc | 7 +++ locator/tablets.hh | 5 ++ service/storage_service.cc | 46 ++++++++++++++++ service/storage_service.hh | 1 + service/tablet_allocator.cc | 2 +- service/topology_state_machine.hh | 3 ++ test/boost/tablets_test.cc | 90 +++++++++++++++++++++++++++++++ test/pylib/rest_client.py | 6 +++ 10 files changed, 200 insertions(+), 1 deletion(-) diff --git a/api/api-doc/storage_service.json b/api/api-doc/storage_service.json index b80b8911f3..46a3589791 100644 --- a/api/api-doc/storage_service.json +++ b/api/api-doc/storage_service.json @@ -2537,6 +2537,30 @@ } ] }, + { + "path":"/storage_service/tablets/balancing", + "operations":[ + { + "nickname":"tablet_balancing_enable", + "method":"POST", + "summary":"Moves a tablet replica", + "type":"void", + "produces":[ + "application/json" + ], + "parameters":[ + { + "name":"enabled", + "description":"When set to false, tablet load balancing is disabled", + "required":true, + "allowMultiple":false, + "type":"boolean", + "paramType":"query" + } + ] + } + ] + }, { "path":"/storage_service/metrics/total_hints", "operations":[ diff --git a/api/storage_service.cc b/api/storage_service.cc index 60c6afe000..0ef32b2b9d 100644 --- a/api/storage_service.cc +++ b/api/storage_service.cc @@ -74,6 +74,16 @@ locator::host_id validate_host_id(const sstring& param) { return hoep.id; } +bool validate_bool(const sstring& param) { + if (param == "true") { + return true; + } else if (param == "false") { + return false; + } else { + throw std::runtime_error("Parameter must be either 'true' or 'false'"); + } +} + static int64_t validate_int(const sstring& param) { return std::atoll(param.c_str()); @@ -1381,6 +1391,12 @@ void set_storage_service(http_context& ctx, routes& r, sharded req) -> future { + auto enabled = validate_bool(req->get_query_param("enabled")); + co_await ss.local().set_tablet_balancing_enabled(enabled); + co_return json_void(); + }); + sp::get_schema_versions.set(r, [&ss](std::unique_ptr req) { return ss.local().describe_schema_versions().then([] (auto result) { std::vector res; @@ -1479,6 +1495,7 @@ void unset_storage_service(http_context& ctx, routes& r) { ss::sstable_info.unset(r); ss::reload_raft_topology_state.unset(r); ss::move_tablet.unset(r); + ss::tablet_balancing_enable.unset(r); sp::get_schema_versions.unset(r); } diff --git a/db/system_keyspace.cc b/db/system_keyspace.cc index 9c5fb2f777..d5eba6f689 100644 --- a/db/system_keyspace.cc +++ b/db/system_keyspace.cc @@ -235,6 +235,7 @@ schema_ptr system_keyspace::topology() { .with_column("global_topology_request", utf8_type, column_kind::static_column) .with_column("enabled_features", set_type_impl::get_instance(utf8_type, true), column_kind::static_column) .with_column("session", uuid_type, column_kind::static_column) + .with_column("tablet_balancing_enabled", boolean_type, column_kind::static_column) .set_comment("Current state of topology change machine") .with_version(generate_schema_version(id)) .build(); @@ -2655,6 +2656,12 @@ future system_keyspace::load_topology_state() { if (some_row.has("session")) { ret.session = service::session_id(some_row.get_as("session")); } + + if (some_row.has("tablet_balancing_enabled")) { + ret.tablet_balancing_enabled = some_row.get_as("tablet_balancing_enabled"); + } else { + ret.tablet_balancing_enabled = true; + } } co_return ret; diff --git a/locator/tablets.hh b/locator/tablets.hh index 8a3dce75da..48fc6b73d6 100644 --- a/locator/tablets.hh +++ b/locator/tablets.hh @@ -343,12 +343,17 @@ public: using table_to_tablet_map = std::unordered_map; private: table_to_tablet_map _tablets; + + // When false, tablet load balancer will not try to rebalance tablets. + bool _balancing_enabled = true; public: + bool balancing_enabled() const { return _balancing_enabled; } const tablet_map& get_tablet_map(table_id id) const; const table_to_tablet_map& all_tables() const { return _tablets; } table_to_tablet_map& all_tables() { return _tablets; } size_t external_memory_usage() const; public: + void set_balancing_enabled(bool value) { _balancing_enabled = value; } void set_tablet_map(table_id, tablet_map); tablet_map& get_tablet_map(table_id id); future<> clear_gently(); diff --git a/service/storage_service.cc b/service/storage_service.cc index af72cf103e..a44db2af94 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -534,6 +534,7 @@ future<> storage_service::topology_state_load() { if (_db.local().get_config().check_experimental(db::experimental_features_t::feature::TABLETS)) { tmptr->set_tablets(co_await replica::read_tablet_metadata(_qp)); + tmptr->tablets().set_balancing_enabled(_topology_state_machine._topology.tablet_balancing_enabled); } })); @@ -660,6 +661,7 @@ public: topology_mutation_builder& set_version(topology::version_t); topology_mutation_builder& set_fence_version(topology::version_t); topology_mutation_builder& set_session(session_id); + topology_mutation_builder& set_tablet_balancing_enabled(bool); topology_mutation_builder& set_current_cdc_generation_id(const cdc::generation_id_v2&); topology_mutation_builder& set_new_cdc_generation_data_uuid(const utils::UUID& value); topology_mutation_builder& set_unpublished_cdc_generations(const std::vector& values); @@ -826,6 +828,11 @@ topology_mutation_builder& topology_mutation_builder::set_session(session_id val return *this; } +topology_mutation_builder& topology_mutation_builder::set_tablet_balancing_enabled(bool value) { + _m.set_static_cell("tablet_balancing_enabled", value, _ts); + return *this; +} + topology_mutation_builder& topology_mutation_builder::del_transition_state() { return del("transition_state"); } @@ -6148,6 +6155,7 @@ future<> storage_service::load_tablet_metadata() { } return mutate_token_metadata([this] (mutable_token_metadata_ptr tmptr) -> future<> { tmptr->set_tablets(co_await replica::read_tablet_metadata(_qp)); + tmptr->tablets().set_balancing_enabled(_topology_state_machine._topology.tablet_balancing_enabled); }, acquire_merge_lock::no); } @@ -6721,6 +6729,44 @@ future<> storage_service::move_tablet(table_id table, dht::token token, locator: }); } +future<> storage_service::set_tablet_balancing_enabled(bool enabled) { + auto holder = _async_gate.hold(); + + if (this_shard_id() != 0) { + // group0 is only set on shard 0. + co_return co_await container().invoke_on(0, [&] (auto& ss) { + return ss.set_tablet_balancing_enabled(enabled); + }); + } + + while (true) { + group0_guard guard = co_await _group0->client().start_operation(&_abort_source); + + while (_topology_state_machine._topology.is_busy()) { + slogger.debug("set_tablet_balancing_enabled(): topology is busy"); + release_guard(std::move(guard)); + co_await _topology_state_machine.event.wait(); + guard = co_await _group0->client().start_operation(&_abort_source); + } + + std::vector updates; + updates.push_back(canonical_mutation(topology_mutation_builder(guard.write_timestamp()) + .set_tablet_balancing_enabled(enabled) + .build())); + + sstring reason = format("Setting tablet balancing to {}", enabled); + slogger.info("raft topology: {}", reason); + topology_change change{std::move(updates)}; + group0_command g0_cmd = _group0->client().prepare_command(std::move(change), guard, reason); + try { + co_await _group0->client().add_entry(std::move(g0_cmd), std::move(guard)); + break; + } catch (group0_concurrent_modification&) { + slogger.debug("set_tablet_balancing_enabled(): concurrent modification"); + } + } +} + future storage_service::join_node_request_handler(join_node_request_params params) { join_node_request_result result; slogger.info("raft topology: received request to join from host_id: {}", params.host_id); diff --git a/service/storage_service.hh b/service/storage_service.hh index be7051d34f..c0709a03d1 100644 --- a/service/storage_service.hh +++ b/service/storage_service.hh @@ -775,6 +775,7 @@ public: public: future<> move_tablet(table_id, dht::token, locator::tablet_replica src, locator::tablet_replica dst); + future<> set_tablet_balancing_enabled(bool); private: // load topology state machine snapshot into memory diff --git a/service/tablet_allocator.cc b/service/tablet_allocator.cc index b63c5b8113..b29e825ede 100644 --- a/service/tablet_allocator.cc +++ b/service/tablet_allocator.cc @@ -417,7 +417,7 @@ public: } if (nodes_to_drain.empty()) { - if (!shuffle && max_load == min_load) { + if (!shuffle && (max_load == min_load || !_tm->tablets().balancing_enabled())) { // load is balanced. // TODO: Evaluate and fix intra-node balance. _stats.for_dc(dc).stop_balance++; diff --git a/service/topology_state_machine.hh b/service/topology_state_machine.hh index f85405c2ab..6eff870d4f 100644 --- a/service/topology_state_machine.hh +++ b/service/topology_state_machine.hh @@ -150,6 +150,9 @@ struct topology { // Session used to create topology_guard for operations like streaming. session_id session; + // When false, tablet load balancer will not try to rebalance tablets. + bool tablet_balancing_enabled = true; + // Find only nodes in non 'left' state const std::pair* find(raft::server_id id) const; // Return true if node exists in any state including 'left' one diff --git a/test/boost/tablets_test.cc b/test/boost/tablets_test.cc index 4486263249..56ceebe660 100644 --- a/test/boost/tablets_test.cc +++ b/test/boost/tablets_test.cc @@ -1289,6 +1289,96 @@ SEASTAR_THREAD_TEST_CASE(test_load_balancing_with_two_empty_nodes) { }).get(); } +SEASTAR_THREAD_TEST_CASE(test_load_balancer_disabling) { + do_with_cql_env_thread([] (auto& e) { + inet_address ip1("192.168.0.1"); + inet_address ip2("192.168.0.2"); + + auto host1 = host_id(next_uuid()); + auto host2 = host_id(next_uuid()); + + auto table1 = table_id(next_uuid()); + + unsigned shard_count = 1; + + semaphore sem(1); + shared_token_metadata stm([&sem] () noexcept { return get_units(sem, 1); }, locator::token_metadata::config{ + locator::topology::config{ + .this_endpoint = ip1, + .local_dc_rack = locator::endpoint_dc_rack::default_location + } + }); + + // host1 is loaded and host2 is empty, resulting in an imbalance. + stm.mutate_token_metadata([&] (auto& tm) { + tm.update_host_id(host1, ip1); + tm.update_host_id(host2, ip2); + tm.update_topology(ip1, locator::endpoint_dc_rack::default_location, std::nullopt, shard_count); + tm.update_topology(ip2, locator::endpoint_dc_rack::default_location, std::nullopt, shard_count); + + tablet_map tmap(16); + for (auto tid : tmap.tablet_ids()) { + tmap.set_tablet(tid, tablet_info { + tablet_replica_set { + tablet_replica {host1, 0}, + } + }); + } + tablet_metadata tmeta; + tmeta.set_tablet_map(table1, std::move(tmap)); + tm.set_tablets(std::move(tmeta)); + return make_ready_future<>(); + }).get(); + + { + auto plan = e.get_tablet_allocator().local().balance_tablets(stm.get()).get0(); + BOOST_REQUIRE(!plan.empty()); + } + + // Disable load balancing + stm.mutate_token_metadata([&] (token_metadata& tm) { + tm.tablets().set_balancing_enabled(false); + return make_ready_future<>(); + }).get(); + + { + auto plan = e.get_tablet_allocator().local().balance_tablets(stm.get()).get0(); + BOOST_REQUIRE(plan.empty()); + } + + // Check that cloning preserves the setting + stm.mutate_token_metadata([&] (token_metadata& tm) { + return make_ready_future<>(); + }).get(); + + { + auto plan = e.get_tablet_allocator().local().balance_tablets(stm.get()).get0(); + BOOST_REQUIRE(plan.empty()); + } + + // Enable load balancing back + stm.mutate_token_metadata([&] (token_metadata& tm) { + tm.tablets().set_balancing_enabled(true); + return make_ready_future<>(); + }).get(); + + { + auto plan = e.get_tablet_allocator().local().balance_tablets(stm.get()).get0(); + BOOST_REQUIRE(!plan.empty()); + } + + // Check that cloning preserves the setting + stm.mutate_token_metadata([&] (token_metadata& tm) { + return make_ready_future<>(); + }).get(); + + { + auto plan = e.get_tablet_allocator().local().balance_tablets(stm.get()).get0(); + BOOST_REQUIRE(!plan.empty()); + } + }).get(); +} + SEASTAR_THREAD_TEST_CASE(test_load_balancing_with_random_load) { do_with_cql_env_thread([] (auto& e) { const int n_hosts = 6; diff --git a/test/pylib/rest_client.py b/test/pylib/rest_client.py index d2d982dd57..df3547cc69 100644 --- a/test/pylib/rest_client.py +++ b/test/pylib/rest_client.py @@ -220,6 +220,12 @@ class ScyllaRESTAPIClient(): "token": str(token) }) + async def enable_tablet_balancing(self, node_ip: str) -> None: + await self.client.post(f"/storage_service/tablets/balancing", host=node_ip, params={"enabled": "true"}) + + async def disable_tablet_balancing(self, node_ip: str) -> None: + await self.client.post(f"/storage_service/tablets/balancing", host=node_ip, params={"enabled": "false"}) + async def disable_injection(self, node_ip: str, injection: str) -> None: await self.client.delete(f"/v2/error_injection/injection/{injection}", host=node_ip)