tablets: Use scheduler's make_sizing_plan() to decide about tablet count of a new table

This makes decisions made by the scheduler consistent with decisions
made on table creation, with regard to tablet count.

We want to avoid over-allocation of tablets when table is created,
which would then be reduced by the scheduler's scaling logic. Not just
to avoid wasteful migrations post table creation, but to respect the
per-shard goal. To respect the per-shard goal, the algorithm will no
longer be as simple as looking at hints, and we want to share the
algorithm between the scheduler and initial tablet allocator. So
invoke the scheduler to get the tablet count when table is created.
This commit is contained in:
Tomasz Grabiec
2025-01-22 22:14:32 +01:00
parent dd68c1e526
commit 94b5165ac7
7 changed files with 34 additions and 14 deletions

View File

@@ -344,14 +344,12 @@ future<size_t> network_topology_strategy::calculate_min_tablet_count(schema_ptr
co_return tablet_count;
}
future<tablet_map> network_topology_strategy::allocate_tablets_for_new_table(schema_ptr s, token_metadata_ptr tm, uint64_t target_tablet_size, std::optional<unsigned> initial_scale) const {
size_t tablet_count = co_await calculate_min_tablet_count(s, tm, target_tablet_size, initial_scale);
future<tablet_map> network_topology_strategy::allocate_tablets_for_new_table(schema_ptr s, token_metadata_ptr tm, size_t tablet_count) const {
auto aligned_tablet_count = 1ul << log2ceil(tablet_count);
if (tablet_count != aligned_tablet_count) {
rslogger.info("Rounding up tablet count from {} to {} for table {}.{}", tablet_count, aligned_tablet_count, s->ks_name(), s->cf_name());
tablet_count = aligned_tablet_count;
}
co_return co_await reallocate_tablets(std::move(s), std::move(tm), tablet_map(tablet_count));
}

View File

@@ -47,7 +47,7 @@ public:
public: // tablet_aware_replication_strategy
virtual effective_replication_map_ptr make_replication_map(table_id, token_metadata_ptr) const override;
virtual future<size_t> calculate_min_tablet_count(schema_ptr s, token_metadata_ptr tm, uint64_t target_tablet_size, std::optional<unsigned> initial_scale) const override;
virtual future<tablet_map> allocate_tablets_for_new_table(schema_ptr, token_metadata_ptr, uint64_t target_tablet_size, std::optional<unsigned> initial_scale = std::nullopt) const override;
virtual future<tablet_map> allocate_tablets_for_new_table(schema_ptr, token_metadata_ptr, size_t tablet_count) const override;
virtual future<tablet_map> reallocate_tablets(schema_ptr, token_metadata_ptr, tablet_map cur_tablets) const override;
protected:
/**

View File

@@ -42,7 +42,7 @@ public:
/// Generates tablet_map for a new table.
/// Runs under group0 guard.
virtual future<tablet_map> allocate_tablets_for_new_table(schema_ptr, token_metadata_ptr, uint64_t target_tablet_size, std::optional<unsigned> initial_scale = std::nullopt) const = 0;
virtual future<tablet_map> allocate_tablets_for_new_table(schema_ptr, token_metadata_ptr, size_t tablet_count) const = 0;
/// Generates tablet_map for a new table or when increasing replication factor.
/// For a new table, cur_tablets is initialized with the tablet_count,

View File

@@ -1067,7 +1067,7 @@ public:
std::unordered_map<table_id, table_sizing> tables;
};
future<sizing_plan> make_sizing_plan() {
future<sizing_plan> make_sizing_plan(schema_ptr new_table = nullptr, const tablet_aware_replication_strategy* new_rs = nullptr) {
sizing_plan plan;
auto process_table = [&] (table_id table, schema_ptr s, const tablet_aware_replication_strategy* rs, size_t tablet_count) -> future<> {
@@ -1128,6 +1128,10 @@ public:
co_await process_table(table, s, rs, tmap->tablet_count());
}
if (new_table) {
co_await process_table(new_table->id(), new_table, new_rs, 0);
}
co_return std::move(plan);
}
@@ -2765,6 +2769,18 @@ class tablet_allocator_impl : public tablet_allocator::impl
load_balancer_stats_manager _load_balancer_stats;
bool _stopped = false;
bool _use_tablet_aware_balancing = true;
private:
load_balancer make_load_balancer(token_metadata_ptr tm,
locator::load_stats_ptr table_load_stats,
std::unordered_set<host_id> skiplist) {
load_balancer lb(_db, tm, std::move(table_load_stats), _load_balancer_stats,
_db.get_config().target_tablet_size_in_bytes(),
_db.get_config().tablets_per_shard_goal(),
std::move(skiplist));
lb.set_use_table_aware_balancing(_use_tablet_aware_balancing);
lb.set_initial_scale(_config.initial_tablets_scale);
return lb;
}
public:
tablet_allocator_impl(tablet_allocator::config cfg, service::migration_notifier& mn, replica::database& db)
: _config(std::move(cfg))
@@ -2789,9 +2805,7 @@ public:
}
future<migration_plan> balance_tablets(token_metadata_ptr tm, locator::load_stats_ptr table_load_stats, std::unordered_set<host_id> skiplist) {
load_balancer lb(_db, tm, std::move(table_load_stats), _load_balancer_stats, _db.get_config().target_tablet_size_in_bytes(), std::move(skiplist));
lb.set_use_table_aware_balancing(_use_tablet_aware_balancing);
lb.set_initial_scale(_config.initial_tablets_scale);
auto lb = make_load_balancer(tm, std::move(table_load_stats), std::move(skiplist));
co_return co_await lb.make_plan();
}
@@ -2805,7 +2819,15 @@ public:
if (auto&& tablet_rs = rs->maybe_as_tablet_aware()) {
auto tm = _db.get_shared_token_metadata().get();
lblogger.debug("Creating tablets for {}.{} id={}", s.ks_name(), s.cf_name(), s.id());
auto map = tablet_rs->allocate_tablets_for_new_table(s.shared_from_this(), tm, _db.get_config().target_tablet_size_in_bytes(), _config.initial_tablets_scale).get();
auto lb = make_load_balancer(tm, nullptr, {});
auto plan = lb.make_sizing_plan(s.shared_from_this(), tablet_rs).get();
auto& table_plan = plan.tables[s.id()];
if (table_plan.target_tablet_count_aligned != table_plan.target_tablet_count) {
lblogger.info("Rounding up tablet count from {} to {} for table {}.{}", table_plan.target_tablet_count,
table_plan.target_tablet_count_aligned, s.ks_name(), s.cf_name());
}
auto tablet_count = table_plan.target_tablet_count_aligned;
auto map = tablet_rs->allocate_tablets_for_new_table(s.shared_from_this(), tm, tablet_count).get();
muts.emplace_back(tablet_map_to_mutation(map, s.id(), s.ks_name(), s.cf_name(), ts, _db.features()).get());
}
}

View File

@@ -519,7 +519,7 @@ SEASTAR_THREAD_TEST_CASE(NetworkTopologyStrategy_tablets_test) {
"NetworkTopologyStrategy", params);
auto tab_awr_ptr = ars_ptr->maybe_as_tablet_aware();
BOOST_REQUIRE(tab_awr_ptr);
auto tmap = tab_awr_ptr->allocate_tablets_for_new_table(s, stm.get(), service::default_target_tablet_size).get();
auto tmap = tab_awr_ptr->allocate_tablets_for_new_table(s, stm.get(), tablet_count).get();
full_ring_check(tmap, ars_ptr, stm.get());
// Test reallocate_tablets after randomizing a different set of options
@@ -611,7 +611,7 @@ static void test_random_balancing(sharded<snitch_ptr>& snitch, gms::inet_address
auto nts_ptr = dynamic_cast<const network_topology_strategy*>(ars_ptr.get());
auto tab_awr_ptr = ars_ptr->maybe_as_tablet_aware();
BOOST_REQUIRE(tab_awr_ptr);
auto tmap = tab_awr_ptr->allocate_tablets_for_new_table(s, tmptr, 1).get();
auto tmap = tab_awr_ptr->allocate_tablets_for_new_table(s, tmptr, tablet_count).get();
full_ring_check(tmap, ars_ptr, stm.get());
check_tablets_balance(tmap, nts_ptr, topo);

View File

@@ -3096,7 +3096,7 @@ static void execute_tablet_for_new_rf_test(calculate_tablet_replicas_for_new_rf_
return make_ready_future<>();
}).get();
auto allocated_map = tablet_aware_ptr->allocate_tablets_for_new_table(s, stm.get(), 0).get();
auto allocated_map = tablet_aware_ptr->allocate_tablets_for_new_table(s, stm.get(), tablet_count).get();
BOOST_REQUIRE_EQUAL(allocated_map.tablet_count(), tablet_count);

View File

@@ -300,7 +300,7 @@ future<results> test_load_balancing_with_many_tables(params p, bool tablet_aware
opts[rack1.dc] = format("{}", rf);
network_topology_strategy tablet_rs(replication_strategy_params(opts, initial_tablets));
stm.mutate_token_metadata([&] (token_metadata& tm) -> future<> {
auto map = co_await tablet_rs.allocate_tablets_for_new_table(s, stm.get(), service::default_target_tablet_size);
auto map = co_await tablet_rs.allocate_tablets_for_new_table(s, stm.get(), initial_tablets.value_or(1));
tm.tablets().set_tablet_map(s->id(), std::move(map));
}).get();
};