tablets: Use scheduler's make_sizing_plan() to decide about tablet count of a new table
This makes decisions made by the scheduler consistent with decisions made on table creation, with regard to tablet count. We want to avoid over-allocation of tablets when table is created, which would then be reduced by the scheduler's scaling logic. Not just to avoid wasteful migrations post table creation, but to respect the per-shard goal. To respect the per-shard goal, the algorithm will no longer be as simple as looking at hints, and we want to share the algorithm between the scheduler and initial tablet allocator. So invoke the scheduler to get the tablet count when table is created.
This commit is contained in:
@@ -344,14 +344,12 @@ future<size_t> network_topology_strategy::calculate_min_tablet_count(schema_ptr
|
||||
co_return tablet_count;
|
||||
}
|
||||
|
||||
future<tablet_map> network_topology_strategy::allocate_tablets_for_new_table(schema_ptr s, token_metadata_ptr tm, uint64_t target_tablet_size, std::optional<unsigned> initial_scale) const {
|
||||
size_t tablet_count = co_await calculate_min_tablet_count(s, tm, target_tablet_size, initial_scale);
|
||||
future<tablet_map> network_topology_strategy::allocate_tablets_for_new_table(schema_ptr s, token_metadata_ptr tm, size_t tablet_count) const {
|
||||
auto aligned_tablet_count = 1ul << log2ceil(tablet_count);
|
||||
if (tablet_count != aligned_tablet_count) {
|
||||
rslogger.info("Rounding up tablet count from {} to {} for table {}.{}", tablet_count, aligned_tablet_count, s->ks_name(), s->cf_name());
|
||||
tablet_count = aligned_tablet_count;
|
||||
}
|
||||
|
||||
co_return co_await reallocate_tablets(std::move(s), std::move(tm), tablet_map(tablet_count));
|
||||
}
|
||||
|
||||
|
||||
@@ -47,7 +47,7 @@ public:
|
||||
public: // tablet_aware_replication_strategy
|
||||
virtual effective_replication_map_ptr make_replication_map(table_id, token_metadata_ptr) const override;
|
||||
virtual future<size_t> calculate_min_tablet_count(schema_ptr s, token_metadata_ptr tm, uint64_t target_tablet_size, std::optional<unsigned> initial_scale) const override;
|
||||
virtual future<tablet_map> allocate_tablets_for_new_table(schema_ptr, token_metadata_ptr, uint64_t target_tablet_size, std::optional<unsigned> initial_scale = std::nullopt) const override;
|
||||
virtual future<tablet_map> allocate_tablets_for_new_table(schema_ptr, token_metadata_ptr, size_t tablet_count) const override;
|
||||
virtual future<tablet_map> reallocate_tablets(schema_ptr, token_metadata_ptr, tablet_map cur_tablets) const override;
|
||||
protected:
|
||||
/**
|
||||
|
||||
@@ -42,7 +42,7 @@ public:
|
||||
|
||||
/// Generates tablet_map for a new table.
|
||||
/// Runs under group0 guard.
|
||||
virtual future<tablet_map> allocate_tablets_for_new_table(schema_ptr, token_metadata_ptr, uint64_t target_tablet_size, std::optional<unsigned> initial_scale = std::nullopt) const = 0;
|
||||
virtual future<tablet_map> allocate_tablets_for_new_table(schema_ptr, token_metadata_ptr, size_t tablet_count) const = 0;
|
||||
|
||||
/// Generates tablet_map for a new table or when increasing replication factor.
|
||||
/// For a new table, cur_tablets is initialized with the tablet_count,
|
||||
|
||||
@@ -1067,7 +1067,7 @@ public:
|
||||
std::unordered_map<table_id, table_sizing> tables;
|
||||
};
|
||||
|
||||
future<sizing_plan> make_sizing_plan() {
|
||||
future<sizing_plan> make_sizing_plan(schema_ptr new_table = nullptr, const tablet_aware_replication_strategy* new_rs = nullptr) {
|
||||
sizing_plan plan;
|
||||
|
||||
auto process_table = [&] (table_id table, schema_ptr s, const tablet_aware_replication_strategy* rs, size_t tablet_count) -> future<> {
|
||||
@@ -1128,6 +1128,10 @@ public:
|
||||
co_await process_table(table, s, rs, tmap->tablet_count());
|
||||
}
|
||||
|
||||
if (new_table) {
|
||||
co_await process_table(new_table->id(), new_table, new_rs, 0);
|
||||
}
|
||||
|
||||
co_return std::move(plan);
|
||||
}
|
||||
|
||||
@@ -2765,6 +2769,18 @@ class tablet_allocator_impl : public tablet_allocator::impl
|
||||
load_balancer_stats_manager _load_balancer_stats;
|
||||
bool _stopped = false;
|
||||
bool _use_tablet_aware_balancing = true;
|
||||
private:
|
||||
load_balancer make_load_balancer(token_metadata_ptr tm,
|
||||
locator::load_stats_ptr table_load_stats,
|
||||
std::unordered_set<host_id> skiplist) {
|
||||
load_balancer lb(_db, tm, std::move(table_load_stats), _load_balancer_stats,
|
||||
_db.get_config().target_tablet_size_in_bytes(),
|
||||
_db.get_config().tablets_per_shard_goal(),
|
||||
std::move(skiplist));
|
||||
lb.set_use_table_aware_balancing(_use_tablet_aware_balancing);
|
||||
lb.set_initial_scale(_config.initial_tablets_scale);
|
||||
return lb;
|
||||
}
|
||||
public:
|
||||
tablet_allocator_impl(tablet_allocator::config cfg, service::migration_notifier& mn, replica::database& db)
|
||||
: _config(std::move(cfg))
|
||||
@@ -2789,9 +2805,7 @@ public:
|
||||
}
|
||||
|
||||
future<migration_plan> balance_tablets(token_metadata_ptr tm, locator::load_stats_ptr table_load_stats, std::unordered_set<host_id> skiplist) {
|
||||
load_balancer lb(_db, tm, std::move(table_load_stats), _load_balancer_stats, _db.get_config().target_tablet_size_in_bytes(), std::move(skiplist));
|
||||
lb.set_use_table_aware_balancing(_use_tablet_aware_balancing);
|
||||
lb.set_initial_scale(_config.initial_tablets_scale);
|
||||
auto lb = make_load_balancer(tm, std::move(table_load_stats), std::move(skiplist));
|
||||
co_return co_await lb.make_plan();
|
||||
}
|
||||
|
||||
@@ -2805,7 +2819,15 @@ public:
|
||||
if (auto&& tablet_rs = rs->maybe_as_tablet_aware()) {
|
||||
auto tm = _db.get_shared_token_metadata().get();
|
||||
lblogger.debug("Creating tablets for {}.{} id={}", s.ks_name(), s.cf_name(), s.id());
|
||||
auto map = tablet_rs->allocate_tablets_for_new_table(s.shared_from_this(), tm, _db.get_config().target_tablet_size_in_bytes(), _config.initial_tablets_scale).get();
|
||||
auto lb = make_load_balancer(tm, nullptr, {});
|
||||
auto plan = lb.make_sizing_plan(s.shared_from_this(), tablet_rs).get();
|
||||
auto& table_plan = plan.tables[s.id()];
|
||||
if (table_plan.target_tablet_count_aligned != table_plan.target_tablet_count) {
|
||||
lblogger.info("Rounding up tablet count from {} to {} for table {}.{}", table_plan.target_tablet_count,
|
||||
table_plan.target_tablet_count_aligned, s.ks_name(), s.cf_name());
|
||||
}
|
||||
auto tablet_count = table_plan.target_tablet_count_aligned;
|
||||
auto map = tablet_rs->allocate_tablets_for_new_table(s.shared_from_this(), tm, tablet_count).get();
|
||||
muts.emplace_back(tablet_map_to_mutation(map, s.id(), s.ks_name(), s.cf_name(), ts, _db.features()).get());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -519,7 +519,7 @@ SEASTAR_THREAD_TEST_CASE(NetworkTopologyStrategy_tablets_test) {
|
||||
"NetworkTopologyStrategy", params);
|
||||
auto tab_awr_ptr = ars_ptr->maybe_as_tablet_aware();
|
||||
BOOST_REQUIRE(tab_awr_ptr);
|
||||
auto tmap = tab_awr_ptr->allocate_tablets_for_new_table(s, stm.get(), service::default_target_tablet_size).get();
|
||||
auto tmap = tab_awr_ptr->allocate_tablets_for_new_table(s, stm.get(), tablet_count).get();
|
||||
full_ring_check(tmap, ars_ptr, stm.get());
|
||||
|
||||
// Test reallocate_tablets after randomizing a different set of options
|
||||
@@ -611,7 +611,7 @@ static void test_random_balancing(sharded<snitch_ptr>& snitch, gms::inet_address
|
||||
auto nts_ptr = dynamic_cast<const network_topology_strategy*>(ars_ptr.get());
|
||||
auto tab_awr_ptr = ars_ptr->maybe_as_tablet_aware();
|
||||
BOOST_REQUIRE(tab_awr_ptr);
|
||||
auto tmap = tab_awr_ptr->allocate_tablets_for_new_table(s, tmptr, 1).get();
|
||||
auto tmap = tab_awr_ptr->allocate_tablets_for_new_table(s, tmptr, tablet_count).get();
|
||||
full_ring_check(tmap, ars_ptr, stm.get());
|
||||
check_tablets_balance(tmap, nts_ptr, topo);
|
||||
|
||||
|
||||
@@ -3096,7 +3096,7 @@ static void execute_tablet_for_new_rf_test(calculate_tablet_replicas_for_new_rf_
|
||||
return make_ready_future<>();
|
||||
}).get();
|
||||
|
||||
auto allocated_map = tablet_aware_ptr->allocate_tablets_for_new_table(s, stm.get(), 0).get();
|
||||
auto allocated_map = tablet_aware_ptr->allocate_tablets_for_new_table(s, stm.get(), tablet_count).get();
|
||||
|
||||
BOOST_REQUIRE_EQUAL(allocated_map.tablet_count(), tablet_count);
|
||||
|
||||
|
||||
@@ -300,7 +300,7 @@ future<results> test_load_balancing_with_many_tables(params p, bool tablet_aware
|
||||
opts[rack1.dc] = format("{}", rf);
|
||||
network_topology_strategy tablet_rs(replication_strategy_params(opts, initial_tablets));
|
||||
stm.mutate_token_metadata([&] (token_metadata& tm) -> future<> {
|
||||
auto map = co_await tablet_rs.allocate_tablets_for_new_table(s, stm.get(), service::default_target_tablet_size);
|
||||
auto map = co_await tablet_rs.allocate_tablets_for_new_table(s, stm.get(), initial_tablets.value_or(1));
|
||||
tm.tablets().set_tablet_map(s->id(), std::move(map));
|
||||
}).get();
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user