Files
scylla/locator/network_topology_strategy.hh
Tomasz Grabiec f1bda8d4c1 tablets: load_balancer: Scale down tablet count to respect per-shard tablet count goal
The limit is enforced by controlling average per-shard tablet replica
count in a given DC, which is controlled by per-table tablet
count. This is effective in respecting the limit on individual shards
as long as tablet replicas are distributed evenly between shards.

There is no attempt to move tablets around in order to enforce limits
on individual shards in case of imbalance between shards.

If the average per-shard tablet count exceeds the limit, all tables
which contribute to it (have replicas in the DC) are scaled down
by the same factor. Due to rounding up to the nearest power of 2,
we may overshoot the per-shard goal by at most a factor of 2.

If different DCs want different scale factors of a given table, the
lowest scale factor is chosen for a given table.

The limit is configurable. It's a global per-cluster config which
controls how many tablet replicas per shard in total we consider to be
still ok. It controls tablet allocator behavior, when choosing initial
tablet count. Even though it's a per-node config, we don't support
different limits per node. All nodes must have the same value of that
config. It's similar in that regard to other scheduler config items
like tablets_initial_scale_factor and target_tablet_size_in_bytes.
2025-02-19 16:29:07 +01:00

79 lines
3.1 KiB
C++

/*
*
* Modified by ScyllaDB
* Copyright (C) 2015-present ScyllaDB
*/
/*
* SPDX-License-Identifier: (LicenseRef-ScyllaDB-Source-Available-1.0 and Apache-2.0)
*/
#pragma once
#include "locator/abstract_replication_strategy.hh"
#include "locator/tablet_replication_strategy.hh"
#include <optional>
#include <unordered_set>
namespace locator {
class load_sketch;
class network_topology_strategy : public abstract_replication_strategy
, public tablet_aware_replication_strategy {
public:
network_topology_strategy(replication_strategy_params params);
virtual size_t get_replication_factor(const token_metadata&) const override {
return _rep_factor;
}
size_t get_replication_factor(const sstring& dc) const override {
auto dc_factor = _dc_rep_factor.find(dc);
return (dc_factor == _dc_rep_factor.end()) ? 0 : dc_factor->second;
}
const std::vector<sstring>& get_datacenters() const {
return _datacenteres;
}
virtual bool allow_remove_node_being_replaced_from_natural_endpoints() const override {
return true;
}
[[nodiscard]] sstring sanity_check_read_replicas(const effective_replication_map& erm, const host_id_vector_replica_set& read_replicas) const override;
public: // tablet_aware_replication_strategy
virtual effective_replication_map_ptr make_replication_map(table_id, token_metadata_ptr) const override;
virtual future<size_t> calculate_min_tablet_count(schema_ptr s, token_metadata_ptr tm, uint64_t target_tablet_size, std::optional<unsigned> initial_scale) const override;
virtual future<tablet_map> allocate_tablets_for_new_table(schema_ptr, token_metadata_ptr, size_t tablet_count) const override;
virtual future<tablet_map> reallocate_tablets(schema_ptr, token_metadata_ptr, tablet_map cur_tablets) const override;
protected:
/**
* calculate endpoints in one pass through the tokens by tracking our
* progress in each DC, rack etc.
*/
virtual future<host_id_set> calculate_natural_endpoints(
const token& search_token, const token_metadata& tm) const override;
virtual void validate_options(const gms::feature_service&, const locator::topology& topology) const override;
private:
future<tablet_replica_set> reallocate_tablets(schema_ptr, token_metadata_ptr, load_sketch&, const tablet_map& cur_tablets, tablet_id tb) const;
future<tablet_replica_set> add_tablets_in_dc(schema_ptr, token_metadata_ptr, load_sketch&, tablet_id,
std::map<sstring, std::unordered_set<locator::host_id>>& replicas_per_rack,
const tablet_replica_set& cur_replicas,
sstring dc, size_t dc_node_count, size_t dc_rf) const;
tablet_replica_set drop_tablets_in_dc(schema_ptr, const locator::topology&, load_sketch&, tablet_id,
const tablet_replica_set& cur_replicas,
sstring dc, size_t dc_node_count, size_t dc_rf) const;
// map: data centers -> replication factor
std::unordered_map<sstring, size_t> _dc_rep_factor;
std::vector<sstring> _datacenteres;
size_t _rep_factor;
};
} // namespace locator