The limit is enforced by controlling average per-shard tablet replica count in a given DC, which is controlled by per-table tablet count. This is effective in respecting the limit on individual shards as long as tablet replicas are distributed evenly between shards. There is no attempt to move tablets around in order to enforce limits on individual shards in case of imbalance between shards. If the average per-shard tablet count exceeds the limit, all tables which contribute to it (have replicas in the DC) are scaled down by the same factor. Due to rounding up to the nearest power of 2, we may overshoot the per-shard goal by at most a factor of 2. If different DCs want different scale factors of a given table, the lowest scale factor is chosen for a given table. The limit is configurable. It's a global per-cluster config which controls how many tablet replicas per shard in total we consider to be still ok. It controls tablet allocator behavior, when choosing initial tablet count. Even though it's a per-node config, we don't support different limits per node. All nodes must have the same value of that config. It's similar in that regard to other scheduler config items like tablets_initial_scale_factor and target_tablet_size_in_bytes.
61 lines
2.7 KiB
C++
61 lines
2.7 KiB
C++
/*
|
|
* Copyright (C) 2023-present ScyllaDB
|
|
*/
|
|
|
|
/*
|
|
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include "locator/abstract_replication_strategy.hh"
|
|
#include "locator/token_metadata.hh"
|
|
#include "locator/tablets.hh"
|
|
|
|
#include <seastar/core/sstring.hh>
|
|
|
|
namespace locator {
|
|
|
|
/// Trait class which allows replication strategies to work in a mode which
|
|
/// uses tablet-based replication.
|
|
///
|
|
/// Contains common logic, like parsing tablet options,
|
|
/// and creating effective_replication_map for a given table which works with
|
|
/// system's tablet_metadata.
|
|
class tablet_aware_replication_strategy : public per_table_replication_strategy {
|
|
private:
|
|
size_t _initial_tablets = 0;
|
|
db::tablet_options _tablet_options;
|
|
protected:
|
|
void validate_tablet_options(const abstract_replication_strategy&, const gms::feature_service&, const replication_strategy_config_options&) const;
|
|
void process_tablet_options(abstract_replication_strategy&, replication_strategy_config_options&, replication_strategy_params);
|
|
size_t get_initial_tablets() const { return _initial_tablets; }
|
|
effective_replication_map_ptr do_make_replication_map(table_id,
|
|
replication_strategy_ptr,
|
|
token_metadata_ptr,
|
|
size_t replication_factor) const;
|
|
|
|
public:
|
|
/// Calculate the minimum tablet_count for a table, given the target_tablet_size, the per-table hints,
|
|
/// the network topology, and the configured replication factors.
|
|
virtual future<size_t> calculate_min_tablet_count(schema_ptr s, token_metadata_ptr tm, uint64_t target_tablet_size, std::optional<unsigned> initial_scale) const = 0;
|
|
|
|
/// Generates tablet_map for a new table.
|
|
/// Runs under group0 guard.
|
|
virtual future<tablet_map> allocate_tablets_for_new_table(schema_ptr, token_metadata_ptr, size_t tablet_count) const = 0;
|
|
|
|
/// Generates tablet_map for a new table or when increasing replication factor.
|
|
/// For a new table, cur_tablets is initialized with the tablet_count,
|
|
/// otherwise, cur_tablets is a copy of the current tablet_map.
|
|
/// Runs under group0 guard.
|
|
virtual future<tablet_map> reallocate_tablets(schema_ptr, token_metadata_ptr, tablet_map cur_tablets) const = 0;
|
|
|
|
/// Returns replication factor in a given DC.
|
|
/// Note that individual tablets may lag behind desired replication factor in their
|
|
/// current replica list, as replication factor changes involve table rebuilding transitions
|
|
/// which are not instantaneous.
|
|
virtual size_t get_replication_factor(const sstring& dc) const = 0;
|
|
};
|
|
|
|
} // namespace locator
|