locator: Introduce tablet_metadata
token_metadata now stores tablet metadata with information about tablets in the system.
This commit is contained in:
@@ -931,6 +931,7 @@ scylla_core = (['message/messaging_service.cc',
|
||||
'query.cc',
|
||||
'query-result-set.cc',
|
||||
'locator/abstract_replication_strategy.cc',
|
||||
'locator/tablets.cc',
|
||||
'locator/azure_snitch.cc',
|
||||
'locator/simple_strategy.cc',
|
||||
'locator/local_strategy.cc',
|
||||
|
||||
10
dht/token.cc
10
dht/token.cc
@@ -282,4 +282,14 @@ compaction_group_of(unsigned most_significant_bits, const token& t) {
|
||||
__builtin_unreachable();
|
||||
}
|
||||
|
||||
token last_token_of_compaction_group(unsigned most_significant_bits, size_t group) {
|
||||
uint64_t n;
|
||||
if (group == ((1ul << most_significant_bits) - 1)) {
|
||||
n = std::numeric_limits<uint64_t>::max();
|
||||
} else {
|
||||
n = ((uint64_t(group) + 1) << (64 - most_significant_bits)) - 1;
|
||||
}
|
||||
return bias(n);
|
||||
}
|
||||
|
||||
} // namespace dht
|
||||
|
||||
@@ -238,6 +238,7 @@ token first_token() {
|
||||
uint64_t unbias(const token& t);
|
||||
token bias(uint64_t n);
|
||||
size_t compaction_group_of(unsigned most_significant_bits, const token& t);
|
||||
token last_token_of_compaction_group(unsigned most_significant_bits, size_t group);
|
||||
|
||||
} // namespace dht
|
||||
|
||||
|
||||
118
locator/tablets.cc
Normal file
118
locator/tablets.cc
Normal file
@@ -0,0 +1,118 @@
|
||||
/*
|
||||
* Copyright (C) 2023-present ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
*/
|
||||
|
||||
#include "locator/tablet_replication_strategy.hh"
|
||||
#include "locator/tablets.hh"
|
||||
#include "types/types.hh"
|
||||
#include "types/tuple.hh"
|
||||
#include "types/set.hh"
|
||||
#include "utils/hash.hh"
|
||||
#include "db/system_keyspace.hh"
|
||||
#include "cql3/query_processor.hh"
|
||||
#include "cql3/untyped_result_set.hh"
|
||||
#include "replica/database.hh"
|
||||
#include "utils/stall_free.hh"
|
||||
|
||||
#include <seastar/core/coroutine.hh>
|
||||
#include <seastar/coroutine/maybe_yield.hh>
|
||||
|
||||
namespace locator {
|
||||
|
||||
seastar::logger tablet_logger("tablets");
|
||||
|
||||
const tablet_map& tablet_metadata::get_tablet_map(table_id id) const {
|
||||
try {
|
||||
return _tablets.at(id);
|
||||
} catch (const std::out_of_range&) {
|
||||
throw std::runtime_error(format("Tablet map not found for table {}", id));
|
||||
}
|
||||
}
|
||||
|
||||
tablet_map& tablet_metadata::get_tablet_map(table_id id) {
|
||||
return const_cast<tablet_map&>(
|
||||
const_cast<const tablet_metadata*>(this)->get_tablet_map(id));
|
||||
}
|
||||
|
||||
void tablet_metadata::set_tablet_map(table_id id, tablet_map map) {
|
||||
_tablets.insert_or_assign(id, std::move(map));
|
||||
}
|
||||
|
||||
future<> tablet_metadata::clear_gently() {
|
||||
for (auto&& [id, map] : _tablets) {
|
||||
co_await map.clear_gently();
|
||||
}
|
||||
co_return;
|
||||
}
|
||||
|
||||
tablet_map::tablet_map(size_t tablet_count)
|
||||
: _log2_tablets(log2ceil(tablet_count)) {
|
||||
if (tablet_count != 1ul << _log2_tablets) {
|
||||
on_internal_error(tablet_logger, format("Tablet count not a power of 2: {}", tablet_count));
|
||||
}
|
||||
_tablets.resize(tablet_count);
|
||||
}
|
||||
|
||||
void tablet_map::check_tablet_id(tablet_id id) const {
|
||||
if (size_t(id) >= tablet_count()) {
|
||||
throw std::logic_error(format("Invalid tablet id: {} >= {}", id, tablet_count()));
|
||||
}
|
||||
}
|
||||
|
||||
const tablet_info& tablet_map::get_tablet_info(tablet_id id) const {
|
||||
check_tablet_id(id);
|
||||
return _tablets[size_t(id)];
|
||||
}
|
||||
|
||||
tablet_id tablet_map::get_tablet_id(token t) const {
|
||||
return tablet_id(dht::compaction_group_of(_log2_tablets, t));
|
||||
}
|
||||
|
||||
dht::token tablet_map::get_last_token(tablet_id id) const {
|
||||
check_tablet_id(id);
|
||||
return dht::last_token_of_compaction_group(_log2_tablets, size_t(id));
|
||||
}
|
||||
|
||||
dht::token tablet_map::get_first_token(tablet_id id) const {
|
||||
if (id == first_tablet()) {
|
||||
return dht::first_token();
|
||||
} else {
|
||||
return dht::next_token(get_last_token(tablet_id(size_t(id) - 1)));
|
||||
}
|
||||
}
|
||||
|
||||
dht::token_range tablet_map::get_token_range(tablet_id id) const {
|
||||
if (id == first_tablet()) {
|
||||
return dht::token_range::make({dht::minimum_token(), false}, {get_last_token(id), true});
|
||||
} else {
|
||||
return dht::token_range::make({get_last_token(tablet_id(size_t(id) - 1)), false}, {get_last_token(id), true});
|
||||
}
|
||||
}
|
||||
|
||||
void tablet_map::set_tablet(tablet_id id, tablet_info info) {
|
||||
check_tablet_id(id);
|
||||
_tablets[size_t(id)] = std::move(info);
|
||||
}
|
||||
|
||||
void tablet_map::set_tablet_transition_info(tablet_id id, tablet_transition_info info) {
|
||||
check_tablet_id(id);
|
||||
_transitions.insert_or_assign(id, std::move(info));
|
||||
}
|
||||
|
||||
future<> tablet_map::clear_gently() {
|
||||
return utils::clear_gently(_tablets);
|
||||
}
|
||||
|
||||
const tablet_transition_info* tablet_map::get_tablet_transition_info(tablet_id id) const {
|
||||
auto i = _transitions.find(id);
|
||||
if (i == _transitions.end()) {
|
||||
return nullptr;
|
||||
}
|
||||
return &i->second;
|
||||
}
|
||||
|
||||
}
|
||||
227
locator/tablets.hh
Normal file
227
locator/tablets.hh
Normal file
@@ -0,0 +1,227 @@
|
||||
/*
|
||||
* Copyright (C) 2023-present ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "dht/token.hh"
|
||||
#include "utils/small_vector.hh"
|
||||
#include "locator/host_id.hh"
|
||||
#include "dht/i_partitioner_fwd.hh"
|
||||
#include "schema/schema_fwd.hh"
|
||||
#include "utils/chunked_vector.hh"
|
||||
#include "utils/hash.hh"
|
||||
|
||||
#include <boost/range/adaptor/transformed.hpp>
|
||||
#include <seastar/core/reactor.hh>
|
||||
#include <seastar/util/log.hh>
|
||||
|
||||
#include <vector>
|
||||
|
||||
namespace locator {
|
||||
|
||||
extern seastar::logger tablet_logger;
|
||||
|
||||
using token = dht::token;
|
||||
|
||||
// Identifies tablet within the scope of a single tablet_map,
|
||||
// which has a scope of (table_id, token metadata version).
|
||||
// Different tablets of different tables can have the same tablet_id.
|
||||
// Different tablets in subsequent token metadata version can have the same tablet_id.
|
||||
// When splitting a tablet, one of the new tablets (in the new token metadata version)
|
||||
// will have the same tablet_id as the old one.
|
||||
enum class tablet_id : size_t;
|
||||
|
||||
struct tablet_replica {
|
||||
host_id host;
|
||||
shard_id shard;
|
||||
|
||||
bool operator==(const tablet_replica&) const = default;
|
||||
};
|
||||
|
||||
std::ostream& operator<<(std::ostream&, const tablet_replica&);
|
||||
|
||||
using tablet_replica_set = utils::small_vector<tablet_replica, 3>;
|
||||
|
||||
/// Stores information about a single tablet.
|
||||
struct tablet_info {
|
||||
tablet_replica_set replicas;
|
||||
|
||||
std::optional<shard_id> get_shard(host_id host) const {
|
||||
for (auto&& r : replicas) {
|
||||
if (r.host == host) {
|
||||
return r.shard;
|
||||
}
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
bool operator==(const tablet_info&) const = default;
|
||||
};
|
||||
|
||||
/// Used for storing tablet state transition during topology changes.
|
||||
/// Describes transition of a single tablet.
|
||||
struct tablet_transition_info {
|
||||
tablet_replica_set next;
|
||||
tablet_replica pending_replica; // Optimization (next - tablet_info::replicas)
|
||||
|
||||
bool operator==(const tablet_transition_info&) const = default;
|
||||
};
|
||||
|
||||
/// Stores information about tablets of a single table.
|
||||
///
|
||||
/// The map contains a constant number of tablets, tablet_count().
|
||||
/// Each tablet has an associated tablet_info, and an optional tablet_transition_info.
|
||||
/// Any given token is owned by exactly one tablet in this map.
|
||||
///
|
||||
/// A tablet map describes the whole ring, it cannot contain a partial mapping.
|
||||
/// This means that the following sequence is always valid:
|
||||
///
|
||||
/// tablet_map& tmap = ...;
|
||||
/// dht::token t = ...;
|
||||
/// tablet_id id = tmap.get_tablet_id(t);
|
||||
/// tablet_info& info = tmap.get_tablet_info(id);
|
||||
///
|
||||
/// A tablet_id obtained from an instance of tablet_map is valid for that instance only.
|
||||
class tablet_map {
|
||||
public:
|
||||
using tablet_container = utils::chunked_vector<tablet_info>;
|
||||
private:
|
||||
// The implementation assumes that _tablets.size() is a power of 2:
|
||||
//
|
||||
// _tablets.size() == 1 << _log2_tablets
|
||||
//
|
||||
tablet_container _tablets;
|
||||
size_t _log2_tablets; // log_2(_tablets.size())
|
||||
std::unordered_map<tablet_id, tablet_transition_info> _transitions;
|
||||
public:
|
||||
/// Constructs a tablet map.
|
||||
///
|
||||
/// \param tablet_count The desired tablets to allocate. Must be a power of two.
|
||||
explicit tablet_map(size_t tablet_count);
|
||||
|
||||
/// Returns tablet_id of a tablet which owns a given token.
|
||||
tablet_id get_tablet_id(token) const;
|
||||
|
||||
/// Returns tablet_info associated with a given tablet.
|
||||
/// The given id must belong to this instance.
|
||||
const tablet_info& get_tablet_info(tablet_id) const;
|
||||
|
||||
/// Returns a pointer to tablet_transition_info associated with a given tablet.
|
||||
/// If there is no transition for a given tablet, returns nullptr.
|
||||
/// \throws std::logic_error If the given id does not belong to this instance.
|
||||
const tablet_transition_info* get_tablet_transition_info(tablet_id) const;
|
||||
|
||||
/// Returns the largest token owned by a given tablet.
|
||||
/// \throws std::logic_error If the given id does not belong to this instance.
|
||||
dht::token get_last_token(tablet_id id) const;
|
||||
|
||||
/// Returns the smallest token owned by a given tablet.
|
||||
/// \throws std::logic_error If the given id does not belong to this instance.
|
||||
dht::token get_first_token(tablet_id id) const;
|
||||
|
||||
/// Returns token_range which contains all tokens owned by a given tablet and only such tokens.
|
||||
/// \throws std::logic_error If the given id does not belong to this instance.
|
||||
dht::token_range get_token_range(tablet_id id) const;
|
||||
|
||||
/// Returns the id of the first tablet.
|
||||
tablet_id first_tablet() const {
|
||||
return tablet_id(0);
|
||||
}
|
||||
|
||||
/// Returns the id of the last tablet.
|
||||
tablet_id last_tablet() const {
|
||||
return tablet_id(tablet_count() - 1);
|
||||
}
|
||||
|
||||
/// Returns the id of a tablet which follows a given tablet in the ring,
|
||||
/// or disengaged optional if the given tablet is the last one.
|
||||
std::optional<tablet_id> next_tablet(tablet_id t) const {
|
||||
if (t == last_tablet()) {
|
||||
return std::nullopt;
|
||||
}
|
||||
return tablet_id(size_t(t) + 1);
|
||||
}
|
||||
|
||||
const tablet_container& tablets() const {
|
||||
return _tablets;
|
||||
}
|
||||
|
||||
/// Returns an iterable range over tablet_id:s which includes all tablets in token ring order.
|
||||
auto tablet_ids() const {
|
||||
return boost::irange<size_t>(0, tablet_count()) | boost::adaptors::transformed([] (size_t i) {
|
||||
return tablet_id(i);
|
||||
});
|
||||
}
|
||||
|
||||
size_t tablet_count() const {
|
||||
return _tablets.size();
|
||||
}
|
||||
|
||||
/// Returns tablet_info associated with the tablet which owns a given token.
|
||||
const tablet_info& get_tablet_info(token t) const {
|
||||
return get_tablet_info(get_tablet_id(t));
|
||||
}
|
||||
|
||||
bool operator==(const tablet_map&) const = default;
|
||||
public:
|
||||
void set_tablet(tablet_id, tablet_info);
|
||||
void set_tablet_transition_info(tablet_id, tablet_transition_info);
|
||||
|
||||
// Destroys gently.
|
||||
// The tablet map is not usable after this call and should be destroyed.
|
||||
future<> clear_gently();
|
||||
private:
|
||||
void check_tablet_id(tablet_id) const;
|
||||
};
|
||||
|
||||
/// Holds information about all tablets in the cluster.
|
||||
///
|
||||
/// When this instance is obtained via token_metadata_ptr, it is immutable
|
||||
/// (represents a snapshot) and references obtained through this are guaranteed
|
||||
/// to remain valid as long as the containing token_metadata_ptr is held.
|
||||
///
|
||||
/// Copy constructor can be invoked across shards.
|
||||
class tablet_metadata {
|
||||
public:
|
||||
// FIXME: Make cheap to copy.
|
||||
// We want both immutability and cheap updates, so we should use
|
||||
// hierarchical data structure with shared pointers and copy-on-write.
|
||||
// Currently we have immutability but updates require full copy.
|
||||
//
|
||||
// Also, currently the copy constructor is invoked across shards, which precludes
|
||||
// using shared pointers. We should change that and use a foreign_ptr<> to
|
||||
// hold immutable tablet_metadata which lives on shard 0 only.
|
||||
// See storage_service::replicate_to_all_cores().
|
||||
using table_to_tablet_map = std::unordered_map<table_id, tablet_map>;
|
||||
private:
|
||||
table_to_tablet_map _tablets;
|
||||
public:
|
||||
const tablet_map& get_tablet_map(table_id id) const;
|
||||
const table_to_tablet_map& all_tables() const { return _tablets; }
|
||||
public:
|
||||
void set_tablet_map(table_id, tablet_map);
|
||||
tablet_map& get_tablet_map(table_id id);
|
||||
future<> clear_gently();
|
||||
public:
|
||||
bool operator==(const tablet_metadata&) const = default;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
namespace std {
|
||||
|
||||
template<>
|
||||
struct hash<locator::tablet_replica> {
|
||||
size_t operator()(const locator::tablet_replica& r) const {
|
||||
return utils::hash_combine(
|
||||
std::hash<locator::host_id>()(r.host),
|
||||
std::hash<shard_id>()(r.shard));
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
@@ -10,6 +10,7 @@
|
||||
#include <optional>
|
||||
#include "locator/snitch_base.hh"
|
||||
#include "locator/abstract_replication_strategy.hh"
|
||||
#include "locator/tablets.hh"
|
||||
#include "log.hh"
|
||||
#include "partition_range_compat.hh"
|
||||
#include <unordered_map>
|
||||
@@ -62,6 +63,8 @@ private:
|
||||
|
||||
std::vector<token> _sorted_tokens;
|
||||
|
||||
tablet_metadata _tablets;
|
||||
|
||||
topology _topology;
|
||||
|
||||
long _ring_version = 0;
|
||||
@@ -72,6 +75,13 @@ private:
|
||||
|
||||
void sort_tokens();
|
||||
|
||||
const tablet_metadata& tablets() const { return _tablets; }
|
||||
|
||||
void set_tablets(tablet_metadata&& tablets) {
|
||||
_tablets = std::move(tablets);
|
||||
invalidate_cached_rings();
|
||||
}
|
||||
|
||||
struct shallow_copy {};
|
||||
token_metadata_impl(shallow_copy, const token_metadata_impl& o) noexcept
|
||||
: _topology(topology::config{})
|
||||
@@ -368,6 +378,7 @@ future<token_metadata_impl> token_metadata_impl::clone_only_token_map(bool clone
|
||||
ret._sorted_tokens = _sorted_tokens;
|
||||
co_await coroutine::maybe_yield();
|
||||
}
|
||||
ret._tablets = _tablets;
|
||||
co_return ret;
|
||||
}
|
||||
|
||||
@@ -380,6 +391,7 @@ future<> token_metadata_impl::clear_gently() noexcept {
|
||||
co_await utils::clear_gently(_pending_ranges_interval_map);
|
||||
co_await utils::clear_gently(_sorted_tokens);
|
||||
co_await _topology.clear_gently();
|
||||
co_await _tablets.clear_gently();
|
||||
co_return;
|
||||
}
|
||||
|
||||
@@ -396,6 +408,14 @@ void token_metadata_impl::sort_tokens() {
|
||||
_sorted_tokens = std::move(sorted);
|
||||
}
|
||||
|
||||
const tablet_metadata& token_metadata::tablets() const {
|
||||
return _impl->tablets();
|
||||
}
|
||||
|
||||
void token_metadata::set_tablets(tablet_metadata tm) {
|
||||
_impl->set_tablets(std::move(tm));
|
||||
}
|
||||
|
||||
const std::vector<token>& token_metadata_impl::sorted_tokens() const {
|
||||
return _sorted_tokens;
|
||||
}
|
||||
|
||||
@@ -41,6 +41,7 @@ class abstract_replication_strategy;
|
||||
using token = dht::token;
|
||||
|
||||
class token_metadata;
|
||||
class tablet_metadata;
|
||||
|
||||
struct host_id_or_endpoint {
|
||||
host_id id;
|
||||
@@ -106,6 +107,8 @@ public:
|
||||
token_metadata& operator=(token_metadata&&) noexcept;
|
||||
~token_metadata();
|
||||
const std::vector<token>& sorted_tokens() const;
|
||||
const tablet_metadata& tablets() const;
|
||||
void set_tablets(tablet_metadata);
|
||||
// Update token->endpoint mappings for a given \c endpoint.
|
||||
// \c tokens are all the tokens that are now owned by \c endpoint.
|
||||
//
|
||||
|
||||
Reference in New Issue
Block a user