dht: document incremental partition_range and token_range sharders

Closes #6210
This commit is contained in:
Avi Kivity
2020-04-16 10:51:13 +03:00
committed by Nadav Har'El
parent aa4c359cff
commit 6f5ef5a5f5

View File

@@ -28,21 +28,54 @@
namespace dht {
// Utilities for sharding ring partition_range:s
// A ring_position range's data is divided into sub-ranges, where each sub-range's data
// is owned by a single shard. Note that multiple non-overlapping sub-ranges may map to a
// single shard, and some shards may not receive any sub-range.
//
// This module provides utilities for determining the sub-ranges to shard mapping. The utilities
// generate optimal mappings: each range that you get is the largest possible, so you
// get the minimum number of ranges possible. You can get many ranges, so operate on them
// one (or a few) at a time, rather than accumulating them.
// A mapping between a partition_range and a shard. All positions within `ring_range` are
// owned by `shard`.
//
// The classes that return ring_position_range_and_shard make `ring_range` as large as
// possible (maximizing the number of tokens), so the total number of such ranges is minimized.
// Successive ranges therefore always have a different `shard` than the previous return.
// (classes that return ring_position_range_and_shard_and_element can have the same `shard`
// in successive returns, if `element` is different).
struct ring_position_range_and_shard {
dht::partition_range ring_range;
unsigned shard;
};
// Incrementally divides a `partition_range` into sub-ranges wholly owned by a single shard.
class ring_position_range_sharder {
const sharder& _sharder;
dht::partition_range _range;
bool _done = false;
public:
// Initializes the ring_position_range_sharder with a given range to subdivide.
ring_position_range_sharder(const sharder& sharder, nonwrapping_range<ring_position> rrp)
: _sharder(sharder), _range(std::move(rrp)) {}
// Fetches the next range-shard mapping. When the input range is exhausted, std::nullopt is
// returned. The returned ranges are contiguous and non-overlapping, and together span the
// entire input range.
std::optional<ring_position_range_and_shard> next(const schema& s);
};
// A mapping between a partition_range and a shard (like ring_position_range_and_shard) extended
// by having a reference to input range index. See ring_position_range_vector_sharder for use.
//
// The classes that return ring_position_range_and_shard_and_element make `ring_range` as large as
// possible (maximizing the number of tokens), so the total number of such ranges is minimized.
// Successive ranges therefore always have a different `shard` than the previous return.
// (classes that return ring_position_range_and_shard_and_element can have the same `shard`
// in successive returns, if `element` is different).
struct ring_position_range_and_shard_and_element : ring_position_range_and_shard {
ring_position_range_and_shard_and_element(ring_position_range_and_shard&& rpras, unsigned element)
: ring_position_range_and_shard(std::move(rpras)), element(element) {
@@ -50,6 +83,21 @@ struct ring_position_range_and_shard_and_element : ring_position_range_and_shard
unsigned element;
};
// Incrementally divides several non-overlapping `partition_range`:s into sub-ranges wholly owned by
// a single shard.
//
// Similar to ring_position_range_sharder, but instead of stopping when the input range is exhauseted,
// moves on to the next input range (input ranges are supplied in a vector).
//
// This has two use cases:
// 1. vnodes. A vnode cannot be described by a single range, since
// one vnode wraps around from the largest token back to the smallest token. Hence it must be
// described as a vector of two ranges, (largest_token, +inf) and (-inf, smallest_token].
// 2. sstable shard mappings. An sstable has metadata describing which ranges it owns, and this is
// used to see what shards these ranges map to (and therefore to see if the sstable is shared or
// not, and which shards share it).
class ring_position_range_vector_sharder {
using vec_type = dht::partition_range_vector;
vec_type _ranges;
@@ -63,11 +111,24 @@ private:
}
}
public:
// Initializes the `ring_position_range_vector_sharder` with the ranges to be processesd.
// Input ranges should be non-overlapping (although nothing bad will happen if they do
// overlap).
ring_position_range_vector_sharder(const sharder& sharder, dht::partition_range_vector ranges);
// results are returned sorted by index within the vector first, then within each vector item
// Fetches the next range-shard mapping. When the input range is exhausted, std::nullopt is
// returned. Within an input range, results are contiguous and non-overlapping (but since input
// ranges usually are discontiguous, overall the results are not contiguous). Together, the results
// span the input ranges.
//
// The result is augmented with an `element` field which indicates the index from the input vector
// that the result belongs to.
//
// Results are returned sorted by index within the vector first, then within each vector item
std::optional<ring_position_range_and_shard_and_element> next(const schema& s);
};
// Incrementally divides a `partition_range` into sub-ranges wholly owned by a single shard.
// Unlike ring_position_range_sharder, it only returns result for a shard number provided by the caller.
class selective_token_range_sharder {
const sharder& _sharder;
dht::token_range _range;
@@ -77,6 +138,7 @@ class selective_token_range_sharder {
dht::token _start_token;
std::optional<range_bound<dht::token>> _start_boundary;
public:
// Initializes the selective_token_range_sharder with a token range and shard_id of interest.
selective_token_range_sharder(const sharder& sharder, dht::token_range range, shard_id shard)
: _sharder(sharder)
, _range(std::move(range))
@@ -86,6 +148,9 @@ public:
, _start_boundary(_sharder.shard_of(_start_token) == shard ?
_range.start() : range_bound<dht::token>(_sharder.token_for_next_shard(_start_token, shard))) {
}
// Returns the next token_range that is both wholly contained within the input range and also
// wholly owned by the input shard_id. When the input range is exhausted, std::nullopt is returned.
// Note if the range does not intersect the shard at all, std::nullopt will be returned immediately.
std::optional<dht::token_range> next();
};