readers: move multishard reader & friends to reader/multishard.cc

Since the multishard reader family weighs more than 1K SLOC, it gets
its own .cc file.
This commit is contained in:
Botond Dénes
2022-03-25 14:53:42 +02:00
parent 3505ef8a49
commit d0ea895671
16 changed files with 1412 additions and 1320 deletions

View File

@@ -700,6 +700,7 @@ scylla_core = (['replica/database.cc',
'converting_mutation_partition_applier.cc',
'mutation_reader.cc',
'readers/combined.cc',
'readers/multishard.cc',
'readers/mutation_reader.cc',
'readers/mutation_readers.cc',
'mutation_query.cc',

View File

@@ -57,6 +57,7 @@
#include "utils/fb_utilities.hh"
#include "query-result-writer.hh"
#include "readers/from_fragments_v2.hh"
#include "readers/evictable.hh"
using namespace std::chrono_literals;

View File

@@ -13,6 +13,7 @@
#include "utils/error_injection.hh"
#include "db/view/view_updating_consumer.hh"
#include "sstables/sstables.hh"
#include "readers/evictable.hh"
static logging::logger vug_logger("view_update_generator");

View File

@@ -12,6 +12,7 @@
#include "replica/database.hh"
#include "db/config.hh"
#include "query-result-writer.hh"
#include "readers/multishard.hh"
#include <seastar/core/coroutine.hh>

File diff suppressed because it is too large Load Diff

View File

@@ -354,203 +354,6 @@ snapshot_source make_empty_snapshot_source();
using mutation_source_opt = optimized_optional<mutation_source>;
/// Make a foreign_reader.
///
/// foreign_reader is a local representant of a reader located on a remote
/// shard. Manages its lifecycle and takes care of seamlessly transferring
/// produced fragments. Fragments are *copied* between the shards, a
/// bufferful at a time.
/// To maximize throughput read-ahead is used. After each fill_buffer() or
/// fast_forward_to() a read-ahead (a fill_buffer() on the remote reader) is
/// issued. This read-ahead runs in the background and is brough back to
/// foreground on the next fill_buffer() or fast_forward_to() call.
/// If the reader resides on this shard (the shard where make_foreign_reader()
/// is called) there is no need to wrap it in foreign_reader, just return it as
/// is.
flat_mutation_reader_v2 make_foreign_reader(schema_ptr schema,
reader_permit permit,
foreign_ptr<std::unique_ptr<flat_mutation_reader_v2>> reader,
streamed_mutation::forwarding fwd_sm = streamed_mutation::forwarding::no);
/// Make an auto-paused evictable reader.
///
/// The reader is paused after each use, that is after each call to any of its
/// members that cause actual reading to be done (`fill_buffer()` and
/// `fast_forward_to()`). When paused, the reader is made evictable, that it is
/// it is registered with reader concurrency semaphore as an inactive read.
/// The reader is resumed automatically on the next use. If it was evicted, it
/// will be recreated at the position it left off reading. This is all
/// transparent to its user.
/// Parameters passed by reference have to be kept alive while the reader is
/// alive.
flat_mutation_reader_v2 make_auto_paused_evictable_reader_v2(
mutation_source ms,
schema_ptr schema,
reader_permit permit,
const dht::partition_range& pr,
const query::partition_slice& ps,
const io_priority_class& pc,
tracing::trace_state_ptr trace_state,
mutation_reader::forwarding fwd_mr);
class evictable_reader_v2;
class evictable_reader_handle_v2 {
friend std::pair<flat_mutation_reader_v2, evictable_reader_handle_v2> make_manually_paused_evictable_reader_v2(mutation_source, schema_ptr, reader_permit,
const dht::partition_range&, const query::partition_slice&, const io_priority_class&, tracing::trace_state_ptr, mutation_reader::forwarding);
private:
evictable_reader_v2* _r;
private:
explicit evictable_reader_handle_v2(evictable_reader_v2& r);
public:
void pause();
};
/// Make a manually-paused evictable reader.
///
/// The reader can be paused via the evictable reader handle when desired. The
/// intended usage is subsequent reads done in bursts, after which the reader is
/// not used for some time. When paused, the reader is made evictable, that is,
/// it is registered with reader concurrency semaphore as an inactive read.
/// The reader is resumed automatically on the next use. If it was evicted, it
/// will be recreated at the position it left off reading. This is all
/// transparent to its user.
/// Parameters passed by reference have to be kept alive while the reader is
/// alive.
std::pair<flat_mutation_reader_v2, evictable_reader_handle_v2> make_manually_paused_evictable_reader_v2(
mutation_source ms,
schema_ptr schema,
reader_permit permit,
const dht::partition_range& pr,
const query::partition_slice& ps,
const io_priority_class& pc,
tracing::trace_state_ptr trace_state,
mutation_reader::forwarding fwd_mr);
/// Reader lifecycle policy for the mulitshard combining reader.
///
/// This policy is expected to make sure any additional resource the readers
/// might need is kept alive for the lifetime of the readers, not that
/// of the multishard reader. This is a very important distinction. As
/// destructors cannot return futures, the multishard reader will be
/// destroyed before all it's shard readers could stop properly. Hence it
/// is the duty of this policy to make sure all objects the shard readers
/// depend on stay alive until they are properly destroyed on their home
/// shards. Note that this also includes the passed in `range` and `slice`
/// parameters because although client code is required to keep them alive as
/// long as the top level reader lives, the shard readers might outlive the
/// multishard reader itself.
class reader_lifecycle_policy_v2 {
public:
struct stopped_reader {
reader_concurrency_semaphore::inactive_read_handle handle;
flat_mutation_reader_v2::tracked_buffer unconsumed_fragments;
};
public:
/// Create an appropriate reader on the shard it is called on.
///
/// Will be called when the multishard reader visits a shard for the
/// first time or when a reader has to be recreated after having been
/// evicted (while paused). This method should also enter gates, take locks
/// or whatever is appropriate to make sure resources it is using on the
/// remote shard stay alive, during the lifetime of the created reader.
///
/// The \c permit parameter shall be obtained via `obtain_reader_permit()`
virtual flat_mutation_reader_v2 create_reader(
schema_ptr schema,
reader_permit permit,
const dht::partition_range& range,
const query::partition_slice& slice,
const io_priority_class& pc,
tracing::trace_state_ptr trace_state,
mutation_reader::forwarding fwd_mr) = 0;
/// Updates the read-range of the shard reader.
///
/// Gives the lifecycle-policy a chance to update its stored read-range (if
/// the case). Called after any modification to the read range (typically
/// after fast_forward_to()). The range is identical to the one the reader
/// holds a reference to after the modification happened. When this method
/// is called, it is safe to destroy the previous range instance.
///
/// This method has to be called on the shard the reader lives on.
virtual void update_read_range(lw_shared_ptr<const dht::partition_range> pr) = 0;
/// Destroy the shard reader.
///
/// Will be called when the multishard reader is being destroyed. It will be
/// called for each of the shard readers.
/// This method is expected to do a proper cleanup, that is, leave any gates,
/// release any locks or whatever is appropriate for the shard reader.
///
/// This method has to be called on the shard the reader lives on.
/// This method will be called from a destructor so it cannot throw.
virtual future<> destroy_reader(stopped_reader reader) noexcept = 0;
/// Get the relevant semaphore for this read.
///
/// The semaphore is used to register paused readers with as inactive
/// readers. The semaphore then can evict these readers when resources are
/// in-demand.
/// The multishard reader will pause and resume readers via the `pause()`
/// and `try_resume()` helper methods. Clients can resume any paused readers
/// after the multishard reader is destroyed via the same helper methods.
///
/// This method will be called on the shard where the relevant reader lives.
virtual reader_concurrency_semaphore& semaphore() = 0;
/// Obtain an admitted permit.
///
/// The permit will be associated with the semaphore returned by
/// `semaphore()`.
///
/// This method will be called on the shard where the relevant reader lives.
virtual future<reader_permit> obtain_reader_permit(schema_ptr schema, const char* const description, db::timeout_clock::time_point timeout) = 0;
};
/// Make a multishard_combining_reader.
///
/// multishard_combining_reader takes care of reading a range from all shards
/// that own a subrange in the range. Shard reader are created on-demand, when
/// the shard is visited for the first time.
///
/// The read starts with a concurrency of one, that is the reader reads from a
/// single shard at a time. The concurrency is exponentially increased (to a
/// maximum of the number of shards) when a reader's buffer is empty after
/// moving the next shard. This condition is important as we only wan't to
/// increase concurrency for sparse tables that have little data and the reader
/// has to move between shards often. When concurrency is > 1, the reader
/// issues background read-aheads to the next shards so that by the time it
/// needs to move to them they have the data ready.
/// For dense tables (where we rarely cross shards) we rely on the
/// foreign_reader to issue sufficient read-aheads on its own to avoid blocking.
///
/// The readers' life-cycles are managed through the supplied lifecycle policy.
flat_mutation_reader_v2 make_multishard_combining_reader_v2(
shared_ptr<reader_lifecycle_policy_v2> lifecycle_policy,
schema_ptr schema,
reader_permit permit,
const dht::partition_range& pr,
const query::partition_slice& ps,
const io_priority_class& pc,
tracing::trace_state_ptr trace_state = nullptr,
mutation_reader::forwarding fwd_mr = mutation_reader::forwarding::no);
flat_mutation_reader_v2 make_multishard_combining_reader_v2_for_tests(
const dht::sharder& sharder,
shared_ptr<reader_lifecycle_policy_v2> lifecycle_policy,
schema_ptr schema,
reader_permit permit,
const dht::partition_range& pr,
const query::partition_slice& ps,
const io_priority_class& pc,
tracing::trace_state_ptr trace_state = nullptr,
mutation_reader::forwarding fwd_mr = mutation_reader::forwarding::no);
class queue_reader;
/// Calls to different methods cannot overlap!

View File

@@ -10,6 +10,7 @@
#include "mutation_reader.hh"
#include "mutation_fragment_v2.hh"
#include "schema_registry.hh"
#include "readers/foreign.hh"
#include <vector>
#include <seastar/core/future-util.hh>
#include <seastar/core/queue.hh>

83
readers/evictable.hh Normal file
View File

@@ -0,0 +1,83 @@
/*
* Copyright (C) 2022-present ScyllaDB
*/
/*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
#pragma once
#include "dht/i_partitioner.hh"
#include "readers/flat_mutation_reader_fwd.hh"
#include "schema_fwd.hh"
#include "seastarx.hh"
namespace seastar {
class io_priority_class;
}
class reader_permit;
class mutation_source;
namespace tracing {
class trace_state_ptr;
}
/// Make an auto-paused evictable reader.
///
/// The reader is paused after each use, that is after each call to any of its
/// members that cause actual reading to be done (`fill_buffer()` and
/// `fast_forward_to()`). When paused, the reader is made evictable, that it is
/// it is registered with reader concurrency semaphore as an inactive read.
/// The reader is resumed automatically on the next use. If it was evicted, it
/// will be recreated at the position it left off reading. This is all
/// transparent to its user.
/// Parameters passed by reference have to be kept alive while the reader is
/// alive.
flat_mutation_reader_v2 make_auto_paused_evictable_reader_v2(
mutation_source ms,
schema_ptr schema,
reader_permit permit,
const dht::partition_range& pr,
const query::partition_slice& ps,
const io_priority_class& pc,
tracing::trace_state_ptr trace_state,
mutation_reader::forwarding fwd_mr);
class evictable_reader_v2;
class evictable_reader_handle_v2 {
friend std::pair<flat_mutation_reader_v2, evictable_reader_handle_v2> make_manually_paused_evictable_reader_v2(mutation_source, schema_ptr, reader_permit,
const dht::partition_range&, const query::partition_slice&, const io_priority_class&, tracing::trace_state_ptr, mutation_reader::forwarding);
private:
evictable_reader_v2* _r;
private:
explicit evictable_reader_handle_v2(evictable_reader_v2& r);
public:
void pause();
};
/// Make a manually-paused evictable reader.
///
/// The reader can be paused via the evictable reader handle when desired. The
/// intended usage is subsequent reads done in bursts, after which the reader is
/// not used for some time. When paused, the reader is made evictable, that is,
/// it is registered with reader concurrency semaphore as an inactive read.
/// The reader is resumed automatically on the next use. If it was evicted, it
/// will be recreated at the position it left off reading. This is all
/// transparent to its user.
/// Parameters passed by reference have to be kept alive while the reader is
/// alive.
std::pair<flat_mutation_reader_v2, evictable_reader_handle_v2> make_manually_paused_evictable_reader_v2(
mutation_source ms,
schema_ptr schema,
reader_permit permit,
const dht::partition_range& pr,
const query::partition_slice& ps,
const io_priority_class& pc,
tracing::trace_state_ptr trace_state,
mutation_reader::forwarding fwd_mr);

34
readers/foreign.hh Normal file
View File

@@ -0,0 +1,34 @@
/*
* Copyright (C) 2022-present ScyllaDB
*/
/*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
#pragma once
#include <seastar/core/sharded.hh>
#include "readers/flat_mutation_reader_fwd.hh"
#include "schema_fwd.hh"
class reader_permit;
/// Make a foreign_reader.
///
/// foreign_reader is a local representant of a reader located on a remote
/// shard. Manages its lifecycle and takes care of seamlessly transferring
/// produced fragments. Fragments are *copied* between the shards, a
/// bufferful at a time.
/// To maximize throughput read-ahead is used. After each fill_buffer() or
/// fast_forward_to() a read-ahead (a fill_buffer() on the remote reader) is
/// issued. This read-ahead runs in the background and is brough back to
/// foreground on the next fill_buffer() or fast_forward_to() call.
/// If the reader resides on this shard (the shard where make_foreign_reader()
/// is called) there is no need to wrap it in foreign_reader, just return it as
/// is.
flat_mutation_reader_v2 make_foreign_reader(schema_ptr schema,
reader_permit permit,
foreign_ptr<std::unique_ptr<flat_mutation_reader_v2>> reader,
streamed_mutation::forwarding fwd_sm = streamed_mutation::forwarding::no);

1144
readers/multishard.cc Normal file

File diff suppressed because it is too large Load Diff

140
readers/multishard.hh Normal file
View File

@@ -0,0 +1,140 @@
/*
* Copyright (C) 2022-present ScyllaDB
*/
/*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
#pragma once
#include "reader_concurrency_semaphore.hh"
#include "readers/flat_mutation_reader_fwd.hh"
#include "tracing/trace_state.hh"
#include "seastarx.hh"
namespace seastar {
class io_priority_class;
}
/// Reader lifecycle policy for the mulitshard combining reader.
///
/// This policy is expected to make sure any additional resource the readers
/// might need is kept alive for the lifetime of the readers, not that
/// of the multishard reader. This is a very important distinction. As
/// destructors cannot return futures, the multishard reader will be
/// destroyed before all it's shard readers could stop properly. Hence it
/// is the duty of this policy to make sure all objects the shard readers
/// depend on stay alive until they are properly destroyed on their home
/// shards. Note that this also includes the passed in `range` and `slice`
/// parameters because although client code is required to keep them alive as
/// long as the top level reader lives, the shard readers might outlive the
/// multishard reader itself.
class reader_lifecycle_policy_v2 {
public:
struct stopped_reader {
reader_concurrency_semaphore::inactive_read_handle handle;
flat_mutation_reader_v2::tracked_buffer unconsumed_fragments;
};
public:
/// Create an appropriate reader on the shard it is called on.
///
/// Will be called when the multishard reader visits a shard for the
/// first time or when a reader has to be recreated after having been
/// evicted (while paused). This method should also enter gates, take locks
/// or whatever is appropriate to make sure resources it is using on the
/// remote shard stay alive, during the lifetime of the created reader.
///
/// The \c permit parameter shall be obtained via `obtain_reader_permit()`
virtual flat_mutation_reader_v2 create_reader(
schema_ptr schema,
reader_permit permit,
const dht::partition_range& range,
const query::partition_slice& slice,
const io_priority_class& pc,
tracing::trace_state_ptr trace_state,
mutation_reader::forwarding fwd_mr) = 0;
/// Updates the read-range of the shard reader.
///
/// Gives the lifecycle-policy a chance to update its stored read-range (if
/// the case). Called after any modification to the read range (typically
/// after fast_forward_to()). The range is identical to the one the reader
/// holds a reference to after the modification happened. When this method
/// is called, it is safe to destroy the previous range instance.
///
/// This method has to be called on the shard the reader lives on.
virtual void update_read_range(lw_shared_ptr<const dht::partition_range> pr) = 0;
/// Destroy the shard reader.
///
/// Will be called when the multishard reader is being destroyed. It will be
/// called for each of the shard readers.
/// This method is expected to do a proper cleanup, that is, leave any gates,
/// release any locks or whatever is appropriate for the shard reader.
///
/// This method has to be called on the shard the reader lives on.
/// This method will be called from a destructor so it cannot throw.
virtual future<> destroy_reader(stopped_reader reader) noexcept = 0;
/// Get the relevant semaphore for this read.
///
/// The semaphore is used to register paused readers with as inactive
/// readers. The semaphore then can evict these readers when resources are
/// in-demand.
/// The multishard reader will pause and resume readers via the `pause()`
/// and `try_resume()` helper methods. Clients can resume any paused readers
/// after the multishard reader is destroyed via the same helper methods.
///
/// This method will be called on the shard where the relevant reader lives.
virtual reader_concurrency_semaphore& semaphore() = 0;
/// Obtain an admitted permit.
///
/// The permit will be associated with the semaphore returned by
/// `semaphore()`.
///
/// This method will be called on the shard where the relevant reader lives.
virtual future<reader_permit> obtain_reader_permit(schema_ptr schema, const char* const description, db::timeout_clock::time_point timeout) = 0;
};
/// Make a multishard_combining_reader.
///
/// multishard_combining_reader takes care of reading a range from all shards
/// that own a subrange in the range. Shard reader are created on-demand, when
/// the shard is visited for the first time.
///
/// The read starts with a concurrency of one, that is the reader reads from a
/// single shard at a time. The concurrency is exponentially increased (to a
/// maximum of the number of shards) when a reader's buffer is empty after
/// moving the next shard. This condition is important as we only wan't to
/// increase concurrency for sparse tables that have little data and the reader
/// has to move between shards often. When concurrency is > 1, the reader
/// issues background read-aheads to the next shards so that by the time it
/// needs to move to them they have the data ready.
/// For dense tables (where we rarely cross shards) we rely on the
/// foreign_reader to issue sufficient read-aheads on its own to avoid blocking.
///
/// The readers' life-cycles are managed through the supplied lifecycle policy.
flat_mutation_reader_v2 make_multishard_combining_reader_v2(
shared_ptr<reader_lifecycle_policy_v2> lifecycle_policy,
schema_ptr schema,
reader_permit permit,
const dht::partition_range& pr,
const query::partition_slice& ps,
const io_priority_class& pc,
tracing::trace_state_ptr trace_state = nullptr,
mutation_reader::forwarding fwd_mr = mutation_reader::forwarding::no);
flat_mutation_reader_v2 make_multishard_combining_reader_v2_for_tests(
const dht::sharder& sharder,
shared_ptr<reader_lifecycle_policy_v2> lifecycle_policy,
schema_ptr schema,
reader_permit permit,
const dht::partition_range& pr,
const query::partition_slice& ps,
const io_priority_class& pc,
tracing::trace_state_ptr trace_state = nullptr,
mutation_reader::forwarding fwd_mr = mutation_reader::forwarding::no);

View File

@@ -47,6 +47,7 @@
#include "cql3/untyped_result_set.hh"
#include "idl/partition_checksum.dist.hh"
#include "readers/empty.hh"
#include "readers/evictable.hh"
extern logging::logger rlogger;

View File

@@ -60,6 +60,7 @@
#include "replica/data_dictionary_impl.hh"
#include "readers/multi_range.hh"
#include "readers/multishard.hh"
using namespace std::chrono_literals;
using namespace db;

View File

@@ -57,6 +57,8 @@
#include "readers/empty_v2.hh"
#include "readers/next_partition_adaptor.hh"
#include "readers/combined.hh"
#include "readers/foreign.hh"
#include "readers/evictable.hh"
static schema_ptr make_schema() {
return schema_builder("ks", "cf")

View File

@@ -32,6 +32,7 @@
#include "utils/ranges.hh"
#include "readers/from_mutations_v2.hh"
#include "readers/evictable.hh"
using namespace std::literals::chrono_literals;

View File

@@ -8,7 +8,7 @@
#pragma once
#include "mutation_reader.hh"
#include "readers/multishard.hh"
#include <seastar/core/gate.hh>
class test_reader_lifecycle_policy