From 1d84a254c00b36dc2576e06ee288e28a13238195 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Sielu=C5=BCycki?= Date: Thu, 10 Mar 2022 12:07:32 +0100 Subject: [PATCH] flat_mutation_reader: Split readers by file and remove unnecessary includes. The flat_mutation_reader files were conflated and contained multiple readers, which were not strictly necessary. Splitting optimizes both iterative compilation times, as touching rarely used readers doesn't recompile large chunks of codebase. Total compilation times are also improved, as the size of flat_mutation_reader.hh and flat_mutation_reader_v2.hh have been reduced and those files are included by many file in the codebase. With changes real 29m14.051s user 168m39.071s sys 5m13.443s Without changes real 30m36.203s user 175m43.354s sys 5m26.376s Closes #10194 --- cache_flat_mutation_reader.hh | 3 +- compaction/compaction_strategy.hh | 2 +- configure.py | 3 +- db/chained_delegating_reader.hh | 2 +- db/data_listeners.hh | 2 +- db/size_estimates_virtual_reader.cc | 1 + db/size_estimates_virtual_reader.hh | 3 +- db/view/build_progress_virtual_reader.hh | 2 +- db/view/view.cc | 1 + db/view/view.hh | 2 +- db/view/view_updating_consumer.hh | 1 + db/virtual_table.cc | 2 + dht/i_partitioner_fwd.hh | 36 + frozen_mutation.cc | 2 +- index/built_indexes_virtual_reader.hh | 2 +- message/messaging_service.cc | 2 +- mutation.cc | 2 +- mutation_fragment.hh | 27 +- mutation_fragment_fwd.hh | 20 + mutation_fragment_v2.hh | 3 - mutation_reader.cc | 4 +- mutation_reader.hh | 5 +- mutation_writer/feed_writers.hh | 2 +- mutation_writer/multishard_writer.hh | 2 +- partition_snapshot_reader.hh | 4 +- reader_concurrency_semaphore.cc | 1 + reader_concurrency_semaphore.hh | 2 +- readers/conversion.hh | 18 + readers/delegating.hh | 42 + readers/delegating_v2.hh | 67 + readers/empty.hh | 16 + readers/empty_v2.hh | 16 + .../flat_mutation_reader.hh | 174 +- readers/flat_mutation_reader_fwd.hh | 26 + .../flat_mutation_reader_v2.hh | 147 +- readers/forwardable.hh | 14 + readers/forwardable_v2.hh | 14 + readers/from_fragments.hh | 33 + readers/from_fragments_v2.hh | 31 + readers/from_mutations.hh | 51 + readers/from_mutations_v2.hh | 57 + readers/generating.hh | 25 + readers/generating_v2.hh | 21 + readers/multi_range.hh | 58 + readers/mutation_reader.cc | 403 ++++ .../mutation_readers.cc | 2100 +++++++---------- readers/nonforwardable.hh | 14 + readers/reversing.hh | 49 + readers/slice_mutations.hh | 20 + repair/row_level.cc | 1 + replica/database.cc | 1 + replica/memtable.cc | 2 + replica/table.cc | 4 + row_cache.cc | 3 + sstables/kl/reader.hh | 3 +- sstables/mx/reader.hh | 4 +- sstables/sstable_mutation_reader.hh | 2 +- sstables/sstable_set.cc | 2 + sstables/sstable_set.hh | 5 +- sstables/sstables.cc | 2 + streaming/stream_session.cc | 1 + streaming/stream_transfer_task.cc | 2 +- test/boost/flat_mutation_reader_test.cc | 13 +- test/boost/frozen_mutation_test.cc | 1 + test/boost/memtable_test.cc | 2 +- ...ombining_reader_as_mutation_source_test.cc | 1 + test/boost/mutation_fragment_test.cc | 1 + test/boost/mutation_query_test.cc | 1 + test/boost/mutation_reader_test.cc | 6 + test/boost/mutation_test.cc | 3 + test/boost/mutation_writer_test.cc | 6 +- test/boost/querier_cache_test.cc | 2 + .../reader_concurrency_semaphore_test.cc | 1 + test/boost/row_cache_test.cc | 7 +- test/boost/sstable_compaction_test.cc | 2 + test/boost/sstable_datafile_test.cc | 2 + test/boost/sstable_set_test.cc | 1 + test/boost/view_build_test.cc | 2 + test/lib/flat_mutation_reader_assertions.hh | 3 +- test/lib/mutation_source_test.cc | 2 +- test/lib/normalizing_reader.hh | 2 +- test/manual/enormous_table_scan_test.cc | 1 + test/perf/perf_mutation_readers.cc | 4 +- 83 files changed, 2063 insertions(+), 1566 deletions(-) create mode 100644 dht/i_partitioner_fwd.hh create mode 100644 mutation_fragment_fwd.hh create mode 100644 readers/conversion.hh create mode 100644 readers/delegating.hh create mode 100644 readers/delegating_v2.hh create mode 100644 readers/empty.hh create mode 100644 readers/empty_v2.hh rename flat_mutation_reader.hh => readers/flat_mutation_reader.hh (82%) create mode 100644 readers/flat_mutation_reader_fwd.hh rename flat_mutation_reader_v2.hh => readers/flat_mutation_reader_v2.hh (82%) create mode 100644 readers/forwardable.hh create mode 100644 readers/forwardable_v2.hh create mode 100644 readers/from_fragments.hh create mode 100644 readers/from_fragments_v2.hh create mode 100644 readers/from_mutations.hh create mode 100644 readers/from_mutations_v2.hh create mode 100644 readers/generating.hh create mode 100644 readers/generating_v2.hh create mode 100644 readers/multi_range.hh create mode 100644 readers/mutation_reader.cc rename flat_mutation_reader.cc => readers/mutation_readers.cc (80%) create mode 100644 readers/nonforwardable.hh create mode 100644 readers/reversing.hh create mode 100644 readers/slice_mutations.hh diff --git a/cache_flat_mutation_reader.hh b/cache_flat_mutation_reader.hh index 9bfc377a83..c47dcc9bed 100644 --- a/cache_flat_mutation_reader.hh +++ b/cache_flat_mutation_reader.hh @@ -15,7 +15,8 @@ #include "query-request.hh" #include "partition_snapshot_row_cursor.hh" #include "read_context.hh" -#include "flat_mutation_reader.hh" +#include "readers/flat_mutation_reader.hh" +#include "readers/delegating.hh" #include "clustering_key_filter.hh" namespace cache { diff --git a/compaction/compaction_strategy.hh b/compaction/compaction_strategy.hh index 0d96991e13..d50d637c23 100644 --- a/compaction/compaction_strategy.hh +++ b/compaction/compaction_strategy.hh @@ -16,7 +16,7 @@ #include "sstables/shared_sstable.hh" #include "exceptions/exceptions.hh" #include "compaction_strategy_type.hh" -#include "flat_mutation_reader.hh" +#include "readers/flat_mutation_reader.hh" #include "table_state.hh" #include "strategy_control.hh" diff --git a/configure.py b/configure.py index 5274fcf813..270199d6eb 100755 --- a/configure.py +++ b/configure.py @@ -698,7 +698,8 @@ scylla_core = (['replica/database.cc', 'mutation_partition_serializer.cc', 'converting_mutation_partition_applier.cc', 'mutation_reader.cc', - 'flat_mutation_reader.cc', + 'readers/mutation_reader.cc', + 'readers/mutation_readers.cc', 'mutation_query.cc', 'keys.cc', 'counters.cc', diff --git a/db/chained_delegating_reader.hh b/db/chained_delegating_reader.hh index c2944243e0..489fb272a4 100644 --- a/db/chained_delegating_reader.hh +++ b/db/chained_delegating_reader.hh @@ -10,7 +10,7 @@ #include -#include "flat_mutation_reader_v2.hh" +#include "readers/flat_mutation_reader_v2.hh" // A reader which allows to insert a deferring operation before reading. // All calls will first wait for a future to resolve, then forward to a given underlying reader. diff --git a/db/data_listeners.hh b/db/data_listeners.hh index 721832e9c4..a1d0d0fff7 100755 --- a/db/data_listeners.hh +++ b/db/data_listeners.hh @@ -15,7 +15,7 @@ #include "utils/hash.hh" #include "schema_fwd.hh" -#include "flat_mutation_reader_v2.hh" +#include "readers/flat_mutation_reader_v2.hh" #include "mutation_reader.hh" #include "utils/top_k.hh" #include "schema_registry.hh" diff --git a/db/size_estimates_virtual_reader.cc b/db/size_estimates_virtual_reader.cc index 5409a652e6..968ceefa7e 100644 --- a/db/size_estimates_virtual_reader.cc +++ b/db/size_estimates_virtual_reader.cc @@ -25,6 +25,7 @@ #include "replica/database.hh" #include "db/size_estimates_virtual_reader.hh" +#include "readers/from_mutations.hh" namespace db { diff --git a/db/size_estimates_virtual_reader.hh b/db/size_estimates_virtual_reader.hh index 2d175270d4..dabdb89404 100644 --- a/db/size_estimates_virtual_reader.hh +++ b/db/size_estimates_virtual_reader.hh @@ -8,7 +8,8 @@ * SPDX-License-Identifier: AGPL-3.0-or-later */ -#include "flat_mutation_reader_v2.hh" +#include "readers/flat_mutation_reader_v2.hh" +#include "readers/flat_mutation_reader.hh" #include "db/system_keyspace.hh" namespace replica { diff --git a/db/view/build_progress_virtual_reader.hh b/db/view/build_progress_virtual_reader.hh index bcdc4b099b..f13c22836d 100644 --- a/db/view/build_progress_virtual_reader.hh +++ b/db/view/build_progress_virtual_reader.hh @@ -10,7 +10,7 @@ #include "db/system_keyspace.hh" #include "db/timeout_clock.hh" #include "dht/i_partitioner.hh" -#include "flat_mutation_reader_v2.hh" +#include "readers/flat_mutation_reader_v2.hh" #include "mutation_fragment.hh" #include "mutation_reader.hh" #include "query-request.hh" diff --git a/db/view/view.cc b/db/view/view.cc index d5b74d03d6..be4f7ab9c5 100644 --- a/db/view/view.cc +++ b/db/view/view.cc @@ -56,6 +56,7 @@ #include "utils/exponential_backoff_retry.hh" #include "utils/fb_utilities.hh" #include "query-result-writer.hh" +#include "readers/from_fragments.hh" using namespace std::chrono_literals; diff --git a/db/view/view.hh b/db/view/view.hh index 564db8a82c..c321a7dead 100644 --- a/db/view/view.hh +++ b/db/view/view.hh @@ -12,7 +12,7 @@ #include "gc_clock.hh" #include "query-request.hh" #include "schema_fwd.hh" -#include "flat_mutation_reader.hh" +#include "readers/flat_mutation_reader.hh" #include "frozen_mutation.hh" class frozen_mutation_and_schema; diff --git a/db/view/view_updating_consumer.hh b/db/view/view_updating_consumer.hh index b94eba867c..6a0a1b7b0d 100644 --- a/db/view/view_updating_consumer.hh +++ b/db/view/view_updating_consumer.hh @@ -19,6 +19,7 @@ #include "mutation_rebuilder.hh" class evictable_reader_handle; +class evictable_reader_handle_v2; namespace db::view { diff --git a/db/virtual_table.cc b/db/virtual_table.cc index 2fb02ebcd0..b4d81c9bb9 100644 --- a/db/virtual_table.cc +++ b/db/virtual_table.cc @@ -12,6 +12,8 @@ #include "db/virtual_table.hh" #include "db/chained_delegating_reader.hh" +#include "readers/reversing.hh" +#include "readers/forwardable.hh" namespace db { diff --git a/dht/i_partitioner_fwd.hh b/dht/i_partitioner_fwd.hh new file mode 100644 index 0000000000..e3ebee6be2 --- /dev/null +++ b/dht/i_partitioner_fwd.hh @@ -0,0 +1,36 @@ +/* + * Modified by ScyllaDB + * Copyright (C) 2022-present ScyllaDB + */ + +/* + * SPDX-License-Identifier: (AGPL-3.0-or-later and Apache-2.0) + */ + +#pragma once +#include +#include "range.hh" + +namespace sstables { + +class key_view; +class decorated_key_view; + +} + +namespace dht { + +class decorated_key; +class ring_position; +class token; + +using partition_range = nonwrapping_range; +using token_range = nonwrapping_range; + +using partition_range_vector = std::vector; +using token_range_vector = std::vector; + +class decorated_key; + +using decorated_key_opt = std::optional; +} diff --git a/frozen_mutation.cc b/frozen_mutation.cc index 6fddcafe9d..ef4754b421 100644 --- a/frozen_mutation.cc +++ b/frozen_mutation.cc @@ -26,7 +26,7 @@ #include "idl/uuid.dist.impl.hh" #include "idl/keys.dist.impl.hh" #include "idl/mutation.dist.impl.hh" -#include "flat_mutation_reader.hh" +#include "readers/flat_mutation_reader.hh" #include "converting_mutation_partition_applier.hh" #include "mutation_partition_view.hh" diff --git a/index/built_indexes_virtual_reader.hh b/index/built_indexes_virtual_reader.hh index 9767b9d248..a548b50d01 100644 --- a/index/built_indexes_virtual_reader.hh +++ b/index/built_indexes_virtual_reader.hh @@ -8,7 +8,7 @@ #include "replica/database.hh" #include "db/system_keyspace.hh" -#include "flat_mutation_reader_v2.hh" +#include "readers/flat_mutation_reader_v2.hh" #include "mutation_fragment_v2.hh" #include "mutation_reader.hh" #include "query-request.hh" diff --git a/message/messaging_service.cc b/message/messaging_service.cc index 3cab11a084..12d78edf6f 100644 --- a/message/messaging_service.cc +++ b/message/messaging_service.cc @@ -88,7 +88,7 @@ #include #include #include "frozen_mutation.hh" -#include "flat_mutation_reader.hh" +#include "readers/flat_mutation_reader.hh" #include "streaming/stream_manager.hh" #include "streaming/stream_mutation_fragments_cmd.hh" #include "locator/snitch_base.hh" diff --git a/mutation.cc b/mutation.cc index 3cca6a7a27..45a9b51393 100644 --- a/mutation.cc +++ b/mutation.cc @@ -8,7 +8,7 @@ #include "mutation.hh" #include "query-result-writer.hh" -#include "flat_mutation_reader.hh" +#include "readers/flat_mutation_reader.hh" #include "mutation_rebuilder.hh" mutation::data::data(dht::decorated_key&& key, schema_ptr&& schema) diff --git a/mutation_fragment.hh b/mutation_fragment.hh index a8f43aef4e..008d4ac68a 100644 --- a/mutation_fragment.hh +++ b/mutation_fragment.hh @@ -18,6 +18,7 @@ #include "db/timeout_clock.hh" #include "reader_permit.hh" +#include "mutation_fragment_fwd.hh" // mutation_fragments are the objects that streamed_mutation are going to // stream. They can represent: @@ -525,32 +526,6 @@ inline position_in_partition_view partition_end::position() const std::ostream& operator<<(std::ostream&, partition_region); std::ostream& operator<<(std::ostream&, mutation_fragment::kind); -using mutation_fragment_opt = optimized_optional; - -namespace streamed_mutation { - // Determines whether streamed_mutation is in forwarding mode or not. - // - // In forwarding mode the stream does not return all fragments right away, - // but only those belonging to the current clustering range. Initially - // current range only covers the static row. The stream can be forwarded - // (even before end-of- stream) to a later range with fast_forward_to(). - // Forwarding doesn't change initial restrictions of the stream, it can - // only be used to skip over data. - // - // Monotonicity of positions is preserved by forwarding. That is fragments - // emitted after forwarding will have greater positions than any fragments - // emitted before forwarding. - // - // For any range, all range tombstones relevant for that range which are - // present in the original stream will be emitted. Range tombstones - // emitted before forwarding which overlap with the new range are not - // necessarily re-emitted. - // - // When streamed_mutation is not in forwarding mode, fast_forward_to() - // cannot be used. - class forwarding_tag; - using forwarding = bool_class; -} // range_tombstone_stream is a helper object that simplifies producing a stream // of range tombstones and merging it with a stream of clustering rows. diff --git a/mutation_fragment_fwd.hh b/mutation_fragment_fwd.hh new file mode 100644 index 0000000000..421640b176 --- /dev/null +++ b/mutation_fragment_fwd.hh @@ -0,0 +1,20 @@ +/* + * Copyright (C) 2016-present ScyllaDB + */ + +/* + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +#pragma once +#include +#include + +using namespace seastar; + +class mutation_fragment; +class mutation_fragment_v2; + +using mutation_fragment_opt = optimized_optional; +using mutation_fragment_v2_opt = optimized_optional; + diff --git a/mutation_fragment_v2.hh b/mutation_fragment_v2.hh index 9bd92bc011..fd9ab7b334 100644 --- a/mutation_fragment_v2.hh +++ b/mutation_fragment_v2.hh @@ -354,9 +354,6 @@ private: std::ostream& operator<<(std::ostream&, mutation_fragment_v2::kind); -using mutation_fragment_v2_opt = optimized_optional; - - // F gets a stream element as an argument and returns the new value which replaces that element // in the transformed stream. template diff --git a/mutation_reader.cc b/mutation_reader.cc index 8ead3f4cc6..07734f1a14 100644 --- a/mutation_reader.cc +++ b/mutation_reader.cc @@ -16,10 +16,12 @@ #include #include "mutation_reader.hh" -#include "flat_mutation_reader.hh" +#include "readers/flat_mutation_reader.hh" +#include "readers/empty.hh" #include "schema_registry.hh" #include "mutation_compactor.hh" #include "dht/sharder.hh" +#include "readers/empty_v2.hh" logging::logger mrlog("mutation_reader"); diff --git a/mutation_reader.hh b/mutation_reader.hh index a662d7b040..146a457363 100644 --- a/mutation_reader.hh +++ b/mutation_reader.hh @@ -14,9 +14,10 @@ #include #include #include "tracing/trace_state.hh" -#include "flat_mutation_reader.hh" -#include "flat_mutation_reader_v2.hh" +#include "readers/flat_mutation_reader.hh" +#include "readers/flat_mutation_reader_v2.hh" #include "reader_concurrency_semaphore.hh" +#include class reader_selector { protected: diff --git a/mutation_writer/feed_writers.hh b/mutation_writer/feed_writers.hh index beffbb2086..39c9c62cdf 100644 --- a/mutation_writer/feed_writers.hh +++ b/mutation_writer/feed_writers.hh @@ -8,7 +8,7 @@ #pragma once -#include "flat_mutation_reader.hh" +#include "readers/flat_mutation_reader.hh" #include "mutation_reader.hh" #include "seastar/core/coroutine.hh" diff --git a/mutation_writer/multishard_writer.hh b/mutation_writer/multishard_writer.hh index 0fa7563a5d..14f771cda2 100644 --- a/mutation_writer/multishard_writer.hh +++ b/mutation_writer/multishard_writer.hh @@ -9,7 +9,7 @@ #pragma once #include "schema_fwd.hh" -#include "flat_mutation_reader.hh" +#include "readers/flat_mutation_reader.hh" #include "dht/i_partitioner.hh" #include "utils/phased_barrier.hh" diff --git a/partition_snapshot_reader.hh b/partition_snapshot_reader.hh index 4a3ea26997..eb0f64abe9 100644 --- a/partition_snapshot_reader.hh +++ b/partition_snapshot_reader.hh @@ -9,8 +9,8 @@ #pragma once #include "partition_version.hh" -#include "flat_mutation_reader.hh" -#include "flat_mutation_reader_v2.hh" +#include "readers/flat_mutation_reader.hh" +#include "readers/flat_mutation_reader_v2.hh" #include "clustering_key_filter.hh" #include "query-request.hh" #include diff --git a/reader_concurrency_semaphore.cc b/reader_concurrency_semaphore.cc index 16c5c3ba08..c0da29b32c 100644 --- a/reader_concurrency_semaphore.cc +++ b/reader_concurrency_semaphore.cc @@ -8,6 +8,7 @@ #include #include +#include #include #include #include diff --git a/reader_concurrency_semaphore.hh b/reader_concurrency_semaphore.hh index 8cecb290f0..51872b8a79 100644 --- a/reader_concurrency_semaphore.hh +++ b/reader_concurrency_semaphore.hh @@ -14,7 +14,7 @@ #include #include #include "reader_permit.hh" -#include "flat_mutation_reader_v2.hh" +#include "readers/flat_mutation_reader_v2.hh" namespace bi = boost::intrusive; diff --git a/readers/conversion.hh b/readers/conversion.hh new file mode 100644 index 0000000000..0f60878899 --- /dev/null +++ b/readers/conversion.hh @@ -0,0 +1,18 @@ +/* + * Copyright (C) 2022-present ScyllaDB + */ + +/* + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +#pragma once + +class flat_mutation_reader; +class flat_mutation_reader_v2; + +// Adapts a v2 reader to v1 reader +flat_mutation_reader downgrade_to_v1(flat_mutation_reader_v2); + +// Adapts a v1 reader to v2 reader +flat_mutation_reader_v2 upgrade_to_v2(flat_mutation_reader); diff --git a/readers/delegating.hh b/readers/delegating.hh new file mode 100644 index 0000000000..98a93ef88e --- /dev/null +++ b/readers/delegating.hh @@ -0,0 +1,42 @@ +/* + * Copyright (C) 2022-present ScyllaDB + */ + +/* + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +#pragma once +#include "readers/flat_mutation_reader.hh" + +class flat_mutation_reader; + +flat_mutation_reader make_delegating_reader(flat_mutation_reader&); + +class delegating_reader : public flat_mutation_reader::impl { + flat_mutation_reader_opt _underlying_holder; + flat_mutation_reader* _underlying; +public: + // when passed a lvalue reference to the reader + // we don't own it and the caller is responsible + // for evenetually closing the reader. + delegating_reader(flat_mutation_reader& r) + : impl(r.schema(), r.permit()) + , _underlying_holder() + , _underlying(&r) + { } + // when passed a rvalue reference to the reader + // we assume ownership of it and will close it + // in close(). + delegating_reader(flat_mutation_reader&& r) + : impl(r.schema(), r.permit()) + , _underlying_holder(std::move(r)) + , _underlying(&*_underlying_holder) + { } + + virtual future<> fill_buffer() override; + virtual future<> fast_forward_to(position_range pr) override; + virtual future<> next_partition() override; + virtual future<> fast_forward_to(const dht::partition_range& pr) override; + virtual future<> close() noexcept override; +}; diff --git a/readers/delegating_v2.hh b/readers/delegating_v2.hh new file mode 100644 index 0000000000..ebe93cc586 --- /dev/null +++ b/readers/delegating_v2.hh @@ -0,0 +1,67 @@ +/* + * Copyright (C) 2022-present ScyllaDB + */ + +/* + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +#pragma once +#include "readers/flat_mutation_reader_v2.hh" + +class delegating_reader_v2 : public flat_mutation_reader_v2::impl { + flat_mutation_reader_v2_opt _underlying_holder; + flat_mutation_reader_v2* _underlying; +public: + // when passed a lvalue reference to the reader + // we don't own it and the caller is responsible + // for evenetually closing the reader. + delegating_reader_v2(flat_mutation_reader_v2& r) + : impl(r.schema(), r.permit()) + , _underlying_holder() + , _underlying(&r) + { } + // when passed a rvalue reference to the reader + // we assume ownership of it and will close it + // in close(). + delegating_reader_v2(flat_mutation_reader_v2&& r) + : impl(r.schema(), r.permit()) + , _underlying_holder(std::move(r)) + , _underlying(&*_underlying_holder) + { } + virtual future<> fill_buffer() override { + if (is_buffer_full()) { + return make_ready_future<>(); + } + return _underlying->fill_buffer().then([this] { + _end_of_stream = _underlying->is_end_of_stream(); + _underlying->move_buffer_content_to(*this); + }); + } + virtual future<> fast_forward_to(position_range pr) override { + _end_of_stream = false; + forward_buffer_to(pr.start()); + return _underlying->fast_forward_to(std::move(pr)); + } + virtual future<> next_partition() override { + clear_buffer_to_next_partition(); + auto maybe_next_partition = make_ready_future<>(); + if (is_buffer_empty()) { + maybe_next_partition = _underlying->next_partition(); + } + return maybe_next_partition.then([this] { + _end_of_stream = _underlying->is_end_of_stream() && _underlying->is_buffer_empty(); + }); + } + virtual future<> fast_forward_to(const dht::partition_range& pr) override { + _end_of_stream = false; + clear_buffer(); + return _underlying->fast_forward_to(pr); + } + virtual future<> close() noexcept override { + return _underlying_holder ? _underlying_holder->close() : make_ready_future<>(); + } +}; +flat_mutation_reader_v2 make_delegating_reader_v2(flat_mutation_reader_v2&); + + diff --git a/readers/empty.hh b/readers/empty.hh new file mode 100644 index 0000000000..8e301b569c --- /dev/null +++ b/readers/empty.hh @@ -0,0 +1,16 @@ +/* + * Copyright (C) 2022-present ScyllaDB + */ + +/* + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +#pragma once +#include "schema_fwd.hh" + +class flat_mutation_reader; +class reader_permit; + +flat_mutation_reader make_empty_flat_reader(schema_ptr s, reader_permit permit); + diff --git a/readers/empty_v2.hh b/readers/empty_v2.hh new file mode 100644 index 0000000000..4dd58e64e5 --- /dev/null +++ b/readers/empty_v2.hh @@ -0,0 +1,16 @@ +/* + * Copyright (C) 2022-present ScyllaDB + */ + +/* + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +#pragma once +#include "schema_fwd.hh" + +class flat_mutation_reader_v2; +class reader_permit; + +flat_mutation_reader_v2 make_empty_flat_reader_v2(schema_ptr s, reader_permit permit); + diff --git a/flat_mutation_reader.hh b/readers/flat_mutation_reader.hh similarity index 82% rename from flat_mutation_reader.hh rename to readers/flat_mutation_reader.hh index 9670a6b54b..ee0ecc8bb4 100644 --- a/flat_mutation_reader.hh +++ b/readers/flat_mutation_reader.hh @@ -14,22 +14,11 @@ #include "dht/i_partitioner.hh" #include "mutation_fragment.hh" -#include "tracing/trace_state.hh" #include "mutation.hh" #include "mutation_consumer_concepts.hh" -#include -#include #include "reader_permit.hh" - -#include - -using seastar::future; - -class mutation_source; -class position_in_partition; - -class flat_mutation_reader_v2; +#include "readers/flat_mutation_reader_fwd.hh" /// \brief Represents a stream of mutation fragments. /// @@ -405,9 +394,6 @@ private: friend flat_mutation_reader downgrade_to_v1(flat_mutation_reader_v2); friend flat_mutation_reader_v2 upgrade_to_v2(flat_mutation_reader); public: - // Documented in mutation_reader::forwarding. - class partition_range_forwarding_tag; - using partition_range_forwarding = bool_class; flat_mutation_reader(std::unique_ptr impl) noexcept : _impl(std::move(impl)) {} flat_mutation_reader(const flat_mutation_reader&) = delete; @@ -666,7 +652,33 @@ namespace mutation_reader { // from streamed_mutation::forwarding - the former is about skipping to // a different partition range, while the latter is about skipping // inside a large partition. - using forwarding = flat_mutation_reader::partition_range_forwarding; + class partition_range_forwarding_tag; + using forwarding = bool_class; +} + +namespace streamed_mutation { + // Determines whether streamed_mutation is in forwarding mode or not. + // + // In forwarding mode the stream does not return all fragments right away, + // but only those belonging to the current clustering range. Initially + // current range only covers the static row. The stream can be forwarded + // (even before end-of- stream) to a later range with fast_forward_to(). + // Forwarding doesn't change initial restrictions of the stream, it can + // only be used to skip over data. + // + // Monotonicity of positions is preserved by forwarding. That is fragments + // emitted after forwarding will have greater positions than any fragments + // emitted before forwarding. + // + // For any range, all range tombstones relevant for that range which are + // present in the original stream will be emitted. Range tombstones + // emitted before forwarding which overlap with the new range are not + // necessarily re-emitted. + // + // When streamed_mutation is not in forwarding mode, fast_forward_to() + // cannot be used. + class forwarding_tag; + using forwarding = bool_class; } // Consumes mutation fragments until StopCondition is true. @@ -752,102 +764,6 @@ flat_mutation_reader transform(flat_mutation_reader r, T t) { return make_flat_mutation_reader(std::move(r), std::move(t)); } -class delegating_reader : public flat_mutation_reader::impl { - flat_mutation_reader_opt _underlying_holder; - flat_mutation_reader* _underlying; -public: - // when passed a lvalue reference to the reader - // we don't own it and the caller is responsible - // for evenetually closing the reader. - delegating_reader(flat_mutation_reader& r) - : impl(r.schema(), r.permit()) - , _underlying_holder() - , _underlying(&r) - { } - // when passed a rvalue reference to the reader - // we assume ownership of it and will close it - // in close(). - delegating_reader(flat_mutation_reader&& r) - : impl(r.schema(), r.permit()) - , _underlying_holder(std::move(r)) - , _underlying(&*_underlying_holder) - { } - virtual future<> fill_buffer() override { - if (is_buffer_full()) { - return make_ready_future<>(); - } - return _underlying->fill_buffer().then([this] { - _end_of_stream = _underlying->is_end_of_stream(); - _underlying->move_buffer_content_to(*this); - }); - } - virtual future<> fast_forward_to(position_range pr) override { - _end_of_stream = false; - forward_buffer_to(pr.start()); - return _underlying->fast_forward_to(std::move(pr)); - } - virtual future<> next_partition() override { - clear_buffer_to_next_partition(); - auto maybe_next_partition = make_ready_future<>(); - if (is_buffer_empty()) { - maybe_next_partition = _underlying->next_partition(); - } - return maybe_next_partition.then([this] { - _end_of_stream = _underlying->is_end_of_stream() && _underlying->is_buffer_empty(); - }); - } - virtual future<> fast_forward_to(const dht::partition_range& pr) override { - _end_of_stream = false; - clear_buffer(); - return _underlying->fast_forward_to(pr); - } - virtual future<> close() noexcept override { - return _underlying_holder ? _underlying_holder->close() : make_ready_future<>(); - } -}; -flat_mutation_reader make_delegating_reader(flat_mutation_reader&); - -flat_mutation_reader make_forwardable(flat_mutation_reader m); - -flat_mutation_reader make_nonforwardable(flat_mutation_reader, bool); - -flat_mutation_reader make_empty_flat_reader(schema_ptr s, reader_permit permit); - -// All mutations should have the same schema. -flat_mutation_reader make_flat_mutation_reader_from_mutations(schema_ptr schema, reader_permit permit, std::vector, - const dht::partition_range& pr = query::full_partition_range, streamed_mutation::forwarding fwd = streamed_mutation::forwarding::no); - -// All mutations should have the same schema. -inline flat_mutation_reader make_flat_mutation_reader_from_mutations(schema_ptr schema, reader_permit permit, std::vector ms, streamed_mutation::forwarding fwd) { - return make_flat_mutation_reader_from_mutations(std::move(schema), std::move(permit), std::move(ms), query::full_partition_range, fwd); -} - -// All mutations should have the same schema. -flat_mutation_reader -make_flat_mutation_reader_from_mutations(schema_ptr schema, - reader_permit permit, - std::vector ms, - const query::partition_slice& slice, - streamed_mutation::forwarding fwd = streamed_mutation::forwarding::no); - -// All mutations should have the same schema. -flat_mutation_reader -make_flat_mutation_reader_from_mutations(schema_ptr schema, - reader_permit permit, - std::vector ms, - const dht::partition_range& pr, - const query::partition_slice& slice, - streamed_mutation::forwarding fwd = streamed_mutation::forwarding::no); - -flat_mutation_reader -make_flat_mutation_reader_from_fragments(schema_ptr, reader_permit, std::deque); - -flat_mutation_reader -make_flat_mutation_reader_from_fragments(schema_ptr, reader_permit, std::deque, const dht::partition_range& pr); - -flat_mutation_reader -make_flat_mutation_reader_from_fragments(schema_ptr, reader_permit, std::deque, const dht::partition_range& pr, const query::partition_slice& slice); - // Calls the consumer for each element of the reader's stream until end of stream // is reached or the consumer requests iteration to stop by returning stop_iteration::yes. // The consumer should accept mutation as the argument and return stop_iteration. @@ -868,40 +784,6 @@ future<> consume_partitions(flat_mutation_reader& reader, Consumer consumer) { }); } -flat_mutation_reader -make_generating_reader(schema_ptr s, reader_permit permit, std::function ()> get_next_fragment); - -/// A reader that emits partitions in native reverse order. -/// -/// 1. The reader's schema() method will return a reversed schema (see -/// \ref schema::make_reversed()). -/// 2. Static row is still emitted first. -/// 3. Range tombstones' bounds are reversed (see \ref range_tombstone::reverse()). -/// 4. Clustered rows and range tombstones are emitted in descending order. -/// Because of 3 and 4 the guarantee that a range tombstone is emitted before -/// any mutation fragment affected by it still holds. -/// Ordering of partitions themselves remains unchanged. -/// For more details see docs/design-notes/reverse-reads.md. -/// -/// The reader's schema (returned by `schema()`) is the reverse of `original`'s schema. -/// -/// \param original the reader to be reversed. -/// \param max_size the maximum amount of memory the reader is allowed to use -/// for reversing and conversely the maximum size of the results. The -/// reverse reader reads entire partitions into memory, before reversing -/// them. Since partitions can be larger than the available memory, we need -/// to enforce a limit on memory consumption. When reaching the soft limit -/// a warning will be logged. When reaching the hard limit the read will be -/// aborted. -/// \param slice serves as a convenience slice storage for reads that have to -/// store an edited slice somewhere. This is common for reads that work -/// with a native-reversed slice and so have to convert the one used in the -/// query -- which is in half-reversed format. -/// -/// FIXME: reversing should be done in the sstable layer, see #1413. -flat_mutation_reader -make_reversing_reader(flat_mutation_reader original, query::max_result_size max_size, std::unique_ptr slice = {}); - /// A cosumer function that is passed a flat_mutation_reader to be consumed from /// and returns a future<> resolved when the reader is fully consumed, and closed. /// Note: the function assumes ownership of the reader and must close it in all cases. diff --git a/readers/flat_mutation_reader_fwd.hh b/readers/flat_mutation_reader_fwd.hh new file mode 100644 index 0000000000..709c7679ab --- /dev/null +++ b/readers/flat_mutation_reader_fwd.hh @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2022-present ScyllaDB + */ + +/* + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +#pragma once +#include + +using namespace seastar; + +class mutation_source; +class position_in_partition; +class flat_mutation_reader_v2; + +namespace streamed_mutation { + class forwarding_tag; + using forwarding = bool_class; +} + +namespace mutation_reader { + class partition_range_forwarding_tag; + using forwarding = bool_class; +} diff --git a/flat_mutation_reader_v2.hh b/readers/flat_mutation_reader_v2.hh similarity index 82% rename from flat_mutation_reader_v2.hh rename to readers/flat_mutation_reader_v2.hh index 138ad5f5d2..7c4f588c0d 100644 --- a/flat_mutation_reader_v2.hh +++ b/readers/flat_mutation_reader_v2.hh @@ -10,23 +10,16 @@ #include #include +#include #include "dht/i_partitioner.hh" -#include "position_in_partition.hh" -#include "flat_mutation_reader.hh" #include "mutation_fragment_v2.hh" -#include "tracing/trace_state.hh" #include "mutation.hh" -#include "query_class_config.hh" #include "mutation_consumer_concepts.hh" - -#include -#include #include "reader_permit.hh" -#include - using seastar::future; +class flat_mutation_reader; /// \brief Represents a stream of mutation fragments. /// @@ -413,10 +406,6 @@ private: friend flat_mutation_reader downgrade_to_v1(flat_mutation_reader_v2); friend flat_mutation_reader_v2 upgrade_to_v2(flat_mutation_reader); public: - // Documented in mutation_reader::forwarding. - class partition_range_forwarding_tag; - using partition_range_forwarding = bool_class; - flat_mutation_reader_v2(std::unique_ptr impl) noexcept : _impl(std::move(impl)) {} flat_mutation_reader_v2(const flat_mutation_reader_v2&) = delete; flat_mutation_reader_v2(flat_mutation_reader_v2&&) = default; @@ -748,139 +737,10 @@ flat_mutation_reader_v2 transform(flat_mutation_reader_v2 r, T t) { return make_flat_mutation_reader_v2(std::move(r), std::move(t)); } -class delegating_reader_v2 : public flat_mutation_reader_v2::impl { - flat_mutation_reader_v2_opt _underlying_holder; - flat_mutation_reader_v2* _underlying; -public: - // when passed a lvalue reference to the reader - // we don't own it and the caller is responsible - // for evenetually closing the reader. - delegating_reader_v2(flat_mutation_reader_v2& r) - : impl(r.schema(), r.permit()) - , _underlying_holder() - , _underlying(&r) - { } - // when passed a rvalue reference to the reader - // we assume ownership of it and will close it - // in close(). - delegating_reader_v2(flat_mutation_reader_v2&& r) - : impl(r.schema(), r.permit()) - , _underlying_holder(std::move(r)) - , _underlying(&*_underlying_holder) - { } - virtual future<> fill_buffer() override { - if (is_buffer_full()) { - return make_ready_future<>(); - } - return _underlying->fill_buffer().then([this] { - _end_of_stream = _underlying->is_end_of_stream(); - _underlying->move_buffer_content_to(*this); - }); - } - virtual future<> fast_forward_to(position_range pr) override { - _end_of_stream = false; - forward_buffer_to(pr.start()); - return _underlying->fast_forward_to(std::move(pr)); - } - virtual future<> next_partition() override { - clear_buffer_to_next_partition(); - auto maybe_next_partition = make_ready_future<>(); - if (is_buffer_empty()) { - maybe_next_partition = _underlying->next_partition(); - } - return maybe_next_partition.then([this] { - _end_of_stream = _underlying->is_end_of_stream() && _underlying->is_buffer_empty(); - }); - } - virtual future<> fast_forward_to(const dht::partition_range& pr) override { - _end_of_stream = false; - clear_buffer(); - return _underlying->fast_forward_to(pr); - } - virtual future<> close() noexcept override { - return _underlying_holder ? _underlying_holder->close() : make_ready_future<>(); - } -}; -flat_mutation_reader_v2 make_delegating_reader_v2(flat_mutation_reader_v2&); - -// Adapts a v2 reader to v1 reader -flat_mutation_reader downgrade_to_v1(flat_mutation_reader_v2); - -// Adapts a v1 reader to v2 reader -flat_mutation_reader_v2 upgrade_to_v2(flat_mutation_reader); // Reads a single partition from a reader. Returns empty optional if there are no more partitions to be read. future read_mutation_from_flat_mutation_reader(flat_mutation_reader_v2&); -flat_mutation_reader_v2 make_forwardable(flat_mutation_reader_v2 m); - -flat_mutation_reader_v2 make_empty_flat_reader_v2(schema_ptr s, reader_permit permit); - -// All mutations should have the same schema. -flat_mutation_reader_v2 make_flat_mutation_reader_from_mutations_v2(schema_ptr schema, reader_permit permit, std::vector, - const dht::partition_range& pr = query::full_partition_range, streamed_mutation::forwarding fwd = streamed_mutation::forwarding::no); - -// All mutations should have the same schema. -inline flat_mutation_reader_v2 make_flat_mutation_reader_from_mutations_v2(schema_ptr schema, reader_permit permit, std::vector ms, streamed_mutation::forwarding fwd) { - return make_flat_mutation_reader_from_mutations_v2(std::move(schema), std::move(permit), std::move(ms), query::full_partition_range, fwd); -} - -// All mutations should have the same schema. -flat_mutation_reader_v2 -make_flat_mutation_reader_from_mutations_v2(schema_ptr schema, - reader_permit permit, - std::vector ms, - const query::partition_slice& slice, - streamed_mutation::forwarding fwd = streamed_mutation::forwarding::no); - -// All mutations should have the same schema. -flat_mutation_reader_v2 -make_flat_mutation_reader_from_mutations_v2(schema_ptr schema, - reader_permit permit, - std::vector ms, - const dht::partition_range& pr, - const query::partition_slice& slice, - streamed_mutation::forwarding fwd = streamed_mutation::forwarding::no); - -/// Make a reader that enables the wrapped reader to work with multiple ranges. -/// -/// \param ranges An range vector that has to contain strictly monotonic -/// partition ranges, such that successively calling -/// `flat_mutation_reader::fast_forward_to()` with each one is valid. -/// An range vector range with 0 or 1 elements is also valid. -/// \param fwd_mr It is only respected when `ranges` contains 0 or 1 partition -/// ranges. Otherwise the reader is created with -/// mutation_reader::forwarding::yes. -flat_mutation_reader_v2 -make_flat_multi_range_reader(schema_ptr s, reader_permit permit, mutation_source source, const dht::partition_range_vector& ranges, - const query::partition_slice& slice, const io_priority_class& pc = default_priority_class(), - tracing::trace_state_ptr trace_state = nullptr, - flat_mutation_reader::partition_range_forwarding fwd_mr = flat_mutation_reader::partition_range_forwarding::yes); - -/// Make a reader that enables the wrapped reader to work with multiple ranges. -/// -/// Generator overload. The ranges returned by the generator have to satisfy the -/// same requirements as the `ranges` param of the vector overload. -flat_mutation_reader_v2 -make_flat_multi_range_reader( - schema_ptr s, - reader_permit permit, - mutation_source source, - std::function()> generator, - const query::partition_slice& slice, - const io_priority_class& pc = default_priority_class(), - tracing::trace_state_ptr trace_state = nullptr, - flat_mutation_reader::partition_range_forwarding fwd_mr = flat_mutation_reader::partition_range_forwarding::yes); - -flat_mutation_reader_v2 -make_flat_mutation_reader_from_fragments(schema_ptr, reader_permit, std::deque); - -flat_mutation_reader_v2 -make_flat_mutation_reader_from_fragments(schema_ptr, reader_permit, std::deque, const dht::partition_range& pr); - -flat_mutation_reader_v2 -make_flat_mutation_reader_from_fragments(schema_ptr, reader_permit, std::deque, const dht::partition_range& pr, const query::partition_slice& slice); - // Calls the consumer for each element of the reader's stream until end of stream // is reached or the consumer requests iteration to stop by returning stop_iteration::yes. // The consumer should accept mutation as the argument and return stop_iteration. @@ -901,9 +761,6 @@ future<> consume_partitions(flat_mutation_reader_v2& reader, Consumer consumer) }); } -flat_mutation_reader_v2 -make_generating_reader(schema_ptr s, reader_permit permit, std::function ()> get_next_fragment); - /// A cosumer function that is passed a flat_mutation_reader to be consumed from /// and returns a future<> resolved when the reader is fully consumed, and closed. /// Note: the function assumes ownership of the reader and must close it in all cases. diff --git a/readers/forwardable.hh b/readers/forwardable.hh new file mode 100644 index 0000000000..cd0aa46324 --- /dev/null +++ b/readers/forwardable.hh @@ -0,0 +1,14 @@ +/* + * Copyright (C) 2022-present ScyllaDB + */ + +/* + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +#pragma once + +class flat_mutation_reader; + +flat_mutation_reader make_forwardable(flat_mutation_reader m); + diff --git a/readers/forwardable_v2.hh b/readers/forwardable_v2.hh new file mode 100644 index 0000000000..e2dc76fdd9 --- /dev/null +++ b/readers/forwardable_v2.hh @@ -0,0 +1,14 @@ +/* + * Copyright (C) 2022-present ScyllaDB + */ + +/* + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +#pragma once + +class flat_mutation_reader_v2; + +flat_mutation_reader_v2 make_forwardable(flat_mutation_reader_v2 m); + diff --git a/readers/from_fragments.hh b/readers/from_fragments.hh new file mode 100644 index 0000000000..e86d177aed --- /dev/null +++ b/readers/from_fragments.hh @@ -0,0 +1,33 @@ +/* + * Copyright (C) 2022-present ScyllaDB + * + * Modified by ScyllaDB + */ + +/* + * SPDX-License-Identifier: (AGPL-3.0-or-later and Apache-2.0) + */ + +#pragma once +#include "schema_fwd.hh" +#include +#include "dht/i_partitioner_fwd.hh" + +class flat_mutation_reader; +class reader_permit; +class mutation_fragment; +class ring_position; + +namespace query { + class partition_slice; +} + +flat_mutation_reader +make_flat_mutation_reader_from_fragments(schema_ptr, reader_permit, std::deque); + +flat_mutation_reader +make_flat_mutation_reader_from_fragments(schema_ptr, reader_permit, std::deque, const dht::partition_range& pr); + +flat_mutation_reader +make_flat_mutation_reader_from_fragments(schema_ptr, reader_permit, std::deque, const dht::partition_range& pr, const query::partition_slice& slice); + diff --git a/readers/from_fragments_v2.hh b/readers/from_fragments_v2.hh new file mode 100644 index 0000000000..d57819f10b --- /dev/null +++ b/readers/from_fragments_v2.hh @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2022-present ScyllaDB + */ + +/* + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +#pragma once +#include "schema_fwd.hh" +#include +#include "dht/i_partitioner_fwd.hh" + +class flat_mutation_reader_v2; +class reader_permit; +class mutation_fragment_v2; +class ring_position; + +namespace query { + class partition_slice; +} + +flat_mutation_reader_v2 +make_flat_mutation_reader_from_fragments(schema_ptr, reader_permit, std::deque); + +flat_mutation_reader_v2 +make_flat_mutation_reader_from_fragments(schema_ptr, reader_permit, std::deque, const dht::partition_range& pr); + +flat_mutation_reader_v2 +make_flat_mutation_reader_from_fragments(schema_ptr, reader_permit, std::deque, const dht::partition_range& pr, const query::partition_slice& slice); + diff --git a/readers/from_mutations.hh b/readers/from_mutations.hh new file mode 100644 index 0000000000..f564588340 --- /dev/null +++ b/readers/from_mutations.hh @@ -0,0 +1,51 @@ +/* + * Copyright (C) 2022-present ScyllaDB + */ + +/* + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +#pragma once +#include "schema_fwd.hh" +#include "dht/i_partitioner_fwd.hh" +#include +#include "mutation_fragment_fwd.hh" + +class flat_mutation_reader; +class reader_permit; +class mutation; + +namespace query { + class partition_slice; + extern const dht::partition_range full_partition_range; +} + +// All mutations should have the same schema. +flat_mutation_reader make_flat_mutation_reader_from_mutations( + schema_ptr schema, + reader_permit permit, + std::vector, + const dht::partition_range& pr = query::full_partition_range, + streamed_mutation::forwarding fwd = streamed_mutation::forwarding::no); + +// All mutations should have the same schema. +flat_mutation_reader make_flat_mutation_reader_from_mutations(schema_ptr schema, reader_permit permit, std::vector ms, streamed_mutation::forwarding fwd); + +// All mutations should have the same schema. +flat_mutation_reader +make_flat_mutation_reader_from_mutations(schema_ptr schema, + reader_permit permit, + std::vector ms, + const query::partition_slice& slice, + streamed_mutation::forwarding fwd = streamed_mutation::forwarding::no); + +// All mutations should have the same schema. +flat_mutation_reader +make_flat_mutation_reader_from_mutations(schema_ptr schema, + reader_permit permit, + std::vector ms, + const dht::partition_range& pr, + const query::partition_slice& slice, + streamed_mutation::forwarding fwd = streamed_mutation::forwarding::no); + diff --git a/readers/from_mutations_v2.hh b/readers/from_mutations_v2.hh new file mode 100644 index 0000000000..3b471d0e70 --- /dev/null +++ b/readers/from_mutations_v2.hh @@ -0,0 +1,57 @@ +/* + * Copyright (C) 2022-present ScyllaDB + */ + +/* + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +#pragma once +#include "schema_fwd.hh" +#include +#include "dht/i_partitioner_fwd.hh" +#include "mutation_fragment_fwd.hh" + +class flat_mutation_reader_v2; +class reader_permit; +class mutation; + +namespace query { + class partition_slice; + extern const dht::partition_range full_partition_range; +} + +// All mutations should have the same schema. +flat_mutation_reader_v2 make_flat_mutation_reader_from_mutations_v2( + schema_ptr schema, + reader_permit permit, + std::vector, + const dht::partition_range& pr = query::full_partition_range, + streamed_mutation::forwarding fwd = streamed_mutation::forwarding::no); + +// All mutations should have the same schema. +flat_mutation_reader_v2 make_flat_mutation_reader_from_mutations_v2( + schema_ptr schema, + reader_permit permit, + std::vector ms, + streamed_mutation::forwarding fwd); + +// All mutations should have the same schema. +flat_mutation_reader_v2 +make_flat_mutation_reader_from_mutations_v2( + schema_ptr schema, + reader_permit permit, + std::vector ms, + const query::partition_slice& slice, + streamed_mutation::forwarding fwd = streamed_mutation::forwarding::no); + +// All mutations should have the same schema. +flat_mutation_reader_v2 +make_flat_mutation_reader_from_mutations_v2( + schema_ptr schema, + reader_permit permit, + std::vector ms, + const dht::partition_range& pr, + const query::partition_slice& slice, + streamed_mutation::forwarding fwd = streamed_mutation::forwarding::no); + diff --git a/readers/generating.hh b/readers/generating.hh new file mode 100644 index 0000000000..b7a30b7fa1 --- /dev/null +++ b/readers/generating.hh @@ -0,0 +1,25 @@ +/* + * Copyright (C) 2022-present ScyllaDB + */ + +/* + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +#pragma once +#include "schema_fwd.hh" +#include +#include +#include + +using namespace seastar; + +class flat_mutation_reader; +class reader_permit; +class mutation_fragment; + +using mutation_fragment_opt = optimized_optional; + +flat_mutation_reader +make_generating_reader(schema_ptr s, reader_permit permit, std::function ()> get_next_fragment); + diff --git a/readers/generating_v2.hh b/readers/generating_v2.hh new file mode 100644 index 0000000000..53788ce1e9 --- /dev/null +++ b/readers/generating_v2.hh @@ -0,0 +1,21 @@ +/* + * Copyright (C) 2022-present ScyllaDB + */ + +/* + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +#pragma once +#include "schema_fwd.hh" +#include +#include +#include "mutation_fragment_fwd.hh" + +using namespace seastar; + +class flat_mutation_reader_v2; +class reader_permit; + +flat_mutation_reader_v2 +make_generating_reader(schema_ptr s, reader_permit permit, std::function ()> get_next_fragment); diff --git a/readers/multi_range.hh b/readers/multi_range.hh new file mode 100644 index 0000000000..b60fbeb535 --- /dev/null +++ b/readers/multi_range.hh @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2022-present ScyllaDB + */ + +/* + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +#pragma once +#include "schema_fwd.hh" +#include "dht/i_partitioner_fwd.hh" +#include +#include +#include +#include "readers/flat_mutation_reader_fwd.hh" +#include "tracing/trace_state.hh" + +using namespace seastar; + +class flat_mutation_reader_v2; +class reader_permit; +class mutation_source; + +namespace query { + class partition_slice; +} + + +// Make a reader that enables the wrapped reader to work with multiple ranges. +/// +/// \param ranges An range vector that has to contain strictly monotonic +/// partition ranges, such that successively calling +/// `flat_mutation_reader::fast_forward_to()` with each one is valid. +/// An range vector range with 0 or 1 elements is also valid. +/// \param fwd_mr It is only respected when `ranges` contains 0 or 1 partition +/// ranges. Otherwise the reader is created with +/// mutation_reader::forwarding::yes. +flat_mutation_reader_v2 +make_flat_multi_range_reader( + schema_ptr s, reader_permit permit, mutation_source source, const dht::partition_range_vector& ranges, + const query::partition_slice& slice, const io_priority_class& pc = default_priority_class(), + tracing::trace_state_ptr trace_state = nullptr, + mutation_reader::forwarding fwd_mr = mutation_reader::forwarding::yes); + +/// Make a reader that enables the wrapped reader to work with multiple ranges. +/// +/// Generator overload. The ranges returned by the generator have to satisfy the +/// same requirements as the `ranges` param of the vector overload. +flat_mutation_reader_v2 +make_flat_multi_range_reader( + schema_ptr s, + reader_permit permit, + mutation_source source, + std::function()> generator, + const query::partition_slice& slice, + const io_priority_class& pc = default_priority_class(), + tracing::trace_state_ptr trace_state = nullptr, + mutation_reader::forwarding fwd_mr = mutation_reader::forwarding::yes); diff --git a/readers/mutation_reader.cc b/readers/mutation_reader.cc new file mode 100644 index 0000000000..68ab838d00 --- /dev/null +++ b/readers/mutation_reader.cc @@ -0,0 +1,403 @@ +/* + * Copyright (C) 2022-present ScyllaDB + */ + +/* + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +#include "readers/flat_mutation_reader.hh" +#include "readers/flat_mutation_reader_v2.hh" +#include "mutation_rebuilder.hh" +#include "mutation_fragment_stream_validator.hh" +#include "schema_upgrader.hh" + +logging::logger fmr_logger("flat_mutation_reader"); + +flat_mutation_reader& flat_mutation_reader::operator=(flat_mutation_reader&& o) noexcept { + if (_impl && _impl->is_close_required()) { + impl* ip = _impl.get(); + // Abort to enforce calling close() before readers are closed + // to prevent leaks and potential use-after-free due to background + // tasks left behind. + on_internal_error_noexcept(fmr_logger, format("{} [{}]: permit {}: was not closed before overwritten by move-assign", typeid(*ip).name(), fmt::ptr(ip), ip->_permit.description())); + abort(); + } + _impl = std::move(o._impl); + return *this; +} + +flat_mutation_reader::~flat_mutation_reader() { + if (_impl && _impl->is_close_required()) { + impl* ip = _impl.get(); + // Abort to enforce calling close() before readers are closed + // to prevent leaks and potential use-after-free due to background + // tasks left behind. + on_internal_error_noexcept(fmr_logger, format("{} [{}]: permit {}: was not closed before destruction", typeid(*ip).name(), fmt::ptr(ip), ip->_permit.description())); + abort(); + } +} + +static size_t compute_buffer_size(const schema& s, const flat_mutation_reader::tracked_buffer& buffer) +{ + return boost::accumulate( + buffer + | boost::adaptors::transformed([&s] (const mutation_fragment& mf) { + return mf.memory_usage(); + }), size_t(0) + ); +} + +void flat_mutation_reader::impl::forward_buffer_to(const position_in_partition& pos) { + _buffer.erase(std::remove_if(_buffer.begin(), _buffer.end(), [this, &pos] (mutation_fragment& f) { + return !f.relevant_for_range_assuming_after(*_schema, pos); + }), _buffer.end()); + + _buffer_size = compute_buffer_size(*_schema, _buffer); +} + +void flat_mutation_reader::impl::clear_buffer_to_next_partition() { + auto next_partition_start = std::find_if(_buffer.begin(), _buffer.end(), [] (const mutation_fragment& mf) { + return mf.is_partition_start(); + }); + _buffer.erase(_buffer.begin(), next_partition_start); + + _buffer_size = compute_buffer_size(*_schema, _buffer); +} + + +template +future flat_mutation_reader::impl::fill_buffer_from(Source& source) { + if (source.is_buffer_empty()) { + if (source.is_end_of_stream()) { + return make_ready_future(true); + } + return source.fill_buffer().then([this, &source] { + return fill_buffer_from(source); + }); + } else { + while (!source.is_buffer_empty() && !is_buffer_full()) { + push_mutation_fragment(source.pop_mutation_fragment()); + } + return make_ready_future(source.is_end_of_stream() && source.is_buffer_empty()); + } +} + +template future flat_mutation_reader::impl::fill_buffer_from(flat_mutation_reader&); + +void flat_mutation_reader::do_upgrade_schema(const schema_ptr& s) { + *this = transform(std::move(*this), schema_upgrader(s)); +} + +void flat_mutation_reader::on_close_error(std::unique_ptr i, std::exception_ptr ep) noexcept { + impl* ip = i.get(); + on_internal_error_noexcept(fmr_logger, + format("Failed to close {} [{}]: permit {}: {}", typeid(*ip).name(), fmt::ptr(ip), ip->_permit.description(), ep)); +} + +invalid_mutation_fragment_stream::invalid_mutation_fragment_stream(std::runtime_error e) : std::runtime_error(std::move(e)) { +} + +static mutation_fragment_v2::kind to_mutation_fragment_kind_v2(mutation_fragment::kind k) { + switch (k) { + case mutation_fragment::kind::partition_start: + return mutation_fragment_v2::kind::partition_start; + case mutation_fragment::kind::static_row: + return mutation_fragment_v2::kind::static_row; + case mutation_fragment::kind::clustering_row: + return mutation_fragment_v2::kind::clustering_row; + case mutation_fragment::kind::range_tombstone: + return mutation_fragment_v2::kind::range_tombstone_change; + case mutation_fragment::kind::partition_end: + return mutation_fragment_v2::kind::partition_end; + } +} + +mutation_fragment_stream_validator::mutation_fragment_stream_validator(const ::schema& s) + : _schema(s) + , _prev_kind(mutation_fragment_v2::kind::partition_end) + , _prev_pos(position_in_partition::end_of_partition_tag_t{}) + , _prev_partition_key(dht::minimum_token(), partition_key::make_empty()) { +} + +bool mutation_fragment_stream_validator::operator()(const dht::decorated_key& dk) { + if (_prev_partition_key.less_compare(_schema, dk)) { + _prev_partition_key = dk; + return true; + } + return false; +} + +bool mutation_fragment_stream_validator::operator()(dht::token t) { + if (_prev_partition_key.token() <= t) { + _prev_partition_key._token = t; + return true; + } + return false; +} + +bool mutation_fragment_stream_validator::operator()(mutation_fragment_v2::kind kind, position_in_partition_view pos) { + if (_prev_kind == mutation_fragment_v2::kind::partition_end) { + const bool valid = (kind == mutation_fragment_v2::kind::partition_start); + if (valid) { + _prev_kind = mutation_fragment_v2::kind::partition_start; + _prev_pos = pos; + } + return valid; + } + auto cmp = position_in_partition::tri_compare(_schema); + auto res = cmp(_prev_pos, pos); + bool valid = true; + if (_prev_kind == mutation_fragment_v2::kind::range_tombstone_change) { + valid = res <= 0; + } else { + valid = res < 0; + } + if (valid) { + _prev_kind = kind; + _prev_pos = pos; + } + return valid; +} +bool mutation_fragment_stream_validator::operator()(mutation_fragment::kind kind, position_in_partition_view pos) { + return (*this)(to_mutation_fragment_kind_v2(kind), pos); +} + +bool mutation_fragment_stream_validator::operator()(const mutation_fragment_v2& mf) { + return (*this)(mf.mutation_fragment_kind(), mf.position()); +} +bool mutation_fragment_stream_validator::operator()(const mutation_fragment& mf) { + return (*this)(to_mutation_fragment_kind_v2(mf.mutation_fragment_kind()), mf.position()); +} + +bool mutation_fragment_stream_validator::operator()(mutation_fragment_v2::kind kind) { + bool valid = true; + switch (_prev_kind) { + case mutation_fragment_v2::kind::partition_start: + valid = kind != mutation_fragment_v2::kind::partition_start; + break; + case mutation_fragment_v2::kind::static_row: // fall-through + case mutation_fragment_v2::kind::clustering_row: // fall-through + case mutation_fragment_v2::kind::range_tombstone_change: + valid = kind != mutation_fragment_v2::kind::partition_start && + kind != mutation_fragment_v2::kind::static_row; + break; + case mutation_fragment_v2::kind::partition_end: + valid = kind == mutation_fragment_v2::kind::partition_start; + break; + } + if (valid) { + _prev_kind = kind; + } + return valid; +} +bool mutation_fragment_stream_validator::operator()(mutation_fragment::kind kind) { + return (*this)(to_mutation_fragment_kind_v2(kind)); +} + +bool mutation_fragment_stream_validator::on_end_of_stream() { + return _prev_kind == mutation_fragment_v2::kind::partition_end; +} + +void mutation_fragment_stream_validator::reset(dht::decorated_key dk) { + _prev_partition_key = dk; + _prev_pos = position_in_partition::for_partition_start(); + _prev_kind = mutation_fragment_v2::kind::partition_start; +} + +void mutation_fragment_stream_validator::reset(const mutation_fragment_v2& mf) { + _prev_pos = mf.position(); + _prev_kind = mf.mutation_fragment_kind(); +} +void mutation_fragment_stream_validator::reset(const mutation_fragment& mf) { + _prev_pos = mf.position(); + _prev_kind = to_mutation_fragment_kind_v2(mf.mutation_fragment_kind()); +} + +namespace { + +[[noreturn]] void on_validation_error(seastar::logger& l, const seastar::sstring& reason) { + try { + on_internal_error(l, reason); + } catch (std::runtime_error& e) { + throw invalid_mutation_fragment_stream(e); + } +} + +} + +bool mutation_fragment_stream_validating_filter::operator()(const dht::decorated_key& dk) { + if (_validation_level < mutation_fragment_stream_validation_level::token) { + return true; + } + if (_validation_level == mutation_fragment_stream_validation_level::token) { + if (_validator(dk.token())) { + return true; + } + on_validation_error(fmr_logger, format("[validator {} for {}] Unexpected token: previous {}, current {}", + static_cast(this), _name, _validator.previous_token(), dk.token())); + } else { + if (_validator(dk)) { + return true; + } + on_validation_error(fmr_logger, format("[validator {} for {}] Unexpected partition key: previous {}, current {}", + static_cast(this), _name, _validator.previous_partition_key(), dk)); + } +} + +mutation_fragment_stream_validating_filter::mutation_fragment_stream_validating_filter(sstring_view name, const schema& s, + mutation_fragment_stream_validation_level level) + : _validator(s) + , _name(format("{} ({}.{} {})", name, s.ks_name(), s.cf_name(), s.id())) + , _validation_level(level) +{ + if (fmr_logger.level() <= log_level::debug) { + std::string_view what; + switch (_validation_level) { + case mutation_fragment_stream_validation_level::partition_region: + what = "partition region"; + break; + case mutation_fragment_stream_validation_level::token: + what = "partition region and token"; + break; + case mutation_fragment_stream_validation_level::partition_key: + what = "partition region and partition key"; + break; + case mutation_fragment_stream_validation_level::clustering_key: + what = "partition region, partition key and clustering key"; + break; + } + fmr_logger.debug("[validator {} for {}] Will validate {} monotonicity.", static_cast(this), _name, what); + } +} + +bool mutation_fragment_stream_validating_filter::operator()(mutation_fragment_v2::kind kind, position_in_partition_view pos) { + bool valid = false; + + fmr_logger.debug("[validator {}] {}:{}", static_cast(this), kind, pos); + + if (_validation_level >= mutation_fragment_stream_validation_level::clustering_key) { + valid = _validator(kind, pos); + } else { + valid = _validator(kind); + } + + if (__builtin_expect(!valid, false)) { + if (_validation_level >= mutation_fragment_stream_validation_level::clustering_key) { + on_validation_error(fmr_logger, format("[validator {} for {}] Unexpected mutation fragment: partition key {}: previous {}:{}, current {}:{}", + static_cast(this), _name, _validator.previous_partition_key(), _validator.previous_mutation_fragment_kind(), _validator.previous_position(), kind, pos)); + } else if (_validation_level >= mutation_fragment_stream_validation_level::partition_key) { + on_validation_error(fmr_logger, format("[validator {} for {}] Unexpected mutation fragment: partition key {}: previous {}, current {}", + static_cast(this), _name, _validator.previous_partition_key(), _validator.previous_mutation_fragment_kind(), kind)); + } else { + on_validation_error(fmr_logger, format("[validator {} for {}] Unexpected mutation fragment: previous {}, current {}", + static_cast(this), _name, _validator.previous_mutation_fragment_kind(), kind)); + } + } + + return true; +} + +bool mutation_fragment_stream_validating_filter::operator()(mutation_fragment::kind kind, position_in_partition_view pos) { + return (*this)(to_mutation_fragment_kind_v2(kind), pos); +} + +bool mutation_fragment_stream_validating_filter::operator()(const mutation_fragment_v2& mv) { + return (*this)(mv.mutation_fragment_kind(), mv.position()); +} +bool mutation_fragment_stream_validating_filter::operator()(const mutation_fragment& mv) { + return (*this)(to_mutation_fragment_kind_v2(mv.mutation_fragment_kind()), mv.position()); +} + +bool mutation_fragment_stream_validating_filter::on_end_of_partition() { + return (*this)(mutation_fragment::kind::partition_end, position_in_partition_view(position_in_partition_view::end_of_partition_tag_t())); +} + +void mutation_fragment_stream_validating_filter::on_end_of_stream() { + fmr_logger.debug("[validator {}] EOS", static_cast(this)); + if (!_validator.on_end_of_stream()) { + on_validation_error(fmr_logger, format("[validator {} for {}] Stream ended with unclosed partition: {}", static_cast(this), _name, + _validator.previous_mutation_fragment_kind())); + } +} + +static size_t compute_buffer_size(const schema& s, const flat_mutation_reader_v2::tracked_buffer& buffer) +{ + return boost::accumulate( + buffer + | boost::adaptors::transformed([&s] (const mutation_fragment_v2& mf) { + return mf.memory_usage(); + }), size_t(0) + ); +} + +flat_mutation_reader_v2& flat_mutation_reader_v2::operator=(flat_mutation_reader_v2&& o) noexcept { + if (_impl && _impl->is_close_required()) { + impl* ip = _impl.get(); + // Abort to enforce calling close() before readers are closed + // to prevent leaks and potential use-after-free due to background + // tasks left behind. + on_internal_error_noexcept(fmr_logger, format("{} [{}]: permit {}: was not closed before overwritten by move-assign", typeid(*ip).name(), fmt::ptr(ip), ip->_permit.description())); + abort(); + } + _impl = std::move(o._impl); + return *this; +} + +flat_mutation_reader_v2::~flat_mutation_reader_v2() { + if (_impl && _impl->is_close_required()) { + impl* ip = _impl.get(); + // Abort to enforce calling close() before readers are closed + // to prevent leaks and potential use-after-free due to background + // tasks left behind. + on_internal_error_noexcept(fmr_logger, format("{} [{}]: permit {}: was not closed before destruction", typeid(*ip).name(), fmt::ptr(ip), ip->_permit.description())); + abort(); + } +} + +void flat_mutation_reader_v2::impl::forward_buffer_to(const position_in_partition& pos) { + clear_buffer(); + _buffer_size = compute_buffer_size(*_schema, _buffer); +} + +void flat_mutation_reader_v2::impl::clear_buffer_to_next_partition() { + auto next_partition_start = std::find_if(_buffer.begin(), _buffer.end(), [] (const mutation_fragment_v2& mf) { + return mf.is_partition_start(); + }); + _buffer.erase(_buffer.begin(), next_partition_start); + + _buffer_size = compute_buffer_size(*_schema, _buffer); +} + +template +future flat_mutation_reader_v2::impl::fill_buffer_from(Source& source) { + if (source.is_buffer_empty()) { + if (source.is_end_of_stream()) { + return make_ready_future(true); + } + return source.fill_buffer().then([this, &source] { + return fill_buffer_from(source); + }); + } else { + while (!source.is_buffer_empty() && !is_buffer_full()) { + push_mutation_fragment(source.pop_mutation_fragment()); + } + return make_ready_future(source.is_end_of_stream() && source.is_buffer_empty()); + } +} + +template future flat_mutation_reader_v2::impl::fill_buffer_from(flat_mutation_reader_v2&); + +void flat_mutation_reader_v2::do_upgrade_schema(const schema_ptr& s) { + *this = transform(std::move(*this), schema_upgrader_v2(s)); +} + +void flat_mutation_reader_v2::on_close_error(std::unique_ptr i, std::exception_ptr ep) noexcept { + impl* ip = i.get(); + on_internal_error_noexcept(fmr_logger, + format("Failed to close {} [{}]: permit {}: {}", typeid(*ip).name(), fmt::ptr(ip), ip->_permit.description(), ep)); +} + +future read_mutation_from_flat_mutation_reader(flat_mutation_reader_v2& r) { + return r.consume(mutation_rebuilder_v2(r.schema())); +} diff --git a/flat_mutation_reader.cc b/readers/mutation_readers.cc similarity index 80% rename from flat_mutation_reader.cc rename to readers/mutation_readers.cc index 606ad12ca3..bd389fdb51 100644 --- a/flat_mutation_reader.cc +++ b/readers/mutation_readers.cc @@ -6,258 +6,110 @@ * SPDX-License-Identifier: AGPL-3.0-or-later */ -#include "flat_mutation_reader.hh" -#include "flat_mutation_reader_v2.hh" -#include "range_tombstone_assembler.hh" -#include "range_tombstone_change_generator.hh" -#include "mutation_fragment_stream_validator.hh" -#include "mutation_reader.hh" -#include "seastar/util/reference_wrapper.hh" +#include "clustering_key_filter.hh" #include "clustering_ranges_walker.hh" -#include "schema_upgrader.hh" -#include +#include "dht/i_partitioner.hh" +#include "mutation.hh" +#include "mutation_partition.hh" +#include "mutation_reader.hh" +#include "range_tombstone_assembler.hh" +#include "range_tombstone_splitter.hh" +#include "readers/delegating.hh" +#include "readers/delegating_v2.hh" +#include "readers/empty.hh" +#include "readers/empty_v2.hh" +#include "readers/flat_mutation_reader.hh" +#include "readers/flat_mutation_reader_v2.hh" +#include "readers/forwardable.hh" +#include "readers/forwardable_v2.hh" +#include "readers/from_fragments.hh" +#include "readers/from_fragments_v2.hh" +#include "readers/from_mutations.hh" +#include "readers/from_mutations_v2.hh" +#include "readers/generating.hh" +#include "readers/generating_v2.hh" +#include "readers/multi_range.hh" +#include "readers/nonforwardable.hh" +#include "readers/reversing.hh" +#include "readers/slice_mutations.hh" +#include #include -#include -#include -#include "utils/exceptions.hh" -#include "mutation_rebuilder.hh" -#include "range_tombstone_splitter.hh" -#include -#include -#include - -#include "clustering_key_filter.hh" - -logging::logger fmr_logger("flat_mutation_reader"); - -flat_mutation_reader& flat_mutation_reader::operator=(flat_mutation_reader&& o) noexcept { - if (_impl && _impl->is_close_required()) { - impl* ip = _impl.get(); - // Abort to enforce calling close() before readers are closed - // to prevent leaks and potential use-after-free due to background - // tasks left behind. - on_internal_error_noexcept(fmr_logger, format("{} [{}]: permit {}: was not closed before overwritten by move-assign", typeid(*ip).name(), fmt::ptr(ip), ip->_permit.description())); - abort(); - } - _impl = std::move(o._impl); - return *this; -} - -flat_mutation_reader::~flat_mutation_reader() { - if (_impl && _impl->is_close_required()) { - impl* ip = _impl.get(); - // Abort to enforce calling close() before readers are closed - // to prevent leaks and potential use-after-free due to background - // tasks left behind. - on_internal_error_noexcept(fmr_logger, format("{} [{}]: permit {}: was not closed before destruction", typeid(*ip).name(), fmt::ptr(ip), ip->_permit.description())); - abort(); - } -} - -static size_t compute_buffer_size(const schema& s, const flat_mutation_reader::tracked_buffer& buffer) -{ - return boost::accumulate( - buffer - | boost::adaptors::transformed([&s] (const mutation_fragment& mf) { - return mf.memory_usage(); - }), size_t(0) - ); -} - -void flat_mutation_reader::impl::forward_buffer_to(const position_in_partition& pos) { - _buffer.erase(std::remove_if(_buffer.begin(), _buffer.end(), [this, &pos] (mutation_fragment& f) { - return !f.relevant_for_range_assuming_after(*_schema, pos); - }), _buffer.end()); - - _buffer_size = compute_buffer_size(*_schema, _buffer); -} - -void flat_mutation_reader::impl::clear_buffer_to_next_partition() { - auto next_partition_start = std::find_if(_buffer.begin(), _buffer.end(), [] (const mutation_fragment& mf) { - return mf.is_partition_start(); - }); - _buffer.erase(_buffer.begin(), next_partition_start); - - _buffer_size = compute_buffer_size(*_schema, _buffer); -} - -flat_mutation_reader make_reversing_reader(flat_mutation_reader original, query::max_result_size max_size, std::unique_ptr slice) { - class partition_reversing_mutation_reader final : public flat_mutation_reader::impl { - flat_mutation_reader _source; - range_tombstone_list _range_tombstones; - std::stack _mutation_fragments; - mutation_fragment_opt _partition_end; - size_t _stack_size = 0; - const query::max_result_size _max_size; - bool _below_soft_limit = true; - std::unique_ptr _slice; // only stored, not used - private: - stop_iteration emit_partition() { - auto emit_range_tombstone = [&] { - // _range_tombstones uses the reverse schema already, so we can use `begin()` - auto it = _range_tombstones.begin(); - push_mutation_fragment(*_schema, _permit, _range_tombstones.pop(it)); - }; - position_in_partition::tri_compare cmp(*_schema); - while (!_mutation_fragments.empty() && !is_buffer_full()) { - auto& mf = _mutation_fragments.top(); - if (!_range_tombstones.empty() && cmp(_range_tombstones.begin()->position(), mf.position()) <= 0) { - emit_range_tombstone(); - } else { - _stack_size -= mf.memory_usage(); - push_mutation_fragment(std::move(mf)); - _mutation_fragments.pop(); - } - } - while (!_range_tombstones.empty() && !is_buffer_full()) { - emit_range_tombstone(); - } - if (is_buffer_full()) { - return stop_iteration::yes; - } - push_mutation_fragment(std::move(*std::exchange(_partition_end, std::nullopt))); - return stop_iteration::no; - } - future consume_partition_from_source() { - if (_source.is_buffer_empty()) { - if (_source.is_end_of_stream()) { - _end_of_stream = true; - return make_ready_future(stop_iteration::yes); - } - return _source.fill_buffer().then([] { return stop_iteration::no; }); - } - while (!_source.is_buffer_empty() && !is_buffer_full()) { - auto mf = _source.pop_mutation_fragment(); - if (mf.is_partition_start() || mf.is_static_row()) { - push_mutation_fragment(std::move(mf)); - } else if (mf.is_end_of_partition()) { - _partition_end = std::move(mf); - if (emit_partition()) { - return make_ready_future(stop_iteration::yes); - } - } else if (mf.is_range_tombstone()) { - auto&& rt = std::move(mf).as_range_tombstone(); - rt.reverse(); - _range_tombstones.apply(*_schema, std::move(rt)); - } else { - _mutation_fragments.emplace(std::move(mf)); - _stack_size += _mutation_fragments.top().memory_usage(); - if (_stack_size > _max_size.hard_limit || (_stack_size > _max_size.soft_limit && _below_soft_limit)) { - const partition_key* key = nullptr; - auto it = buffer().end(); - --it; - if (it->is_partition_start()) { - key = &it->as_partition_start().key().key(); - } else { - --it; - key = &it->as_partition_start().key().key(); - } - - if (_stack_size > _max_size.hard_limit) { - return make_exception_future(std::runtime_error(fmt::format( - "Memory usage of reversed read exceeds hard limit of {} (configured via max_memory_for_unlimited_query_hard_limit), while reading partition {}", - _max_size.hard_limit, - key->with_schema(*_schema)))); - } else { - fmr_logger.warn( - "Memory usage of reversed read exceeds soft limit of {} (configured via max_memory_for_unlimited_query_soft_limit), while reading partition {}", - _max_size.soft_limit, - key->with_schema(*_schema)); - _below_soft_limit = false; - } - } - } - } - return make_ready_future(is_buffer_full()); - } - public: - explicit partition_reversing_mutation_reader(flat_mutation_reader mr, query::max_result_size max_size, std::unique_ptr slice) - : flat_mutation_reader::impl(mr.schema()->make_reversed(), mr.permit()) - , _source(std::move(mr)) - , _range_tombstones(*_schema) - , _max_size(max_size) - , _slice(std::move(slice)) - { } - - virtual future<> fill_buffer() override { - return repeat([&] { - if (_partition_end) { - // We have consumed full partition from source, now it is - // time to emit it. - auto stop = emit_partition(); - if (stop) { - return make_ready_future(stop_iteration::yes); - } - } - return consume_partition_from_source(); - }); - } - - virtual future<> next_partition() override { - clear_buffer_to_next_partition(); - if (is_buffer_empty() && !is_end_of_stream()) { - while (!_mutation_fragments.empty()) { - _stack_size -= _mutation_fragments.top().memory_usage(); - _mutation_fragments.pop(); - } - _range_tombstones.clear(); - _partition_end = std::nullopt; - return _source.next_partition(); - } - return make_ready_future<>(); - } - - virtual future<> fast_forward_to(const dht::partition_range& pr) override { - clear_buffer(); - while (!_mutation_fragments.empty()) { - _mutation_fragments.pop(); - } - _stack_size = 0; - _partition_end = std::nullopt; - _end_of_stream = false; - return _source.fast_forward_to(pr); - } - - virtual future<> fast_forward_to(position_range) override { - return make_exception_future<>(make_backtraced_exception_ptr()); - } - - virtual future<> close() noexcept override { - return _source.close(); - } - }; - - return make_flat_mutation_reader(std::move(original), max_size, std::move(slice)); -} - -template -future flat_mutation_reader::impl::fill_buffer_from(Source& source) { - if (source.is_buffer_empty()) { - if (source.is_end_of_stream()) { - return make_ready_future(true); - } - return source.fill_buffer().then([this, &source] { - return fill_buffer_from(source); - }); - } else { - while (!source.is_buffer_empty() && !is_buffer_full()) { - push_mutation_fragment(source.pop_mutation_fragment()); - } - return make_ready_future(source.is_end_of_stream() && source.is_buffer_empty()); - } -} - -template future flat_mutation_reader::impl::fill_buffer_from(flat_mutation_reader&); +extern logging::logger fmr_logger; flat_mutation_reader make_delegating_reader(flat_mutation_reader& r) { return make_flat_mutation_reader(r); } +future<> delegating_reader::fill_buffer() { + if (is_buffer_full()) { + return make_ready_future<>(); + } + return _underlying->fill_buffer().then([this] { + _end_of_stream = _underlying->is_end_of_stream(); + _underlying->move_buffer_content_to(*this); + }); +} + +future<> delegating_reader::fast_forward_to(position_range pr) { + _end_of_stream = false; + forward_buffer_to(pr.start()); + return _underlying->fast_forward_to(std::move(pr)); +} + +future<> delegating_reader::next_partition() { + clear_buffer_to_next_partition(); + auto maybe_next_partition = make_ready_future<>(); + if (is_buffer_empty()) { + maybe_next_partition = _underlying->next_partition(); + } + return maybe_next_partition.then([this] { + _end_of_stream = _underlying->is_end_of_stream() && _underlying->is_buffer_empty(); + }); +} + +future<> delegating_reader::fast_forward_to(const dht::partition_range& pr) { + _end_of_stream = false; + clear_buffer(); + return _underlying->fast_forward_to(pr); +} + +future<> delegating_reader::close() noexcept { + return _underlying_holder ? _underlying_holder->close() : make_ready_future<>(); +} + flat_mutation_reader_v2 make_delegating_reader_v2(flat_mutation_reader_v2& r) { return make_flat_mutation_reader_v2(r); } +class empty_flat_reader final : public flat_mutation_reader::impl { +public: + empty_flat_reader(schema_ptr s, reader_permit permit) : impl(std::move(s), std::move(permit)) { _end_of_stream = true; } + virtual future<> fill_buffer() override { return make_ready_future<>(); } + virtual future<> next_partition() override { return make_ready_future<>(); } + virtual future<> fast_forward_to(const dht::partition_range& pr) override { return make_ready_future<>(); }; + virtual future<> fast_forward_to(position_range cr) override { return make_ready_future<>(); }; + virtual future<> close() noexcept override { return make_ready_future<>(); } +}; + +flat_mutation_reader make_empty_flat_reader(schema_ptr s, reader_permit permit) { + return make_flat_mutation_reader(std::move(s), std::move(permit)); +} + +class empty_flat_reader_v2 final : public flat_mutation_reader_v2::impl { +public: + empty_flat_reader_v2(schema_ptr s, reader_permit permit) : impl(std::move(s), std::move(permit)) { _end_of_stream = true; } + virtual future<> fill_buffer() override { return make_ready_future<>(); } + virtual future<> next_partition() override { return make_ready_future<>(); } + virtual future<> fast_forward_to(const dht::partition_range& pr) override { return make_ready_future<>(); }; + virtual future<> fast_forward_to(position_range cr) override { return make_ready_future<>(); }; + virtual future<> close() noexcept override { return make_ready_future<>(); } +}; + +flat_mutation_reader_v2 make_empty_flat_reader_v2(schema_ptr s, reader_permit permit) { + return make_flat_mutation_reader_v2(std::move(s), std::move(permit)); +} + flat_mutation_reader make_forwardable(flat_mutation_reader m) { class reader : public flat_mutation_reader::impl { flat_mutation_reader _underlying; @@ -434,6 +286,262 @@ flat_mutation_reader_v2 make_forwardable(flat_mutation_reader_v2 m) { return make_flat_mutation_reader_v2(std::move(m)); } +flat_mutation_reader make_slicing_filtering_reader(flat_mutation_reader rd, const dht::partition_range& pr, const query::partition_slice& slice) { + class reader : public flat_mutation_reader::impl { + flat_mutation_reader _rd; + const dht::partition_range* _pr; + const query::partition_slice* _slice; + dht::ring_position_comparator _cmp; + std::optional _ranges_walker; + std::optional _splitter; + + public: + reader(flat_mutation_reader rd, const dht::partition_range& pr, const query::partition_slice& slice) + : flat_mutation_reader::impl(rd.schema(), rd.permit()) + , _rd(std::move(rd)) + , _pr(&pr) + , _slice(&slice) + , _cmp(*_schema) { + } + + virtual future<> fill_buffer() override { + const auto consume_fn = [this] (mutation_fragment mf) { + push_mutation_fragment(std::move(mf)); + }; + + while (!is_buffer_full() && !is_end_of_stream()) { + co_await _rd.fill_buffer(); + while (!_rd.is_buffer_empty()) { + auto mf = _rd.pop_mutation_fragment(); + switch (mf.mutation_fragment_kind()) { + case mutation_fragment::kind::partition_start: { + auto& dk = mf.as_partition_start().key(); + if (!_pr->contains(dk, _cmp)) { + co_return co_await _rd.next_partition(); + } else { + _ranges_walker.emplace(*_schema, _slice->row_ranges(*_schema, dk.key()), false); + _splitter.emplace(*_schema, _permit, *_ranges_walker); + } + // fall-through + } + + case mutation_fragment::kind::static_row: + consume_fn(std::move(mf)); + break; + + case mutation_fragment::kind::partition_end: + _splitter->flush(position_in_partition::after_all_clustered_rows(), consume_fn); + consume_fn(std::move(mf)); + break; + + case mutation_fragment::kind::clustering_row: + _splitter->flush(mf.position(), consume_fn); + if (_ranges_walker->advance_to(mf.position())) { + consume_fn(std::move(mf)); + } + break; + + case mutation_fragment::kind::range_tombstone: + auto&& rt = mf.as_range_tombstone(); + _splitter->consume(rt, consume_fn); + break; + } + } + + _end_of_stream = _rd.is_end_of_stream(); + co_return; + } + } + + virtual future<> next_partition() override { + clear_buffer_to_next_partition(); + if (is_buffer_empty()) { + _end_of_stream = false; + return _rd.next_partition(); + } + + return make_ready_future<>(); + } + + virtual future<> fast_forward_to(const dht::partition_range& pr) override { + clear_buffer(); + _end_of_stream = false; + return _rd.fast_forward_to(pr); + } + + virtual future<> fast_forward_to(position_range pr) override { + forward_buffer_to(pr.start()); + _end_of_stream = false; + return _rd.fast_forward_to(std::move(pr)); + } + + virtual future<> close() noexcept override { + return _rd.close(); + } + }; + + return make_flat_mutation_reader(std::move(rd), pr, slice); +} + +std::vector slice_mutations(schema_ptr schema, std::vector ms, const query::partition_slice& slice) { + std::vector sliced_ms; + for (auto& m : ms) { + auto ck_ranges = query::clustering_key_filter_ranges::get_ranges(*schema, slice, m.key()); + auto mp = mutation_partition(std::move(m.partition()), *schema, std::move(ck_ranges)); + sliced_ms.emplace_back(schema, m.decorated_key(), std::move(mp)); + } + return sliced_ms; +} + +flat_mutation_reader make_reversing_reader(flat_mutation_reader original, query::max_result_size max_size, std::unique_ptr slice) { + class partition_reversing_mutation_reader final : public flat_mutation_reader::impl { + flat_mutation_reader _source; + range_tombstone_list _range_tombstones; + std::stack _mutation_fragments; + mutation_fragment_opt _partition_end; + size_t _stack_size = 0; + const query::max_result_size _max_size; + bool _below_soft_limit = true; + std::unique_ptr _slice; // only stored, not used + private: + stop_iteration emit_partition() { + auto emit_range_tombstone = [&] { + // _range_tombstones uses the reverse schema already, so we can use `begin()` + auto it = _range_tombstones.begin(); + push_mutation_fragment(*_schema, _permit, _range_tombstones.pop(it)); + }; + position_in_partition::tri_compare cmp(*_schema); + while (!_mutation_fragments.empty() && !is_buffer_full()) { + auto& mf = _mutation_fragments.top(); + if (!_range_tombstones.empty() && cmp(_range_tombstones.begin()->position(), mf.position()) <= 0) { + emit_range_tombstone(); + } else { + _stack_size -= mf.memory_usage(); + push_mutation_fragment(std::move(mf)); + _mutation_fragments.pop(); + } + } + while (!_range_tombstones.empty() && !is_buffer_full()) { + emit_range_tombstone(); + } + if (is_buffer_full()) { + return stop_iteration::yes; + } + push_mutation_fragment(std::move(*std::exchange(_partition_end, std::nullopt))); + return stop_iteration::no; + } + future consume_partition_from_source() { + if (_source.is_buffer_empty()) { + if (_source.is_end_of_stream()) { + _end_of_stream = true; + return make_ready_future(stop_iteration::yes); + } + return _source.fill_buffer().then([] { return stop_iteration::no; }); + } + while (!_source.is_buffer_empty() && !is_buffer_full()) { + auto mf = _source.pop_mutation_fragment(); + if (mf.is_partition_start() || mf.is_static_row()) { + push_mutation_fragment(std::move(mf)); + } else if (mf.is_end_of_partition()) { + _partition_end = std::move(mf); + if (emit_partition()) { + return make_ready_future(stop_iteration::yes); + } + } else if (mf.is_range_tombstone()) { + auto&& rt = std::move(mf).as_range_tombstone(); + rt.reverse(); + _range_tombstones.apply(*_schema, std::move(rt)); + } else { + _mutation_fragments.emplace(std::move(mf)); + _stack_size += _mutation_fragments.top().memory_usage(); + if (_stack_size > _max_size.hard_limit || (_stack_size > _max_size.soft_limit && _below_soft_limit)) { + const partition_key* key = nullptr; + auto it = buffer().end(); + --it; + if (it->is_partition_start()) { + key = &it->as_partition_start().key().key(); + } else { + --it; + key = &it->as_partition_start().key().key(); + } + + if (_stack_size > _max_size.hard_limit) { + return make_exception_future(std::runtime_error(fmt::format( + "Memory usage of reversed read exceeds hard limit of {} (configured via max_memory_for_unlimited_query_hard_limit), while reading partition {}", + _max_size.hard_limit, + key->with_schema(*_schema)))); + } else { + fmr_logger.warn( + "Memory usage of reversed read exceeds soft limit of {} (configured via max_memory_for_unlimited_query_soft_limit), while reading partition {}", + _max_size.soft_limit, + key->with_schema(*_schema)); + _below_soft_limit = false; + } + } + } + } + return make_ready_future(is_buffer_full()); + } + public: + explicit partition_reversing_mutation_reader(flat_mutation_reader mr, query::max_result_size max_size, std::unique_ptr slice) + : flat_mutation_reader::impl(mr.schema()->make_reversed(), mr.permit()) + , _source(std::move(mr)) + , _range_tombstones(*_schema) + , _max_size(max_size) + , _slice(std::move(slice)) + { } + + virtual future<> fill_buffer() override { + return repeat([&] { + if (_partition_end) { + // We have consumed full partition from source, now it is + // time to emit it. + auto stop = emit_partition(); + if (stop) { + return make_ready_future(stop_iteration::yes); + } + } + return consume_partition_from_source(); + }); + } + + virtual future<> next_partition() override { + clear_buffer_to_next_partition(); + if (is_buffer_empty() && !is_end_of_stream()) { + while (!_mutation_fragments.empty()) { + _stack_size -= _mutation_fragments.top().memory_usage(); + _mutation_fragments.pop(); + } + _range_tombstones.clear(); + _partition_end = std::nullopt; + return _source.next_partition(); + } + return make_ready_future<>(); + } + + virtual future<> fast_forward_to(const dht::partition_range& pr) override { + clear_buffer(); + while (!_mutation_fragments.empty()) { + _mutation_fragments.pop(); + } + _stack_size = 0; + _partition_end = std::nullopt; + _end_of_stream = false; + return _source.fast_forward_to(pr); + } + + virtual future<> fast_forward_to(position_range) override { + return make_exception_future<>(make_backtraced_exception_ptr()); + } + + virtual future<> close() noexcept override { + return _source.close(); + } + }; + + return make_flat_mutation_reader(std::move(original), max_size, std::move(slice)); +} + flat_mutation_reader make_nonforwardable(flat_mutation_reader r, bool single_partition) { class reader : public flat_mutation_reader::impl { flat_mutation_reader _underlying; @@ -500,32 +608,459 @@ flat_mutation_reader make_nonforwardable(flat_mutation_reader r, bool single_par return make_flat_mutation_reader(std::move(r), single_partition); } -class empty_flat_reader final : public flat_mutation_reader_v2::impl { +template +class flat_multi_range_mutation_reader : public flat_mutation_reader_v2::impl { + std::optional _generator; + flat_mutation_reader_v2 _reader; + + const dht::partition_range* next() { + if (!_generator) { + return nullptr; + } + return (*_generator)(); + } + public: - empty_flat_reader(schema_ptr s, reader_permit permit) : impl(std::move(s), std::move(permit)) { _end_of_stream = true; } - virtual future<> fill_buffer() override { return make_ready_future<>(); } - virtual future<> next_partition() override { return make_ready_future<>(); } - virtual future<> fast_forward_to(const dht::partition_range& pr) override { return make_ready_future<>(); }; - virtual future<> fast_forward_to(position_range cr) override { return make_ready_future<>(); }; - virtual future<> close() noexcept override { return make_ready_future<>(); } + flat_multi_range_mutation_reader( + schema_ptr s, + reader_permit permit, + mutation_source source, + const dht::partition_range& first_range, + Generator generator, + const query::partition_slice& slice, + const io_priority_class& pc, + tracing::trace_state_ptr trace_state) + : impl(s, std::move(permit)) + , _generator(std::move(generator)) + , _reader(source.make_reader_v2(s, _permit, first_range, slice, pc, trace_state, streamed_mutation::forwarding::no, mutation_reader::forwarding::yes)) + { + } + + virtual future<> fill_buffer() override { + return do_until([this] { return is_end_of_stream() || !is_buffer_empty(); }, [this] { + return _reader.fill_buffer().then([this] () { + while (!_reader.is_buffer_empty()) { + push_mutation_fragment(_reader.pop_mutation_fragment()); + } + if (!_reader.is_end_of_stream()) { + return make_ready_future<>(); + } + if (auto r = next()) { + return _reader.fast_forward_to(*r); + } else { + _end_of_stream = true; + return make_ready_future<>(); + } + }); + }); + } + + virtual future<> fast_forward_to(const dht::partition_range& pr) override { + clear_buffer(); + _end_of_stream = false; + return _reader.fast_forward_to(pr).then([this] { + _generator.reset(); + }); + } + + virtual future<> fast_forward_to(position_range pr) override { + return make_exception_future<>(make_backtraced_exception_ptr()); + } + + virtual future<> next_partition() override { + clear_buffer_to_next_partition(); + if (is_buffer_empty() && !is_end_of_stream()) { + return _reader.next_partition(); + } + return make_ready_future<>(); + } + + virtual future<> close() noexcept override { + return _reader.close(); + } }; -flat_mutation_reader make_empty_flat_reader(schema_ptr s, reader_permit permit) { - return downgrade_to_v1(make_flat_mutation_reader_v2(std::move(s), std::move(permit))); -} - -flat_mutation_reader_v2 make_empty_flat_reader_v2(schema_ptr s, reader_permit permit) { - return make_flat_mutation_reader_v2(std::move(s), std::move(permit)); -} - -std::vector slice_mutations(schema_ptr schema, std::vector ms, const query::partition_slice& slice) { - std::vector sliced_ms; - for (auto& m : ms) { - auto ck_ranges = query::clustering_key_filter_ranges::get_ranges(*schema, slice, m.key()); - auto mp = mutation_partition(std::move(m.partition()), *schema, std::move(ck_ranges)); - sliced_ms.emplace_back(schema, m.decorated_key(), std::move(mp)); +/// A reader that is empty when created but can be fast-forwarded. +/// +/// Useful when a reader has to be created without an initial read-range and it +/// has to be fast-forwardable. +/// Delays the creation of the underlying reader until it is first +/// fast-forwarded and thus a range is available. +class forwardable_empty_mutation_reader : public flat_mutation_reader_v2::impl { + mutation_source _source; + const query::partition_slice& _slice; + const io_priority_class& _pc; + tracing::trace_state_ptr _trace_state; + flat_mutation_reader_v2_opt _reader; +public: + forwardable_empty_mutation_reader(schema_ptr s, + reader_permit permit, + mutation_source source, + const query::partition_slice& slice, + const io_priority_class& pc, + tracing::trace_state_ptr trace_state) + : impl(s, std::move(permit)) + , _source(std::move(source)) + , _slice(slice) + , _pc(pc) + , _trace_state(std::move(trace_state)) { + _end_of_stream = true; } - return sliced_ms; + virtual future<> fill_buffer() override { + if (!_reader) { + return make_ready_future<>(); + } + if (_reader->is_buffer_empty()) { + if (_reader->is_end_of_stream()) { + _end_of_stream = true; + return make_ready_future<>(); + } else { + return _reader->fill_buffer().then([this] { return fill_buffer(); }); + } + } + _reader->move_buffer_content_to(*this); + return make_ready_future<>(); + } + virtual future<> fast_forward_to(const dht::partition_range& pr) override { + if (!_reader) { + _reader = _source.make_reader_v2(_schema, _permit, pr, _slice, _pc, std::move(_trace_state), streamed_mutation::forwarding::no, + mutation_reader::forwarding::yes); + _end_of_stream = false; + return make_ready_future<>(); + } + + clear_buffer(); + _end_of_stream = false; + return _reader->fast_forward_to(pr); + } + virtual future<> fast_forward_to(position_range pr) override { + return make_exception_future<>(make_backtraced_exception_ptr()); + } + virtual future<> next_partition() override { + if (!_reader) { + return make_ready_future<>(); + } + clear_buffer_to_next_partition(); + if (is_buffer_empty() && !is_end_of_stream()) { + return _reader->next_partition(); + } + return make_ready_future<>(); + } + virtual future<> close() noexcept override { + return _reader ? _reader->close() : make_ready_future<>(); + } +}; +flat_mutation_reader_v2 +make_flat_multi_range_reader(schema_ptr s, reader_permit permit, mutation_source source, const dht::partition_range_vector& ranges, + const query::partition_slice& slice, const io_priority_class& pc, + tracing::trace_state_ptr trace_state, + mutation_reader::forwarding fwd_mr) +{ + class adapter { + dht::partition_range_vector::const_iterator _it; + dht::partition_range_vector::const_iterator _end; + + public: + adapter(dht::partition_range_vector::const_iterator begin, dht::partition_range_vector::const_iterator end) : _it(begin), _end(end) { + } + const dht::partition_range* operator()() { + if (_it == _end) { + return nullptr; + } + return &*_it++; + } + }; + + if (ranges.empty()) { + if (fwd_mr) { + return make_flat_mutation_reader_v2(std::move(s), std::move(permit), std::move(source), slice, pc, + std::move(trace_state)); + } else { + return make_empty_flat_reader_v2(std::move(s), std::move(permit)); + } + } else if (ranges.size() == 1) { + return source.make_reader_v2(std::move(s), std::move(permit), ranges.front(), slice, pc, std::move(trace_state), streamed_mutation::forwarding::no, fwd_mr); + } else { + return make_flat_mutation_reader_v2>(std::move(s), std::move(permit), std::move(source), + ranges.front(), adapter(std::next(ranges.cbegin()), ranges.cend()), slice, pc, std::move(trace_state)); + } +} + +flat_mutation_reader_v2 +make_flat_multi_range_reader( + schema_ptr s, + reader_permit permit, + mutation_source source, + std::function()> generator, + const query::partition_slice& slice, + const io_priority_class& pc, + tracing::trace_state_ptr trace_state, + mutation_reader::forwarding fwd_mr) { + class adapter { + std::function()> _generator; + std::unique_ptr _previous; + std::unique_ptr _current; + + public: + explicit adapter(std::function()> generator) + : _generator(std::move(generator)) + , _previous(std::make_unique(dht::partition_range::make_singular({dht::token{}, partition_key::make_empty()}))) + , _current(std::make_unique(dht::partition_range::make_singular({dht::token{}, partition_key::make_empty()}))) { + } + const dht::partition_range* operator()() { + std::swap(_current, _previous); + if (auto next = _generator()) { + *_current = std::move(*next); + return _current.get(); + } else { + return nullptr; + } + } + }; + + auto adapted_generator = adapter(std::move(generator)); + auto* first_range = adapted_generator(); + if (!first_range) { + if (fwd_mr) { + return make_flat_mutation_reader_v2(std::move(s), std::move(permit), std::move(source), slice, pc, std::move(trace_state)); + } else { + return make_empty_flat_reader_v2(std::move(s), std::move(permit)); + } + } else { + return make_flat_mutation_reader_v2>(std::move(s), std::move(permit), std::move(source), + *first_range, std::move(adapted_generator), slice, pc, std::move(trace_state)); + } +} + + +/* + * This reader takes a get_next_fragment generator that produces mutation_fragment_opt which is returned by + * generating_reader. + */ +class generating_reader_v2 final : public flat_mutation_reader_v2::impl { + std::function ()> _get_next_fragment; +public: + generating_reader_v2(schema_ptr s, reader_permit permit, std::function ()> get_next_fragment) + : impl(std::move(s), std::move(permit)), _get_next_fragment(std::move(get_next_fragment)) + { } + virtual future<> fill_buffer() override { + return do_until([this] { return is_end_of_stream() || is_buffer_full(); }, [this] { + return _get_next_fragment().then([this] (mutation_fragment_v2_opt mopt) { + if (!mopt) { + _end_of_stream = true; + } else { + push_mutation_fragment(std::move(*mopt)); + } + }); + }); + } + virtual future<> next_partition() override { + return make_exception_future<>(make_backtraced_exception_ptr()); + } + virtual future<> fast_forward_to(const dht::partition_range&) override { + return make_exception_future<>(make_backtraced_exception_ptr()); + } + virtual future<> fast_forward_to(position_range) override { + return make_exception_future<>(make_backtraced_exception_ptr()); + } + virtual future<> close() noexcept override { + return make_ready_future<>(); + } +}; + +flat_mutation_reader_v2 make_generating_reader(schema_ptr s, reader_permit permit, std::function ()> get_next_fragment) { + return make_flat_mutation_reader_v2(std::move(s), std::move(permit), std::move(get_next_fragment)); +} + +/* + * This reader takes a get_next_fragment generator that produces mutation_fragment_opt which is returned by + * generating_reader. + * + */ +class generating_reader final : public flat_mutation_reader::impl { + std::function ()> _get_next_fragment; +public: + generating_reader(schema_ptr s, reader_permit permit, std::function ()> get_next_fragment) + : impl(std::move(s), std::move(permit)), _get_next_fragment(std::move(get_next_fragment)) + { } + virtual future<> fill_buffer() override { + return do_until([this] { return is_end_of_stream() || is_buffer_full(); }, [this] { + return _get_next_fragment().then([this] (mutation_fragment_opt mopt) { + if (!mopt) { + _end_of_stream = true; + } else { + push_mutation_fragment(std::move(*mopt)); + } + }); + }); + } + virtual future<> next_partition() override { + return make_exception_future<>(make_backtraced_exception_ptr()); + } + virtual future<> fast_forward_to(const dht::partition_range&) override { + return make_exception_future<>(make_backtraced_exception_ptr()); + } + virtual future<> fast_forward_to(position_range) override { + return make_exception_future<>(make_backtraced_exception_ptr()); + } + virtual future<> close() noexcept override { + return make_ready_future<>(); + } +}; + +flat_mutation_reader make_generating_reader(schema_ptr s, reader_permit permit, std::function ()> get_next_fragment) { + return make_flat_mutation_reader(std::move(s), std::move(permit), std::move(get_next_fragment)); +} + +flat_mutation_reader_v2 +make_flat_mutation_reader_from_mutations_v2(schema_ptr schema, reader_permit permit, std::vector ms, const query::partition_slice& slice, streamed_mutation::forwarding fwd) { + return make_flat_mutation_reader_from_mutations_v2(std::move(schema), std::move(permit), std::move(ms), query::full_partition_range, slice, fwd); +} + +flat_mutation_reader_v2 +make_flat_mutation_reader_from_mutations_v2(schema_ptr s, reader_permit permit, std::vector mutations, const dht::partition_range& pr, + const query::partition_slice& query_slice, streamed_mutation::forwarding fwd) { + class reader final : public flat_mutation_reader_v2::impl { + std::vector _mutations; + const dht::partition_range* _pr; + bool _reversed; + const dht::decorated_key* _dk = nullptr; + std::optional _cookie; + + private: + void maybe_emit_partition_start() { + if (_dk) { + consume(tombstone{}); // flush partition-start + } + } + public: + void consume_new_partition(const dht::decorated_key& dk) { + _dk = &dk; + } + void consume(tombstone t) { + push_mutation_fragment(*_schema, _permit, partition_start(*_dk, t)); + _dk = nullptr; + } + stop_iteration consume(static_row&& sr) { + maybe_emit_partition_start(); + push_mutation_fragment(*_schema, _permit, std::move(sr)); + return stop_iteration(is_buffer_full()); + } + stop_iteration consume(clustering_row&& cr) { + maybe_emit_partition_start(); + push_mutation_fragment(*_schema, _permit, std::move(cr)); + return stop_iteration(is_buffer_full()); + } + stop_iteration consume(range_tombstone_change&& rtc) { + maybe_emit_partition_start(); + push_mutation_fragment(*_schema, _permit, std::move(rtc)); + return stop_iteration(is_buffer_full()); + } + stop_iteration consume_end_of_partition() { + if (is_buffer_full()) { + return stop_iteration::yes; + } + maybe_emit_partition_start(); + push_mutation_fragment(*_schema, _permit, partition_end{}); + return stop_iteration::no; + } + void consume_end_of_stream() { } + + public: + reader(schema_ptr schema, reader_permit permit, std::vector mutations, const dht::partition_range& pr, bool reversed) + : impl(std::move(schema), std::move(permit)) + , _mutations(std::move(mutations)) + , _pr(&pr) + , _reversed(reversed) + { + std::reverse(_mutations.begin(), _mutations.end()); + } + virtual future<> fill_buffer() override { + if (_mutations.empty()) { + _end_of_stream = true; + return make_ready_future<>(); + } + + dht::ring_position_comparator cmp{*_schema}; + while (!_mutations.empty()) { + auto& mut = _mutations.back(); + if (_pr->before(mut.decorated_key(), cmp)) { + _mutations.pop_back(); + continue; + } + if (_pr->after(mut.decorated_key(), cmp)) { + _end_of_stream = true; + break; + } + if (!_cookie) { + _cookie.emplace(); + } + auto res = std::move(mut).consume(*this, _reversed ? consume_in_reverse::yes : consume_in_reverse::no, std::move(*_cookie)); + if (res.stop == stop_iteration::yes) { + _cookie = std::move(res.cookie); + break; + } else { + _cookie.reset(); + _mutations.pop_back(); + } + } + return make_ready_future<>(); + } + virtual future<> next_partition() override { + clear_buffer_to_next_partition(); + if (is_buffer_empty() && _cookie) { + _cookie.reset(); + _mutations.pop_back(); + } + return make_ready_future<>(); + } + virtual future<> fast_forward_to(const dht::partition_range& pr) override { + clear_buffer(); + _end_of_stream = false; + _cookie.reset(); + _pr = ≺ + return make_ready_future<>(); + } + virtual future<> fast_forward_to(position_range pr) override { + return make_exception_future<>(std::bad_function_call{}); + } + virtual future<> close() noexcept override { + return make_ready_future<>(); + } + }; + if (mutations.empty()) { + return make_empty_flat_reader_v2(std::move(s), std::move(permit)); + } + const auto reversed = query_slice.is_reversed(); + std::vector sliced_mutations; + if (reversed) { + sliced_mutations = slice_mutations(s->make_reversed(), std::move(mutations), query::half_reverse_slice(*s, query_slice)); + } else { + sliced_mutations = slice_mutations(s, std::move(mutations), query_slice); + } + auto res = make_flat_mutation_reader_v2(s, std::move(permit), std::move(sliced_mutations), pr, reversed); + if (fwd) { + return make_forwardable(std::move(res)); + } + return res; +} + +flat_mutation_reader_v2 +make_flat_mutation_reader_from_mutations_v2(schema_ptr s, reader_permit permit, std::vector mutations, const dht::partition_range& pr, streamed_mutation::forwarding fwd) { + return make_flat_mutation_reader_from_mutations_v2(s, std::move(permit), std::move(mutations), pr, s->full_slice(), fwd); +} + +flat_mutation_reader_v2 make_flat_mutation_reader_from_mutations_v2( + schema_ptr schema, + reader_permit permit, + std::vector ms, + streamed_mutation::forwarding fwd) { + return make_flat_mutation_reader_from_mutations_v2(std::move(schema), std::move(permit), std::move(ms), query::full_partition_range, fwd); +} + +// All mutations should have the same schema. +flat_mutation_reader make_flat_mutation_reader_from_mutations(schema_ptr schema, reader_permit permit, std::vector ms, streamed_mutation::forwarding fwd) { + return make_flat_mutation_reader_from_mutations(std::move(schema), std::move(permit), std::move(ms), query::full_partition_range, fwd); } flat_mutation_reader @@ -760,484 +1295,6 @@ make_flat_mutation_reader_from_mutations(schema_ptr s, reader_permit permit, std return res; } -flat_mutation_reader_v2 -make_flat_mutation_reader_from_mutations_v2(schema_ptr schema, reader_permit permit, std::vector ms, const query::partition_slice& slice, streamed_mutation::forwarding fwd) { - return make_flat_mutation_reader_from_mutations_v2(std::move(schema), std::move(permit), std::move(ms), query::full_partition_range, slice, fwd); -} - -flat_mutation_reader_v2 -make_flat_mutation_reader_from_mutations_v2(schema_ptr s, reader_permit permit, std::vector mutations, const dht::partition_range& pr, - const query::partition_slice& query_slice, streamed_mutation::forwarding fwd) { - class reader final : public flat_mutation_reader_v2::impl { - std::vector _mutations; - const dht::partition_range* _pr; - bool _reversed; - const dht::decorated_key* _dk = nullptr; - std::optional _cookie; - - private: - void maybe_emit_partition_start() { - if (_dk) { - consume(tombstone{}); // flush partition-start - } - } - public: - void consume_new_partition(const dht::decorated_key& dk) { - _dk = &dk; - } - void consume(tombstone t) { - push_mutation_fragment(*_schema, _permit, partition_start(*_dk, t)); - _dk = nullptr; - } - stop_iteration consume(static_row&& sr) { - maybe_emit_partition_start(); - push_mutation_fragment(*_schema, _permit, std::move(sr)); - return stop_iteration(is_buffer_full()); - } - stop_iteration consume(clustering_row&& cr) { - maybe_emit_partition_start(); - push_mutation_fragment(*_schema, _permit, std::move(cr)); - return stop_iteration(is_buffer_full()); - } - stop_iteration consume(range_tombstone_change&& rtc) { - maybe_emit_partition_start(); - push_mutation_fragment(*_schema, _permit, std::move(rtc)); - return stop_iteration(is_buffer_full()); - } - stop_iteration consume_end_of_partition() { - if (is_buffer_full()) { - return stop_iteration::yes; - } - maybe_emit_partition_start(); - push_mutation_fragment(*_schema, _permit, partition_end{}); - return stop_iteration::no; - } - void consume_end_of_stream() { } - - public: - reader(schema_ptr schema, reader_permit permit, std::vector mutations, const dht::partition_range& pr, bool reversed) - : impl(std::move(schema), std::move(permit)) - , _mutations(std::move(mutations)) - , _pr(&pr) - , _reversed(reversed) - { - std::reverse(_mutations.begin(), _mutations.end()); - } - virtual future<> fill_buffer() override { - if (_mutations.empty()) { - _end_of_stream = true; - return make_ready_future<>(); - } - - dht::ring_position_comparator cmp{*_schema}; - while (!_mutations.empty()) { - auto& mut = _mutations.back(); - if (_pr->before(mut.decorated_key(), cmp)) { - _mutations.pop_back(); - continue; - } - if (_pr->after(mut.decorated_key(), cmp)) { - _end_of_stream = true; - break; - } - if (!_cookie) { - _cookie.emplace(); - } - auto res = std::move(mut).consume(*this, _reversed ? consume_in_reverse::yes : consume_in_reverse::no, std::move(*_cookie)); - if (res.stop == stop_iteration::yes) { - _cookie = std::move(res.cookie); - break; - } else { - _cookie.reset(); - _mutations.pop_back(); - } - } - return make_ready_future<>(); - } - virtual future<> next_partition() override { - clear_buffer_to_next_partition(); - if (is_buffer_empty() && _cookie) { - _cookie.reset(); - _mutations.pop_back(); - } - return make_ready_future<>(); - } - virtual future<> fast_forward_to(const dht::partition_range& pr) override { - clear_buffer(); - _end_of_stream = false; - _cookie.reset(); - _pr = ≺ - return make_ready_future<>(); - } - virtual future<> fast_forward_to(position_range pr) override { - return make_exception_future<>(std::bad_function_call{}); - } - virtual future<> close() noexcept override { - return make_ready_future<>(); - } - }; - if (mutations.empty()) { - return make_empty_flat_reader_v2(std::move(s), std::move(permit)); - } - const auto reversed = query_slice.is_reversed(); - std::vector sliced_mutations; - if (reversed) { - sliced_mutations = slice_mutations(s->make_reversed(), std::move(mutations), query::half_reverse_slice(*s, query_slice)); - } else { - sliced_mutations = slice_mutations(s, std::move(mutations), query_slice); - } - auto res = make_flat_mutation_reader_v2(s, std::move(permit), std::move(sliced_mutations), pr, reversed); - if (fwd) { - return make_forwardable(std::move(res)); - } - return res; -} - -flat_mutation_reader_v2 -make_flat_mutation_reader_from_mutations_v2(schema_ptr s, reader_permit permit, std::vector mutations, const dht::partition_range& pr, streamed_mutation::forwarding fwd) { - return make_flat_mutation_reader_from_mutations_v2(s, std::move(permit), std::move(mutations), pr, s->full_slice(), fwd); -} - -/// A reader that is empty when created but can be fast-forwarded. -/// -/// Useful when a reader has to be created without an initial read-range and it -/// has to be fast-forwardable. -/// Delays the creation of the underlying reader until it is first -/// fast-forwarded and thus a range is available. -class forwardable_empty_mutation_reader : public flat_mutation_reader_v2::impl { - mutation_source _source; - const query::partition_slice& _slice; - const io_priority_class& _pc; - tracing::trace_state_ptr _trace_state; - flat_mutation_reader_v2_opt _reader; -public: - forwardable_empty_mutation_reader(schema_ptr s, - reader_permit permit, - mutation_source source, - const query::partition_slice& slice, - const io_priority_class& pc, - tracing::trace_state_ptr trace_state) - : impl(s, std::move(permit)) - , _source(std::move(source)) - , _slice(slice) - , _pc(pc) - , _trace_state(std::move(trace_state)) { - _end_of_stream = true; - } - virtual future<> fill_buffer() override { - if (!_reader) { - return make_ready_future<>(); - } - if (_reader->is_buffer_empty()) { - if (_reader->is_end_of_stream()) { - _end_of_stream = true; - return make_ready_future<>(); - } else { - return _reader->fill_buffer().then([this] { return fill_buffer(); }); - } - } - _reader->move_buffer_content_to(*this); - return make_ready_future<>(); - } - virtual future<> fast_forward_to(const dht::partition_range& pr) override { - if (!_reader) { - _reader = _source.make_reader_v2(_schema, _permit, pr, _slice, _pc, std::move(_trace_state), streamed_mutation::forwarding::no, - mutation_reader::forwarding::yes); - _end_of_stream = false; - return make_ready_future<>(); - } - - clear_buffer(); - _end_of_stream = false; - return _reader->fast_forward_to(pr); - } - virtual future<> fast_forward_to(position_range pr) override { - return make_exception_future<>(make_backtraced_exception_ptr()); - } - virtual future<> next_partition() override { - if (!_reader) { - return make_ready_future<>(); - } - clear_buffer_to_next_partition(); - if (is_buffer_empty() && !is_end_of_stream()) { - return _reader->next_partition(); - } - return make_ready_future<>(); - } - virtual future<> close() noexcept override { - return _reader ? _reader->close() : make_ready_future<>(); - } -}; - -template -class flat_multi_range_mutation_reader : public flat_mutation_reader_v2::impl { - std::optional _generator; - flat_mutation_reader_v2 _reader; - - const dht::partition_range* next() { - if (!_generator) { - return nullptr; - } - return (*_generator)(); - } - -public: - flat_multi_range_mutation_reader( - schema_ptr s, - reader_permit permit, - mutation_source source, - const dht::partition_range& first_range, - Generator generator, - const query::partition_slice& slice, - const io_priority_class& pc, - tracing::trace_state_ptr trace_state) - : impl(s, std::move(permit)) - , _generator(std::move(generator)) - , _reader(source.make_reader_v2(s, _permit, first_range, slice, pc, trace_state, streamed_mutation::forwarding::no, mutation_reader::forwarding::yes)) - { - } - - virtual future<> fill_buffer() override { - return do_until([this] { return is_end_of_stream() || !is_buffer_empty(); }, [this] { - return _reader.fill_buffer().then([this] () { - while (!_reader.is_buffer_empty()) { - push_mutation_fragment(_reader.pop_mutation_fragment()); - } - if (!_reader.is_end_of_stream()) { - return make_ready_future<>(); - } - if (auto r = next()) { - return _reader.fast_forward_to(*r); - } else { - _end_of_stream = true; - return make_ready_future<>(); - } - }); - }); - } - - virtual future<> fast_forward_to(const dht::partition_range& pr) override { - clear_buffer(); - _end_of_stream = false; - return _reader.fast_forward_to(pr).then([this] { - _generator.reset(); - }); - } - - virtual future<> fast_forward_to(position_range pr) override { - return make_exception_future<>(make_backtraced_exception_ptr()); - } - - virtual future<> next_partition() override { - clear_buffer_to_next_partition(); - if (is_buffer_empty() && !is_end_of_stream()) { - return _reader.next_partition(); - } - return make_ready_future<>(); - } - - virtual future<> close() noexcept override { - return _reader.close(); - } -}; - -flat_mutation_reader_v2 -make_flat_multi_range_reader(schema_ptr s, reader_permit permit, mutation_source source, const dht::partition_range_vector& ranges, - const query::partition_slice& slice, const io_priority_class& pc, - tracing::trace_state_ptr trace_state, - mutation_reader::forwarding fwd_mr) -{ - class adapter { - dht::partition_range_vector::const_iterator _it; - dht::partition_range_vector::const_iterator _end; - - public: - adapter(dht::partition_range_vector::const_iterator begin, dht::partition_range_vector::const_iterator end) : _it(begin), _end(end) { - } - const dht::partition_range* operator()() { - if (_it == _end) { - return nullptr; - } - return &*_it++; - } - }; - - if (ranges.empty()) { - if (fwd_mr) { - return make_flat_mutation_reader_v2(std::move(s), std::move(permit), std::move(source), slice, pc, - std::move(trace_state)); - } else { - return make_empty_flat_reader_v2(std::move(s), std::move(permit)); - } - } else if (ranges.size() == 1) { - return source.make_reader_v2(std::move(s), std::move(permit), ranges.front(), slice, pc, std::move(trace_state), streamed_mutation::forwarding::no, fwd_mr); - } else { - return make_flat_mutation_reader_v2>(std::move(s), std::move(permit), std::move(source), - ranges.front(), adapter(std::next(ranges.cbegin()), ranges.cend()), slice, pc, std::move(trace_state)); - } -} - -flat_mutation_reader_v2 -make_flat_multi_range_reader( - schema_ptr s, - reader_permit permit, - mutation_source source, - std::function()> generator, - const query::partition_slice& slice, - const io_priority_class& pc, - tracing::trace_state_ptr trace_state, - mutation_reader::forwarding fwd_mr) { - class adapter { - std::function()> _generator; - std::unique_ptr _previous; - std::unique_ptr _current; - - public: - explicit adapter(std::function()> generator) - : _generator(std::move(generator)) - , _previous(std::make_unique(dht::partition_range::make_singular({dht::token{}, partition_key::make_empty()}))) - , _current(std::make_unique(dht::partition_range::make_singular({dht::token{}, partition_key::make_empty()}))) { - } - const dht::partition_range* operator()() { - std::swap(_current, _previous); - if (auto next = _generator()) { - *_current = std::move(*next); - return _current.get(); - } else { - return nullptr; - } - } - }; - - auto adapted_generator = adapter(std::move(generator)); - auto* first_range = adapted_generator(); - if (!first_range) { - if (fwd_mr) { - return make_flat_mutation_reader_v2(std::move(s), std::move(permit), std::move(source), slice, pc, std::move(trace_state)); - } else { - return make_empty_flat_reader_v2(std::move(s), std::move(permit)); - } - } else { - return make_flat_mutation_reader_v2>(std::move(s), std::move(permit), std::move(source), - *first_range, std::move(adapted_generator), slice, pc, std::move(trace_state)); - } -} - -flat_mutation_reader -make_flat_mutation_reader_from_fragments(schema_ptr schema, reader_permit permit, std::deque fragments) { - return make_flat_mutation_reader_from_fragments(std::move(schema), std::move(permit), std::move(fragments), query::full_partition_range); -} - -flat_mutation_reader -make_flat_mutation_reader_from_fragments(schema_ptr schema, reader_permit permit, std::deque fragments, const dht::partition_range& pr) { - class reader : public flat_mutation_reader::impl { - std::deque _fragments; - const dht::partition_range* _pr; - dht::ring_position_comparator _cmp; - - private: - bool end_of_range() const { - return _fragments.empty() || - (_fragments.front().is_partition_start() && _pr->after(_fragments.front().as_partition_start().key(), _cmp)); - } - - void do_fast_forward_to(const dht::partition_range& pr) { - clear_buffer(); - _pr = ≺ - _fragments.erase(_fragments.begin(), std::find_if(_fragments.begin(), _fragments.end(), [this] (const mutation_fragment& mf) { - return mf.is_partition_start() && !_pr->before(mf.as_partition_start().key(), _cmp); - })); - _end_of_stream = end_of_range(); - } - - public: - reader(schema_ptr schema, reader_permit permit, std::deque fragments, const dht::partition_range& pr) - : flat_mutation_reader::impl(std::move(schema), std::move(permit)) - , _fragments(std::move(fragments)) - , _pr(&pr) - , _cmp(*_schema) { - do_fast_forward_to(*_pr); - } - virtual future<> fill_buffer() override { - while (!(_end_of_stream = end_of_range()) && !is_buffer_full()) { - push_mutation_fragment(std::move(_fragments.front())); - _fragments.pop_front(); - } - return make_ready_future<>(); - } - virtual future<> next_partition() override { - clear_buffer_to_next_partition(); - if (is_buffer_empty()) { - while (!(_end_of_stream = end_of_range()) && !_fragments.front().is_partition_start()) { - _fragments.pop_front(); - } - } - return make_ready_future<>(); - } - virtual future<> fast_forward_to(position_range pr) override { - throw std::runtime_error("This reader can't be fast forwarded to another range."); - } - virtual future<> fast_forward_to(const dht::partition_range& pr) override { - do_fast_forward_to(pr); - return make_ready_future<>(); - } - virtual future<> close() noexcept override { - return make_ready_future<>(); - } - }; - return make_flat_mutation_reader(std::move(schema), std::move(permit), std::move(fragments), pr); -} - -flat_mutation_reader -make_flat_mutation_reader_from_fragments(schema_ptr schema, reader_permit permit, std::deque fragments, - const dht::partition_range& pr, const query::partition_slice& query_slice) { - const auto reversed = query_slice.is_reversed(); - if (reversed) { - schema = schema->make_reversed(); - } - auto slice = reversed ? query::half_reverse_slice(*schema, query_slice) : query_slice; - - std::optional ranges_walker; - std::optional splitter; - std::deque filtered; - for (auto&& mf : fragments) { - switch (mf.mutation_fragment_kind()) { - case mutation_fragment::kind::partition_start: - ranges_walker.emplace(*schema, slice.row_ranges(*schema, mf.as_partition_start().key().key()), false); - splitter.emplace(*schema, permit, *ranges_walker); - filtered.emplace_back(std::move(mf)); - break; - case mutation_fragment::kind::static_row: - filtered.push_back(std::move(mf)); - break; - case mutation_fragment::kind::partition_end: - splitter->flush(position_in_partition::after_all_clustered_rows(), [&] (mutation_fragment mf) { - filtered.emplace_back(std::move(mf)); - }); - filtered.push_back(std::move(mf)); - break; - case mutation_fragment::kind::clustering_row: - splitter->flush(mf.position(), [&] (mutation_fragment mf) { - filtered.emplace_back(std::move(mf)); - }); - if (ranges_walker->advance_to(mf.position())) { - filtered.push_back(std::move(mf)); - } - break; - case mutation_fragment::kind::range_tombstone: - splitter->consume(std::move(mf).as_range_tombstone(), [&] (mutation_fragment mf) { - filtered.emplace_back(std::move(mf)); - }); - break; - } - } - auto rd = make_flat_mutation_reader_from_fragments(std::move(schema), permit, std::move(filtered), pr); - if (reversed) { - rd = make_reversing_reader(std::move(rd), permit.max_result_size()); - } - return rd; -} - flat_mutation_reader_v2 make_flat_mutation_reader_from_fragments(schema_ptr schema, reader_permit permit, std::deque fragments) { return make_flat_mutation_reader_from_fragments(std::move(schema), std::move(permit), std::move(fragments), query::full_partition_range); @@ -1379,499 +1436,122 @@ make_flat_mutation_reader_from_fragments(schema_ptr schema, reader_permit permit return make_flat_mutation_reader_from_fragments(std::move(schema), permit, std::move(filtered), pr); } + flat_mutation_reader -make_slicing_filtering_reader(flat_mutation_reader rd, const dht::partition_range& pr, const query::partition_slice& slice) { +make_flat_mutation_reader_from_fragments(schema_ptr schema, reader_permit permit, std::deque fragments, + const dht::partition_range& pr, const query::partition_slice& query_slice) { + const auto reversed = query_slice.is_reversed(); + if (reversed) { + schema = schema->make_reversed(); + } + auto slice = reversed ? query::half_reverse_slice(*schema, query_slice) : query_slice; + + std::optional ranges_walker; + std::optional splitter; + std::deque filtered; + for (auto&& mf : fragments) { + switch (mf.mutation_fragment_kind()) { + case mutation_fragment::kind::partition_start: + ranges_walker.emplace(*schema, slice.row_ranges(*schema, mf.as_partition_start().key().key()), false); + splitter.emplace(*schema, permit, *ranges_walker); + filtered.emplace_back(std::move(mf)); + break; + case mutation_fragment::kind::static_row: + filtered.push_back(std::move(mf)); + break; + case mutation_fragment::kind::partition_end: + splitter->flush(position_in_partition::after_all_clustered_rows(), [&] (mutation_fragment mf) { + filtered.emplace_back(std::move(mf)); + }); + filtered.push_back(std::move(mf)); + break; + case mutation_fragment::kind::clustering_row: + splitter->flush(mf.position(), [&] (mutation_fragment mf) { + filtered.emplace_back(std::move(mf)); + }); + if (ranges_walker->advance_to(mf.position())) { + filtered.push_back(std::move(mf)); + } + break; + case mutation_fragment::kind::range_tombstone: + splitter->consume(std::move(mf).as_range_tombstone(), [&] (mutation_fragment mf) { + filtered.emplace_back(std::move(mf)); + }); + break; + } + } + auto rd = make_flat_mutation_reader_from_fragments(std::move(schema), permit, std::move(filtered), pr); + if (reversed) { + rd = make_reversing_reader(std::move(rd), permit.max_result_size()); + } + return rd; +} + +flat_mutation_reader +make_flat_mutation_reader_from_fragments(schema_ptr schema, reader_permit permit, std::deque fragments) { + return make_flat_mutation_reader_from_fragments(std::move(schema), std::move(permit), std::move(fragments), query::full_partition_range); +} + +flat_mutation_reader +make_flat_mutation_reader_from_fragments(schema_ptr schema, reader_permit permit, std::deque fragments, const dht::partition_range& pr) { class reader : public flat_mutation_reader::impl { - flat_mutation_reader _rd; + std::deque _fragments; const dht::partition_range* _pr; - const query::partition_slice* _slice; dht::ring_position_comparator _cmp; - std::optional _ranges_walker; - std::optional _splitter; + + private: + bool end_of_range() const { + return _fragments.empty() || + (_fragments.front().is_partition_start() && _pr->after(_fragments.front().as_partition_start().key(), _cmp)); + } + + void do_fast_forward_to(const dht::partition_range& pr) { + clear_buffer(); + _pr = ≺ + _fragments.erase(_fragments.begin(), std::find_if(_fragments.begin(), _fragments.end(), [this] (const mutation_fragment& mf) { + return mf.is_partition_start() && !_pr->before(mf.as_partition_start().key(), _cmp); + })); + _end_of_stream = end_of_range(); + } public: - reader(flat_mutation_reader rd, const dht::partition_range& pr, const query::partition_slice& slice) - : flat_mutation_reader::impl(rd.schema(), rd.permit()) - , _rd(std::move(rd)) - , _pr(&pr) - , _slice(&slice) - , _cmp(*_schema) { + reader(schema_ptr schema, reader_permit permit, std::deque fragments, const dht::partition_range& pr) + : flat_mutation_reader::impl(std::move(schema), std::move(permit)) + , _fragments(std::move(fragments)) + , _pr(&pr) + , _cmp(*_schema) { + do_fast_forward_to(*_pr); } - virtual future<> fill_buffer() override { - const auto consume_fn = [this] (mutation_fragment mf) { - push_mutation_fragment(std::move(mf)); - }; - - while (!is_buffer_full() && !is_end_of_stream()) { - co_await _rd.fill_buffer(); - while (!_rd.is_buffer_empty()) { - auto mf = _rd.pop_mutation_fragment(); - switch (mf.mutation_fragment_kind()) { - case mutation_fragment::kind::partition_start: { - auto& dk = mf.as_partition_start().key(); - if (!_pr->contains(dk, _cmp)) { - co_return co_await _rd.next_partition(); - } else { - _ranges_walker.emplace(*_schema, _slice->row_ranges(*_schema, dk.key()), false); - _splitter.emplace(*_schema, _permit, *_ranges_walker); - } - // fall-through - } - - case mutation_fragment::kind::static_row: - consume_fn(std::move(mf)); - break; - - case mutation_fragment::kind::partition_end: - _splitter->flush(position_in_partition::after_all_clustered_rows(), consume_fn); - consume_fn(std::move(mf)); - break; - - case mutation_fragment::kind::clustering_row: - _splitter->flush(mf.position(), consume_fn); - if (_ranges_walker->advance_to(mf.position())) { - consume_fn(std::move(mf)); - } - break; - - case mutation_fragment::kind::range_tombstone: - auto&& rt = mf.as_range_tombstone(); - _splitter->consume(rt, consume_fn); - break; - } - } - - _end_of_stream = _rd.is_end_of_stream(); - co_return; + while (!(_end_of_stream = end_of_range()) && !is_buffer_full()) { + push_mutation_fragment(std::move(_fragments.front())); + _fragments.pop_front(); } + return make_ready_future<>(); } - virtual future<> next_partition() override { clear_buffer_to_next_partition(); if (is_buffer_empty()) { - _end_of_stream = false; - return _rd.next_partition(); + while (!(_end_of_stream = end_of_range()) && !_fragments.front().is_partition_start()) { + _fragments.pop_front(); + } } - return make_ready_future<>(); } - - virtual future<> fast_forward_to(const dht::partition_range& pr) override { - clear_buffer(); - _end_of_stream = false; - return _rd.fast_forward_to(pr); - } - virtual future<> fast_forward_to(position_range pr) override { - forward_buffer_to(pr.start()); - _end_of_stream = false; - return _rd.fast_forward_to(std::move(pr)); + throw std::runtime_error("This reader can't be fast forwarded to another range."); + } + virtual future<> fast_forward_to(const dht::partition_range& pr) override { + do_fast_forward_to(pr); + return make_ready_future<>(); } - virtual future<> close() noexcept override { - return _rd.close(); + return make_ready_future<>(); } }; - - return make_flat_mutation_reader(std::move(rd), pr, slice); + return make_flat_mutation_reader(std::move(schema), std::move(permit), std::move(fragments), pr); } -/* - * This reader takes a get_next_fragment generator that produces mutation_fragment_opt which is returned by - * generating_reader. - * - */ -class generating_reader final : public flat_mutation_reader::impl { - std::function ()> _get_next_fragment; -public: - generating_reader(schema_ptr s, reader_permit permit, std::function ()> get_next_fragment) - : impl(std::move(s), std::move(permit)), _get_next_fragment(std::move(get_next_fragment)) - { } - virtual future<> fill_buffer() override { - return do_until([this] { return is_end_of_stream() || is_buffer_full(); }, [this] { - return _get_next_fragment().then([this] (mutation_fragment_opt mopt) { - if (!mopt) { - _end_of_stream = true; - } else { - push_mutation_fragment(std::move(*mopt)); - } - }); - }); - } - virtual future<> next_partition() override { - return make_exception_future<>(make_backtraced_exception_ptr()); - } - virtual future<> fast_forward_to(const dht::partition_range&) override { - return make_exception_future<>(make_backtraced_exception_ptr()); - } - virtual future<> fast_forward_to(position_range) override { - return make_exception_future<>(make_backtraced_exception_ptr()); - } - virtual future<> close() noexcept override { - return make_ready_future<>(); - } -}; - -flat_mutation_reader make_generating_reader(schema_ptr s, reader_permit permit, std::function ()> get_next_fragment) { - return make_flat_mutation_reader(std::move(s), std::move(permit), std::move(get_next_fragment)); -} - - -/* - * This reader takes a get_next_fragment generator that produces mutation_fragment_opt which is returned by - * generating_reader. - */ -class generating_reader_v2 final : public flat_mutation_reader_v2::impl { - std::function ()> _get_next_fragment; -public: - generating_reader_v2(schema_ptr s, reader_permit permit, std::function ()> get_next_fragment) - : impl(std::move(s), std::move(permit)), _get_next_fragment(std::move(get_next_fragment)) - { } - virtual future<> fill_buffer() override { - return do_until([this] { return is_end_of_stream() || is_buffer_full(); }, [this] { - return _get_next_fragment().then([this] (mutation_fragment_v2_opt mopt) { - if (!mopt) { - _end_of_stream = true; - } else { - push_mutation_fragment(std::move(*mopt)); - } - }); - }); - } - virtual future<> next_partition() override { - return make_exception_future<>(make_backtraced_exception_ptr()); - } - virtual future<> fast_forward_to(const dht::partition_range&) override { - return make_exception_future<>(make_backtraced_exception_ptr()); - } - virtual future<> fast_forward_to(position_range) override { - return make_exception_future<>(make_backtraced_exception_ptr()); - } - virtual future<> close() noexcept override { - return make_ready_future<>(); - } -}; - -flat_mutation_reader_v2 make_generating_reader(schema_ptr s, reader_permit permit, std::function ()> get_next_fragment) { - return make_flat_mutation_reader_v2(std::move(s), std::move(permit), std::move(get_next_fragment)); -} - -void flat_mutation_reader::do_upgrade_schema(const schema_ptr& s) { - *this = transform(std::move(*this), schema_upgrader(s)); -} - -void flat_mutation_reader::on_close_error(std::unique_ptr i, std::exception_ptr ep) noexcept { - impl* ip = i.get(); - on_internal_error_noexcept(fmr_logger, - format("Failed to close {} [{}]: permit {}: {}", typeid(*ip).name(), fmt::ptr(ip), ip->_permit.description(), ep)); -} - -invalid_mutation_fragment_stream::invalid_mutation_fragment_stream(std::runtime_error e) : std::runtime_error(std::move(e)) { -} - -static mutation_fragment_v2::kind to_mutation_fragment_kind_v2(mutation_fragment::kind k) { - switch (k) { - case mutation_fragment::kind::partition_start: - return mutation_fragment_v2::kind::partition_start; - case mutation_fragment::kind::static_row: - return mutation_fragment_v2::kind::static_row; - case mutation_fragment::kind::clustering_row: - return mutation_fragment_v2::kind::clustering_row; - case mutation_fragment::kind::range_tombstone: - return mutation_fragment_v2::kind::range_tombstone_change; - case mutation_fragment::kind::partition_end: - return mutation_fragment_v2::kind::partition_end; - } -} - -mutation_fragment_stream_validator::mutation_fragment_stream_validator(const ::schema& s) - : _schema(s) - , _prev_kind(mutation_fragment_v2::kind::partition_end) - , _prev_pos(position_in_partition::end_of_partition_tag_t{}) - , _prev_partition_key(dht::minimum_token(), partition_key::make_empty()) { -} - -bool mutation_fragment_stream_validator::operator()(const dht::decorated_key& dk) { - if (_prev_partition_key.less_compare(_schema, dk)) { - _prev_partition_key = dk; - return true; - } - return false; -} - -bool mutation_fragment_stream_validator::operator()(dht::token t) { - if (_prev_partition_key.token() <= t) { - _prev_partition_key._token = t; - return true; - } - return false; -} - -bool mutation_fragment_stream_validator::operator()(mutation_fragment_v2::kind kind, position_in_partition_view pos) { - if (_prev_kind == mutation_fragment_v2::kind::partition_end) { - const bool valid = (kind == mutation_fragment_v2::kind::partition_start); - if (valid) { - _prev_kind = mutation_fragment_v2::kind::partition_start; - _prev_pos = pos; - } - return valid; - } - auto cmp = position_in_partition::tri_compare(_schema); - auto res = cmp(_prev_pos, pos); - bool valid = true; - if (_prev_kind == mutation_fragment_v2::kind::range_tombstone_change) { - valid = res <= 0; - } else { - valid = res < 0; - } - if (valid) { - _prev_kind = kind; - _prev_pos = pos; - } - return valid; -} -bool mutation_fragment_stream_validator::operator()(mutation_fragment::kind kind, position_in_partition_view pos) { - return (*this)(to_mutation_fragment_kind_v2(kind), pos); -} - -bool mutation_fragment_stream_validator::operator()(const mutation_fragment_v2& mf) { - return (*this)(mf.mutation_fragment_kind(), mf.position()); -} -bool mutation_fragment_stream_validator::operator()(const mutation_fragment& mf) { - return (*this)(to_mutation_fragment_kind_v2(mf.mutation_fragment_kind()), mf.position()); -} - -bool mutation_fragment_stream_validator::operator()(mutation_fragment_v2::kind kind) { - bool valid = true; - switch (_prev_kind) { - case mutation_fragment_v2::kind::partition_start: - valid = kind != mutation_fragment_v2::kind::partition_start; - break; - case mutation_fragment_v2::kind::static_row: // fall-through - case mutation_fragment_v2::kind::clustering_row: // fall-through - case mutation_fragment_v2::kind::range_tombstone_change: - valid = kind != mutation_fragment_v2::kind::partition_start && - kind != mutation_fragment_v2::kind::static_row; - break; - case mutation_fragment_v2::kind::partition_end: - valid = kind == mutation_fragment_v2::kind::partition_start; - break; - } - if (valid) { - _prev_kind = kind; - } - return valid; -} -bool mutation_fragment_stream_validator::operator()(mutation_fragment::kind kind) { - return (*this)(to_mutation_fragment_kind_v2(kind)); -} - -bool mutation_fragment_stream_validator::on_end_of_stream() { - return _prev_kind == mutation_fragment_v2::kind::partition_end; -} - -void mutation_fragment_stream_validator::reset(dht::decorated_key dk) { - _prev_partition_key = dk; - _prev_pos = position_in_partition::for_partition_start(); - _prev_kind = mutation_fragment_v2::kind::partition_start; -} - -void mutation_fragment_stream_validator::reset(const mutation_fragment_v2& mf) { - _prev_pos = mf.position(); - _prev_kind = mf.mutation_fragment_kind(); -} -void mutation_fragment_stream_validator::reset(const mutation_fragment& mf) { - _prev_pos = mf.position(); - _prev_kind = to_mutation_fragment_kind_v2(mf.mutation_fragment_kind()); -} - -namespace { - -[[noreturn]] void on_validation_error(seastar::logger& l, const seastar::sstring& reason) { - try { - on_internal_error(l, reason); - } catch (std::runtime_error& e) { - throw invalid_mutation_fragment_stream(e); - } -} - -} - -bool mutation_fragment_stream_validating_filter::operator()(const dht::decorated_key& dk) { - if (_validation_level < mutation_fragment_stream_validation_level::token) { - return true; - } - if (_validation_level == mutation_fragment_stream_validation_level::token) { - if (_validator(dk.token())) { - return true; - } - on_validation_error(fmr_logger, format("[validator {} for {}] Unexpected token: previous {}, current {}", - static_cast(this), _name, _validator.previous_token(), dk.token())); - } else { - if (_validator(dk)) { - return true; - } - on_validation_error(fmr_logger, format("[validator {} for {}] Unexpected partition key: previous {}, current {}", - static_cast(this), _name, _validator.previous_partition_key(), dk)); - } -} - -mutation_fragment_stream_validating_filter::mutation_fragment_stream_validating_filter(sstring_view name, const schema& s, - mutation_fragment_stream_validation_level level) - : _validator(s) - , _name(format("{} ({}.{} {})", name, s.ks_name(), s.cf_name(), s.id())) - , _validation_level(level) -{ - if (fmr_logger.level() <= log_level::debug) { - std::string_view what; - switch (_validation_level) { - case mutation_fragment_stream_validation_level::partition_region: - what = "partition region"; - break; - case mutation_fragment_stream_validation_level::token: - what = "partition region and token"; - break; - case mutation_fragment_stream_validation_level::partition_key: - what = "partition region and partition key"; - break; - case mutation_fragment_stream_validation_level::clustering_key: - what = "partition region, partition key and clustering key"; - break; - } - fmr_logger.debug("[validator {} for {}] Will validate {} monotonicity.", static_cast(this), _name, what); - } -} - -bool mutation_fragment_stream_validating_filter::operator()(mutation_fragment_v2::kind kind, position_in_partition_view pos) { - bool valid = false; - - fmr_logger.debug("[validator {}] {}:{}", static_cast(this), kind, pos); - - if (_validation_level >= mutation_fragment_stream_validation_level::clustering_key) { - valid = _validator(kind, pos); - } else { - valid = _validator(kind); - } - - if (__builtin_expect(!valid, false)) { - if (_validation_level >= mutation_fragment_stream_validation_level::clustering_key) { - on_validation_error(fmr_logger, format("[validator {} for {}] Unexpected mutation fragment: partition key {}: previous {}:{}, current {}:{}", - static_cast(this), _name, _validator.previous_partition_key(), _validator.previous_mutation_fragment_kind(), _validator.previous_position(), kind, pos)); - } else if (_validation_level >= mutation_fragment_stream_validation_level::partition_key) { - on_validation_error(fmr_logger, format("[validator {} for {}] Unexpected mutation fragment: partition key {}: previous {}, current {}", - static_cast(this), _name, _validator.previous_partition_key(), _validator.previous_mutation_fragment_kind(), kind)); - } else { - on_validation_error(fmr_logger, format("[validator {} for {}] Unexpected mutation fragment: previous {}, current {}", - static_cast(this), _name, _validator.previous_mutation_fragment_kind(), kind)); - } - } - - return true; -} -bool mutation_fragment_stream_validating_filter::operator()(mutation_fragment::kind kind, position_in_partition_view pos) { - return (*this)(to_mutation_fragment_kind_v2(kind), pos); -} - -bool mutation_fragment_stream_validating_filter::operator()(const mutation_fragment_v2& mv) { - return (*this)(mv.mutation_fragment_kind(), mv.position()); -} -bool mutation_fragment_stream_validating_filter::operator()(const mutation_fragment& mv) { - return (*this)(to_mutation_fragment_kind_v2(mv.mutation_fragment_kind()), mv.position()); -} - -bool mutation_fragment_stream_validating_filter::on_end_of_partition() { - return (*this)(mutation_fragment::kind::partition_end, position_in_partition_view(position_in_partition_view::end_of_partition_tag_t())); -} - -void mutation_fragment_stream_validating_filter::on_end_of_stream() { - fmr_logger.debug("[validator {}] EOS", static_cast(this)); - if (!_validator.on_end_of_stream()) { - on_validation_error(fmr_logger, format("[validator {} for {}] Stream ended with unclosed partition: {}", static_cast(this), _name, - _validator.previous_mutation_fragment_kind())); - } -} - -flat_mutation_reader_v2& flat_mutation_reader_v2::operator=(flat_mutation_reader_v2&& o) noexcept { - if (_impl && _impl->is_close_required()) { - impl* ip = _impl.get(); - // Abort to enforce calling close() before readers are closed - // to prevent leaks and potential use-after-free due to background - // tasks left behind. - on_internal_error_noexcept(fmr_logger, format("{} [{}]: permit {}: was not closed before overwritten by move-assign", typeid(*ip).name(), fmt::ptr(ip), ip->_permit.description())); - abort(); - } - _impl = std::move(o._impl); - return *this; -} - -flat_mutation_reader_v2::~flat_mutation_reader_v2() { - if (_impl && _impl->is_close_required()) { - impl* ip = _impl.get(); - // Abort to enforce calling close() before readers are closed - // to prevent leaks and potential use-after-free due to background - // tasks left behind. - on_internal_error_noexcept(fmr_logger, format("{} [{}]: permit {}: was not closed before destruction", typeid(*ip).name(), fmt::ptr(ip), ip->_permit.description())); - abort(); - } -} - -static size_t compute_buffer_size(const schema& s, const flat_mutation_reader_v2::tracked_buffer& buffer) -{ - return boost::accumulate( - buffer - | boost::adaptors::transformed([&s] (const mutation_fragment_v2& mf) { - return mf.memory_usage(); - }), size_t(0) - ); -} - -void flat_mutation_reader_v2::impl::forward_buffer_to(const position_in_partition& pos) { - clear_buffer(); - _buffer_size = compute_buffer_size(*_schema, _buffer); -} - -void flat_mutation_reader_v2::impl::clear_buffer_to_next_partition() { - auto next_partition_start = std::find_if(_buffer.begin(), _buffer.end(), [] (const mutation_fragment_v2& mf) { - return mf.is_partition_start(); - }); - _buffer.erase(_buffer.begin(), next_partition_start); - - _buffer_size = compute_buffer_size(*_schema, _buffer); -} - -template -future flat_mutation_reader_v2::impl::fill_buffer_from(Source& source) { - if (source.is_buffer_empty()) { - if (source.is_end_of_stream()) { - return make_ready_future(true); - } - return source.fill_buffer().then([this, &source] { - return fill_buffer_from(source); - }); - } else { - while (!source.is_buffer_empty() && !is_buffer_full()) { - push_mutation_fragment(source.pop_mutation_fragment()); - } - return make_ready_future(source.is_end_of_stream() && source.is_buffer_empty()); - } -} - -template future flat_mutation_reader_v2::impl::fill_buffer_from(flat_mutation_reader_v2&); - -void flat_mutation_reader_v2::do_upgrade_schema(const schema_ptr& s) { - *this = transform(std::move(*this), schema_upgrader_v2(s)); -} - -future read_mutation_from_flat_mutation_reader(flat_mutation_reader_v2& r) { - return r.consume(mutation_rebuilder_v2(r.schema())); -} - -void flat_mutation_reader_v2::on_close_error(std::unique_ptr i, std::exception_ptr ep) noexcept { - impl* ip = i.get(); - on_internal_error_noexcept(fmr_logger, - format("Failed to close {} [{}]: permit {}: {}", typeid(*ip).name(), fmt::ptr(ip), ip->_permit.description(), ep)); -} flat_mutation_reader downgrade_to_v1(flat_mutation_reader_v2 r) { class transforming_reader : public flat_mutation_reader::impl { diff --git a/readers/nonforwardable.hh b/readers/nonforwardable.hh new file mode 100644 index 0000000000..39cdba5b13 --- /dev/null +++ b/readers/nonforwardable.hh @@ -0,0 +1,14 @@ +/* + * Copyright (C) 2022-present ScyllaDB + */ + +/* + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +#pragma once + +class flat_mutation_reader; + +flat_mutation_reader make_nonforwardable(flat_mutation_reader, bool); + diff --git a/readers/reversing.hh b/readers/reversing.hh new file mode 100644 index 0000000000..0193288a6b --- /dev/null +++ b/readers/reversing.hh @@ -0,0 +1,49 @@ +/* + * Copyright (C) 2022-present ScyllaDB + */ + +/* + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +#pragma once +#include + +class flat_mutation_reader; + +namespace query { + struct max_result_size; + class partition_slice; +} + + +/// A reader that emits partitions in native reverse order. +/// +/// 1. The reader's schema() method will return a reversed schema (see +/// \ref schema::make_reversed()). +/// 2. Static row is still emitted first. +/// 3. Range tombstones' bounds are reversed (see \ref range_tombstone::reverse()). +/// 4. Clustered rows and range tombstones are emitted in descending order. +/// Because of 3 and 4 the guarantee that a range tombstone is emitted before +/// any mutation fragment affected by it still holds. +/// Ordering of partitions themselves remains unchanged. +/// For more details see docs/design-notes/reverse-reads.md. +/// +/// The reader's schema (returned by `schema()`) is the reverse of `original`'s schema. +/// +/// \param original the reader to be reversed. +/// \param max_size the maximum amount of memory the reader is allowed to use +/// for reversing and conversely the maximum size of the results. The +/// reverse reader reads entire partitions into memory, before reversing +/// them. Since partitions can be larger than the available memory, we need +/// to enforce a limit on memory consumption. When reaching the soft limit +/// a warning will be logged. When reaching the hard limit the read will be +/// aborted. +/// \param slice serves as a convenience slice storage for reads that have to +/// store an edited slice somewhere. This is common for reads that work +/// with a native-reversed slice and so have to convert the one used in the +/// query -- which is in half-reversed format. +/// +/// FIXME: reversing should be done in the sstable layer, see #1413. +flat_mutation_reader +make_reversing_reader(flat_mutation_reader original, query::max_result_size max_size, std::unique_ptr slice = {}); diff --git a/readers/slice_mutations.hh b/readers/slice_mutations.hh new file mode 100644 index 0000000000..c4d71f4c64 --- /dev/null +++ b/readers/slice_mutations.hh @@ -0,0 +1,20 @@ +/* + * Copyright (C) 2022-present ScyllaDB + */ + +/* + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +#pragma once +#include +#include "schema_fwd.hh" + +class mutation; + +namespace query { + class partition_slice; +} + +std::vector slice_mutations(schema_ptr schema, std::vector ms, const query::partition_slice& slice); + diff --git a/repair/row_level.cc b/repair/row_level.cc index 8032f6db35..3ce0b49832 100644 --- a/repair/row_level.cc +++ b/repair/row_level.cc @@ -46,6 +46,7 @@ #include "db/batchlog_manager.hh" #include "cql3/untyped_result_set.hh" #include "idl/partition_checksum.dist.hh" +#include "readers/empty.hh" extern logging::logger rlogger; diff --git a/replica/database.cc b/replica/database.cc index bc1456ecc7..24fea58e4d 100644 --- a/replica/database.cc +++ b/replica/database.cc @@ -59,6 +59,7 @@ #include "tombstone_gc.hh" #include "data_dictionary/impl.hh" +#include "readers/multi_range.hh" using namespace std::chrono_literals; using namespace db; diff --git a/replica/memtable.cc b/replica/memtable.cc index 1cbb135ae6..a43844c47c 100644 --- a/replica/memtable.cc +++ b/replica/memtable.cc @@ -12,6 +12,8 @@ #include "partition_snapshot_reader.hh" #include "partition_builder.hh" #include "mutation_partition_view.hh" +#include "readers/empty_v2.hh" +#include "readers/forwardable_v2.hh" namespace replica { diff --git a/replica/table.cc b/replica/table.cc index 3213f267a9..4009994c09 100644 --- a/replica/table.cc +++ b/replica/table.cc @@ -43,6 +43,10 @@ #include #include #include "utils/error_injection.hh" +#include "readers/reversing.hh" +#include "readers/from_mutations.hh" +#include "readers/empty_v2.hh" +#include "readers/multi_range.hh" namespace replica { diff --git a/row_cache.cc b/row_cache.cc index 59cec7bbf2..8cc06fe710 100644 --- a/row_cache.cc +++ b/row_cache.cc @@ -20,6 +20,9 @@ #include "dirty_memory_manager.hh" #include "cache_flat_mutation_reader.hh" #include "real_dirty_memory_accounter.hh" +#include "readers/empty.hh" +#include "readers/forwardable.hh" +#include "readers/nonforwardable.hh" namespace cache { diff --git a/sstables/kl/reader.hh b/sstables/kl/reader.hh index 6202e74446..c81fd4c612 100644 --- a/sstables/kl/reader.hh +++ b/sstables/kl/reader.hh @@ -8,8 +8,9 @@ #pragma once -#include "flat_mutation_reader.hh" +#include "readers/flat_mutation_reader.hh" #include "sstables/progress_monitor.hh" +#include namespace sstables { namespace kl { diff --git a/sstables/mx/reader.hh b/sstables/mx/reader.hh index 6d77efd316..ee9a24f676 100644 --- a/sstables/mx/reader.hh +++ b/sstables/mx/reader.hh @@ -8,8 +8,10 @@ #pragma once -#include "flat_mutation_reader_v2.hh" +#include "readers/flat_mutation_reader_v2.hh" +#include "readers/flat_mutation_reader.hh" #include "sstables/progress_monitor.hh" +#include namespace sstables { namespace mx { diff --git a/sstables/sstable_mutation_reader.hh b/sstables/sstable_mutation_reader.hh index 9921b4bd80..6e2d65139b 100644 --- a/sstables/sstable_mutation_reader.hh +++ b/sstables/sstable_mutation_reader.hh @@ -22,7 +22,7 @@ #include "clustering_ranges_walker.hh" #include "binary_search.hh" #include "../dht/i_partitioner.hh" -#include "flat_mutation_reader_v2.hh" +#include "readers/flat_mutation_reader_v2.hh" #include "sstables/mx/partition_reversing_data_source.hh" namespace sstables { diff --git a/sstables/sstable_set.cc b/sstables/sstable_set.cc index 316cd66ecc..5e1e5be15b 100644 --- a/sstables/sstable_set.cc +++ b/sstables/sstable_set.cc @@ -21,6 +21,8 @@ #include "sstable_set_impl.hh" #include "replica/database.hh" +#include "readers/from_mutations.hh" +#include "readers/empty_v2.hh" namespace sstables { diff --git a/sstables/sstable_set.hh b/sstables/sstable_set.hh index 448c5c522b..c82f808556 100644 --- a/sstables/sstable_set.hh +++ b/sstables/sstable_set.hh @@ -8,12 +8,13 @@ #pragma once -#include "flat_mutation_reader.hh" -#include "flat_mutation_reader_v2.hh" +#include "readers/flat_mutation_reader.hh" +#include "readers/flat_mutation_reader_v2.hh" #include "sstables/progress_monitor.hh" #include "shared_sstable.hh" #include "dht/i_partitioner.hh" #include +#include #include namespace utils { diff --git a/sstables/sstables.cc b/sstables/sstables.cc index 72f0898ea0..176a93adea 100644 --- a/sstables/sstables.cc +++ b/sstables/sstables.cc @@ -73,6 +73,8 @@ #include "utils/bit_cast.hh" #include "utils/cached_file.hh" #include "tombstone_gc.hh" +#include "readers/reversing.hh" +#include "readers/forwardable.hh" thread_local disk_error_signal_type sstable_read_error; thread_local disk_error_signal_type sstable_write_error; diff --git a/streaming/stream_session.cc b/streaming/stream_session.cc index 62e1f7ac57..ed40b0f889 100644 --- a/streaming/stream_session.cc +++ b/streaming/stream_session.cc @@ -36,6 +36,7 @@ #include "mutation_source_metadata.hh" #include "streaming/stream_mutation_fragments_cmd.hh" #include "consumer.hh" +#include "readers/generating.hh" namespace streaming { diff --git a/streaming/stream_transfer_task.cc b/streaming/stream_transfer_task.cc index 558f741cd6..f232675710 100644 --- a/streaming/stream_transfer_task.cc +++ b/streaming/stream_transfer_task.cc @@ -16,7 +16,7 @@ #include "streaming/stream_reason.hh" #include "streaming/stream_mutation_fragments_cmd.hh" #include "mutation_reader.hh" -#include "flat_mutation_reader.hh" +#include "readers/flat_mutation_reader.hh" #include "mutation_fragment_stream_validator.hh" #include "frozen_mutation.hh" #include "mutation.hh" diff --git a/test/boost/flat_mutation_reader_test.cc b/test/boost/flat_mutation_reader_test.cc index 0ebc47c2db..4a01561314 100644 --- a/test/boost/flat_mutation_reader_test.cc +++ b/test/boost/flat_mutation_reader_test.cc @@ -15,7 +15,13 @@ #include "mutation.hh" #include "mutation_fragment.hh" #include "test/lib/mutation_source_test.hh" -#include "flat_mutation_reader.hh" +#include "readers/flat_mutation_reader.hh" +#include "readers/reversing.hh" +#include "readers/forwardable.hh" +#include "readers/delegating.hh" +#include "readers/multi_range.hh" +#include "readers/from_mutations.hh" +#include "readers/from_fragments.hh" #include "mutation_reader.hh" #include "schema_builder.hh" #include "replica/memtable.hh" @@ -32,6 +38,9 @@ #include "test/lib/random_schema.hh" #include +#include "readers/from_mutations_v2.hh" +#include "readers/from_fragments_v2.hh" +#include "readers/forwardable_v2.hh" struct mock_consumer { struct result { @@ -560,7 +569,7 @@ void test_flat_stream(schema_ptr s, std::vector muts, reversed_partiti return fmr.consume_in_thread(std::move(fsc)); } else { if (reversed) { - return with_closeable(make_reversing_reader(make_flat_mutation_reader(fmr), query::max_result_size(size_t(1) << 20)), + return with_closeable(make_reversing_reader(make_delegating_reader(fmr), query::max_result_size(size_t(1) << 20)), [fsc = std::move(fsc)] (flat_mutation_reader& reverse_reader) mutable { return reverse_reader.consume(std::move(fsc)); }).get0(); diff --git a/test/boost/frozen_mutation_test.cc b/test/boost/frozen_mutation_test.cc index 7e3c7360d6..6cd7acfc9e 100644 --- a/test/boost/frozen_mutation_test.cc +++ b/test/boost/frozen_mutation_test.cc @@ -20,6 +20,7 @@ #include "test/lib/mutation_source_test.hh" #include +#include "readers/from_mutations.hh" static schema_builder new_table() { return { "some_keyspace", "some_table" }; diff --git a/test/boost/memtable_test.cc b/test/boost/memtable_test.cc index 5188dbf90c..fdc6c2c75e 100644 --- a/test/boost/memtable_test.cc +++ b/test/boost/memtable_test.cc @@ -24,7 +24,7 @@ #include "test/lib/mutation_source_test.hh" #include "test/lib/mutation_assertions.hh" #include "test/lib/flat_mutation_reader_assertions.hh" -#include "flat_mutation_reader.hh" +#include "readers/flat_mutation_reader.hh" #include "test/lib/data_model.hh" #include "test/lib/eventually.hh" #include "test/lib/random_utils.hh" diff --git a/test/boost/multishard_combining_reader_as_mutation_source_test.cc b/test/boost/multishard_combining_reader_as_mutation_source_test.cc index 2bd322840c..c713bf4efc 100644 --- a/test/boost/multishard_combining_reader_as_mutation_source_test.cc +++ b/test/boost/multishard_combining_reader_as_mutation_source_test.cc @@ -27,6 +27,7 @@ #include "mutation_reader.hh" #include "schema_registry.hh" #include "service/priority_manager.hh" +#include "readers/forwardable_v2.hh" // It has to be a container that does not invalidate pointers static std::list keep_alive_sharder; diff --git a/test/boost/mutation_fragment_test.cc b/test/boost/mutation_fragment_test.cc index 06bb1af463..bb074b359b 100644 --- a/test/boost/mutation_fragment_test.cc +++ b/test/boost/mutation_fragment_test.cc @@ -26,6 +26,7 @@ #include "test/lib/simple_schema.hh" #include +#include "readers/from_mutations.hh" // A StreamedMutationConsumer which distributes fragments randomly into several mutations. class fragment_scatterer { diff --git a/test/boost/mutation_query_test.cc b/test/boost/mutation_query_test.cc index 85dee52733..c181a611ae 100644 --- a/test/boost/mutation_query_test.cc +++ b/test/boost/mutation_query_test.cc @@ -27,6 +27,7 @@ #include #include "schema_builder.hh" #include "partition_slice_builder.hh" +#include "readers/from_mutations.hh" using namespace std::literals::chrono_literals; diff --git a/test/boost/mutation_reader_test.cc b/test/boost/mutation_reader_test.cc index a468ad9744..64bef4b138 100644 --- a/test/boost/mutation_reader_test.cc +++ b/test/boost/mutation_reader_test.cc @@ -47,6 +47,12 @@ #include "mutation_rebuilder.hh" #include +#include "readers/from_mutations.hh" +#include "readers/forwardable_v2.hh" +#include "readers/forwardable.hh" +#include "readers/from_fragments_v2.hh" +#include "readers/empty.hh" +#include "readers/empty_v2.hh" static schema_ptr make_schema() { return schema_builder("ks", "cf") diff --git a/test/boost/mutation_test.cc b/test/boost/mutation_test.cc index c6b59d205f..776180ba6a 100644 --- a/test/boost/mutation_test.cc +++ b/test/boost/mutation_test.cc @@ -53,6 +53,9 @@ #include "types/user.hh" #include "concrete_types.hh" #include "mutation_rebuilder.hh" +#include "readers/from_mutations_v2.hh" +#include "readers/from_mutations.hh" +#include "readers/from_fragments_v2.hh" using namespace std::chrono_literals; diff --git a/test/boost/mutation_writer_test.cc b/test/boost/mutation_writer_test.cc index 72564c849f..43c2c4de0f 100644 --- a/test/boost/mutation_writer_test.cc +++ b/test/boost/mutation_writer_test.cc @@ -16,7 +16,8 @@ #include "mutation_fragment.hh" #include "mutation_rebuilder.hh" #include "test/lib/mutation_source_test.hh" -#include "flat_mutation_reader.hh" +#include "readers/flat_mutation_reader.hh" +#include "readers/from_mutations.hh" #include "mutation_writer/multishard_writer.hh" #include "mutation_writer/timestamp_based_splitting_writer.hh" #include "mutation_writer/partition_based_splitting_writer.hh" @@ -28,6 +29,9 @@ #include "test/lib/log.hh" #include +#include "readers/from_mutations_v2.hh" +#include "readers/empty_v2.hh" +#include "readers/generating_v2.hh" using namespace mutation_writer; diff --git a/test/boost/querier_cache_test.cc b/test/boost/querier_cache_test.cc index 3dce71cac1..56cde9102e 100644 --- a/test/boost/querier_cache_test.cc +++ b/test/boost/querier_cache_test.cc @@ -19,6 +19,8 @@ #include #include +#include "readers/from_mutations.hh" +#include "readers/empty_v2.hh" using namespace std::chrono_literals; diff --git a/test/boost/reader_concurrency_semaphore_test.cc b/test/boost/reader_concurrency_semaphore_test.cc index ff71112850..6f124ced57 100644 --- a/test/boost/reader_concurrency_semaphore_test.cc +++ b/test/boost/reader_concurrency_semaphore_test.cc @@ -15,6 +15,7 @@ #include #include #include +#include "readers/empty_v2.hh" SEASTAR_THREAD_TEST_CASE(test_reader_concurrency_semaphore_clear_inactive_reads) { simple_schema s; diff --git a/test/boost/row_cache_test.cc b/test/boost/row_cache_test.cc index 6d6d3e8404..193085fc97 100644 --- a/test/boost/row_cache_test.cc +++ b/test/boost/row_cache_test.cc @@ -33,6 +33,11 @@ #include "test/lib/random_utils.hh" #include +#include "readers/from_mutations.hh" +#include "readers/from_mutations_v2.hh" +#include "readers/delegating.hh" +#include "readers/delegating_v2.hh" +#include "readers/empty_v2.hh" using namespace std::chrono_literals; @@ -1308,7 +1313,6 @@ public: }); } }; - static std::vector updated_ring(std::vector& mutations) { std::vector result; for (auto&& m : mutations) { @@ -1583,7 +1587,6 @@ SEASTAR_TEST_CASE(test_cache_population_and_clear_race) { }); } - SEASTAR_TEST_CASE(test_mvcc) { return seastar::async([] { auto test = [&] (const mutation& m1, const mutation& m2, bool with_active_memtable_reader) { diff --git a/test/boost/sstable_compaction_test.cc b/test/boost/sstable_compaction_test.cc index a03af14f04..bcc9594adc 100644 --- a/test/boost/sstable_compaction_test.cc +++ b/test/boost/sstable_compaction_test.cc @@ -67,6 +67,8 @@ #include "test/lib/reader_concurrency_semaphore.hh" #include "test/lib/sstable_utils.hh" #include "test/lib/random_utils.hh" +#include "readers/from_mutations_v2.hh" +#include "readers/from_fragments_v2.hh" namespace fs = std::filesystem; diff --git a/test/boost/sstable_datafile_test.cc b/test/boost/sstable_datafile_test.cc index aa84caf1b1..679744b1e6 100644 --- a/test/boost/sstable_datafile_test.cc +++ b/test/boost/sstable_datafile_test.cc @@ -59,6 +59,8 @@ #include "test/lib/reader_concurrency_semaphore.hh" #include "test/lib/sstable_utils.hh" #include "test/lib/random_utils.hh" +#include "readers/from_mutations_v2.hh" +#include "readers/from_fragments_v2.hh" namespace fs = std::filesystem; diff --git a/test/boost/sstable_set_test.cc b/test/boost/sstable_set_test.cc index 8e50ac6b4f..727fed84ff 100644 --- a/test/boost/sstable_set_test.cc +++ b/test/boost/sstable_set_test.cc @@ -13,6 +13,7 @@ #include "sstables/sstable_set.hh" #include "sstables/sstables.hh" #include "test/lib/simple_schema.hh" +#include "readers/from_mutations_v2.hh" static sstables::sstable_set make_sstable_set(schema_ptr schema, lw_shared_ptr all = {}, bool use_level_metadata = true) { return sstables::sstable_set(std::make_unique(schema, std::move(all), use_level_metadata), schema); diff --git a/test/boost/view_build_test.cc b/test/boost/view_build_test.cc index d9583f3820..e23cf2303c 100644 --- a/test/boost/view_build_test.cc +++ b/test/boost/view_build_test.cc @@ -31,6 +31,8 @@ #include "test/lib/random_utils.hh" #include "utils/ranges.hh" +#include "readers/from_mutations_v2.hh" + using namespace std::literals::chrono_literals; schema_ptr test_table_schema() { diff --git a/test/lib/flat_mutation_reader_assertions.hh b/test/lib/flat_mutation_reader_assertions.hh index 9307386b4e..ea1cdf7225 100644 --- a/test/lib/flat_mutation_reader_assertions.hh +++ b/test/lib/flat_mutation_reader_assertions.hh @@ -10,7 +10,8 @@ #include #include -#include "flat_mutation_reader_v2.hh" +#include "readers/flat_mutation_reader.hh" +#include "readers/flat_mutation_reader_v2.hh" #include "mutation_assertions.hh" #include "schema.hh" #include "test/lib/log.hh" diff --git a/test/lib/mutation_source_test.cc b/test/lib/mutation_source_test.cc index db1b850653..6db7594195 100644 --- a/test/lib/mutation_source_test.cc +++ b/test/lib/mutation_source_test.cc @@ -14,7 +14,7 @@ #include "counters.hh" #include "mutation_rebuilder.hh" #include "test/lib/simple_schema.hh" -#include "flat_mutation_reader.hh" +#include "readers/flat_mutation_reader.hh" #include "test/lib/flat_mutation_reader_assertions.hh" #include "mutation_query.hh" #include "mutation_rebuilder.hh" diff --git a/test/lib/normalizing_reader.hh b/test/lib/normalizing_reader.hh index 4190c9a40e..00c068f50b 100644 --- a/test/lib/normalizing_reader.hh +++ b/test/lib/normalizing_reader.hh @@ -10,7 +10,7 @@ #include "mutation_reader.hh" #include -#include "flat_mutation_reader.hh" +#include "readers/flat_mutation_reader.hh" /* * A helper class that wraps another flat_mutation_reader diff --git a/test/manual/enormous_table_scan_test.cc b/test/manual/enormous_table_scan_test.cc index e1a23ce5ba..1a2c42a8e2 100644 --- a/test/manual/enormous_table_scan_test.cc +++ b/test/manual/enormous_table_scan_test.cc @@ -28,6 +28,7 @@ #include "range.hh" #include "sstables/sstables.hh" #include "schema_builder.hh" +#include "readers/forwardable.hh" class enormous_table_reader final : public flat_mutation_reader::impl { // Reader for a table with 4.5 billion rows, all with partition key 0 and an incrementing clustering key diff --git a/test/perf/perf_mutation_readers.cc b/test/perf/perf_mutation_readers.cc index bea7f0f797..6a14ae6066 100644 --- a/test/perf/perf_mutation_readers.cc +++ b/test/perf/perf_mutation_readers.cc @@ -17,7 +17,9 @@ #include "test/perf/perf.hh" #include "mutation_reader.hh" -#include "flat_mutation_reader.hh" +#include "readers/flat_mutation_reader.hh" +#include "readers/from_mutations.hh" +#include "readers/empty_v2.hh" #include "replica/memtable.hh" namespace tests {