From 5d556115a180225e65eff6fa75d3fed974de66ea Mon Sep 17 00:00:00 2001 From: Nadav Har'El Date: Wed, 20 Jul 2022 15:26:06 +0300 Subject: [PATCH] cql, view: rename and explain bytes_with_action The structure "bytes_with_action" was very hard to understand because of its mysterious and general-sounding name, and no comments. In this patch I add a large comment explaining its purpose, and rename it to a more suitable name, view_key_and_action, which suggests that each such object is about one view key (where to add a view row), and an additional "action" that we need to take beyond adding the view row. This is the best I can do to make this code easier to understand without completely reorganizing it. Signed-off-by: Nadav Har'El --- column_computation.hh | 13 ++++++++++-- db/view/view.cc | 46 ++++++++++++++++++++++--------------------- db/view/view.hh | 34 +++++++++++++++++++++----------- schema.cc | 7 ++++--- 4 files changed, 62 insertions(+), 38 deletions(-) diff --git a/column_computation.hh b/column_computation.hh index f1c92f823d..70ce8d9f58 100644 --- a/column_computation.hh +++ b/column_computation.hh @@ -17,7 +17,7 @@ struct atomic_cell_view; struct tombstone; namespace db::view { -struct bytes_with_action; +struct view_key_and_action; } class column_computation; @@ -96,6 +96,15 @@ public: virtual bytes compute_value(const schema& schema, const partition_key& key) const override; }; +/* + * collection_column_computation is used for a secondary index on a collection + * column. In this case we don't have a single value to compute, but rather we + * want to return multiple values (e.g., all the keys in the collection). + * So this class does not implement the base class's compute_value() - + * instead it implements a new method compute_collection_values(), which + * can return multiple values. This new method is currently called only from + * the materialized-view code which uses collection_column_computation. + */ class collection_column_computation final : public column_computation { enum class kind { keys, @@ -132,5 +141,5 @@ public: return true; } - std::vector compute_values_with_action(const schema& schema, const partition_key& key, const clustering_row& row, const std::optional& existing) const; + std::vector compute_values_with_action(const schema& schema, const partition_key& key, const clustering_row& row, const std::optional& existing) const; }; diff --git a/db/view/view.cc b/db/view/view.cc index e7ada73b92..33eaa085af 100644 --- a/db/view/view.cc +++ b/db/view/view.cc @@ -491,21 +491,23 @@ row_marker view_updates::compute_row_marker(const clustering_row& base_row) cons namespace { -struct bytes_view_with_action { - managed_bytes_view _bytes_view; - bytes_with_action::action _action; - bytes_view_with_action(managed_bytes_view view, bytes_with_action::action action) - : _bytes_view(view) +// The following struct is identical to view_key_with_action, except the key +// is stored as a managed_bytes_view instead of bytes. +struct view_managed_key_view_and_action { + managed_bytes_view _key_view; + view_key_and_action::action _action; + view_managed_key_view_and_action(managed_bytes_view key_view, view_key_and_action::action action) + : _key_view(key_view) , _action(action) {} - bytes_view_with_action(managed_bytes_view view) - : _bytes_view(view) + view_managed_key_view_and_action(managed_bytes_view key_view) + : _key_view(key_view) {} - bytes_view_with_action(bytes_with_action&& bwa, std::deque& linearized_values) - : bytes_view_with_action(managed_bytes_view(linearized_values.emplace_back(std::move(bwa._view))), bwa._action) + view_managed_key_view_and_action(view_key_and_action&& bwa, std::deque& linearized_values) + : view_managed_key_view_and_action(managed_bytes_view(linearized_values.emplace_back(std::move(bwa._key_bytes))), bwa._action) {} - static managed_bytes_view get_view(const bytes_view_with_action& bvwa) { - return bvwa._bytes_view; + static managed_bytes_view get_key_view(const view_managed_key_view_and_action& bvwa) { + return bvwa._key_view; } }; @@ -537,7 +539,7 @@ public: , _existing(existing) {} - using vector_type = utils::small_vector; + using vector_type = utils::small_vector; vector_type operator()(const column_definition& cdef) { column_position++; @@ -614,8 +616,8 @@ view_updates::get_view_rows(const partition_key& base_key, const clustering_row& std::vector ret; auto compute_row = [&](Range&& pk, Range&& ck) { - partition_key pkey = partition_key::from_range(boost::adaptors::transform(pk, bytes_view_with_action::get_view)); - clustering_key ckey = clustering_key::from_range(boost::adaptors::transform(ck, bytes_view_with_action::get_view)); + partition_key pkey = partition_key::from_range(boost::adaptors::transform(pk, view_managed_key_view_and_action::get_key_view)); + clustering_key ckey = clustering_key::from_range(boost::adaptors::transform(ck, view_managed_key_view_and_action::get_key_view)); auto action = (action_column < pk.size() ? pk[action_column] : ck[action_column - pk.size()])._action; mutation_partition& partition = partition_for(std::move(pkey)); ret.push_back({&partition.clustered_row(*_view, std::move(ckey)), action}); @@ -623,9 +625,9 @@ view_updates::get_view_rows(const partition_key& base_key, const clustering_row& if (had_multiple_values_in_pk) { // cartesian_product expects std::vector>, while we have std::vector. - std::vector> pk_elems_, ck_elems_; + std::vector> pk_elems_, ck_elems_; auto std_vector_from_small_vector = boost::adaptors::transformed([](const auto& vector) { - return std::vector{vector.begin(), vector.end()}; + return std::vector{vector.begin(), vector.end()}; }); boost::copy(pk_elems | std_vector_from_small_vector, std::back_inserter(pk_elems_)); boost::copy(ck_elems | std_vector_from_small_vector, std::back_inserter(ck_elems_)); @@ -643,7 +645,7 @@ view_updates::get_view_rows(const partition_key& base_key, const clustering_row& ck_size = cartesian_product_size(ck_elems_); on_internal_error(vlogger, format("Computed sizes of possible partition keys and clustering keys don't match: {} != {}", pk_size, ck_size)); }; - for (std::vector& pk : cartesian_product_pk) { + for (std::vector& pk : cartesian_product_pk) { if (ck_it == cartesian_product_ck.end()) { throw_length_error(); } @@ -654,8 +656,8 @@ view_updates::get_view_rows(const partition_key& base_key, const clustering_row& throw_length_error(); } } else { - for (std::vector& pk : cartesian_product_pk) { - for (std::vector& ck : cartesian_product_ck) { + for (std::vector& pk : cartesian_product_pk) { + for (std::vector& ck : cartesian_product_ck) { compute_row(pk, ck); } } @@ -860,7 +862,7 @@ void view_updates::do_delete_old_entry(const partition_key& base_key, const clus for (const auto& [r, action] : view_rows) { const auto& col_ids = _base_info->base_non_pk_columns_in_view_pk(); if (_view_info.has_computed_column_depending_on_base_non_primary_key()) { - if (auto ts_tag = std::get_if(&action)) { + if (auto ts_tag = std::get_if(&action)) { r->apply(ts_tag->into_shadowable_tombstone(now)); } } else if (!col_ids.empty()) { @@ -1006,8 +1008,8 @@ void view_updates::update_entry_for_computed_column( struct visitor { deletable_row* row; gc_clock::time_point now; - void operator()(bytes_with_action::no_action) {} - void operator()(bytes_with_action::shadowable_tombstone_tag t) { + void operator()(view_key_and_action::no_action) {} + void operator()(view_key_and_action::shadowable_tombstone_tag t) { row->apply(t.into_shadowable_tombstone(now)); } void operator()(row_marker rm) { diff --git a/db/view/view.hh b/db/view/view.hh index 7f0ecef308..578df1fcf3 100644 --- a/db/view/view.hh +++ b/db/view/view.hh @@ -123,12 +123,24 @@ bool matches_view_filter(const schema& base, const view_info& view, const partit bool clustering_prefix_matches(const schema& base, const partition_key& key, const clustering_key_prefix& ck); -/** - * The liveness of rows resulting from an update to a table with an index over collection column - * depends on the liveness of the collection cells. bytes_with_action is used to return this information - * from the function computing the result for a particular computed column. +/* + * When a base-table update modifies a value in a materialized view's key + * key column, Scylla needs to create a new view row. When indexing a + * collection - Scylla needs to add multiple almost-identical rows with just + * a different key. Scylla may also need to take additional "actions" for each + * of those rows - namely deleting an old row or adding a row marker. + * + * So the following struct view_key_and_action holds one such row key and + * one action. The action can be: + * 1. "no_action" - Do nothing beyond adding the view row under the given + * key. The row's key is given, but its other columns are derived from + * the base table's existing row and and the update mutation.. + * 2. a row_marker - also add a CQL row marker, to allow a view row to live + * even if there is nothing in it besides the key. + * 3. a (shadowable) tombstone, to remove and old view row that this one + * replaces. */ -struct bytes_with_action { +struct view_key_and_action { struct no_action {}; struct shadowable_tombstone_tag { api::timestamp_type ts; @@ -138,14 +150,14 @@ struct bytes_with_action { }; using action = std::variant; - bytes _view; + bytes _key_bytes; action _action = no_action{}; - bytes_with_action(bytes view) - : _view(std::move(view)) + view_key_and_action(bytes key_bytes) + : _key_bytes(std::move(key_bytes)) {} - bytes_with_action(bytes view, action action) - : _view(std::move(view)) + view_key_and_action(bytes key_bytes, action action) + : _key_bytes(std::move(key_bytes)) , _action(action) {} @@ -178,7 +190,7 @@ private: row_marker compute_row_marker(const clustering_row& base_row) const; struct view_row_entry { deletable_row* _row; - bytes_with_action::action _action; + view_key_and_action::action _action; }; std::vector get_view_rows(const partition_key& base_key, const clustering_row& update, const std::optional& existing); bool can_skip_view_updates(const clustering_row& update, const clustering_row& existing) const; diff --git a/schema.cc b/schema.cc index 94dead3e9d..8d1bdb34f4 100644 --- a/schema.cc +++ b/schema.cc @@ -8,6 +8,7 @@ #include #include +#include "db/view/view.hh" #include "timestamp.hh" #include "utils/UUID_gen.hh" #include "cql3/column_identifier.hh" @@ -1841,7 +1842,7 @@ bytes collection_column_computation::compute_value(const schema&, const partitio throw std::runtime_error(fmt::format("{}: not supported", __PRETTY_FUNCTION__)); } -std::vector collection_column_computation::compute_values_with_action(const schema& schema, const partition_key& key, const clustering_row& update, const std::optional& existing) const { +std::vector collection_column_computation::compute_values_with_action(const schema& schema, const partition_key& key, const clustering_row& update, const std::optional& existing) const { using collection_kv = std::pair; auto serialize_cell = [_kind = _kind](const collection_kv& kv) -> bytes { using kind = collection_column_computation::kind; @@ -1857,7 +1858,7 @@ std::vector collection_column_computation::compute_ } }; - std::vector ret; + std::vector ret; auto compute_row_marker = [] (auto&& cell) -> row_marker { return cell.is_live_and_has_ttl() ? row_marker(cell.timestamp(), cell.ttl(), cell.expiry()) : row_marker(cell.timestamp()); @@ -1877,7 +1878,7 @@ std::vector collection_column_computation::compute_ } operation_ts -= 1; if (existing && existing->second.is_live()) { - db::view::bytes_with_action::shadowable_tombstone_tag tag{operation_ts}; + db::view::view_key_and_action::shadowable_tombstone_tag tag{operation_ts}; ret.push_back({serialize_cell(*existing), {tag}}); } };