cql, view: rename and explain bytes_with_action

The structure "bytes_with_action" was very hard to understand because of
its mysterious and general-sounding name, and no comments.

In this patch I add a large comment explaining its purpose, and rename
it to a more suitable name, view_key_and_action, which suggests that
each such object is about one view key (where to add a view row), and
an additional "action" that we need to take beyond adding the view row.

This is the best I can do to make this code easier to understand without
completely reorganizing it.

Signed-off-by: Nadav Har'El <nyh@scylladb.com>
This commit is contained in:
Nadav Har'El
2022-07-20 15:26:06 +03:00
parent 8b00c91c13
commit 5d556115a1
4 changed files with 62 additions and 38 deletions

View File

@@ -17,7 +17,7 @@ struct atomic_cell_view;
struct tombstone;
namespace db::view {
struct bytes_with_action;
struct view_key_and_action;
}
class column_computation;
@@ -96,6 +96,15 @@ public:
virtual bytes compute_value(const schema& schema, const partition_key& key) const override;
};
/*
* collection_column_computation is used for a secondary index on a collection
* column. In this case we don't have a single value to compute, but rather we
* want to return multiple values (e.g., all the keys in the collection).
* So this class does not implement the base class's compute_value() -
* instead it implements a new method compute_collection_values(), which
* can return multiple values. This new method is currently called only from
* the materialized-view code which uses collection_column_computation.
*/
class collection_column_computation final : public column_computation {
enum class kind {
keys,
@@ -132,5 +141,5 @@ public:
return true;
}
std::vector<db::view::bytes_with_action> compute_values_with_action(const schema& schema, const partition_key& key, const clustering_row& row, const std::optional<clustering_row>& existing) const;
std::vector<db::view::view_key_and_action> compute_values_with_action(const schema& schema, const partition_key& key, const clustering_row& row, const std::optional<clustering_row>& existing) const;
};

View File

@@ -491,21 +491,23 @@ row_marker view_updates::compute_row_marker(const clustering_row& base_row) cons
namespace {
struct bytes_view_with_action {
managed_bytes_view _bytes_view;
bytes_with_action::action _action;
bytes_view_with_action(managed_bytes_view view, bytes_with_action::action action)
: _bytes_view(view)
// The following struct is identical to view_key_with_action, except the key
// is stored as a managed_bytes_view instead of bytes.
struct view_managed_key_view_and_action {
managed_bytes_view _key_view;
view_key_and_action::action _action;
view_managed_key_view_and_action(managed_bytes_view key_view, view_key_and_action::action action)
: _key_view(key_view)
, _action(action)
{}
bytes_view_with_action(managed_bytes_view view)
: _bytes_view(view)
view_managed_key_view_and_action(managed_bytes_view key_view)
: _key_view(key_view)
{}
bytes_view_with_action(bytes_with_action&& bwa, std::deque<bytes>& linearized_values)
: bytes_view_with_action(managed_bytes_view(linearized_values.emplace_back(std::move(bwa._view))), bwa._action)
view_managed_key_view_and_action(view_key_and_action&& bwa, std::deque<bytes>& linearized_values)
: view_managed_key_view_and_action(managed_bytes_view(linearized_values.emplace_back(std::move(bwa._key_bytes))), bwa._action)
{}
static managed_bytes_view get_view(const bytes_view_with_action& bvwa) {
return bvwa._bytes_view;
static managed_bytes_view get_key_view(const view_managed_key_view_and_action& bvwa) {
return bvwa._key_view;
}
};
@@ -537,7 +539,7 @@ public:
, _existing(existing)
{}
using vector_type = utils::small_vector<bytes_view_with_action, 1>;
using vector_type = utils::small_vector<view_managed_key_view_and_action, 1>;
vector_type operator()(const column_definition& cdef) {
column_position++;
@@ -614,8 +616,8 @@ view_updates::get_view_rows(const partition_key& base_key, const clustering_row&
std::vector<view_updates::view_row_entry> ret;
auto compute_row = [&]<typename Range>(Range&& pk, Range&& ck) {
partition_key pkey = partition_key::from_range(boost::adaptors::transform(pk, bytes_view_with_action::get_view));
clustering_key ckey = clustering_key::from_range(boost::adaptors::transform(ck, bytes_view_with_action::get_view));
partition_key pkey = partition_key::from_range(boost::adaptors::transform(pk, view_managed_key_view_and_action::get_key_view));
clustering_key ckey = clustering_key::from_range(boost::adaptors::transform(ck, view_managed_key_view_and_action::get_key_view));
auto action = (action_column < pk.size() ? pk[action_column] : ck[action_column - pk.size()])._action;
mutation_partition& partition = partition_for(std::move(pkey));
ret.push_back({&partition.clustered_row(*_view, std::move(ckey)), action});
@@ -623,9 +625,9 @@ view_updates::get_view_rows(const partition_key& base_key, const clustering_row&
if (had_multiple_values_in_pk) {
// cartesian_product expects std::vector<std::vector<>>, while we have std::vector<small_vector>.
std::vector<std::vector<bytes_view_with_action>> pk_elems_, ck_elems_;
std::vector<std::vector<view_managed_key_view_and_action>> pk_elems_, ck_elems_;
auto std_vector_from_small_vector = boost::adaptors::transformed([](const auto& vector) {
return std::vector<bytes_view_with_action>{vector.begin(), vector.end()};
return std::vector<view_managed_key_view_and_action>{vector.begin(), vector.end()};
});
boost::copy(pk_elems | std_vector_from_small_vector, std::back_inserter(pk_elems_));
boost::copy(ck_elems | std_vector_from_small_vector, std::back_inserter(ck_elems_));
@@ -643,7 +645,7 @@ view_updates::get_view_rows(const partition_key& base_key, const clustering_row&
ck_size = cartesian_product_size(ck_elems_);
on_internal_error(vlogger, format("Computed sizes of possible partition keys and clustering keys don't match: {} != {}", pk_size, ck_size));
};
for (std::vector<bytes_view_with_action>& pk : cartesian_product_pk) {
for (std::vector<view_managed_key_view_and_action>& pk : cartesian_product_pk) {
if (ck_it == cartesian_product_ck.end()) {
throw_length_error();
}
@@ -654,8 +656,8 @@ view_updates::get_view_rows(const partition_key& base_key, const clustering_row&
throw_length_error();
}
} else {
for (std::vector<bytes_view_with_action>& pk : cartesian_product_pk) {
for (std::vector<bytes_view_with_action>& ck : cartesian_product_ck) {
for (std::vector<view_managed_key_view_and_action>& pk : cartesian_product_pk) {
for (std::vector<view_managed_key_view_and_action>& ck : cartesian_product_ck) {
compute_row(pk, ck);
}
}
@@ -860,7 +862,7 @@ void view_updates::do_delete_old_entry(const partition_key& base_key, const clus
for (const auto& [r, action] : view_rows) {
const auto& col_ids = _base_info->base_non_pk_columns_in_view_pk();
if (_view_info.has_computed_column_depending_on_base_non_primary_key()) {
if (auto ts_tag = std::get_if<bytes_with_action::shadowable_tombstone_tag>(&action)) {
if (auto ts_tag = std::get_if<view_key_and_action::shadowable_tombstone_tag>(&action)) {
r->apply(ts_tag->into_shadowable_tombstone(now));
}
} else if (!col_ids.empty()) {
@@ -1006,8 +1008,8 @@ void view_updates::update_entry_for_computed_column(
struct visitor {
deletable_row* row;
gc_clock::time_point now;
void operator()(bytes_with_action::no_action) {}
void operator()(bytes_with_action::shadowable_tombstone_tag t) {
void operator()(view_key_and_action::no_action) {}
void operator()(view_key_and_action::shadowable_tombstone_tag t) {
row->apply(t.into_shadowable_tombstone(now));
}
void operator()(row_marker rm) {

View File

@@ -123,12 +123,24 @@ bool matches_view_filter(const schema& base, const view_info& view, const partit
bool clustering_prefix_matches(const schema& base, const partition_key& key, const clustering_key_prefix& ck);
/**
* The liveness of rows resulting from an update to a table with an index over collection column
* depends on the liveness of the collection cells. bytes_with_action is used to return this information
* from the function computing the result for a particular computed column.
/*
* When a base-table update modifies a value in a materialized view's key
* key column, Scylla needs to create a new view row. When indexing a
* collection - Scylla needs to add multiple almost-identical rows with just
* a different key. Scylla may also need to take additional "actions" for each
* of those rows - namely deleting an old row or adding a row marker.
*
* So the following struct view_key_and_action holds one such row key and
* one action. The action can be:
* 1. "no_action" - Do nothing beyond adding the view row under the given
* key. The row's key is given, but its other columns are derived from
* the base table's existing row and and the update mutation..
* 2. a row_marker - also add a CQL row marker, to allow a view row to live
* even if there is nothing in it besides the key.
* 3. a (shadowable) tombstone, to remove and old view row that this one
* replaces.
*/
struct bytes_with_action {
struct view_key_and_action {
struct no_action {};
struct shadowable_tombstone_tag {
api::timestamp_type ts;
@@ -138,14 +150,14 @@ struct bytes_with_action {
};
using action = std::variant<no_action, row_marker, shadowable_tombstone_tag>;
bytes _view;
bytes _key_bytes;
action _action = no_action{};
bytes_with_action(bytes view)
: _view(std::move(view))
view_key_and_action(bytes key_bytes)
: _key_bytes(std::move(key_bytes))
{}
bytes_with_action(bytes view, action action)
: _view(std::move(view))
view_key_and_action(bytes key_bytes, action action)
: _key_bytes(std::move(key_bytes))
, _action(action)
{}
@@ -178,7 +190,7 @@ private:
row_marker compute_row_marker(const clustering_row& base_row) const;
struct view_row_entry {
deletable_row* _row;
bytes_with_action::action _action;
view_key_and_action::action _action;
};
std::vector<view_row_entry> get_view_rows(const partition_key& base_key, const clustering_row& update, const std::optional<clustering_row>& existing);
bool can_skip_view_updates(const clustering_row& update, const clustering_row& existing) const;

View File

@@ -8,6 +8,7 @@
#include <seastar/core/on_internal_error.hh>
#include <map>
#include "db/view/view.hh"
#include "timestamp.hh"
#include "utils/UUID_gen.hh"
#include "cql3/column_identifier.hh"
@@ -1841,7 +1842,7 @@ bytes collection_column_computation::compute_value(const schema&, const partitio
throw std::runtime_error(fmt::format("{}: not supported", __PRETTY_FUNCTION__));
}
std::vector<db::view::bytes_with_action> collection_column_computation::compute_values_with_action(const schema& schema, const partition_key& key, const clustering_row& update, const std::optional<clustering_row>& existing) const {
std::vector<db::view::view_key_and_action> collection_column_computation::compute_values_with_action(const schema& schema, const partition_key& key, const clustering_row& update, const std::optional<clustering_row>& existing) const {
using collection_kv = std::pair<bytes_view, atomic_cell_view>;
auto serialize_cell = [_kind = _kind](const collection_kv& kv) -> bytes {
using kind = collection_column_computation::kind;
@@ -1857,7 +1858,7 @@ std::vector<db::view::bytes_with_action> collection_column_computation::compute_
}
};
std::vector<db::view::bytes_with_action> ret;
std::vector<db::view::view_key_and_action> ret;
auto compute_row_marker = [] (auto&& cell) -> row_marker {
return cell.is_live_and_has_ttl() ? row_marker(cell.timestamp(), cell.ttl(), cell.expiry()) : row_marker(cell.timestamp());
@@ -1877,7 +1878,7 @@ std::vector<db::view::bytes_with_action> collection_column_computation::compute_
}
operation_ts -= 1;
if (existing && existing->second.is_live()) {
db::view::bytes_with_action::shadowable_tombstone_tag tag{operation_ts};
db::view::view_key_and_action::shadowable_tombstone_tag tag{operation_ts};
ret.push_back({serialize_cell(*existing), {tag}});
}
};