schema: add computed info to column definition
Some columns may represent not user-provided values, but ones computed from other columns. Currently an example is token column used in secondary indexes to provide proper ordering. In order to avoid hardcoding special cases in execution stage, optional additional information for computed columns is stored in column definition.
This commit is contained in:
54
column_computation.hh
Normal file
54
column_computation.hh
Normal file
@@ -0,0 +1,54 @@
|
||||
/*
|
||||
* Copyright (C) 2019 ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "bytes.hh"
|
||||
|
||||
class schema;
|
||||
class partition_key;
|
||||
class clustering_row;
|
||||
|
||||
class column_computation;
|
||||
using column_computation_ptr = std::unique_ptr<column_computation>;
|
||||
|
||||
/*
|
||||
* Column computation represents a computation performed in order to obtain a value for a computed column.
|
||||
* Computed columns description is also available at docs/system_schema_keyspace.md. They hold values
|
||||
* not provided directly by the user, but rather computed: from other column values and possibly other sources.
|
||||
* This class is able to serialize/deserialize column computations and perform the computation itself,
|
||||
* based on given schema, partition key and clustering row. Responsibility for providing enough data
|
||||
* in the clustering row in order for computation to succeed belongs to the caller. In particular,
|
||||
* generating a value might involve performing a read-before-write if the computation is performed
|
||||
* on more values than are present in the update request.
|
||||
*/
|
||||
class column_computation {
|
||||
public:
|
||||
virtual ~column_computation() = default;
|
||||
|
||||
static column_computation_ptr deserialize(bytes_view raw);
|
||||
static column_computation_ptr deserialize(const Json::Value& json);
|
||||
|
||||
virtual column_computation_ptr clone() const = 0;
|
||||
|
||||
virtual bytes serialize() const = 0;
|
||||
virtual bytes_opt compute_value(const schema& schema, const partition_key& key, const clustering_row& row) const = 0;
|
||||
};
|
||||
20
schema.cc
20
schema.cc
@@ -497,12 +497,13 @@ sstring index_metadata::get_default_index_name(const sstring& cf_name,
|
||||
return cf_name + "_idx";
|
||||
}
|
||||
|
||||
column_definition::column_definition(bytes name, data_type type, column_kind kind, column_id component_index, column_view_virtual is_view_virtual, api::timestamp_type dropped_at)
|
||||
column_definition::column_definition(bytes name, data_type type, column_kind kind, column_id component_index, column_view_virtual is_view_virtual, column_computation_ptr computation, api::timestamp_type dropped_at)
|
||||
: _name(std::move(name))
|
||||
, _dropped_at(dropped_at)
|
||||
, _is_atomic(type->is_atomic())
|
||||
, _is_counter(type->is_counter())
|
||||
, _is_view_virtual(is_view_virtual)
|
||||
, _computation(std::move(computation))
|
||||
, type(std::move(type))
|
||||
, id(component_index)
|
||||
, kind(kind)
|
||||
@@ -516,6 +517,9 @@ std::ostream& operator<<(std::ostream& os, const column_definition& cd) {
|
||||
if (cd.is_view_virtual()) {
|
||||
os << ", view_virtual";
|
||||
}
|
||||
if (cd.is_computed()) {
|
||||
os << ", computed:" << cd.get_computation().serialize();
|
||||
}
|
||||
os << ", componentIndex=" << (cd.has_component_index() ? std::to_string(cd.component_index()) : "null");
|
||||
os << ", droppedAt=" << cd._dropped_at;
|
||||
os << "}";
|
||||
@@ -701,7 +705,7 @@ column_definition& schema_builder::find_column(const cql3::column_identifier& c)
|
||||
}
|
||||
|
||||
schema_builder& schema_builder::with_column(const column_definition& c) {
|
||||
return with_column(bytes(c.name()), data_type(c.type), column_kind(c.kind), c.position(), c.view_virtual());
|
||||
return with_column(bytes(c.name()), data_type(c.type), column_kind(c.kind), c.position(), c.view_virtual(), c.get_computation_ptr());
|
||||
}
|
||||
|
||||
schema_builder& schema_builder::with_column(bytes name, data_type type, column_kind kind, column_view_virtual is_view_virtual) {
|
||||
@@ -709,8 +713,8 @@ schema_builder& schema_builder::with_column(bytes name, data_type type, column_k
|
||||
return with_column(name, type, kind, 0, is_view_virtual);
|
||||
}
|
||||
|
||||
schema_builder& schema_builder::with_column(bytes name, data_type type, column_kind kind, column_id component_index, column_view_virtual is_view_virtual) {
|
||||
_raw._columns.emplace_back(name, type, kind, component_index, is_view_virtual);
|
||||
schema_builder& schema_builder::with_column(bytes name, data_type type, column_kind kind, column_id component_index, column_view_virtual is_view_virtual, column_computation_ptr computation) {
|
||||
_raw._columns.emplace_back(name, type, kind, component_index, is_view_virtual, std::move(computation));
|
||||
if (type->is_multi_cell()) {
|
||||
with_collection(name, type);
|
||||
} else if (type->is_counter()) {
|
||||
@@ -719,6 +723,10 @@ schema_builder& schema_builder::with_column(bytes name, data_type type, column_k
|
||||
return *this;
|
||||
}
|
||||
|
||||
schema_builder& schema_builder::with_computed_column(bytes name, data_type type, column_kind kind, column_computation_ptr computation) {
|
||||
return with_column(name, type, kind, 0, column_view_virtual::no, std::move(computation));
|
||||
}
|
||||
|
||||
schema_builder& schema_builder::remove_column(bytes name)
|
||||
{
|
||||
auto it = boost::range::find_if(_raw._columns, [&] (auto& column) {
|
||||
@@ -1244,6 +1252,10 @@ raw_view_info::raw_view_info(utils::UUID base_id, sstring base_name, bool includ
|
||||
, _where_clause(where_clause)
|
||||
{ }
|
||||
|
||||
column_computation_ptr column_computation::deserialize(bytes_view raw) {
|
||||
throw std::runtime_error("Incorrect column computation value");
|
||||
}
|
||||
|
||||
bool operator==(const raw_view_info& x, const raw_view_info& y) {
|
||||
return x._base_id == y._base_id
|
||||
&& x._base_name == y._base_name
|
||||
|
||||
40
schema.hh
40
schema.hh
@@ -39,6 +39,7 @@
|
||||
#include "compress.hh"
|
||||
#include "compaction_strategy.hh"
|
||||
#include "caching_options.hh"
|
||||
#include "column_computation.hh"
|
||||
|
||||
using column_count_type = uint32_t;
|
||||
|
||||
@@ -217,6 +218,7 @@ private:
|
||||
bool _is_atomic;
|
||||
bool _is_counter;
|
||||
column_view_virtual _is_view_virtual;
|
||||
column_computation_ptr _computation;
|
||||
|
||||
struct thrift_bits {
|
||||
thrift_bits()
|
||||
@@ -232,6 +234,7 @@ public:
|
||||
column_definition(bytes name, data_type type, column_kind kind,
|
||||
column_id component_index = 0,
|
||||
column_view_virtual view_virtual = column_view_virtual::no,
|
||||
column_computation_ptr = nullptr,
|
||||
api::timestamp_type dropped_at = api::missing_timestamp);
|
||||
|
||||
data_type type;
|
||||
@@ -244,6 +247,35 @@ public:
|
||||
column_kind kind;
|
||||
::shared_ptr<cql3::column_specification> column_specification;
|
||||
|
||||
// NOTICE(sarna): This copy constructor is hand-written instead of default,
|
||||
// because it involves deep copying of the computation object.
|
||||
// Computation has a strict ownership policy provided by
|
||||
// unique_ptr, and as such cannot rely on default copying.
|
||||
column_definition(const column_definition& other)
|
||||
: _name(other._name)
|
||||
, _dropped_at(other._dropped_at)
|
||||
, _is_atomic(other._is_atomic)
|
||||
, _is_counter(other._is_counter)
|
||||
, _is_view_virtual(other._is_view_virtual)
|
||||
, _computation(other.get_computation_ptr())
|
||||
, _thrift_bits(other._thrift_bits)
|
||||
, type(other.type)
|
||||
, id(other.id)
|
||||
, kind(other.kind)
|
||||
, column_specification(other.column_specification)
|
||||
{}
|
||||
|
||||
column_definition& operator=(const column_definition& other) {
|
||||
if (this == &other) {
|
||||
return *this;
|
||||
}
|
||||
column_definition tmp(other);
|
||||
*this = std::move(tmp);
|
||||
return *this;
|
||||
}
|
||||
|
||||
column_definition& operator=(column_definition&& other) = default;
|
||||
|
||||
bool is_static() const { return kind == column_kind::static_column; }
|
||||
bool is_regular() const { return kind == column_kind::regular_column; }
|
||||
bool is_partition_key() const { return kind == column_kind::partition_key; }
|
||||
@@ -258,6 +290,14 @@ public:
|
||||
// These columns should be hidden from the user's SELECT queries.
|
||||
bool is_view_virtual() const { return _is_view_virtual == column_view_virtual::yes; }
|
||||
column_view_virtual view_virtual() const { return _is_view_virtual; }
|
||||
// Computed column values are generated from other columns (and possibly other sources) during updates.
|
||||
// Their values are still stored on disk, same as a regular columns.
|
||||
bool is_computed() const { return bool(_computation); }
|
||||
const column_computation& get_computation() const { return *_computation; }
|
||||
column_computation_ptr get_computation_ptr() const {
|
||||
return _computation ? _computation->clone() : nullptr;
|
||||
}
|
||||
void set_computed(column_computation_ptr computation) { _computation = std::move(computation); }
|
||||
// Columns hidden from CQL cannot be in any way retrieved by the user,
|
||||
// either explicitly or via the '*' operator, or functions, aggregates, etc.
|
||||
bool is_hidden_from_cql() const { return is_view_virtual(); }
|
||||
|
||||
@@ -239,7 +239,8 @@ public:
|
||||
column_definition& find_column(const cql3::column_identifier&);
|
||||
schema_builder& with_column(const column_definition& c);
|
||||
schema_builder& with_column(bytes name, data_type type, column_kind kind = column_kind::regular_column, column_view_virtual view_virtual = column_view_virtual::no);
|
||||
schema_builder& with_column(bytes name, data_type type, column_kind kind, column_id component_index, column_view_virtual view_virtual = column_view_virtual::no);
|
||||
schema_builder& with_column(bytes name, data_type type, column_kind kind, column_id component_index, column_view_virtual view_virtual = column_view_virtual::no, column_computation_ptr computation = nullptr);
|
||||
schema_builder& with_computed_column(bytes name, data_type type, column_kind kind, column_computation_ptr computation);
|
||||
schema_builder& remove_column(bytes name);
|
||||
schema_builder& without_column(sstring name, api::timestamp_type timestamp);
|
||||
schema_builder& without_column(sstring name, data_type, api::timestamp_type timestamp);
|
||||
|
||||
Reference in New Issue
Block a user