Merge 'cql3: allow SELECT of specific collection element' from Michael Litvak

This adds to the grammar the option to SELECT a specific element in a collection (map/set/list).

For example:
`SELECT map['key'] FROM table`
`SELECT map['key1']['key2'] FROM table`

This feature was implemented in Cassandra 4.0 and was requested by scylla users.

The behavior is mostly compatible with Cassandra, except:
1. in SELECT, we allow list subscript in a selector, while cassandra allows only map and set.
2. in UPDATE, we allow set subscript in a column condition, while cassandra allows only map and list.
3. the slice syntax `SELECT m[a..b]` is not implemented yet
4. null subscript - `SELECT m[null]` returns null in scylla, while cassandra returns error

Fixes #7751

backport was requested for a user to be able to use it

Closes scylladb/scylladb#22051

* github.com:scylladb/scylladb:
  cql3: allow SELECT of specific collection key
  cql3: allow set subscript
This commit is contained in:
Avi Kivity
2025-01-01 14:48:40 +02:00
14 changed files with 241 additions and 41 deletions

View File

@@ -460,7 +460,9 @@ unaliasedSelector returns [uexpression s]
| f=functionName args=selectionFunctionArgs { tmp = function_call{std::move(f), std::move(args)}; }
| K_CAST '(' arg=unaliasedSelector K_AS t=native_type ')' { tmp = cast{.style = cast::cast_style::sql, .arg = std::move(arg), .type = std::move(t)}; }
)
( '.' fi=cident { tmp = field_selection{std::move(tmp), std::move(fi)}; } )*
( '.' fi=cident { tmp = field_selection{std::move(tmp), std::move(fi)}; }
| '[' sub=term ']' { tmp = subscript{std::move(tmp), std::move(sub)}; }
)*
{ $s = tmp; }
;

View File

@@ -141,7 +141,7 @@ get_value(const subscript& s, const evaluation_inputs& inputs) {
auto col_type = static_pointer_cast<const collection_type_impl>(type_of(s.val));
const auto deserialized = type_of(s.val)->deserialize(managed_bytes_view(*serialized));
const auto key = evaluate(s.sub, inputs);
auto&& key_type = col_type->is_map() ? col_type->name_comparator() : int32_type;
auto&& key_type = col_type->is_list() ? int32_type : col_type->name_comparator();
if (key.is_null()) {
// For m[null] return null.
// This is different from Cassandra - which treats m[null]
@@ -162,6 +162,15 @@ get_value(const subscript& s, const evaluation_inputs& inputs) {
});
});
return found == data_map.cend() ? std::nullopt : managed_bytes_opt(found->second.serialize_nonnull());
} else if (col_type->is_set()) {
const auto& data_set = value_cast<set_type_impl::native_type>(deserialized);
const auto found = key.view().with_linearized([&] (bytes_view key_bv) {
using entry = data_value;
return std::find_if(data_set.cbegin(), data_set.cend(), [&] (const entry& element) {
return key_type->compare(element.serialize_nonnull(), key_bv) == 0;
});
});
return found == data_set.cend() ? std::nullopt : managed_bytes_opt(found->serialize_nonnull());
} else if (col_type->is_list()) {
const auto& data_list = value_cast<list_type_impl::native_type>(deserialized);
auto key_deserialized = key.view().with_linearized([&] (bytes_view key_bv) {

View File

@@ -1191,18 +1191,24 @@ try_prepare_expression(const expression& expr, data_dictionary::database db, con
auto col_spec = column_specification_of(sub_col);
lw_shared_ptr<column_specification> subscript_column_spec;
data_type value_cmp;
if (sub_col_type.is_map()) {
subscript_column_spec = map_key_spec_of(*col_spec);
value_cmp = static_cast<const collection_type_impl&>(sub_col_type).value_comparator();
} else if (sub_col_type.is_set()) {
subscript_column_spec = set_value_spec_of(*col_spec);
value_cmp = static_cast<const collection_type_impl&>(sub_col_type).name_comparator();
} else if (sub_col_type.is_list()) {
subscript_column_spec = list_key_spec_of(*col_spec);
value_cmp = static_cast<const collection_type_impl&>(sub_col_type).value_comparator();
} else {
throw exceptions::invalid_request_exception(format("Column {} is not a map/list, cannot be subscripted", col_spec->name->text()));
throw exceptions::invalid_request_exception(format("Column {} is not a map/set/list, cannot be subscripted", col_spec->name->text()));
}
return subscript {
.val = sub_col,
.sub = prepare_expression(sub.sub, db, schema.ks_name(), &schema, std::move(subscript_column_spec)),
.type = static_cast<const collection_type_impl&>(sub_col_type).value_comparator(),
.type = value_cmp,
};
},
[&] (const unresolved_identifier& unin) -> std::optional<expression> {
@@ -1280,7 +1286,8 @@ test_assignment(const expression& expr, data_dictionary::database db, const sstr
return expression_test_assignment(col_val.col->type, receiver);
},
[&] (const subscript&) -> test_result {
on_internal_error(expr_logger, "subscripts are not yet reachable via test_assignment()");
// not implemented. issue #22075
return assignment_testable::test_result::NOT_ASSIGNABLE;
},
[&] (const unresolved_identifier& ui) -> test_result {
return unresolved_identifier_test_assignment(ui, db, keyspace, schema_opt, receiver);
@@ -1388,6 +1395,8 @@ static lw_shared_ptr<column_specification> get_lhs_receiver(const expression& pr
const column_value& sub_col = get_subscripted_column(col_val);
if (sub_col.col->type->is_map()) {
return map_value_spec_of(*sub_col.col->column_specification);
} else if (sub_col.col->type->is_set()) {
return set_value_spec_of(*sub_col.col->column_specification);
} else {
return list_value_spec_of(*sub_col.col->column_specification);
}

View File

@@ -41,7 +41,7 @@ selectable_processes_selection(const expr::expression& selectable) {
on_internal_error(slogger, "no way to express 'SELECT a binop b' in the grammar yet");
},
[] (const expr::subscript&) -> bool {
on_internal_error(slogger, "no way to express 'SELECT a[b]' in the grammar yet");
return true;
},
[&] (const expr::column_value& column) -> bool {
return false;

View File

@@ -382,7 +382,7 @@ protected:
.clustering_key = rs.current_clustering_key,
.static_and_regular_columns = rs.current,
.selection = &_sel,
.options = nullptr,
.options = rs._options,
.static_and_regular_timestamps = rs._timestamps,
.static_and_regular_ttls = rs._ttls,
.temporaries = {},
@@ -516,6 +516,7 @@ selection::collect_metadata(const schema& schema, const std::vector<prepared_sel
}
result_set_builder::result_set_builder(const selection& s, gc_clock::time_point now,
const query_options* options,
std::vector<size_t> group_by_cell_indices,
uint64_t limit, uint64_t per_partition_limit)
: _result_set(std::make_unique<result_set>(::make_shared<metadata>(*(s.get_result_metadata()))))
@@ -527,6 +528,7 @@ result_set_builder::result_set_builder(const selection& s, gc_clock::time_point
, _per_partition_remaining_previous_partition(per_partition_limit)
, _last_group(_group_by_cell_indices.size())
, _group_began(false)
, _options(options)
, _now(now)
{
if (s._collect_timestamps) {

View File

@@ -189,6 +189,7 @@ public:
std::vector<bytes> current_clustering_key;
std::vector<api::timestamp_type> _timestamps;
std::vector<int32_t> _ttls;
const query_options* _options;
private:
const gc_clock::time_point _now;
public:
@@ -244,6 +245,7 @@ public:
};
result_set_builder(const selection& s, gc_clock::time_point now,
const query_options* options = nullptr,
std::vector<size_t> group_by_cell_indices = {},
uint64_t limit = std::numeric_limits<uint64_t>::max(),
uint64_t per_partition_limit = std::numeric_limits<uint64_t>::max());

View File

@@ -499,7 +499,7 @@ select_statement::execute_without_checking_exception_message_aggregate_or_paged(
auto per_partition_limit = get_limit(options, _per_partition_limit, true);
if (aggregate || nonpaged_filtering) {
auto builder = cql3::selection::result_set_builder(*_selection, now, *_group_by_cell_indices, limit, per_partition_limit.value());
auto builder = cql3::selection::result_set_builder(*_selection, now, &options, *_group_by_cell_indices, limit, per_partition_limit.value());
coordinator_result<void> result_void = co_await utils::result_do_until(
[&p, &builder, limit] {
return p->is_exhausted() || (limit < builder.result_set_size());
@@ -916,7 +916,7 @@ select_statement::process_results_complex(foreign_ptr<lw_shared_ptr<query::resul
lw_shared_ptr<query::read_command> cmd,
const query_options& options,
gc_clock::time_point now) const {
cql3::selection::result_set_builder builder(*_selection, now);
cql3::selection::result_set_builder builder(*_selection, now, &options);
co_return co_await builder.with_thread_if_needed([&] {
if (_restrictions_need_filtering) {
results->ensure_counts();
@@ -1192,7 +1192,7 @@ indexed_table_select_statement::do_execute(query_processor& qp,
// the paging state between requesting data from replicas.
const bool aggregate = _selection->is_aggregate() || has_group_by();
if (aggregate) {
cql3::selection::result_set_builder builder(*_selection, now, *_group_by_cell_indices);
cql3::selection::result_set_builder builder(*_selection, now, &options, *_group_by_cell_indices);
std::unique_ptr<cql3::query_options> internal_options = std::make_unique<cql3::query_options>(cql3::query_options(options));
stop_iteration stop;
// page size is set to the internal count page size, regardless of the user-provided value
@@ -1371,9 +1371,9 @@ indexed_table_select_statement::read_posting_list(query_processor& qp,
int32_t page_size = options.get_page_size();
if (page_size <= 0 || !service::pager::query_pagers::may_need_paging(*_view_schema, page_size, *cmd, partition_ranges)) {
return qp.proxy().query_result(_view_schema, cmd, std::move(partition_ranges), options.get_consistency(), {timeout, state.get_permit(), state.get_client_state(), state.get_trace_state()})
.then(utils::result_wrap([this, now, selection = std::move(selection), partition_slice = std::move(partition_slice)] (service::storage_proxy::coordinator_query_result qr)
.then(utils::result_wrap([this, now, &options, selection = std::move(selection), partition_slice = std::move(partition_slice)] (service::storage_proxy::coordinator_query_result qr)
-> coordinator_result<::shared_ptr<cql_transport::messages::result_message::rows>> {
cql3::selection::result_set_builder builder(*selection, now);
cql3::selection::result_set_builder builder(*selection, now, &options);
query::result_view::consume(*qr.query_result,
std::move(partition_slice),
cql3::selection::result_set_builder::visitor(builder, *_view_schema, *selection));
@@ -1799,8 +1799,8 @@ mutation_fragments_select_statement::do_execute(query_processor& qp, service::qu
*command, key_ranges))) {
return do_query(erm_keepalive, {}, qp.proxy(), _schema, command, std::move(key_ranges), cl,
{timeout, state.get_permit(), state.get_client_state(), state.get_trace_state(), {}, {}})
.then(wrap_result_to_error_message([this, erm_keepalive, now, slice = command->slice] (service::storage_proxy_coordinator_query_result&& qr) mutable {
cql3::selection::result_set_builder builder(*_selection, now);
.then(wrap_result_to_error_message([this, erm_keepalive, now, &options, slice = command->slice] (service::storage_proxy_coordinator_query_result&& qr) mutable {
cql3::selection::result_set_builder builder(*_selection, now, &options);
query::result_view::consume(*qr.query_result, std::move(slice),
cql3::selection::result_set_builder::visitor(builder, *_schema, *_selection));
auto msg = ::make_shared<cql_transport::messages::result_message::rows>(result(builder.build()));
@@ -1982,6 +1982,10 @@ std::unique_ptr<prepared_statement> select_statement::prepare(data_dictionary::d
}
}
for (auto& ps : prepared_selectors) {
expr::fill_prepare_context(ps.expr, ctx);
}
for (auto& ps : prepared_selectors) {
aggregation_depth = std::max(aggregation_depth, expr::aggregation_depth(ps.expr));
}

View File

@@ -20,10 +20,12 @@ Querying data from data is done using a ``SELECT`` statement:
: [ BYPASS CACHE ]
: [ USING TIMEOUT `timeout` ]
select_clause: `selector` [ AS `identifier` ] ( ',' `selector` [ AS `identifier` ] )*
selector: `column_name`
selector: ( `column_name`
: | CAST '(' `selector` AS `cql_type` ')'
: | `function_name` '(' [ `selector` ( ',' `selector` )* ] ')'
: | COUNT '(' '*' ')'
: )
: ( '.' `field_name` | '[' `term` ']' )*
where_clause: `relation` ( AND `relation` )*
group_by_clause: `column_name` (',' `column_name` )*
relation: `column_name` `operator` `term`

View File

@@ -334,6 +334,10 @@ Further, maps support:
Note that for removing multiple elements in a ``map``, you remove from it a ``set`` of keys.
- Selecting one element::
SELECT favs['fruit'] FROM users WHERE id = 'jsmith';
Lastly, TTLs are allowed for both ``INSERT`` and ``UPDATE``, but in both cases, the TTL set only applies to the newly
inserted/updated elements. In other words::
@@ -376,6 +380,10 @@ Further, sets support:
UPDATE images SET tags = tags - { 'cat' } WHERE name = 'cat.jpg';
- Selecting an element (if the element doesn't exist, returns null)::
SELECT tags['gray'] FROM images;
Lastly, as for :ref:`maps <maps>`, TTLs, if used, only apply to the newly inserted values.
.. _lists:
@@ -432,6 +440,10 @@ Further, lists support:
UPDATE plays SET scores = scores - [ 12, 21 ] WHERE id = '123-afde';
- Selecting an element by its position in the list::
SELECT scores[1] FROM plays;
.. warning:: The append and prepend operations are not idempotent by nature. So, in particular, if one of these operation
timeouts, then retrying the operation is not safe, and it may (or may not) lead to appending/prepending the value
twice.

View File

@@ -443,6 +443,7 @@ future<query::mapreduce_result> mapreduce_service::execute_on_this_shard(
auto rs_builder = cql3::selection::result_set_builder(
*selection,
now,
nullptr,
std::vector<size_t>() // Represents empty GROUP BY indices.
);

View File

@@ -222,7 +222,7 @@ future<std::unique_ptr<cql3::result_set>> query_pager::fetch_page(uint32_t page_
future<result<std::unique_ptr<cql3::result_set>>> query_pager::fetch_page_result(uint32_t page_size,
gc_clock::time_point now, db::timeout_clock::time_point timeout) {
return do_with(
cql3::selection::result_set_builder(*_selection, now),
cql3::selection::result_set_builder(*_selection, now, &_options),
[this, page_size, now, timeout](auto& builder) {
return this->fetch_page_result(builder, page_size, now, timeout).then(utils::result_wrap([&builder] {
return builder.with_thread_if_needed([&builder] () -> result<std::unique_ptr<cql3::result_set>> {

View File

@@ -876,7 +876,7 @@ def testMultipleOperationOnSetWithinTheSameQuery(cql, test_keyspace):
execute(cql, table, "UPDATE %s SET s = s - ? , s = s + ? WHERE pk = ?", {3}, {3, 4}, 1)
assert_rows(execute(cql, table, "SELECT * FROM %s WHERE pk = 1") , [1, {0, 1, 2, 4}])
@pytest.mark.xfail(reason="Cassandra 4.0 feature of selecting part of map or list not yet supported. Issue #7751")
@pytest.mark.xfail(reason="Cassandra 4.0 feature of selecting a collection slice and subscript assignment not yet supported. Issue #22075")
def testMapOperation(cql, test_keyspace):
with create_table(cql, test_keyspace, "(k int, c int, l text, m map<text, text>, fm frozen<map<text, text>>, sm map<text, text> STATIC, fsm frozen<map<text, text>> STATIC, o int, PRIMARY KEY (k, c))") as table:
execute(cql, table, "INSERT INTO %s(k, c, l, m, fm, sm, fsm, o) VALUES (0, 0, 'foobar', ?, ?, ?, ?, 42)",
@@ -1028,7 +1028,7 @@ def testMapOperation(cql, test_keyspace):
[0, "foobar", {"1": "value1", "22": "value22"}, 42],
[0, "foobar", {"1": "value1_2"}, 42])
@pytest.mark.xfail(reason="Cassandra 4.0 feature of selecting part of map or list not yet supported. Issue #7751")
@pytest.mark.xfail(reason="Cassandra 4.0 feature of selecting a collection slice and subscript assignment not yet supported. Issue #22075")
def testMapOperationWithIntKey(cql, test_keyspace):
with create_table(cql, test_keyspace, "(k int, c int, l text, m map<int, text>, fm frozen<map<int, text>>, sm map<int, text> STATIC, fsm frozen<map<int, text>> STATIC, o int, PRIMARY KEY (k, c))") as table:
# used type "int" as map key intentionally since CQL parsing relies on "BigInteger"
@@ -1133,7 +1133,7 @@ def testMapOperationWithIntKey(cql, test_keyspace):
[0, "foobar", {1: "value1", 22: "value22"}, 42],
[0, "foobar", {1: "value1_2"}, 42])
@pytest.mark.xfail(reason="Cassandra 4.0 feature of selecting part of map or list not yet supported. Issue #7751")
@pytest.mark.xfail(reason="Cassandra 4.0 feature of selecting a collection slice not yet supported. Issue #22075")
def testMapOperationOnPartKey(cql, test_keyspace):
with create_table(cql, test_keyspace, "(k frozen<map<text, text>> PRIMARY KEY, l text, o int)") as table:
execute(cql, table, "INSERT INTO %s(k, l, o) VALUES (?, 'foobar', 42)", {"1": "value1", "22": "value22", "333": "value333"})
@@ -1168,7 +1168,7 @@ def testMapOperationOnPartKey(cql, test_keyspace):
assert_rows(execute(cql, table, "SELECT l, k, o FROM %s WHERE k = ?", {"1": "value1", "22": "value22", "333": "value333"}),
["foobar", {"1": "value1", "22": "value22", "333": "value333"}, 42])
@pytest.mark.xfail(reason="Cassandra 4.0 feature of selecting part of map or list not yet supported. Issue #7751")
@pytest.mark.xfail(reason="Cassandra 4.0 feature of selecting a collection slice not yet supported. Issue #22075")
def testMapOperationOnClustKey(cql, test_keyspace):
with create_table(cql, test_keyspace, "(k int, c frozen<map<text, text>>, l text, o int, PRIMARY KEY (k,c))") as table:
execute(cql, table, "INSERT INTO %s(k, c, l, o) VALUES (0, ?, 'foobar', 42)", {"1": "value1", "22": "value22", "333": "value333"})
@@ -1203,7 +1203,7 @@ def testMapOperationOnClustKey(cql, test_keyspace):
assert_rows(execute(cql, table, "SELECT k, l, c, o FROM %s WHERE k = 0 AND c = ?", {"1": "value1", "22": "value22", "333": "value333"}),
[0, "foobar", {"1": "value1", "22": "value22", "333": "value333"}, 42])
@pytest.mark.xfail(reason="Cassandra 4.0 feature of selecting part of map or list not yet supported. Issue #7751")
@pytest.mark.xfail(reason="Cassandra 4.0 feature of selecting a set slice not yet supported. Issue #22075")
def testSetOperation(cql, test_keyspace):
with create_table(cql, test_keyspace, "(k int, c int, l text, s set<text>, fs frozen<set<text>>, ss set<text> STATIC, fss frozen<set<text>> STATIC, o int, PRIMARY KEY (k, c))") as table:
execute(cql, table, "INSERT INTO %s(k, c, l, s, fs, ss, fss, o) VALUES (0, 0, 'foobar', ?, ?, ?, ?, 42)",
@@ -1273,7 +1273,7 @@ def testSetOperation(cql, test_keyspace):
[0, "foobar", {"1", "22", "22_2", "333"}, 42],
[0, "foobar", {"22", "333"}, 42])
@pytest.mark.xfail(reason="Cassandra 4.0 feature of selecting part of map or list not yet supported. Issue #7751")
@pytest.mark.xfail(reason="Cassandra 4.0 feature of selecting a collection slice not yet supported. Issue #22075")
def testCollectionSliceOnMV(cql, test_keyspace):
with create_table(cql, test_keyspace, "(k int, c int, l text, m map<text, text>, o int, PRIMARY KEY (k, c))") as table:
assert_invalid_message(cql, table, "Can only select columns by name when defining a materialized view (got m['abc'])",
@@ -1281,18 +1281,18 @@ def testCollectionSliceOnMV(cql, test_keyspace):
assert_invalid_message(cql, table, "Can only select columns by name when defining a materialized view (got m['abc'..'def'])",
"CREATE MATERIALIZED VIEW " + test_keyspace + ".view1 AS SELECT m['abc'..'def'] FROM %s WHERE k IS NOT NULL AND c IS NOT NULL AND m IS NOT NULL PRIMARY KEY (c, k)");
@pytest.mark.xfail(reason="Cassandra 4.0 feature of selecting part of map or list not yet supported. Issue #7751")
def testElementAccessOnList(cql, test_keyspace):
with create_table(cql, test_keyspace, "(pk int PRIMARY KEY, l list<int>)") as table:
execute(cql, table, "INSERT INTO %s (pk, l) VALUES (1, [1, 2, 3])");
# Scylla supports element access on list.
# def testElementAccessOnList(cql, test_keyspace):
# with create_table(cql, test_keyspace, "(pk int PRIMARY KEY, l list<int>)") as table:
# execute(cql, table, "INSERT INTO %s (pk, l) VALUES (1, [1, 2, 3])");
#
# assert_invalid_message(cql, table, "Element selection is only allowed on sets and maps, but l is a list",
# "SELECT pk, l[0] FROM %s");
#
# assert_invalid_message(cql, table, "Slice selection is only allowed on sets and maps, but l is a list",
# "SELECT pk, l[1..3] FROM %s");
assert_invalid_message(cql, table, "Element selection is only allowed on sets and maps, but l is a list",
"SELECT pk, l[0] FROM %s");
assert_invalid_message(cql, table, "Slice selection is only allowed on sets and maps, but l is a list",
"SELECT pk, l[1..3] FROM %s");
@pytest.mark.xfail(reason="Cassandra 4.0 feature of selecting part of map or list not yet supported. Issue #7751")
@pytest.mark.xfail(reason="Cassandra 4.0 feature of selecting a collection slice not yet supported. Issue #22075")
def testCollectionOperationResultSetMetadata(cql, test_keyspace):
with create_table(cql, test_keyspace, "(k int PRIMARY KEY, m map<text, text>, fm frozen<map<text, text>>, s set<text>, fs frozen<set<text>>)") as table:
execute(cql, table, "INSERT INTO %s (k, m, fm, s, fs) VALUES (?, ?, ?, ?, ?)",
@@ -1350,7 +1350,7 @@ def testCollectionOperationResultSetMetadata(cql, test_keyspace):
}
"""
@pytest.mark.xfail(reason="Cassandra 4.0 feature of selecting part of map or list not yet supported. Issue #7751")
@pytest.mark.xfail(reason="Cassandra 4.0 feature of selecting a collection slice not yet supported. Issue #22075")
def testFrozenCollectionNestedAccess(cql, test_keyspace):
with create_table(cql, test_keyspace, "(k int PRIMARY KEY, m map<text, frozen<map<text, set<int>>>>)") as table:
execute(cql, table, "INSERT INTO %s(k, m) VALUES (0, ?)", {"1": {"a": {1, 2, 4}, "b": {3}}, "2": {"a": {2, 4}}})
@@ -1368,7 +1368,7 @@ def testFrozenCollectionNestedAccess(cql, test_keyspace):
assert_rows(execute(cql, table, "SELECT m[?][?][?] FROM %s WHERE k = 0", "1", "a", 2), [2])
assert_rows(execute(cql, table, "SELECT m[?][?][?..?] FROM %s WHERE k = 0", "1", "a", 2, 3), [{2}])
@pytest.mark.xfail(reason="Cassandra 4.0 feature of selecting part of map or list not yet supported. Issue #7751")
@pytest.mark.xfail(reason="Cassandra 4.0 feature of selecting a collection slice not yet supported. Issue #22075")
def testUDTAndCollectionNestedAccess(cql, test_keyspace):
sm_tuple = collections.namedtuple('sm_tuple', ['s', 'm'])
with create_type(cql, test_keyspace, "(s set<int>, m map<text, text>)") as type_name:
@@ -1406,7 +1406,7 @@ def testUDTAndCollectionNestedAccess(cql, test_keyspace):
assert_rows(execute(cql, table, "SELECT v.m[..?] FROM %s WHERE k = 0", "b"), [{"a": "v1"}])
assert_rows(execute(cql, table, "SELECT v.m[?] FROM %s WHERE k = 0", "d"), ["v2"])
@pytest.mark.xfail(reason="Cassandra 4.0 feature of selecting part of map or list not yet supported. Issue #7751")
@pytest.mark.xfail(reason="Cassandra 4.0 feature of selecting a collection slice not yet supported. Issue #22075")
def testMapOverlappingSlices(cql, test_keyspace):
with create_table(cql, test_keyspace, "(k int PRIMARY KEY, m map<int,int>)") as table:
execute(cql, table, "INSERT INTO %s(k, m) VALUES (?, ?)", 0, {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5})
@@ -1431,7 +1431,7 @@ def testMapOverlappingSlices(cql, test_keyspace):
assert_rows(execute(cql, table, "SELECT m[1..3], m[2] FROM %s WHERE k=?", 0),
[{1: 1, 2: 2, 3: 3}, 2])
@pytest.mark.xfail(reason="Cassandra 4.0 feature of selecting part of map or list not yet supported. Issue #7751")
@pytest.mark.xfail(reason="Cassandra 4.0 feature of selecting a collection slice not yet supported. Issue #22075")
def testMapOverlappingSlicesWithDoubles(cql, test_keyspace):
with create_table(cql, test_keyspace, "(k int PRIMARY KEY, m map<double, double>)") as table:
execute(cql, table, "INSERT INTO %s(k, m) VALUES (?, ?)", 0, {0.0: 0.0, 1.1: 1.1, 2.2: 2.2, 3.0: 3.0, 4.4: 4.4, 5.5: 5.5})
@@ -1453,7 +1453,7 @@ def testMapOverlappingSlicesWithDoubles(cql, test_keyspace):
assert_rows(execute(cql, table, "SELECT m[1.1..3.0], m[2.2] FROM %s WHERE k=?", 0),
[{1.1: 1.1, 2.2: 2.2, 3.0: 3.0}, 2.2])
@pytest.mark.xfail(reason="Cassandra 4.0 feature of selecting part of map or list not yet supported. Issue #7751")
@pytest.mark.xfail(reason="Cassandra 4.0 feature of selecting a collection slice not yet supported. Issue #22075")
def testNestedAccessWithNestedMap(cql, test_keyspace):
with create_table(cql, test_keyspace, "(id text PRIMARY KEY, m map<float,frozen<map<int, text>>>)") as table:
execute(cql, table, "INSERT INTO %s (id,m) VALUES ('1', {1: {2: 'one-two'}})")
@@ -1507,7 +1507,7 @@ def testInsertingCollectionsWithInvalidElements(cql, test_keyspace):
assert_invalid_message(cql, table, "Invalid map literal for m: value (1, '1', 1.0, 1) is not of type frozen<tuple<int, text, double>>",
"INSERT INTO %s (k, m) VALUES (0, {1 : (1, '1', 1.0, 1)})")
@pytest.mark.xfail(reason="Cassandra 4.0 feature of selecting part of map or list not yet supported. Issue #7751")
@pytest.mark.xfail(reason="Cassandra 4.0 feature of selecting a collection slice not yet supported. Issue #22075")
def testSelectionOfEmptyCollections(cql, test_keyspace):
with create_table(cql, test_keyspace, "(k int PRIMARY KEY, m frozen<map<text, int>>, s frozen<set<int>>)") as table:
execute(cql, table, "INSERT INTO %s(k) VALUES (0)")

View File

@@ -317,8 +317,7 @@ def testOrderByForInClause(cql, test_keyspace):
assert_invalid_message(cql, table, "LIMIT must be strictly positive",
"SELECT v as c2 FROM %s where pk1 = ? AND pk2 IN (?, ?) ORDER BY c1 DESC , c2 DESC LIMIT 0; ", 1, 1, 2)
# Reproduces #7751
@pytest.mark.xfail(reason="Issue #7751")
@pytest.mark.skip(reason="Issue #22061")
def testOrderByForInClauseWithCollectionElementSelection(cql, test_keyspace):
with create_table(cql, test_keyspace, "(pk int, c frozen<set<int>>, v int, PRIMARY KEY (pk, c))") as table:
execute(cql, table, "INSERT INTO %s (pk, c, v) VALUES (0, {1, 2}, 0)")

View File

@@ -0,0 +1,158 @@
# Copyright 2024-present ScyllaDB
#
# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
#############################################################################
# Tests for SELECT of a specific key in a collection column
#############################################################################
import pytest
import re
import time
from cassandra.protocol import InvalidRequest
from .util import unique_name, unique_key_int, unique_key_string, new_test_table, new_type, new_function
@pytest.fixture(scope="module")
def table1(cql, test_keyspace):
table = test_keyspace + "." + unique_name()
cql.execute(f"CREATE TABLE {table} (p int PRIMARY KEY, m map<int, int>)")
yield table
cql.execute("DROP TABLE " + table)
def test_basic_int_key_selection(cql, table1):
p = unique_key_int()
cql.execute(f"INSERT INTO {table1}(p,m) VALUES ({p}, " + "{1:10,2:20})")
assert list(cql.execute(f"SELECT m[1] FROM {table1} WHERE p={p}")) == [(10,)]
assert list(cql.execute(f"SELECT m[2] FROM {table1} WHERE p={p}")) == [(20,)]
assert list(cql.execute(f"SELECT m[3] FROM {table1} WHERE p={p}")) == [(None,)]
def test_basic_string_key_selection(cql, test_keyspace):
schema = 'p int PRIMARY KEY, m map<text, int>'
with new_test_table(cql, test_keyspace, schema) as table:
p = unique_key_int()
cql.execute(f"INSERT INTO {table}(p,m) VALUES ({p}, " + "{'aa':10,'ab':20})")
assert list(cql.execute(f"SELECT m['aa'] FROM {table} WHERE p={p}")) == [(10,)]
assert list(cql.execute(f"SELECT m['ab'] FROM {table} WHERE p={p}")) == [(20,)]
assert list(cql.execute(f"SELECT m['ac'] FROM {table} WHERE p={p}")) == [(None,)]
def test_subscript_type_mismatch(cql, table1):
p = unique_key_int()
cql.execute(f"INSERT INTO {table1}(p,m) VALUES ({p}, " + "{1:10,2:20})")
with pytest.raises(InvalidRequest):
cql.execute(f"SELECT m['x'] FROM {table1} WHERE p={p}")
def test_subscript_with_alias(cql, table1):
p = unique_key_int()
cql.execute(f"INSERT INTO {table1}(p,m) VALUES ({p}, " + "{1:10,2:20})")
assert [(r.m1, r.m2) for r in cql.execute(f"SELECT m[1] as m1, m[2] as m2 FROM {table1} WHERE p={p}")] == [(10, 20)]
def test_frozen_map_subscript(cql, test_keyspace):
schema = 'p int PRIMARY KEY, m frozen<map<int, int>>'
with new_test_table(cql, test_keyspace, schema) as table:
p = unique_key_int()
cql.execute(f"INSERT INTO {table}(p,m) VALUES ({p}, " + "{1:10,2:20})")
assert list(cql.execute(f"SELECT m[1] FROM {table} WHERE p={p}")) == [(10,)]
assert list(cql.execute(f"SELECT m[2] FROM {table} WHERE p={p}")) == [(20,)]
assert list(cql.execute(f"SELECT m[3] FROM {table} WHERE p={p}")) == [(None,)]
def test_nested_key_selection(cql, test_keyspace):
schema = 'p int PRIMARY KEY, m map<text, frozen<map<text, int>>>'
with new_test_table(cql, test_keyspace, schema) as table:
p = unique_key_int()
cql.execute(f"INSERT INTO {table}(p, m) VALUES ({p}, " + "{'1': {'a': 10, 'b': 11}, '2': {'a': 12}})")
assert list(cql.execute(f"SELECT m['1']['a'] FROM {table} WHERE p={p}")) == [(10,)]
assert list(cql.execute(f"SELECT m['1']['b'] FROM {table} WHERE p={p}")) == [(11,)]
assert list(cql.execute(f"SELECT m['2']['a'] FROM {table} WHERE p={p}")) == [(12,)]
assert list(cql.execute(f"SELECT m['2']['b'] FROM {table} WHERE p={p}")) == [(None,)]
def test_prepare_key(cql, table1):
p = unique_key_int()
cql.execute(f"INSERT INTO {table1} (p,m) VALUES ({p}, " + "{1:10,2:20})")
lookup1 = cql.prepare(f"SELECT m[?] FROM {table1} WHERE p = ?")
assert list(cql.execute(lookup1, [1, p])) == [(10,)]
assert list(cql.execute(lookup1, [2, p])) == [(20,)]
assert list(cql.execute(lookup1, [3, p])) == [(None,)]
lookup2 = cql.prepare(f"SELECT m[:x1], m[:x2] FROM {table1} WHERE p = :key")
assert list(cql.execute(lookup2, {'x1':2, 'x2':1, 'key':p})) == [(20,10)]
def test_null_map(cql, table1):
p = unique_key_int()
cql.execute(f"INSERT INTO {table1}(p) VALUES ({p})")
assert list(cql.execute(f"SELECT m[1] FROM {table1} WHERE p={p}")) == [(None,)]
# scylla only because scylla returns null while cassandra returns error
def test_null_subscript(scylla_only, cql, table1):
p = unique_key_int()
cql.execute(f"INSERT INTO {table1}(p,m) VALUES ({p}, " + "{1:10,2:20})")
assert list(cql.execute(f"SELECT m[null] FROM {table1} WHERE p={p}")) == [(None,)]
def test_subscript_and_field(cql, test_keyspace):
with new_type(cql, test_keyspace, '(a int)') as typ:
schema = f"p int PRIMARY KEY, m map<int, frozen<{typ}>>"
with new_test_table(cql, test_keyspace, schema) as table:
p = unique_key_int()
cql.execute(f"INSERT INTO {table}(p,m) VALUES ({p}, " + "{1:{a:10}})")
assert list(cql.execute(f"SELECT m[1].a FROM {table} WHERE p={p}")) == [(10,)]
def test_field_and_subscript(cql, test_keyspace):
with new_type(cql, test_keyspace, '(a frozen<map<int,int>>)') as typ:
schema = f"p int PRIMARY KEY, t {typ}"
with new_test_table(cql, test_keyspace, schema) as table:
p = unique_key_int()
cql.execute(f"INSERT INTO {table}(p,t) VALUES ({p}, " + "{a:{1:10}})")
assert list(cql.execute(f"SELECT t.a[1] FROM {table} WHERE p={p}")) == [(10,)]
def test_field_and_subscript_and_field(cql, test_keyspace):
with new_type(cql, test_keyspace, '(b int)') as typ1, \
new_type(cql, test_keyspace, f"(a frozen<map<int,{typ1}>>)") as typ2:
schema = f"p int PRIMARY KEY, t {typ2}"
with new_test_table(cql, test_keyspace, schema) as table:
p = unique_key_int()
cql.execute(f"INSERT INTO {table}(p,t) VALUES ({p}, " + "{a:{1:{b:10}}})")
assert list(cql.execute(f"SELECT t.a[1].b FROM {table} WHERE p={p}")) == [(10,)]
def test_other_types_cannot_be_subscripted(cql, table1):
with pytest.raises(InvalidRequest, match='not a'):
cql.execute(f"SELECT p[2] FROM {table1}")
with pytest.raises(InvalidRequest, match='not a'):
cql.execute(f"SELECT token(p)[2] FROM {table1}")
def test_udf_subscript(scylla_only, cql, test_keyspace, table1):
fn = "(k int) CALLED ON NULL INPUT RETURNS int LANGUAGE Lua AS 'return k+1'"
with new_function(cql, test_keyspace, fn, 'add_one'):
p = unique_key_int()
cql.execute(f"INSERT INTO {table1}(p,m) VALUES ({p}, " + "{1:10,2:20})")
assert list(cql.execute(f"SELECT m[add_one(1)] FROM {table1} WHERE p={p}")) == [(20,)]
# cassandra doesn't support subscript on a list
def test_list_subscript(scylla_only, cql, test_keyspace):
schema = 'p int PRIMARY KEY, l list<int>'
with new_test_table(cql, test_keyspace, schema) as table:
p = unique_key_int()
cql.execute(f"INSERT INTO {table}(p,l) VALUES ({p}, " + "[10,20])")
assert list(cql.execute(f"SELECT l[0] FROM {table} WHERE p={p}")) == [(10,)]
assert list(cql.execute(f"SELECT l[1] FROM {table} WHERE p={p}")) == [(20,)]
assert list(cql.execute(f"SELECT l[2] FROM {table} WHERE p={p}")) == [(None,)]
assert list(cql.execute(f"SELECT l[10] FROM {table} WHERE p={p}")) == [(None,)]
def test_set_subscript(cql, test_keyspace):
schema = 'p int PRIMARY KEY, s set<int>'
with new_test_table(cql, test_keyspace, schema) as table:
p = unique_key_int()
cql.execute(f"INSERT INTO {table}(p,s) VALUES ({p}, " + "{10,20})")
assert list(cql.execute(f"SELECT s[0] FROM {table} WHERE p={p}")) == [(None,)]
assert list(cql.execute(f"SELECT s[10] FROM {table} WHERE p={p}")) == [(10,)]
assert list(cql.execute(f"SELECT s[11] FROM {table} WHERE p={p}")) == [(None,)]
assert list(cql.execute(f"SELECT s[20] FROM {table} WHERE p={p}")) == [(20,)]
# scylla only because cassandra doesn't support lua language
@pytest.mark.xfail(reason="#22075")
def test_subscript_function_arg(scylla_only, cql, test_keyspace, table1):
fn = "(k int) CALLED ON NULL INPUT RETURNS int LANGUAGE Lua AS 'return k+1'"
with new_function(cql, test_keyspace, fn, 'add_one'):
p = unique_key_int()
cql.execute(f"INSERT INTO {table1}(p,m) VALUES ({p}, " + "{1:10,2:20})")
assert list(cql.execute(f"SELECT add_one(m[1]) FROM {table1} WHERE p={p}")) == [(11,)]