From 374be94faad03123aa946ee50e56bfcab6b7e61a Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 18 Mar 2026 12:30:12 +0200 Subject: [PATCH 01/76] test: statement_restrictions: add index_selection regression test In preparation for refactoring statement_restrictions, add a simple and an exhaustive regression test, encoding the index selection algorithm into the test. We cannot change the index selection algorithm because then mixed-node clusters will alter the sorting key mid-query (if paging takes place). Because the exhaustive space has such a large stack frame, and because Address Santizer bloats the stack frame, increase it for debug builds. --- test/boost/statement_restrictions_test.cc | 799 ++++++++++++++++++++++ 1 file changed, 799 insertions(+) diff --git a/test/boost/statement_restrictions_test.cc b/test/boost/statement_restrictions_test.cc index f778abe105..04ded2d886 100644 --- a/test/boost/statement_restrictions_test.cc +++ b/test/boost/statement_restrictions_test.cc @@ -11,12 +11,14 @@ #include #include +#include #include #include "cql3/restrictions/statement_restrictions.hh" #include "cql3/expr/expr-utils.hh" #include "cql3/util.hh" +#include "index/secondary_index_manager.hh" #include "test/lib/cql_assertions.hh" #include "test/lib/cql_test_env.hh" #include "test/lib/test_utils.hh" @@ -366,6 +368,803 @@ SEASTAR_TEST_CASE(slice_single_column_mixed_order) { }); } +// Regression test: verifies that index selection (find_idx), uses_secondary_indexing, +// and need_filtering produce consistent results across all supported index types: +// - regular_values (standard column EQ) +// - keys (set CONTAINS, map CONTAINS KEY) +// - collection_values (map/list CONTAINS) +// - keys_and_values (map subscript EQ) +// - full (frozen collection EQ; round-trips to regular_values via serialization) +// - local vs global index scoring +SEASTAR_TEST_CASE(index_selection) { + return do_with_cql_env_thread([](cql_test_env& e) { + cquery_nofail(e, "CREATE TABLE ks.idx_test (" + " pk1 int, pk2 int," + " ck1 int, ck2 int," + " v1 int, v2 int, v3 int," + " s1 set," + " m1 map," + " l1 list," + " fs frozen>," + " PRIMARY KEY ((pk1, pk2), ck1, ck2)" + ")"); + // 10 indexes covering all target types. + cquery_nofail(e, "CREATE INDEX idx_v1 ON ks.idx_test(v1)"); + cquery_nofail(e, "CREATE INDEX idx_v2 ON ks.idx_test(v2)"); + cquery_nofail(e, "CREATE INDEX idx_v3_local ON ks.idx_test((pk1,pk2), v3)"); + cquery_nofail(e, "CREATE INDEX idx_ck1 ON ks.idx_test(ck1)"); + cquery_nofail(e, "CREATE INDEX idx_s1 ON ks.idx_test(s1)"); // keys (rewritten from VALUES for sets) + cquery_nofail(e, "CREATE INDEX idx_m1_values ON ks.idx_test(VALUES(m1))"); // collection_values + cquery_nofail(e, "CREATE INDEX idx_m1_keys ON ks.idx_test(KEYS(m1))"); // keys + cquery_nofail(e, "CREATE INDEX idx_m1_entries ON ks.idx_test(ENTRIES(m1))"); // keys_and_values + cquery_nofail(e, "CREATE INDEX idx_l1 ON ks.idx_test(l1)"); // collection_values + cquery_nofail(e, "CREATE INDEX idx_fs ON ks.idx_test(FULL(fs))"); // full -> round-trips to regular_values + + auto schema = e.local_db().find_schema("ks", "idx_test"); + auto& sim = e.data_dictionary().find_column_family(schema).get_index_manager(); + + struct expected { + std::string_view where_clause; + std::optional index_name; // nullopt = no index selected + bool uses_secondary_indexing; + bool need_filtering; + }; + + // Build statement_restrictions from a WHERE clause string and return the + // index-selection result. + auto check = [&](std::string_view where_clause) -> expected { + prepare_context ctx; + auto factors = where_clause.empty() + ? std::vector{} + : boolean_factors(cql3::util::where_clause_to_relations(where_clause, cql3::dialect{})); + auto sr = restrictions::analyze_statement_restrictions( + e.data_dictionary(), + schema, + statements::statement_type::SELECT, + expr::conjunction{std::move(factors)}, + ctx, + /*contains_only_static_columns=*/false, + /*for_view=*/false, + /*allow_filtering=*/true, + restrictions::check_indexes::yes); + auto [idx, restrictions_expr] = sr.find_idx(sim); + return {where_clause, + idx ? std::optional(idx->metadata().name()) : std::nullopt, + sr.uses_secondary_indexing(), + sr.need_filtering()}; + }; + + auto none = std::optional{}; + auto idx = [](const char* name) { return std::optional(name); }; + + auto verify = [](const expected& got, const expected& want) { + BOOST_CHECK_MESSAGE(got.index_name == want.index_name, + fmt::format("WHERE {}: index_name: got {} want {}", + want.where_clause, + got.index_name.value_or("(none)"), + want.index_name.value_or("(none)"))); + BOOST_CHECK_MESSAGE(got.uses_secondary_indexing == want.uses_secondary_indexing, + fmt::format("WHERE {}: uses_secondary_indexing: got {} want {}", + want.where_clause, + got.uses_secondary_indexing, + want.uses_secondary_indexing)); + BOOST_CHECK_MESSAGE(got.need_filtering == want.need_filtering, + fmt::format("WHERE {}: need_filtering: got {} want {}", + want.where_clause, + got.need_filtering, + want.need_filtering)); + }; + + // --- A. Regular column EQ (target_type: regular_values) --- + verify(check("v1 = 1"), {"", idx("idx_v1"), true, false}); + verify(check("v2 = 1"), {"", idx("idx_v2"), true, false}); + // WHERE-clause order tiebreak: first column in WHERE wins for equal scores. + verify(check("v1 = 1 AND v2 = 1"), {"", idx("idx_v1"), true, true}); + verify(check("v2 = 1 AND v1 = 1"), {"", idx("idx_v2"), true, true}); + // Slices (GT/LT) are not supported by standard secondary indexes. + verify(check("v1 > 1"), {"", none, false, true}); + + // --- B. Local vs global index scoring --- + // Local index with full PK scores 2, global scores 1. + verify(check("pk1 = 1 AND pk2 = 1 AND v3 = 1"), {"", idx("idx_v3_local"), true, false}); + // Local (score 2) beats global (score 1) even when global column appears first. + verify(check("pk1 = 1 AND pk2 = 1 AND v1 = 1 AND v3 = 1"), {"", idx("idx_v3_local"), true, true}); + // Local index without full PK gets score 0 and is never picked. + verify(check("v3 = 1"), {"", none, false, true}); + + // --- C. CK column index (search group ordering) --- + verify(check("ck1 = 1"), {"", idx("idx_ck1"), true, false}); + // CK group is iterated before non-PK group, regardless of WHERE order. + verify(check("ck1 = 1 AND v1 = 1"), {"", idx("idx_ck1"), true, true}); + verify(check("v1 = 1 AND ck1 = 1"), {"", idx("idx_ck1"), true, true}); + + // --- D. Set CONTAINS (target_type: keys, rewritten from VALUES for sets) --- + verify(check("s1 CONTAINS 1"), {"", idx("idx_s1"), true, false}); + + // --- E. Map indexes --- + // CONTAINS on map values (target_type: collection_values). + verify(check("m1 CONTAINS 'one'"), {"", idx("idx_m1_values"), true, false}); + // CONTAINS KEY on map keys (target_type: keys). + verify(check("m1 CONTAINS KEY 1"), {"", idx("idx_m1_keys"), true, false}); + // Subscript EQ on map entries (target_type: keys_and_values). + verify(check("m1[1] = 'one'"), {"", idx("idx_m1_entries"), true, false}); + + // --- F. List CONTAINS (target_type: collection_values) --- + verify(check("l1 CONTAINS 1"), {"", idx("idx_l1"), true, false}); + + // --- G. Frozen collection (FULL index, round-trips to regular_values) --- + verify(check("fs = {1}"), {"", idx("idx_fs"), true, false}); + // Same with full PK (local index on v3 is available but idx_fs is global, score 1). + verify(check("pk1 = 1 AND pk2 = 1 AND fs = {1}"), {"", idx("idx_fs"), true, false}); + + // --- H. Double CONTAINS on same column: CollectionYes && CollectionYes = No --- + verify(check("s1 CONTAINS 1 AND s1 CONTAINS 2"), {"", none, false, true}); + + // --- I. Collection + regular column tiebreak (WHERE-clause order) --- + verify(check("s1 CONTAINS 1 AND v1 = 1"), {"", idx("idx_s1"), true, true}); + verify(check("v1 = 1 AND s1 CONTAINS 1"), {"", idx("idx_v1"), true, true}); + + // --- J. CK group beats collection in non-PK group --- + verify(check("m1 CONTAINS 'one' AND ck1 = 1"), {"", idx("idx_ck1"), true, true}); + + // --- K. Edge cases --- + // Full PK only: no secondary index needed. + verify(check("pk1 = 1 AND pk2 = 1"), {"", none, false, false}); + // No restrictions at all. + verify(check(""), {"", none, false, false}); + // Token restriction with a regular column: index is used. + verify(check("token(pk1, pk2) > 0 AND v1 = 1"), {"", idx("idx_v1"), true, false}); + }); +} + +// Exhaustive combinatorial test: iterates over all 2^N subsets of N restriction +// fragments and, for each subset, verifies a broad set of statement_restrictions +// public APIs. This catches any refactoring that accidentally changes observable +// behaviour for *any* combination of restriction types. +// +// Restriction fragments (15 independent bits, 2^15 = 32768 combinations): +// bit 0: pk1 = 1 +// bit 1: pk2 = 2 +// bit 2: ck1 = 3 (single-column EQ) +// bit 3: ck2 > 4 (single-column slice) +// bit 4: ck1 IN (3, 6) (single-column IN; includes CK1_EQ value 3) +// bit 5: (ck1, ck2) = (7, 8) (multi-column EQ) +// bit 6: (ck1, ck2) > (9, 10) (multi-column slice) +// bit 7: (ck1, ck2) IN ((11, 12), (13, 14)) (multi-column IN) +// bit 8: v1 = 15 (global index comb_v1, target: regular_values) +// bit 9: v3 = 16 (local index comb_v3_local, target: regular_values) +// bit 10: s1 CONTAINS 17 (global index comb_s1, target: keys — set) +// bit 11: m1 CONTAINS 'alpha' (global index comb_m1_values, target: collection_values) +// bit 12: m2 CONTAINS KEY 18 (global index comb_m2_keys, target: keys — map) +// bit 13: m3[19] = 'beta' (global index comb_m3_entries, target: keys_and_values) +// bit 14: fs = {20, 21} (global index comb_fs, target: full — frozen collection) +SEASTAR_TEST_CASE(combinatorial_restrictions) { + // ASAN's fake-stack shadow buffer for the large lambda below is ~248 KiB; + // bump the thread stack so it doesn't overflow under sanitized builds. + seastar::thread_attributes tattr; +#if defined(__SANITIZE_ADDRESS__) || __has_feature(address_sanitizer) + tattr.stack_size = 2 * 1024 * 1024; +#endif + return do_with_cql_env_thread([](cql_test_env& e) { + cquery_nofail(e, "CREATE TABLE ks.comb (" + " pk1 int, pk2 int," + " ck1 int, ck2 int," + " v1 int, v2 int, v3 int," + " s1 set," + " m1 map," + " m2 map," + " m3 map," + " fs frozen>," + " PRIMARY KEY ((pk1, pk2), ck1, ck2)" + ")"); + cquery_nofail(e, "CREATE INDEX comb_pk1 ON ks.comb(pk1)"); + cquery_nofail(e, "CREATE INDEX comb_v1 ON ks.comb(v1)"); + cquery_nofail(e, "CREATE INDEX comb_v3_local ON ks.comb((pk1,pk2), v3)"); + cquery_nofail(e, "CREATE INDEX comb_s1 ON ks.comb(s1)"); + cquery_nofail(e, "CREATE INDEX comb_ck1 ON ks.comb(ck1)"); + cquery_nofail(e, "CREATE INDEX comb_m1_values ON ks.comb(VALUES(m1))"); + cquery_nofail(e, "CREATE INDEX comb_m2_keys ON ks.comb(KEYS(m2))"); + cquery_nofail(e, "CREATE INDEX comb_m3_entries ON ks.comb(ENTRIES(m3))"); + cquery_nofail(e, "CREATE INDEX comb_fs ON ks.comb(FULL(fs))"); + + auto schema = e.local_db().find_schema("ks", "comb"); + auto& sim = e.data_dictionary().find_column_family(schema).get_index_manager(); + const auto& pk1_def = *schema->get_column_definition("pk1"); + const auto& pk2_def = *schema->get_column_definition("pk2"); + const auto& ck1_def = *schema->get_column_definition("ck1"); + const auto& ck2_def = *schema->get_column_definition("ck2"); + const auto& v1_def = *schema->get_column_definition("v1"); + const auto& v3_def = *schema->get_column_definition("v3"); + const auto& s1_def = *schema->get_column_definition("s1"); + const auto& m1_def = *schema->get_column_definition("m1"); + const auto& m2_def = *schema->get_column_definition("m2"); + const auto& m3_def = *schema->get_column_definition("m3"); + const auto& fs_def = *schema->get_column_definition("fs"); + + // Every restriction fragment is an independent bit in the mask. + // This includes CK restriction variants, giving us exhaustive + // coverage of all 2^15 = 32768 combinations. + enum frag : unsigned { + PK1 = 1u << 0, // pk1 = 1 (global index, regular_values) + PK2 = 1u << 1, // pk2 = 2 (no index) + CK1_EQ = 1u << 2, // ck1 = 3 + CK2_SLICE = 1u << 3, // ck2 > 4 + CK1_IN = 1u << 4, // ck1 IN (3, 6) + MULTI_EQ = 1u << 5, // (ck1, ck2) = (7, 8) + MULTI_SLICE= 1u << 6, // (ck1, ck2) > (9, 10) + MULTI_IN = 1u << 7, // (ck1, ck2) IN ((11, 12), (13, 14)) + V1 = 1u << 8, // v1 = 15 (global index, regular_values) + V3 = 1u << 9, // v3 = 16 (local index, regular_values) + S1 = 1u << 10, // s1 CONTAINS 17 (global index, keys — set) + M_VAL = 1u << 11, // m1 CONTAINS 'alpha' (global index, collection_values) + M_KEY = 1u << 12, // m2 CONTAINS KEY 18 (global index, keys — map) + M_ENT = 1u << 13, // m3[19] = 'beta' (global index, keys_and_values) + FS = 1u << 14, // fs = {20, 21} (global index, full) + }; + constexpr unsigned N_FRAG = 15; + constexpr unsigned FRAG_TOTAL = 1u << N_FRAG; + + constexpr unsigned SINGLE_CK_MASK = CK1_EQ | CK2_SLICE | CK1_IN; + constexpr unsigned MULTI_CK_MASK = MULTI_EQ | MULTI_SLICE | MULTI_IN; + + struct fragment_info { + unsigned bit; + const char* clause; + }; + // Each fragment uses unique values so that conjunction intersections + // are predictable. CK1_IN includes the CK1_EQ value (3) to ensure + // the intersection is non-empty when both are present. + const fragment_info fragments[] = { + {PK1, "pk1 = 1"}, + {PK2, "pk2 = 2"}, + {CK1_EQ, "ck1 = 3"}, + {CK2_SLICE, "ck2 > 4"}, + {CK1_IN, "ck1 IN (3, 6)"}, + {MULTI_EQ, "(ck1, ck2) = (7, 8)"}, + {MULTI_SLICE, "(ck1, ck2) > (9, 10)"}, + {MULTI_IN, "(ck1, ck2) IN ((11, 12), (13, 14))"}, + {V1, "v1 = 15"}, + {V3, "v3 = 16"}, + {S1, "s1 CONTAINS 17"}, + {M_VAL, "m1 CONTAINS 'alpha'"}, + {M_KEY, "m2 CONTAINS KEY 18"}, + {M_ENT, "m3[19] = 'beta'"}, + {FS, "fs = {20, 21}"}, + }; + + unsigned total_tested = 0; + unsigned total_illegal = 0; + + for (unsigned mask = 0; mask < FRAG_TOTAL; ++mask) { + // --- Illegality detection --- + // Rule 1: Mixing single-column and multi-column CK restrictions + // is always illegal. + // Rule 2: At most one multi-column CK restriction type allowed. + bool has_single_ck = (mask & SINGLE_CK_MASK) != 0; + bool has_multi_ck = (mask & MULTI_CK_MASK) != 0; + unsigned multi_ck_count = std::popcount(mask & MULTI_CK_MASK); + bool is_illegal = (has_single_ck && has_multi_ck) + || (multi_ck_count > 1); + + // Build WHERE clause from all set bits. + std::string where_clause; + for (auto& f : fragments) { + if (mask & f.bit) { + if (!where_clause.empty()) { + where_clause += " AND "; + } + where_clause += f.clause; + } + } + + auto ctx_msg = [&](std::string_view api) { + return fmt::format("mask=0x{:04x} WHERE [{}]: {}", + mask, where_clause, api); + }; + + prepare_context ctx; + auto where_expr = where_clause.empty() + ? expr::expression(expr::conjunction{}) + : cql3::util::where_clause_to_relations(where_clause, cql3::dialect{}); + + std::optional sr; + try { + sr.emplace(restrictions::analyze_statement_restrictions( + e.data_dictionary(), + schema, + statements::statement_type::SELECT, + where_expr, + ctx, + /*contains_only_static_columns=*/false, + /*for_view=*/false, + /*allow_filtering=*/true, + restrictions::check_indexes::yes)); + } catch (const exceptions::invalid_request_exception&) { + } + + if (is_illegal) { + BOOST_CHECK_MESSAGE(!sr, + ctx_msg("expected exception for illegal CK combination")); + ++total_illegal; + ++total_tested; + continue; + } + BOOST_REQUIRE_MESSAGE(sr, + ctx_msg("unexpected exception for legal CK combination")); + + // --- Derived CK properties --- + bool has_multi_column = has_multi_ck; + + // Which CK columns are restricted? + bool has_ck1 = (mask & (CK1_EQ | CK1_IN | MULTI_EQ | MULTI_SLICE | MULTI_IN)) != 0; + bool has_ck2 = (mask & (CK2_SLICE | MULTI_EQ | MULTI_SLICE | MULTI_IN)) != 0; + bool has_any_ck = has_ck1 || has_ck2; + unsigned ck_count = (has_ck1 ? 1u : 0u) + (has_ck2 ? 1u : 0u); + + // clustering_key_restrictions_has_IN: any IN binop present + bool has_ck_in = (mask & (CK1_IN | MULTI_IN)) != 0; + + // clustering_key_restrictions_has_only_eq: no non-EQ binop in CK + // restrictions. Vacuously true when no CK restrictions. + bool has_only_eq = !(mask & (CK2_SLICE | CK1_IN | MULTI_SLICE | MULTI_IN)); + + // clustering_key_restrictions_need_filtering (internal predicate, + // before ORing with has_partition_key_unrestricted_components): + // For multi-column restrictions, always false. + // For single-column: true when there's a CK gap (ck2 restricted + // without ck1 being restricted by EQ or IN). + bool ck_need_filtering_internal = !has_multi_column + && (mask & CK2_SLICE) + && !(mask & (CK1_EQ | CK1_IN)); + + // has_eq_restriction_on_column: recognizes column_value and + // tuple_constructor LHS with oper_t::EQ. + // CK1_EQ → true for ck1. MULTI_EQ → true for both ck1 and ck2. + // IN, slice → false. + bool ck1_has_eq = (mask & CK1_EQ) || (mask & MULTI_EQ); + bool ck2_has_eq = (mask & MULTI_EQ) != 0; + + // comb_pk1 index selection: + // Requires: pk1 restricted with EQ and PK incomplete + // (_is_key_range). PK restrictions are iterated first in + // _index_restrictions, so comb_pk1 (global, score 1) beats + // all same-score indexes that come later. + bool selects_comb_pk1 = (mask & PK1) && !(mask & PK2); + + // comb_ck1 index selection: + // Requires: !full_pk, single-column EQ on ck1 (not IN — index + // only supports EQ), no CK1_IN (makes conjunction unsupported), + // no multi-column. + // In legal combos, CK1_EQ set → no MULTI_* possible. + // When pk1 is also restricted, comb_pk1 takes priority (PK + // group comes before CK group in _index_restrictions). + bool selects_comb_ck1 = (mask & CK1_EQ) && !(mask & CK1_IN); + + // Index clustering range multiplier: + // CK1_IN alone produces 2 IN values → 2 ranges. + // CK1_EQ + CK1_IN: intersection narrows to 1. + // Multi-column: not added to prefix, multiplier 1. + unsigned idx_range_multiplier = ((mask & CK1_IN) && !(mask & CK1_EQ)) ? 2 : 1; + + // --- Partition key APIs --- + bool has_pk1 = (mask & PK1) != 0; + bool has_pk2 = (mask & PK2) != 0; + bool full_pk = has_pk1 && has_pk2; + + BOOST_CHECK_MESSAGE( + sr->partition_key_restrictions_is_empty() == (!has_pk1 && !has_pk2), + ctx_msg("partition_key_restrictions_is_empty")); + + BOOST_CHECK_MESSAGE( + sr->partition_key_restrictions_is_all_eq() == true, + ctx_msg("partition_key_restrictions_is_all_eq")); + + BOOST_CHECK_MESSAGE( + sr->has_partition_key_unrestricted_components() == (!has_pk1 || !has_pk2), + ctx_msg("has_partition_key_unrestricted_components")); + + unsigned pk_restricted = (has_pk1 ? 1u : 0u) + (has_pk2 ? 1u : 0u); + BOOST_CHECK_MESSAGE( + sr->partition_key_restrictions_size() == pk_restricted, + ctx_msg(fmt::format("partition_key_restrictions_size: got {} want {}", + sr->partition_key_restrictions_size(), pk_restricted))); + + BOOST_CHECK_MESSAGE( + sr->has_token_restrictions() == false, + ctx_msg("has_token_restrictions")); + + BOOST_CHECK_MESSAGE( + sr->key_is_in_relation() == false, + ctx_msg("key_is_in_relation")); + + // is_key_range: true unless full PK is specified with EQ + BOOST_CHECK_MESSAGE( + sr->is_key_range() == !full_pk, + ctx_msg("is_key_range")); + + // --- Clustering key APIs --- + BOOST_CHECK_MESSAGE( + sr->has_clustering_columns_restriction() == has_any_ck, + ctx_msg("has_clustering_columns_restriction")); + + BOOST_CHECK_MESSAGE( + sr->clustering_columns_restrictions_size() == ck_count, + ctx_msg(fmt::format("clustering_columns_restrictions_size: got {} want {}", + sr->clustering_columns_restrictions_size(), ck_count))); + + BOOST_CHECK_MESSAGE( + sr->has_unrestricted_clustering_columns() == (ck_count < 2), + ctx_msg("has_unrestricted_clustering_columns")); + + BOOST_CHECK_MESSAGE( + sr->clustering_key_restrictions_has_IN() == has_ck_in, + ctx_msg("clustering_key_restrictions_has_IN")); + + BOOST_CHECK_MESSAGE( + sr->clustering_key_restrictions_has_only_eq() == has_only_eq, + ctx_msg("clustering_key_restrictions_has_only_eq")); + + // ck_restrictions_need_filtering: + // = has_any_ck && (!full_pk || ck_need_filtering_internal) + // The internal predicate captures column-gap / non-prefix issues; + // has_partition_key_unrestricted_components() is ORed in by the + // outer ck_restrictions_need_filtering(). + bool ck_needs_filter = has_any_ck && (!full_pk || ck_need_filtering_internal); + BOOST_CHECK_MESSAGE( + sr->ck_restrictions_need_filtering() == ck_needs_filter, + ctx_msg("ck_restrictions_need_filtering")); + + // --- Non-primary-key APIs --- + bool has_v1 = (mask & V1) != 0; + bool has_v3 = (mask & V3) != 0; + bool has_s1 = (mask & S1) != 0; + bool has_m_val = (mask & M_VAL) != 0; + bool has_m_key = (mask & M_KEY) != 0; + bool has_m_ent = (mask & M_ENT) != 0; + bool has_fs = (mask & FS) != 0; + bool has_nonpk = has_v1 || has_v3 || has_s1 + || has_m_val || has_m_key || has_m_ent || has_fs; + + BOOST_CHECK_MESSAGE( + sr->has_non_primary_key_restriction() == has_nonpk, + ctx_msg("has_non_primary_key_restriction")); + + // --- Per-column restriction checks --- + BOOST_CHECK_MESSAGE( + sr->is_restricted(&pk1_def) == has_pk1, + ctx_msg("is_restricted(pk1)")); + BOOST_CHECK_MESSAGE( + sr->is_restricted(&pk2_def) == has_pk2, + ctx_msg("is_restricted(pk2)")); + BOOST_CHECK_MESSAGE( + sr->is_restricted(&ck1_def) == has_ck1, + ctx_msg("is_restricted(ck1)")); + BOOST_CHECK_MESSAGE( + sr->is_restricted(&ck2_def) == has_ck2, + ctx_msg("is_restricted(ck2)")); + BOOST_CHECK_MESSAGE( + sr->is_restricted(&v1_def) == has_v1, + ctx_msg("is_restricted(v1)")); + BOOST_CHECK_MESSAGE( + sr->is_restricted(&v3_def) == has_v3, + ctx_msg("is_restricted(v3)")); + BOOST_CHECK_MESSAGE( + sr->is_restricted(&s1_def) == has_s1, + ctx_msg("is_restricted(s1)")); + BOOST_CHECK_MESSAGE( + sr->is_restricted(&m1_def) == has_m_val, + ctx_msg("is_restricted(m1)")); + BOOST_CHECK_MESSAGE( + sr->is_restricted(&m2_def) == has_m_key, + ctx_msg("is_restricted(m2)")); + BOOST_CHECK_MESSAGE( + sr->is_restricted(&m3_def) == has_m_ent, + ctx_msg("is_restricted(m3)")); + BOOST_CHECK_MESSAGE( + sr->is_restricted(&fs_def) == has_fs, + ctx_msg("is_restricted(fs)")); + + // has_eq_restriction_on_column: + // pk1/pk2 always EQ when present. + // ck1/ck2 depend on the CK restriction type. + // v1/v3/fs are EQ, s1 is CONTAINS, m1 CONTAINS, m2 CONTAINS KEY, + // m3[1]='a' is subscript EQ (not recognized), fs is regular EQ. + BOOST_CHECK_MESSAGE( + sr->has_eq_restriction_on_column(pk1_def) == has_pk1, + ctx_msg("has_eq_restriction_on_column(pk1)")); + BOOST_CHECK_MESSAGE( + sr->has_eq_restriction_on_column(pk2_def) == has_pk2, + ctx_msg("has_eq_restriction_on_column(pk2)")); + BOOST_CHECK_MESSAGE( + sr->has_eq_restriction_on_column(ck1_def) == ck1_has_eq, + ctx_msg("has_eq_restriction_on_column(ck1)")); + BOOST_CHECK_MESSAGE( + sr->has_eq_restriction_on_column(ck2_def) == ck2_has_eq, + ctx_msg("has_eq_restriction_on_column(ck2)")); + BOOST_CHECK_MESSAGE( + sr->has_eq_restriction_on_column(v1_def) == has_v1, + ctx_msg("has_eq_restriction_on_column(v1)")); + BOOST_CHECK_MESSAGE( + sr->has_eq_restriction_on_column(v3_def) == has_v3, + ctx_msg("has_eq_restriction_on_column(v3)")); + // s1 CONTAINS is not EQ: + BOOST_CHECK_MESSAGE( + sr->has_eq_restriction_on_column(s1_def) == false, + ctx_msg("has_eq_restriction_on_column(s1)")); + // m1 CONTAINS is not EQ: + BOOST_CHECK_MESSAGE( + sr->has_eq_restriction_on_column(m1_def) == false, + ctx_msg("has_eq_restriction_on_column(m1)")); + // m2 CONTAINS KEY is not EQ: + BOOST_CHECK_MESSAGE( + sr->has_eq_restriction_on_column(m2_def) == false, + ctx_msg("has_eq_restriction_on_column(m2)")); + // m3[1] = 'a' is a subscript EQ — not recognized by + // has_eq_restriction_on_column (needs column_value/tuple_constructor LHS). + BOOST_CHECK_MESSAGE( + sr->has_eq_restriction_on_column(m3_def) == false, + ctx_msg("has_eq_restriction_on_column(m3)")); + // fs = {1,2} is a regular EQ on frozen collection: + BOOST_CHECK_MESSAGE( + sr->has_eq_restriction_on_column(fs_def) == has_fs, + ctx_msg("has_eq_restriction_on_column(fs)")); + + // --- Index selection --- + auto [idx_opt, idx_expr] = sr->find_idx(sim); + + // Determine expected index. The scoring algorithm: + // - do_find_idx iterates _index_restrictions (PK group, then + // CK group, then non-PK group in WHERE-clause order). + // - Multi-column restrictions are skipped (line 1358). + // - Score: local index with full PK = 2, global = 1, local + // without full PK = 0. + // - Strict > for tiebreaking → first with highest score wins. + // + // The PK index (comb_pk1) can be selected when: + // (a) pk1 is restricted with EQ, and + // (b) PK is incomplete (_is_key_range), which triggers + // _uses_secondary_indexing via _has_queriable_pk_index. + // PK restrictions are iterated first in _index_restrictions, so + // comb_pk1 (score 1) beats all later global indexes (tie → first + // wins). + // + // The CK index (comb_ck1) can only be selected when: + // (a) ck1 is restricted with single-column EQ (not IN — the + // index only supports EQ, not IN), + // (b) PK is incomplete (_is_key_range), and + // (c) no multi-column CK restriction (_has_multi_column blocks + // the CK-index path at line 1151). + // (d) CK1_EQ + CK1_IN conjunction makes the index unsupported + // (IN child fails is_supported_by). + // (e) pk1 is NOT restricted (otherwise comb_pk1 wins first). + // + // When comb_ck1 qualifies it is iterated after PK but before + // non-PK in _index_restrictions, and its score 1 ties with any + // non-PK global, so it wins. + + std::optional expected_idx; + + if (selects_comb_pk1) { + expected_idx = "comb_pk1"; + } else if (selects_comb_ck1 && !full_pk) { + expected_idx = "comb_ck1"; + } else if (full_pk && has_v3) { + // Local index scores 2, beats any global (score 1). + expected_idx = "comb_v3_local"; + } else if (has_v1) { + expected_idx = "comb_v1"; + } else if (has_s1) { + // v3 without full_pk scores 0 and is skipped. + expected_idx = "comb_s1"; + } else if (has_m_val) { + expected_idx = "comb_m1_values"; + } else if (has_m_key) { + expected_idx = "comb_m2_keys"; + } else if (has_m_ent) { + expected_idx = "comb_m3_entries"; + } else if (has_fs) { + expected_idx = "comb_fs"; + } + // else: no indexable column (v3 alone without full_pk scores 0) + + bool uses_idx = expected_idx.has_value(); + + BOOST_CHECK_MESSAGE( + (idx_opt ? std::optional(idx_opt->metadata().name()) : std::nullopt) == expected_idx, + ctx_msg(fmt::format("find_idx: got {} want {}", + idx_opt ? idx_opt->metadata().name() : "(none)", + expected_idx.value_or("(none)")))); + BOOST_CHECK_MESSAGE( + sr->uses_secondary_indexing() == uses_idx, + ctx_msg(fmt::format("uses_secondary_indexing: got {} want {}", + sr->uses_secondary_indexing(), uses_idx))); + + // --- need_filtering --- + // Filtering is needed when: + // - PK is not fully specified (partial PK needs filtering unless + // using index) + // - CK has a gap (ck2 without ck1) needs filtering + // - Non-PK restrictions that aren't consumed by the index need + // filtering + // - When using an index, remaining restrictions beyond the + // indexed one need filtering + // - Multi-column CK restrictions that can't be converted to + // bounds need filtering + // The exact logic is complex; we check invariants. + bool need_filt = sr->need_filtering(); + + // 1. If no restrictions at all, no filtering needed. + if (mask == 0) { + BOOST_CHECK_MESSAGE(!need_filt, ctx_msg("need_filtering: empty should be false")); + } + // 2. If only the full PK is specified (no CK, no non-PK), no filtering. + if (mask == (PK1 | PK2)) { + BOOST_CHECK_MESSAGE(!need_filt, ctx_msg("need_filtering: full PK only should be false")); + } + // 3. Single indexed column (no CK): no filtering needed. + if (mask == PK1 || mask == V1 || mask == S1) { + BOOST_CHECK_MESSAGE(!need_filt, ctx_msg("need_filtering: single indexed column should be false")); + } + // 4. If using index + has extra non-PK restrictions, filtering is needed. + if (uses_idx) { + int non_pk_indexed_count = (has_v1 ? 1 : 0) + (has_v3 ? 1 : 0) + (has_s1 ? 1 : 0) + + (has_m_val ? 1 : 0) + (has_m_key ? 1 : 0) + (has_m_ent ? 1 : 0) + + (has_fs ? 1 : 0); + if (non_pk_indexed_count > 1) { + BOOST_CHECK_MESSAGE(need_filt, + ctx_msg("need_filtering: multiple non-PK restrictions with index should need filtering")); + } + } + // 5. Partial PK with no index needs filtering. + if ((has_pk1 != has_pk2) && !uses_idx) { + BOOST_CHECK_MESSAGE(need_filt, + ctx_msg("need_filtering: partial PK without index should need filtering")); + } + // 6. CK gap (ck2 without ck1, single-column only) needs filtering. + if (ck_need_filtering_internal) { + BOOST_CHECK_MESSAGE(need_filt, + ctx_msg("need_filtering: CK gap should need filtering")); + } + // 7. Non-PK restriction without index needs filtering. + if (has_nonpk && !uses_idx) { + BOOST_CHECK_MESSAGE(need_filt, + ctx_msg("need_filtering: non-PK restriction without index should need filtering")); + } + + // --- pk_restrictions_need_filtering --- + bool pk_needs_filter = sr->pk_restrictions_need_filtering(); + if (!uses_idx && has_pk1 != has_pk2) { + BOOST_CHECK_MESSAGE(pk_needs_filter, + ctx_msg("pk_restrictions_need_filtering: partial PK should need filtering")); + } + if (!has_pk1 && !has_pk2) { + BOOST_CHECK_MESSAGE(!pk_needs_filter, + ctx_msg("pk_restrictions_need_filtering: no PK should be false")); + } + if (full_pk) { + BOOST_CHECK_MESSAGE(!pk_needs_filter, + ctx_msg("pk_restrictions_need_filtering: full PK should be false")); + } + + // --- is_empty --- + BOOST_CHECK_MESSAGE( + sr->is_empty() == (mask == 0), + ctx_msg("is_empty")); + + // --- get_not_null_columns: none of our fragments use IS NOT NULL --- + BOOST_CHECK_MESSAGE( + sr->get_not_null_columns().empty(), + ctx_msg("get_not_null_columns should be empty")); + + // --- get_partition_key_ranges --- + // Always returns exactly 1 range. Full PK → singular range + // (specific partition). Otherwise → open-ended range. + { + auto pk_ranges = sr->get_partition_key_ranges(query_options({})); + BOOST_CHECK_MESSAGE(pk_ranges.size() == 1, + ctx_msg(fmt::format("get_partition_key_ranges: {} ranges, want 1", + pk_ranges.size()))); + if (full_pk) { + BOOST_CHECK_MESSAGE(pk_ranges.size() == 1 && pk_ranges[0].is_singular(), + ctx_msg("get_partition_key_ranges: full PK should yield singular range")); + } else { + BOOST_CHECK_MESSAGE(pk_ranges.size() == 1 && !pk_ranges[0].is_singular(), + ctx_msg("get_partition_key_ranges: incomplete PK should yield non-singular range")); + } + } + + // --- get_clustering_bounds --- + // Expected range count: + // Empty CK restrictions → 1 open-ended range. + // Multi-column: MULTI_IN → 2 singular, else → 1. + // Single-column: CK1_IN without CK1_EQ → 2 (IN values expand), + // else → 1 (CK1_EQ narrows intersection to 1 value). + { + auto ck_bounds = sr->get_clustering_bounds(query_options({})); + unsigned expected_ck_bounds; + if (!(mask & (SINGLE_CK_MASK | MULTI_CK_MASK))) { + // No CK restrictions → 1 open-ended range. + expected_ck_bounds = 1; + } else if (mask & MULTI_CK_MASK) { + expected_ck_bounds = (mask & MULTI_IN) ? 2 : 1; + } else { + expected_ck_bounds = ((mask & CK1_IN) && !(mask & CK1_EQ)) ? 2 : 1; + } + BOOST_CHECK_MESSAGE(ck_bounds.size() == expected_ck_bounds, + ctx_msg(fmt::format("get_clustering_bounds: {} ranges, want {}", + ck_bounds.size(), expected_ck_bounds))); + // With no CK restrictions the range should be open-ended. + if (!(mask & (SINGLE_CK_MASK | MULTI_CK_MASK))) { + BOOST_CHECK_MESSAGE( + ck_bounds.size() == 1 + && !ck_bounds[0].start() + && !ck_bounds[0].end(), + ctx_msg("get_clustering_bounds: no CK should be open-ended")); + } + } + + // --- Index table range APIs --- + if (uses_idx) { + bool is_local_idx = (expected_idx == "comb_v3_local"); + const auto& view_schema = *sr->get_view_schema(); + + if (is_local_idx) { + // --- get_local_index_clustering_ranges --- + // Local index CK prefix = (indexed_col, base_ck1, ...). + // The indexed column is always EQ (1 value); CK IN values + // multiply via the base CK appended to the prefix. + auto local_ranges = sr->get_local_index_clustering_ranges(query_options({}), view_schema); + unsigned expected_local = 1 * idx_range_multiplier; + BOOST_CHECK_MESSAGE(!local_ranges.empty(), + ctx_msg("get_local_index_clustering_ranges should not be empty")); + BOOST_CHECK_MESSAGE(local_ranges.size() == expected_local, + ctx_msg(fmt::format("get_local_index_clustering_ranges: {} ranges, want {}", + local_ranges.size(), expected_local))); + } else { + // --- get_global_index_clustering_ranges --- + // Global index CK prefix = (token, pk1, pk2, ...base CK...). + // With full PK: CK IN values expand the prefix. + // Without full PK: prefix is empty → 1 open-ended range. + auto global_ranges = sr->get_global_index_clustering_ranges(query_options({}), view_schema); + BOOST_CHECK_MESSAGE(!global_ranges.empty(), + ctx_msg("get_global_index_clustering_ranges should not be empty")); + if (full_pk) { + unsigned expected_global = 1 * idx_range_multiplier; + BOOST_CHECK_MESSAGE(global_ranges.size() == expected_global, + ctx_msg(fmt::format( + "get_global_index_clustering_ranges (full PK): {} ranges, want {}", + global_ranges.size(), expected_global))); + } else { + // Without full PK the prefix has no token/PK entries, + // so we get 1 open-ended range. + BOOST_CHECK_MESSAGE(global_ranges.size() == 1, + ctx_msg(fmt::format( + "get_global_index_clustering_ranges (!full PK): {} ranges, want 1", + global_ranges.size()))); + } + + // --- get_global_index_token_clustering_ranges --- + // For modern (non-v1) indexes the token column is + // long_type, so this dispatches to the same + // get_single_column_clustering_bounds as + // get_global_index_clustering_ranges. + auto token_ranges = sr->get_global_index_token_clustering_ranges(query_options({}), view_schema); + BOOST_CHECK_MESSAGE(token_ranges.size() == global_ranges.size(), + ctx_msg(fmt::format( + "get_global_index_token_clustering_ranges: {} ranges, want {} (same as global)", + token_ranges.size(), global_ranges.size()))); + } + } + + ++total_tested; + } + + BOOST_TEST_MESSAGE(fmt::format("Tested {} restriction combinations ({} legal, {} illegal, 2^{} = {} total)", + total_tested, total_tested - total_illegal, total_illegal, N_FRAG, FRAG_TOTAL)); + }, {}, tattr); +} + + // Currently expression doesn't have operator==(). // Implementing it is ugly, because there are shared pointers and the term base class. // For testing purposes checking stringified expressions is enough. From eec0b20dbc9298fc964e6fff8ef9f5716f78b14f Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sat, 19 Oct 2024 22:17:56 +0300 Subject: [PATCH 02/76] cql3: statement_restrictions: prepare statement_restrictions for capturing `this` Prevent copying/moving, that can change the address, and instead enforce using shared_ptr. Most of the code is already using shared_ptr, so the changes aren't very large. To forbid non-shared_ptr construction, the constructors are annotated with a private_tag tag class. --- cql3/restrictions/statement_restrictions.cc | 18 ++++-- cql3/restrictions/statement_restrictions.hh | 21 +++++-- cql3/statements/modification_statement.cc | 2 +- cql3/statements/modification_statement.hh | 2 +- .../prune_materialized_view_statement.hh | 2 +- cql3/statements/raw/select_statement.hh | 2 +- cql3/statements/select_statement.cc | 14 ++--- cql3/statements/select_statement.hh | 4 +- cql3/statements/update_statement.hh | 2 +- service/pager/query_pagers.cc | 2 +- test/boost/statement_restrictions_test.cc | 14 ++--- test/vector_search/filter_test.cc | 60 +++++++++---------- 12 files changed, 81 insertions(+), 62 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index 3c5426c3a9..a99edff7a1 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -778,7 +778,7 @@ bool has_only_eq_binops(const expression& e) { return non_eq_binop == nullptr; } -statement_restrictions::statement_restrictions(schema_ptr schema, bool allow_filtering) +statement_restrictions::statement_restrictions(private_tag, schema_ptr schema, bool allow_filtering) : _schema(schema) , _partition_range_is_simple(true) { } @@ -1058,7 +1058,8 @@ static std::vector extract_clustering_prefix_restrictions( return prefix; } -statement_restrictions::statement_restrictions(data_dictionary::database db, +statement_restrictions::statement_restrictions(private_tag, + data_dictionary::database db, schema_ptr schema, statements::statement_type type, const expr::expression& where_clause, @@ -1067,7 +1068,7 @@ statement_restrictions::statement_restrictions(data_dictionary::database db, bool for_view, bool allow_filtering, check_indexes do_check_indexes) - : statement_restrictions(schema, allow_filtering) + : statement_restrictions(private_tag{}, schema, allow_filtering) { _check_indexes = do_check_indexes; for (auto&& relation_expr : boolean_factors(where_clause)) { @@ -2884,7 +2885,7 @@ const std::unordered_set statement_restrictions::get_n return _not_null_columns; } -statement_restrictions +shared_ptr analyze_statement_restrictions( data_dictionary::database db, schema_ptr schema, @@ -2895,7 +2896,14 @@ analyze_statement_restrictions( bool for_view, bool allow_filtering, check_indexes do_check_indexes) { - return statement_restrictions(db, std::move(schema), type, where_clause, ctx, selects_only_static_columns, for_view, allow_filtering, do_check_indexes); + return make_shared(statement_restrictions::private_tag{}, db, std::move(schema), type, where_clause, ctx, selects_only_static_columns, for_view, allow_filtering, do_check_indexes); +} + +shared_ptr +make_trivial_statement_restrictions( + schema_ptr schema, + bool allow_filtering) { + return make_shared(statement_restrictions::private_tag{}, std::move(schema), allow_filtering); } } // namespace restrictions diff --git a/cql3/restrictions/statement_restrictions.hh b/cql3/restrictions/statement_restrictions.hh index 5af95ee78f..120f1e363d 100644 --- a/cql3/restrictions/statement_restrictions.hh +++ b/cql3/restrictions/statement_restrictions.hh @@ -32,6 +32,7 @@ using check_indexes = bool_class; * The restrictions corresponding to the relations specified on the where-clause of CQL query. */ class statement_restrictions { + struct private_tag {}; // Tag for private constructor private: schema_ptr _schema; @@ -130,9 +131,10 @@ public: * @param cfm the column family meta data * @return a new empty StatementRestrictions. */ - statement_restrictions(schema_ptr schema, bool allow_filtering); + statement_restrictions(private_tag, schema_ptr schema, bool allow_filtering); - friend statement_restrictions analyze_statement_restrictions( +public: + friend shared_ptr analyze_statement_restrictions( data_dictionary::database db, schema_ptr schema, statements::statement_type type, @@ -142,9 +144,15 @@ public: bool for_view, bool allow_filtering, check_indexes do_check_indexes); + friend shared_ptr make_trivial_statement_restrictions( + schema_ptr schema, + bool allow_filtering); -private: - statement_restrictions(data_dictionary::database db, + // Important: objects of this class captures `this` extensively and so must remain non-copyable. + statement_restrictions(const statement_restrictions&) = delete; + statement_restrictions& operator=(const statement_restrictions&) = delete; + statement_restrictions(private_tag, + data_dictionary::database db, schema_ptr schema, statements::statement_type type, const expr::expression& where_clause, @@ -416,7 +424,7 @@ public: bool is_empty() const; }; -statement_restrictions analyze_statement_restrictions( +shared_ptr analyze_statement_restrictions( data_dictionary::database db, schema_ptr schema, statements::statement_type type, @@ -427,6 +435,9 @@ statement_restrictions analyze_statement_restrictions( bool allow_filtering, check_indexes do_check_indexes); +shared_ptr make_trivial_statement_restrictions( + schema_ptr schema, + bool allow_filtering); // Extracts all binary operators which have the given column on their left hand side. // Extracts only single-column restrictions. diff --git a/cql3/statements/modification_statement.cc b/cql3/statements/modification_statement.cc index 1c6cd4a28a..bb687a8220 100644 --- a/cql3/statements/modification_statement.cc +++ b/cql3/statements/modification_statement.cc @@ -626,7 +626,7 @@ modification_statement::prepare(data_dictionary::database db, prepare_context& c // Since this cache is only meaningful for LWT queries, just clear the ids // if it's not a conditional statement so that the AST nodes don't // participate in the caching mechanism later. - if (!prepared_stmt->has_conditions() && prepared_stmt->_restrictions.has_value()) { + if (!prepared_stmt->has_conditions() && prepared_stmt->_restrictions) { ctx.clear_pk_function_calls_cache(); } prepared_stmt->_may_use_token_aware_routing = ctx.get_partition_key_bind_indexes(*schema).size() != 0; diff --git a/cql3/statements/modification_statement.hh b/cql3/statements/modification_statement.hh index 9b88930821..5543ac87d5 100644 --- a/cql3/statements/modification_statement.hh +++ b/cql3/statements/modification_statement.hh @@ -94,7 +94,7 @@ private: std::optional _is_raw_counter_shard_write; protected: - std::optional _restrictions; + shared_ptr _restrictions; public: typedef std::optional> json_cache_opt; diff --git a/cql3/statements/prune_materialized_view_statement.hh b/cql3/statements/prune_materialized_view_statement.hh index 6d952b4cd3..69fefb6b4d 100644 --- a/cql3/statements/prune_materialized_view_statement.hh +++ b/cql3/statements/prune_materialized_view_statement.hh @@ -19,7 +19,7 @@ public: uint32_t bound_terms, lw_shared_ptr parameters, ::shared_ptr selection, - ::shared_ptr restrictions, + ::shared_ptr restrictions, ::shared_ptr> group_by_cell_indices, bool is_reversed, ordering_comparator_type ordering_comparator, diff --git a/cql3/statements/raw/select_statement.hh b/cql3/statements/raw/select_statement.hh index 075503fea1..bcd559d6af 100644 --- a/cql3/statements/raw/select_statement.hh +++ b/cql3/statements/raw/select_statement.hh @@ -109,7 +109,7 @@ public: std::unique_ptr prepare(data_dictionary::database db, cql_stats& stats, const cql_config& cfg, bool for_view); private: std::vector maybe_jsonize_select_clause(std::vector select, data_dictionary::database db, schema_ptr schema); - ::shared_ptr prepare_restrictions( + ::shared_ptr prepare_restrictions( data_dictionary::database db, schema_ptr schema, prepare_context& ctx, diff --git a/cql3/statements/select_statement.cc b/cql3/statements/select_statement.cc index c2d25c83f5..da8a068e55 100644 --- a/cql3/statements/select_statement.cc +++ b/cql3/statements/select_statement.cc @@ -1027,7 +1027,7 @@ view_indexed_table_select_statement::prepare(data_dictionary::database db, uint32_t bound_terms, lw_shared_ptr parameters, ::shared_ptr selection, - ::shared_ptr restrictions, + ::shared_ptr restrictions, ::shared_ptr> group_by_cell_indices, bool is_reversed, ordering_comparator_type ordering_comparator, @@ -1607,7 +1607,7 @@ public: uint32_t bound_terms, lw_shared_ptr parameters, ::shared_ptr selection, - ::shared_ptr restrictions, + ::shared_ptr restrictions, ::shared_ptr> group_by_cell_indices, bool is_reversed, ordering_comparator_type ordering_comparator, @@ -1645,7 +1645,7 @@ private: uint32_t bound_terms, lw_shared_ptr parameters, ::shared_ptr selection, - ::shared_ptr restrictions, + ::shared_ptr restrictions, ::shared_ptr> group_by_cell_indices, bool is_reversed, parallelized_select_statement::ordering_comparator_type ordering_comparator, @@ -2076,7 +2076,7 @@ static select_statement::ordering_comparator_type get_similarity_ordering_compar ::shared_ptr vector_indexed_table_select_statement::prepare(data_dictionary::database db, schema_ptr schema, uint32_t bound_terms, lw_shared_ptr parameters, ::shared_ptr selection, - ::shared_ptr restrictions, ::shared_ptr> group_by_cell_indices, bool is_reversed, + ::shared_ptr restrictions, ::shared_ptr> group_by_cell_indices, bool is_reversed, ordering_comparator_type ordering_comparator, prepared_ann_ordering_type prepared_ann_ordering, std::optional limit, std::optional per_partition_limit, cql_stats& stats, const secondary_index::index& index, std::unique_ptr attrs) { @@ -2589,7 +2589,7 @@ std::unique_ptr select_statement::prepare(data_dictionary::d return make_unique(audit_info(), std::move(stmt), ctx, std::move(partition_key_bind_indices), std::move(warnings)); } -::shared_ptr +::shared_ptr select_statement::prepare_restrictions(data_dictionary::database db, schema_ptr schema, prepare_context& ctx, @@ -2599,8 +2599,8 @@ select_statement::prepare_restrictions(data_dictionary::database db, restrictions::check_indexes do_check_indexes) { try { - return ::make_shared(restrictions::analyze_statement_restrictions(db, schema, statement_type::SELECT, _where_clause, ctx, - selection->contains_only_static_columns(), for_view, allow_filtering, do_check_indexes)); + return restrictions::analyze_statement_restrictions(db, schema, statement_type::SELECT, _where_clause, ctx, + selection->contains_only_static_columns(), for_view, allow_filtering, do_check_indexes); } catch (const exceptions::unrecognized_entity_exception& e) { if (contains_alias(e.entity)) { throw exceptions::invalid_request_exception(format("Aliases aren't allowed in the WHERE clause (name: '{}')", e.entity)); diff --git a/cql3/statements/select_statement.hh b/cql3/statements/select_statement.hh index 0f1d333f01..87ce5accd9 100644 --- a/cql3/statements/select_statement.hh +++ b/cql3/statements/select_statement.hh @@ -200,7 +200,7 @@ public: uint32_t bound_terms, lw_shared_ptr parameters, ::shared_ptr selection, - ::shared_ptr restrictions, + ::shared_ptr restrictions, ::shared_ptr> group_by_cell_indices, bool is_reversed, ordering_comparator_type ordering_comparator, @@ -372,7 +372,7 @@ public: static ::shared_ptr prepare(data_dictionary::database db, schema_ptr schema, uint32_t bound_terms, lw_shared_ptr parameters, ::shared_ptr selection, - ::shared_ptr restrictions, ::shared_ptr> group_by_cell_indices, bool is_reversed, + ::shared_ptr restrictions, ::shared_ptr> group_by_cell_indices, bool is_reversed, ordering_comparator_type ordering_comparator, prepared_ann_ordering_type prepared_ann_ordering, std::optional limit, std::optional per_partition_limit, cql_stats& stats, const secondary_index::index& index, std::unique_ptr attrs); diff --git a/cql3/statements/update_statement.hh b/cql3/statements/update_statement.hh index 356df3f19f..2dfbf49150 100644 --- a/cql3/statements/update_statement.hh +++ b/cql3/statements/update_statement.hh @@ -66,7 +66,7 @@ public: : update_statement(std::move(audit_info), statement_type::INSERT, bound_terms, s, std::move(attrs), stats) , _value(std::move(v)) , _default_unset(default_unset) { - _restrictions = restrictions::statement_restrictions(s, false); + _restrictions = cql3::restrictions::make_trivial_statement_restrictions(s, false); } private: virtual void execute_operations_for_key(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params, const json_cache_opt& json_cache) const override; diff --git a/service/pager/query_pagers.cc b/service/pager/query_pagers.cc index f68854631c..42b09c1949 100644 --- a/service/pager/query_pagers.cc +++ b/service/pager/query_pagers.cc @@ -493,7 +493,7 @@ std::unique_ptr service::pager::query_pagers::pager // If partition row limit is applied to paging, we still need to fall back // to filtering the results to avoid extraneous rows on page breaks. if (!filtering_restrictions && cmd->slice.partition_row_limit() < query::max_rows_if_set) { - filtering_restrictions = ::make_shared(s, true); + filtering_restrictions = cql3::restrictions::make_trivial_statement_restrictions(s, true); } if (filtering_restrictions) { return std::make_unique(proxy, std::move(s), std::move(selection), state, diff --git a/test/boost/statement_restrictions_test.cc b/test/boost/statement_restrictions_test.cc index 04ded2d886..977f7b8dfb 100644 --- a/test/boost/statement_restrictions_test.cc +++ b/test/boost/statement_restrictions_test.cc @@ -45,7 +45,7 @@ query::clustering_row_ranges slice( /*for_view=*/false, /*allow_filtering=*/true, restrictions::check_indexes::yes) - .get_clustering_bounds(query_options({})); + ->get_clustering_bounds(query_options({})); } /// Overload that parses the WHERE clause from string. Named differently to disambiguate when where_clause is @@ -427,11 +427,11 @@ SEASTAR_TEST_CASE(index_selection) { /*for_view=*/false, /*allow_filtering=*/true, restrictions::check_indexes::yes); - auto [idx, restrictions_expr] = sr.find_idx(sim); + auto [idx, restrictions_expr] = sr->find_idx(sim); return {where_clause, idx ? std::optional(idx->metadata().name()) : std::nullopt, - sr.uses_secondary_indexing(), - sr.need_filtering()}; + sr->uses_secondary_indexing(), + sr->need_filtering()}; }; auto none = std::optional{}; @@ -667,9 +667,9 @@ SEASTAR_TEST_CASE(combinatorial_restrictions) { ? expr::expression(expr::conjunction{}) : cql3::util::where_clause_to_relations(where_clause, cql3::dialect{}); - std::optional sr; + shared_ptr sr; try { - sr.emplace(restrictions::analyze_statement_restrictions( + sr = restrictions::analyze_statement_restrictions( e.data_dictionary(), schema, statements::statement_type::SELECT, @@ -678,7 +678,7 @@ SEASTAR_TEST_CASE(combinatorial_restrictions) { /*contains_only_static_columns=*/false, /*for_view=*/false, /*allow_filtering=*/true, - restrictions::check_indexes::yes)); + restrictions::check_indexes::yes); } catch (const exceptions::invalid_request_exception&) { } diff --git a/test/vector_search/filter_test.cc b/test/vector_search/filter_test.cc index 28d1d83d76..902020bae8 100644 --- a/test/vector_search/filter_test.cc +++ b/test/vector_search/filter_test.cc @@ -23,7 +23,7 @@ using namespace cql3; namespace { /// Helper to create statement_restrictions from a WHERE clause string -restrictions::statement_restrictions make_restrictions( +shared_ptr make_restrictions( std::string_view where_clause, cql_test_env& env, const sstring& table_name = "t", const sstring& keyspace_name = "ks") { prepare_context ctx; @@ -63,8 +63,8 @@ SEASTAR_TEST_CASE(to_json_empty_restrictions) { cquery_nofail(e, "create table ks.t(pk int, ck int, v vector, primary key(pk, ck))"); auto schema = e.local_db().find_schema("ks", "t"); - restrictions::statement_restrictions restr(schema, false); - auto json = rjson::print(vector_search::prepare_filter(restr, false).to_json(query_options({}))); + shared_ptr restr = restrictions::make_trivial_statement_restrictions(schema, false); + auto json = rjson::print(vector_search::prepare_filter(*restr, false).to_json(query_options({}))); BOOST_CHECK_EQUAL(json, "{}"); }); @@ -75,7 +75,7 @@ SEASTAR_TEST_CASE(to_json_with_allow_filtering) { cquery_nofail(e, "create table ks.t(pk int, ck int, v vector, primary key(pk, ck))"); auto restr = make_restrictions("pk=1", e); - auto json = get_restrictions_json(restr, true); + auto json = get_restrictions_json(*restr, true); auto expected = R"json({"restrictions":[{"type":"==","lhs":"pk","rhs":1}],"allow_filtering":true})json"; BOOST_CHECK_EQUAL(json, expected); @@ -87,7 +87,7 @@ SEASTAR_TEST_CASE(to_json_single_column_eq) { cquery_nofail(e, "create table ks.t(pk int, ck int, v vector, primary key(pk, ck))"); auto restr = make_restrictions("pk=42", e); - auto json = get_restrictions_json(restr, false); + auto json = get_restrictions_json(*restr, false); auto expected = R"json({"restrictions":[{"type":"==","lhs":"pk","rhs":42}],"allow_filtering":false})json"; BOOST_CHECK_EQUAL(json, expected); @@ -99,7 +99,7 @@ SEASTAR_TEST_CASE(to_json_single_column_lt) { cquery_nofail(e, "create table ks.t(pk int, ck int, v vector, primary key(pk, ck))"); auto restr = make_restrictions("pk=1 and ck<100", e); - auto json = get_restrictions_json(restr, true); + auto json = get_restrictions_json(*restr, true); auto expected = R"json({"restrictions":[{"type":"==","lhs":"pk","rhs":1},{"type":"<","lhs":"ck","rhs":100}],"allow_filtering":true})json"; BOOST_CHECK_EQUAL(json, expected); @@ -111,7 +111,7 @@ SEASTAR_TEST_CASE(to_json_single_column_gt) { cquery_nofail(e, "create table ks.t(pk int, ck int, v vector, primary key(pk, ck))"); auto restr = make_restrictions("pk=1 and ck>50", e); - auto json = get_restrictions_json(restr, true); + auto json = get_restrictions_json(*restr, true); auto expected = R"json({"restrictions":[{"type":"==","lhs":"pk","rhs":1},{"type":">","lhs":"ck","rhs":50}],"allow_filtering":true})json"; BOOST_CHECK_EQUAL(json, expected); @@ -123,7 +123,7 @@ SEASTAR_TEST_CASE(to_json_single_column_lte) { cquery_nofail(e, "create table ks.t(pk int, ck int, v vector, primary key(pk, ck))"); auto restr = make_restrictions("pk=1 and ck<=75", e); - auto json = get_restrictions_json(restr, true); + auto json = get_restrictions_json(*restr, true); auto expected = R"json({"restrictions":[{"type":"==","lhs":"pk","rhs":1},{"type":"<=","lhs":"ck","rhs":75}],"allow_filtering":true})json"; BOOST_CHECK_EQUAL(json, expected); @@ -135,7 +135,7 @@ SEASTAR_TEST_CASE(to_json_single_column_gte) { cquery_nofail(e, "create table ks.t(pk int, ck int, v vector, primary key(pk, ck))"); auto restr = make_restrictions("pk=1 and ck>=25", e); - auto json = get_restrictions_json(restr, true); + auto json = get_restrictions_json(*restr, true); auto expected = R"json({"restrictions":[{"type":"==","lhs":"pk","rhs":1},{"type":">=","lhs":"ck","rhs":25}],"allow_filtering":true})json"; BOOST_CHECK_EQUAL(json, expected); @@ -147,7 +147,7 @@ SEASTAR_TEST_CASE(to_json_single_column_in) { cquery_nofail(e, "create table ks.t(pk int, ck int, v vector, primary key(pk, ck))"); auto restr = make_restrictions("pk=1 and ck in (1, 2, 3)", e); - auto json = get_restrictions_json(restr, true); + auto json = get_restrictions_json(*restr, true); auto expected = R"json({"restrictions":[{"type":"==","lhs":"pk","rhs":1},{"type":"IN","lhs":"ck","rhs":[1,2,3]}],"allow_filtering":true})json"; BOOST_CHECK_EQUAL(json, expected); @@ -159,7 +159,7 @@ SEASTAR_TEST_CASE(to_json_string_value) { cquery_nofail(e, "create table ks.t(pk text, ck int, v vector, primary key(pk, ck))"); auto restr = make_restrictions("pk='hello'", e); - auto json = get_restrictions_json(restr, false); + auto json = get_restrictions_json(*restr, false); auto expected = R"json({"restrictions":[{"type":"==","lhs":"pk","rhs":"hello"}],"allow_filtering":false})json"; BOOST_CHECK_EQUAL(json, expected); @@ -171,7 +171,7 @@ SEASTAR_TEST_CASE(to_json_multi_column_eq) { cquery_nofail(e, "create table ks.t(pk int, ck1 int, ck2 int, v vector, primary key(pk, ck1, ck2))"); auto restr = make_restrictions("pk=1 and (ck1, ck2)=(10, 20)", e); - auto json = get_restrictions_json(restr, true); + auto json = get_restrictions_json(*restr, true); auto expected = R"json({"restrictions":[{"type":"==","lhs":"pk","rhs":1},{"type":"()==()","lhs":["ck1","ck2"],"rhs":[10,20]}],"allow_filtering":true})json"; BOOST_CHECK_EQUAL(json, expected); @@ -183,7 +183,7 @@ SEASTAR_TEST_CASE(to_json_multi_column_lt) { cquery_nofail(e, "create table ks.t(pk int, ck1 int, ck2 int, v vector, primary key(pk, ck1, ck2))"); auto restr = make_restrictions("pk=1 and (ck1, ck2)<(10, 20)", e); - auto json = get_restrictions_json(restr, true); + auto json = get_restrictions_json(*restr, true); auto expected = R"json({"restrictions":[{"type":"==","lhs":"pk","rhs":1},{"type":"()<()","lhs":["ck1","ck2"],"rhs":[10,20]}],"allow_filtering":true})json"; BOOST_CHECK_EQUAL(json, expected); @@ -195,7 +195,7 @@ SEASTAR_TEST_CASE(to_json_multi_column_gt) { cquery_nofail(e, "create table ks.t(pk int, ck1 int, ck2 int, v vector, primary key(pk, ck1, ck2))"); auto restr = make_restrictions("pk=1 and (ck1, ck2)>(10, 20)", e); - auto json = get_restrictions_json(restr, true); + auto json = get_restrictions_json(*restr, true); auto expected = R"json({"restrictions":[{"type":"==","lhs":"pk","rhs":1},{"type":"()>()","lhs":["ck1","ck2"],"rhs":[10,20]}],"allow_filtering":true})json"; BOOST_CHECK_EQUAL(json, expected); @@ -207,7 +207,7 @@ SEASTAR_TEST_CASE(to_json_multi_column_lte) { cquery_nofail(e, "create table ks.t(pk int, ck1 int, ck2 int, v vector, primary key(pk, ck1, ck2))"); auto restr = make_restrictions("pk=1 and (ck1, ck2)<=(10, 20)", e); - auto json = get_restrictions_json(restr, true); + auto json = get_restrictions_json(*restr, true); auto expected = R"json({"restrictions":[{"type":"==","lhs":"pk","rhs":1},{"type":"()<=()","lhs":["ck1","ck2"],"rhs":[10,20]}],"allow_filtering":true})json"; BOOST_CHECK_EQUAL(json, expected); @@ -219,7 +219,7 @@ SEASTAR_TEST_CASE(to_json_multi_column_gte) { cquery_nofail(e, "create table ks.t(pk int, ck1 int, ck2 int, v vector, primary key(pk, ck1, ck2))"); auto restr = make_restrictions("pk=1 and (ck1, ck2)>=(10, 20)", e); - auto json = get_restrictions_json(restr, true); + auto json = get_restrictions_json(*restr, true); auto expected = R"json({"restrictions":[{"type":"==","lhs":"pk","rhs":1},{"type":"()>=()","lhs":["ck1","ck2"],"rhs":[10,20]}],"allow_filtering":true})json"; BOOST_CHECK_EQUAL(json, expected); @@ -231,7 +231,7 @@ SEASTAR_TEST_CASE(to_json_multi_column_in) { cquery_nofail(e, "create table ks.t(pk int, ck1 int, ck2 int, v vector, primary key(pk, ck1, ck2))"); auto restr = make_restrictions("pk=1 and (ck1, ck2) in ((1, 2), (3, 4))", e); - auto json = get_restrictions_json(restr, true); + auto json = get_restrictions_json(*restr, true); auto expected = R"json({"restrictions":[{"type":"==","lhs":"pk","rhs":1},{"type":"()IN()","lhs":["ck1","ck2"],"rhs":[[1,2],[3,4]]}],"allow_filtering":true})json"; BOOST_CHECK_EQUAL(json, expected); @@ -243,7 +243,7 @@ SEASTAR_TEST_CASE(to_json_multiple_restrictions) { cquery_nofail(e, "create table ks.t(pk int, ck int, v vector, primary key(pk, ck))"); auto restr = make_restrictions("pk=1 and ck>=10 and ck<100", e); - auto json = get_restrictions_json(restr, true); + auto json = get_restrictions_json(*restr, true); auto expected = R"json({"restrictions":[{"type":"==","lhs":"pk","rhs":1},{"type":">=","lhs":"ck","rhs":10},{"type":"<","lhs":"ck","rhs":100}],"allow_filtering":true})json"; BOOST_CHECK_EQUAL(json, expected); @@ -255,7 +255,7 @@ SEASTAR_TEST_CASE(to_json_with_boolean_value) { cquery_nofail(e, "create table ks.t(pk int, ck boolean, v vector, primary key(pk, ck))"); auto restr = make_restrictions("ck=true", e); - auto json = get_restrictions_json(restr, true); + auto json = get_restrictions_json(*restr, true); auto expected = R"json({"restrictions":[{"type":"==","lhs":"ck","rhs":true}],"allow_filtering":true})json"; BOOST_CHECK_EQUAL(json, expected); @@ -267,7 +267,7 @@ SEASTAR_TEST_CASE(to_json_bind_marker_partition_key) { cquery_nofail(e, "create table ks.t(pk int, ck int, v vector, primary key(pk, ck))"); auto restr = make_restrictions("pk=?", e); - auto filter = vector_search::prepare_filter(restr, false); + auto filter = vector_search::prepare_filter(*restr, false); std::vector bind_values = {raw_value::make_value(int32_type->decompose(42))}; auto options = make_query_options(std::move(bind_values)); @@ -283,7 +283,7 @@ SEASTAR_TEST_CASE(to_json_bind_marker_clustering_key) { cquery_nofail(e, "create table ks.t(pk int, ck int, v vector, primary key(pk, ck))"); auto restr = make_restrictions("pk=? and ck>?", e); - auto filter = vector_search::prepare_filter(restr, true); + auto filter = vector_search::prepare_filter(*restr, true); std::vector bind_values = { raw_value::make_value(int32_type->decompose(1)), @@ -301,7 +301,7 @@ SEASTAR_TEST_CASE(to_json_bind_marker_different_values) { cquery_nofail(e, "create table ks.t(pk int, ck int, v vector, primary key(pk, ck))"); auto restr = make_restrictions("pk=?", e); - auto filter = vector_search::prepare_filter(restr, false); + auto filter = vector_search::prepare_filter(*restr, false); std::vector bind_values1 = {raw_value::make_value(int32_type->decompose(100))}; auto options1 = make_query_options(std::move(bind_values1)); @@ -322,7 +322,7 @@ SEASTAR_TEST_CASE(to_json_bind_marker_string_value) { cquery_nofail(e, "create table ks.t(pk text, ck int, v vector, primary key(pk, ck))"); auto restr = make_restrictions("pk=?", e); - auto filter = vector_search::prepare_filter(restr, false); + auto filter = vector_search::prepare_filter(*restr, false); std::vector bind_values = {raw_value::make_value(utf8_type->decompose("hello_world"))}; auto options = make_query_options(std::move(bind_values)); @@ -338,7 +338,7 @@ SEASTAR_TEST_CASE(to_json_mixed_literals_and_bind_markers) { cquery_nofail(e, "create table ks.t(pk int, ck int, v vector, primary key(pk, ck))"); auto restr = make_restrictions("pk=1 and ck>?", e); - auto filter = vector_search::prepare_filter(restr, true); + auto filter = vector_search::prepare_filter(*restr, true); std::vector bind_values = {raw_value::make_value(int32_type->decompose(25))}; auto options = make_query_options(std::move(bind_values)); @@ -354,7 +354,7 @@ SEASTAR_TEST_CASE(to_json_bind_marker_in_list) { cquery_nofail(e, "create table ks.t(pk int, ck int, v vector, primary key(pk, ck))"); auto restr = make_restrictions("pk=1 and ck in ?", e); - auto filter = vector_search::prepare_filter(restr, true); + auto filter = vector_search::prepare_filter(*restr, true); auto list_type = list_type_impl::get_instance(int32_type, true); auto list_val = make_list_value(list_type, {data_value(10), data_value(20), data_value(30)}); @@ -373,7 +373,7 @@ SEASTAR_TEST_CASE(to_json_bind_marker_multi_column) { cquery_nofail(e, "create table ks.t(pk int, ck1 int, ck2 int, v vector, primary key(pk, ck1, ck2))"); auto restr = make_restrictions("pk=1 and (ck1, ck2)>?", e); - auto filter = vector_search::prepare_filter(restr, true); + auto filter = vector_search::prepare_filter(*restr, true); auto tuple_type = tuple_type_impl::get_instance({int32_type, int32_type}); auto tuple_val = make_tuple_value(tuple_type, {data_value(10), data_value(20)}); @@ -392,7 +392,7 @@ SEASTAR_TEST_CASE(to_json_no_bind_markers_uses_cache) { cquery_nofail(e, "create table ks.t(pk int, ck int, v vector, primary key(pk, ck))"); auto restr = make_restrictions("pk=42", e); - auto filter = vector_search::prepare_filter(restr, false); + auto filter = vector_search::prepare_filter(*restr, false); auto options1 = query_options({}); auto json1 = rjson::print(filter.to_json(options1)); @@ -412,7 +412,7 @@ SEASTAR_TEST_CASE(to_json_nonprimary_key_eq) { cquery_nofail(e, "create table ks.t(pk int, ck int, r int, v vector, primary key(pk, ck))"); auto restr = make_restrictions("pk=1 and r=42", e); - auto json = get_restrictions_json(restr, true); + auto json = get_restrictions_json(*restr, true); auto expected = R"json({"restrictions":[{"type":"==","lhs":"pk","rhs":1},{"type":"==","lhs":"r","rhs":42}],"allow_filtering":true})json"; BOOST_CHECK_EQUAL(json, expected); @@ -424,7 +424,7 @@ SEASTAR_TEST_CASE(to_json_nonprimary_key_range) { cquery_nofail(e, "create table ks.t(pk int, ck int, r int, v vector, primary key(pk, ck))"); auto restr = make_restrictions("pk=1 and r>10 and r<100", e); - auto json = get_restrictions_json(restr, true); + auto json = get_restrictions_json(*restr, true); auto expected = R"json({"restrictions":[{"type":"==","lhs":"pk","rhs":1},{"type":">","lhs":"r","rhs":10},{"type":"<","lhs":"r","rhs":100}],"allow_filtering":true})json"; BOOST_CHECK_EQUAL(json, expected); @@ -436,7 +436,7 @@ SEASTAR_TEST_CASE(to_json_nonprimary_key_bind_marker) { cquery_nofail(e, "create table ks.t(pk int, ck int, r int, v vector, primary key(pk, ck))"); auto restr = make_restrictions("pk=1 and r=?", e); - auto filter = vector_search::prepare_filter(restr, true); + auto filter = vector_search::prepare_filter(*restr, true); std::vector bind_values = {raw_value::make_value(int32_type->decompose(99))}; auto options = make_query_options(std::move(bind_values)); From 926886fcfb0243e78fc04ae74df11d28239cebea Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 23 Sep 2024 22:38:27 +0300 Subject: [PATCH 03/76] cql3: statement_restrictions: wrap get_partition_key_ranges statement_restrictions::get_partition_key_ranges() re-interprets the expressions used to specify the partition key. This means that the analysis phase (determining what those expressions are and how they are to be used) and the execution phase (using them) are in separate places. This makes it very hard to refactor while preserving correctness. As a first step in unifying the two phases, we move the selection of the strategy (using token, cartesian product, or single partition) from execution to analysis, by making the if-tree return a function to be executed at execution time, rather than running the if-tree itself at execution time. --- cql3/restrictions/statement_restrictions.cc | 15 +++++++++++++++ cql3/restrictions/statement_restrictions.hh | 6 ++++++ 2 files changed, 21 insertions(+) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index a99edff7a1..71d30dbe9d 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -1253,6 +1253,8 @@ statement_restrictions::statement_restrictions(private_tag, } } } + + _get_partition_key_ranges_fn = build_partition_key_ranges_fn(); } bool @@ -1918,8 +1920,15 @@ dht::partition_range_vector partition_ranges_from_EQs( } // anonymous namespace dht::partition_range_vector statement_restrictions::get_partition_key_ranges(const query_options& options) const { + return _get_partition_key_ranges_fn(options); +} + +get_partition_key_ranges_fn_t +statement_restrictions::build_partition_key_ranges_fn() const { if (_partition_range_restrictions.empty()) { + return [] (const query_options& options) -> dht::partition_range_vector{ return {dht::partition_range::make_open_ended_both_sides()}; + }; } if (has_partition_token(_partition_range_restrictions[0], *_schema)) { if (_partition_range_restrictions.size() != 1) { @@ -1927,12 +1936,18 @@ dht::partition_range_vector statement_restrictions::get_partition_key_ranges(con rlogger, format("Unexpected size of token restrictions: {}", _partition_range_restrictions.size())); } + return [&] (const query_options& options) { return partition_ranges_from_token(_partition_range_restrictions[0], options, *_schema); + }; } else if (_partition_range_is_simple) { + return [&] (const query_options& options) { // Special case to avoid extra allocations required for a Cartesian product. return partition_ranges_from_EQs(_partition_range_restrictions, options, *_schema); + }; } + return [&] (const query_options& options) { return partition_ranges_from_singles(_partition_range_restrictions, options, *_schema); + }; } namespace { diff --git a/cql3/restrictions/statement_restrictions.hh b/cql3/restrictions/statement_restrictions.hh index 120f1e363d..414bb3ea77 100644 --- a/cql3/restrictions/statement_restrictions.hh +++ b/cql3/restrictions/statement_restrictions.hh @@ -28,6 +28,10 @@ namespace restrictions { ///have an index-manager, or even a table object. using check_indexes = bool_class; +// A function that returns the partition key ranges for a query. It is the solver of +// WHERE clause fragments such as WHERE token(pk) > 1 or WHERE pk1 IN :list1 AND pk2 IN :list2. +using get_partition_key_ranges_fn_t = std::function; + /** * The restrictions corresponding to the relations specified on the where-clause of CQL query. */ @@ -124,6 +128,7 @@ private: schema_ptr _view_schema; std::optional _idx_opt; expr::expression _idx_restrictions = expr::conjunction({}); + get_partition_key_ranges_fn_t _get_partition_key_ranges_fn; public: /** * Creates a new empty StatementRestrictions. @@ -324,6 +329,7 @@ private: unsigned int num_clustering_prefix_columns_that_need_not_be_filtered() const; void calculate_column_defs_for_filtering_and_erase_restrictions_used_for_index(data_dictionary::database db); + get_partition_key_ranges_fn_t build_partition_key_ranges_fn() const; public: /** * Returns the specified range of the partition key. From fcf7c4c90ddcafb45a6cceeb6521d5a58233998c Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sat, 19 Oct 2024 23:01:45 +0300 Subject: [PATCH 04/76] cql3: statement_restrictions: move value_list, value_set to header file They don't really need to be public, but will be used in intermediate storage. --- cql3/restrictions/statement_restrictions.cc | 7 ------- cql3/restrictions/statement_restrictions.hh | 8 ++++++++ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index 71d30dbe9d..83866f4207 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -55,13 +55,6 @@ using namespace expr; static logging::logger rlogger("restrictions"); static auto& expr_logger = rlogger; // compatibility with code moved from expression.cc -/// A set of discrete values. -using value_list = std::vector; // Sorted and deduped using value comparator. - -/// General set of values. Empty set and single-element sets are always value_list. interval is -/// never singular and never has start > end. Universal set is a interval with both bounds null. -using value_set = std::variant>; - /// A set of all column values that would satisfy an expression. The _token_values variant finds /// matching values for the partition token function call instead of the column. /// diff --git a/cql3/restrictions/statement_restrictions.hh b/cql3/restrictions/statement_restrictions.hh index 414bb3ea77..9645dc3dce 100644 --- a/cql3/restrictions/statement_restrictions.hh +++ b/cql3/restrictions/statement_restrictions.hh @@ -23,6 +23,14 @@ namespace cql3 { namespace restrictions { +/// A set of discrete values. +using value_list = std::vector; // Sorted and deduped using value comparator. + +/// General set of values. Empty set and single-element sets are always value_list. interval is +/// never singular and never has start > end. Universal set is a interval with both bounds null. +using value_set = std::variant>; + + ///In some cases checking if columns have indexes is undesired of even ///impossible, because e.g. the query runs on a pseudo-table, which does not ///have an index-manager, or even a table object. From 531f137ed376d31c62d2965df17981dd200fdada Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 20 Oct 2024 16:49:57 +0300 Subject: [PATCH 05/76] cql3: statement_restrictions: split _partition_range_restrictions into three cases _partition_range_restrictions are a vector of expressions, one per partition key column, except that it can be empty if there is no restriction on the partition that can be translated to a read command, and if the restriction is on a token range, the first element only is used. Separate the three cases into distinct structs. After this, additional work can be done utilizing the specialization. --- cql3/restrictions/statement_restrictions.cc | 67 +++++++++++++-------- cql3/restrictions/statement_restrictions.hh | 18 +++++- 2 files changed, 58 insertions(+), 27 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index 83866f4207..28cd1f9bd8 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -28,6 +28,7 @@ #include "dht/i_partitioner.hh" #include "db/schema_tables.hh" #include "types/tuple.hh" +#include "utils/overloaded_functor.hh" namespace { struct maybe_column_definition { @@ -791,7 +792,7 @@ void with_current_binary_operator( } /// Every token, or if no tokens, an EQ/IN of every single PK column. -static std::vector extract_partition_range( +static partition_range_restrictions extract_partition_range( const expr::expression& where_clause, schema_ptr schema) { using namespace expr; struct extract_partition_range_visitor { @@ -901,12 +902,14 @@ static std::vector extract_partition_range( expr::visit(v, where_clause); if (v.tokens) { - return {std::move(*v.tokens)}; + return token_range_restrictions{.token_restrictions = std::move(*v.tokens)}; } if (v.single_column.size() == schema->partition_key_size()) { - return v.single_column | std::views::values | std::ranges::to(); + return single_column_partition_range_restrictions{ + .per_column_restrictions = v.single_column | std::views::values | std::ranges::to(), + }; } - return {}; + return no_partition_range_restrictions{}; } /// Extracts where_clause atoms with clustering-column LHS and copies them to a vector. These elements define the @@ -1918,29 +1921,29 @@ dht::partition_range_vector statement_restrictions::get_partition_key_ranges(con get_partition_key_ranges_fn_t statement_restrictions::build_partition_key_ranges_fn() const { - if (_partition_range_restrictions.empty()) { + return std::visit(overloaded_functor{ + [&] (const no_partition_range_restrictions&) -> get_partition_key_ranges_fn_t { return [] (const query_options& options) -> dht::partition_range_vector{ return {dht::partition_range::make_open_ended_both_sides()}; }; - } - if (has_partition_token(_partition_range_restrictions[0], *_schema)) { - if (_partition_range_restrictions.size() != 1) { - on_internal_error( - rlogger, - format("Unexpected size of token restrictions: {}", _partition_range_restrictions.size())); + }, + [&] (const token_range_restrictions& r) -> get_partition_key_ranges_fn_t { + return [&] (const query_options& options) -> dht::partition_range_vector { + return partition_ranges_from_token(r.token_restrictions, options, *_schema); + }; + }, + [&] (const single_column_partition_range_restrictions& r) -> get_partition_key_ranges_fn_t { + if (_partition_range_is_simple) { + return [&] (const query_options& options) { + // Special case to avoid extra allocations required for a Cartesian product. + return partition_ranges_from_EQs(r.per_column_restrictions, options, *_schema); + }; + } else { + return [&] (const query_options& options) { + return partition_ranges_from_singles(r.per_column_restrictions, options, *_schema); + }; } - return [&] (const query_options& options) { - return partition_ranges_from_token(_partition_range_restrictions[0], options, *_schema); - }; - } else if (_partition_range_is_simple) { - return [&] (const query_options& options) { - // Special case to avoid extra allocations required for a Cartesian product. - return partition_ranges_from_EQs(_partition_range_restrictions, options, *_schema); - }; - } - return [&] (const query_options& options) { - return partition_ranges_from_singles(_partition_range_restrictions, options, *_schema); - }; + }}, _partition_range_restrictions); } namespace { @@ -2716,10 +2719,13 @@ void statement_restrictions::prepare_indexed_global(const schema& idx_tbl_schema // avoids indexing when _partition_range_is_simple. See _idx_tbl_ck_prefix blurb for its composition. _idx_tbl_ck_prefix = std::vector(1 + _schema->partition_key_size(), expr::conjunction({})); _idx_tbl_ck_prefix->reserve(_idx_tbl_ck_prefix->size() + idx_tbl_schema.clustering_key_size()); - for (const auto& e : _partition_range_restrictions) { + auto *single_column_partition_key_restrictions = std::get_if(&_partition_range_restrictions); + if (single_column_partition_key_restrictions) { + for (const auto& e : single_column_partition_key_restrictions->per_column_restrictions) { const auto col = expr::as(find(e, oper_t::EQ)->lhs).col; const auto pos = _schema->position(*col) + 1; (*_idx_tbl_ck_prefix)[pos] = replace_column_def(e, &idx_tbl_schema.clustering_column_at(pos)); + } } if (std::ranges::any_of(*_idx_tbl_ck_prefix | std::views::drop(1), is_empty_restriction)) { @@ -2865,7 +2871,7 @@ sstring statement_restrictions::to_string() const { return _where ? expr::to_string(*_where) : ""; } -static void validate_primary_key_restrictions(const query_options& options, const std::vector& restrictions) { +static void validate_primary_key_restrictions(const query_options& options, std::span restrictions) { for (const auto& r: restrictions) { for_each_expression(r, [&](const binary_operator& binop) { if (binop.op != oper_t::EQ && binop.op != oper_t::IN) { @@ -2884,7 +2890,16 @@ static void validate_primary_key_restrictions(const query_options& options, cons } void statement_restrictions::validate_primary_key(const query_options& options) const { - validate_primary_key_restrictions(options, _partition_range_restrictions); + std::visit(overloaded_functor{ + [&] (const no_partition_range_restrictions&) { + }, + [&] (const token_range_restrictions& r) { + validate_primary_key_restrictions(options, std::span(&r.token_restrictions, 1)); + }, + [&] (const single_column_partition_range_restrictions& r) { + validate_primary_key_restrictions(options, r.per_column_restrictions); + } + }, _partition_range_restrictions); validate_primary_key_restrictions(options, _clustering_prefix_restrictions); } diff --git a/cql3/restrictions/statement_restrictions.hh b/cql3/restrictions/statement_restrictions.hh index 9645dc3dce..df5cf6cd70 100644 --- a/cql3/restrictions/statement_restrictions.hh +++ b/cql3/restrictions/statement_restrictions.hh @@ -40,6 +40,22 @@ using check_indexes = bool_class; // WHERE clause fragments such as WHERE token(pk) > 1 or WHERE pk1 IN :list1 AND pk2 IN :list2. using get_partition_key_ranges_fn_t = std::function; +struct no_partition_range_restrictions { +}; + +struct token_range_restrictions { + expr::expression token_restrictions = expr::conjunction({}); +}; + +struct single_column_partition_range_restrictions { + std::vector per_column_restrictions; +}; + +using partition_range_restrictions = std::variant< + no_partition_range_restrictions, + token_range_restrictions, + single_column_partition_range_restrictions>; + /** * The restrictions corresponding to the relations specified on the where-clause of CQL query. */ @@ -126,7 +142,7 @@ private: /// binary_operators on token. If single-column restrictions define the partition range, each element holds /// restrictions for one partition column. Each partition column has a corresponding element, but the elements /// are in arbitrary order. - std::vector _partition_range_restrictions; + partition_range_restrictions _partition_range_restrictions; bool _partition_range_is_simple; ///< False iff _partition_range_restrictions imply a Cartesian product. From c73f3ac55f507ffcc05a40d114c66406e0e53cc9 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 22 Oct 2024 18:24:56 +0300 Subject: [PATCH 06/76] cql3: statement_restrictions: do not collect subscripted partition key columns An indexed SELECT of the from SELECT ... WHERE pk['sub'] = ? is impossible because our indexes do not support frozen maps, and partition key collections must be frozen. Stop collecting such constructs for the purpose of determining the partition range. This reduces having to deal with combinations of restrictions on the column and its entries later on. In case we start supporting indexes on frozen maps, leave an on_internal_error to remind us. --- cql3/restrictions/statement_restrictions.cc | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index 28cd1f9bd8..38854aa5ac 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -846,15 +846,9 @@ static partition_range_restrictions extract_partition_range( void operator()(const subscript& sub) { const column_value& cval = get_subscripted_column(sub.val); - - with_current_binary_operator(*this, [&] (const binary_operator& b) { - if (cval.col->is_partition_key() && (b.op == oper_t::EQ || b.op == oper_t::IN)) { - const auto [it, inserted] = single_column.try_emplace(cval.col, b); - if (!inserted) { - it->second = make_conjunction(std::move(it->second), b); - } - } - }); + if (cval.col->is_partition_key()) { + on_internal_error(rlogger, "extract_partition_range(subscript)"); + } } void operator()(const constant&) {} From 941011bb4a4d49fe950c11e209ac807ed1b52f73 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 22 Oct 2024 22:26:17 +0300 Subject: [PATCH 07/76] cql3: statement_restrictions: pre-analyze partition key columns The expression tree for partition keys is analyzed during runtime: in partition_range_from_singles() (for example), we call find_binop and get_subscripted_column() to understand the expression structure. This analysis is problematic because it has to match the analysis during prepare time; and they have to evolve in lock step. Here, we move the analysis to the prepare stage. This is done by augmenting the expression into a new predicate struct. It contains the original expression (as a fallback for paths not yet converted), as well as a solve_for function which contains a function built at prepare time that embeds all the necessary analysis. We introduce the `predicate` type which is an augmentation of boolean expressions. In addition to the expression, we remember what column the expression is on, and a function that computes what values the column can take on that would make the expression true. The field that says what column the predicate is about is typed as a variant since later on we will have predicates on non-columns (the token, or a clustering prefix). Note that currently the function engages in some run-time analysis of its own, since it calls possible_lhs_values that itself does analysis, but this is a step in the right direction. --- cql3/restrictions/statement_restrictions.cc | 100 ++++++++++++++++---- cql3/restrictions/statement_restrictions.hh | 29 +++++- 2 files changed, 110 insertions(+), 19 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index 38854aa5ac..b85163b80a 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -133,6 +133,22 @@ bool contains_multi_column_restriction(const expression&); bool has_only_eq_binops(const expression&); +static +value_set +solve(const predicate& ac, const query_options& options) { + if (ac.solve_for) { + return ac.solve_for(options); + } + + return std::visit( + overloaded_functor{ + [&] (const on_column& oc) { + return possible_column_values(oc.column, ac.filter, options); + }, + }, + ac.on); +} + namespace { const value_set empty_value_set = value_list{}; @@ -219,6 +235,53 @@ interval to_range(oper_t op, const clustering_key_prefix& return to_range(op, val); } +static +data_type +type(const predicate& p) { + return std::visit( + overloaded_functor{ + [] (const on_column& oc) { return oc.column->type->without_reversed().shared_from_this(); }, + }, + p.on); +} + +static +predicate +make_conjunction(predicate a, predicate b) { + if (a.on != b.on) { + on_internal_error(rlogger, "make_conjunction: merging predicate targets"); + } + + auto& sa = a.solve_for; + auto& sb = b.solve_for; + + auto sa_and_sb = std::invoke([&] -> solve_for_t { + if (sa && sb) { + return [sa = std::move(sa), sb = std::move(sb), type = type(a)] (const query_options& options) { + return intersection(sa(options), sb(options), type.get()); + }; + } else { + return {}; + } + }); + + return predicate{ + .solve_for = std::move(sa_and_sb), + .filter = make_conjunction(std::move(a.filter), std::move(b.filter)), + .on = a.on, + .is_singleton = false, // Even if both columns are singletons, the conjunction of them can return zero values. + }; +} + +static +const column_definition* +require_on_single_column(const predicate& p) { + if (auto* pcol = std::get_if(&p.on)) { + return pcol->column; + } + on_internal_error(rlogger, "require_on_single_column: predicate is not on a single column"); +} + // When cdef == nullptr it finds possible token values instead of column values. // When finding token values the table_schema_opt argument has to point to a valid schema, // but it isn't used when finding values for column. @@ -798,7 +861,7 @@ static partition_range_restrictions extract_partition_range( struct extract_partition_range_visitor { schema_ptr table_schema; std::optional tokens; - std::unordered_map single_column; + std::unordered_map single_column; const binary_operator* current_binary_operator = nullptr; void operator()(const conjunction& c) { @@ -832,9 +895,15 @@ static partition_range_restrictions extract_partition_range( auto s = &cv; with_current_binary_operator(*this, [&] (const binary_operator& b) { if (s->col->is_partition_key() && (b.op == oper_t::EQ || b.op == oper_t::IN)) { - const auto [it, inserted] = single_column.try_emplace(s->col, b); + auto a = predicate{ + .solve_for = std::bind_front(possible_column_values, s->col, b), + .filter = b, + .on = on_column{s->col}, + .is_singleton = b.op == oper_t::EQ, + }; + const auto [it, inserted] = single_column.try_emplace(s->col, std::move(a)); if (!inserted) { - it->second = make_conjunction(std::move(it->second), b); + it->second = make_conjunction(std::move(it->second), std::move(a)); } } }); @@ -1859,30 +1928,26 @@ void error_if_exceeds_clustering_key_limit(size_t size, size_t clustering_limit) /// Computes partition-key ranges from expressions, which contains EQ/IN for every partition column. dht::partition_range_vector partition_ranges_from_singles( - const std::vector& expressions, const query_options& options, const schema& schema) { + const std::vector& expressions, const query_options& options, const schema& schema) { const size_t size_limit = options.get_cql_config().restrictions.partition_key_restrictions_max_cartesian_product_size; // Each element is a vector of that column's possible values: std::vector> column_values(schema.partition_key_size()); size_t product_size = 1; for (const auto& e : expressions) { - if (const auto arbitrary_binop = find_binop(e, [] (const binary_operator&) { return true; })) { - if (auto cv = expr::as_if(&arbitrary_binop->lhs)) { - const value_set vals = possible_column_values(cv->col, e, options); + const value_set vals = solve(e, options); if (auto lst = std::get_if(&vals)) { if (lst->empty()) { return {}; } product_size *= lst->size(); error_if_exceeds_partition_key_limit(product_size, size_limit); - column_values[schema.position(*cv->col)] = std::move(*lst); + column_values[schema.position(*require_on_single_column(e))] = std::move(*lst); } else { throw exceptions::invalid_request_exception( "Only EQ and IN relation are supported on the partition key " "(unless you use the token() function or ALLOW FILTERING)"); } - } - } } cartesian_product cp(column_values); dht::partition_range_vector ranges; @@ -1894,15 +1959,14 @@ dht::partition_range_vector partition_ranges_from_singles( /// Computes partition-key ranges from EQ restrictions on each partition column. Returns a single singleton range if /// the EQ restrictions are not mutually conflicting. Otherwise, returns an empty vector. dht::partition_range_vector partition_ranges_from_EQs( - const std::vector& eq_expressions, const query_options& options, const schema& schema) { + const std::vector& eq_expressions, const query_options& options, const schema& schema) { std::vector pk_value(schema.partition_key_size()); for (const auto& e : eq_expressions) { - const auto col = expr::get_subscripted_column(find(e, oper_t::EQ)->lhs).col; - const auto vals = std::get(possible_column_values(col, e, options)); + const auto vals = std::get(solve(e, options)); if (vals.empty()) { // Case of C=1 AND C=2. return {}; } - pk_value[schema.position(*col)] = std::move(vals[0]); + pk_value[schema.position(*require_on_single_column(e))] = std::move(vals[0]); } return {range_from_bytes(schema, pk_value)}; } @@ -2716,9 +2780,9 @@ void statement_restrictions::prepare_indexed_global(const schema& idx_tbl_schema auto *single_column_partition_key_restrictions = std::get_if(&_partition_range_restrictions); if (single_column_partition_key_restrictions) { for (const auto& e : single_column_partition_key_restrictions->per_column_restrictions) { - const auto col = expr::as(find(e, oper_t::EQ)->lhs).col; + const auto col = expr::as(find(e.filter, oper_t::EQ)->lhs).col; const auto pos = _schema->position(*col) + 1; - (*_idx_tbl_ck_prefix)[pos] = replace_column_def(e, &idx_tbl_schema.clustering_column_at(pos)); + (*_idx_tbl_ck_prefix)[pos] = replace_column_def(e.filter, &idx_tbl_schema.clustering_column_at(pos)); } } @@ -2865,7 +2929,7 @@ sstring statement_restrictions::to_string() const { return _where ? expr::to_string(*_where) : ""; } -static void validate_primary_key_restrictions(const query_options& options, std::span restrictions) { +static void validate_primary_key_restrictions(const query_options& options, std::ranges::range auto&& restrictions) { for (const auto& r: restrictions) { for_each_expression(r, [&](const binary_operator& binop) { if (binop.op != oper_t::EQ && binop.op != oper_t::IN) { @@ -2891,7 +2955,7 @@ void statement_restrictions::validate_primary_key(const query_options& options) validate_primary_key_restrictions(options, std::span(&r.token_restrictions, 1)); }, [&] (const single_column_partition_range_restrictions& r) { - validate_primary_key_restrictions(options, r.per_column_restrictions); + validate_primary_key_restrictions(options, r.per_column_restrictions | std::views::transform(&predicate::filter)); } }, _partition_range_restrictions); validate_primary_key_restrictions(options, _clustering_prefix_restrictions); diff --git a/cql3/restrictions/statement_restrictions.hh b/cql3/restrictions/statement_restrictions.hh index df5cf6cd70..6ca485af2c 100644 --- a/cql3/restrictions/statement_restrictions.hh +++ b/cql3/restrictions/statement_restrictions.hh @@ -30,6 +30,33 @@ using value_list = std::vector; // Sorted and deduped using value /// never singular and never has start > end. Universal set is a interval with both bounds null. using value_set = std::variant>; +// For some boolean expression (say (X = 3) = TRUE, this represents a function that solves for X. +// (here, it would return 3). The expression is obtained by equating some factors of the WHERE +// clause to TRUE. +using solve_for_t = std::function; + +struct on_column { + const column_definition* column; + + bool operator==(const on_column&) const = default; +}; + +// A predicate on a column or a combination of columns. The WHERE clause analyzer +// will attempt to convert predicates (that return true or false for a particular row) +// to solvers (that return the set of column values that satisfy the predicate) when possible. +struct predicate { + // A function that returns the set of values that satisfy the filter. Can be unset, + // in which case the filter must be interpreted. + solve_for_t solve_for; + // The original filter for this column. + expr::expression filter; + // What column the predicate can be solved for + std::variant< + on_column // solving for a single column: e.g. c1 = 3 + > on; + // Whether the returned value_set will resolve to a single value. + bool is_singleton = false; +}; ///In some cases checking if columns have indexes is undesired of even ///impossible, because e.g. the query runs on a pseudo-table, which does not @@ -48,7 +75,7 @@ struct token_range_restrictions { }; struct single_column_partition_range_restrictions { - std::vector per_column_restrictions; + std::vector per_column_restrictions; }; using partition_range_restrictions = std::variant< From 6fce090e30873f58a78a994b1d5e674c86d90377 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 22 Oct 2024 23:08:55 +0300 Subject: [PATCH 08/76] cql3: statement_restrictions: pre-analyze token range restrictions Convert token range restrictions to the predicate format we introduced earlier, where we have a function to solve for the token range rather than running the analysis at runtime. Again the truth is that the function will delegate to possible_partition_token_values() which actually will do the analysis at runtime, but it's one step closer. We add a new variant element for predicate::on, since it doesn't fit the existing element (the token isn't a column). --- cql3/restrictions/statement_restrictions.cc | 20 ++++++++++++++++---- cql3/restrictions/statement_restrictions.hh | 12 ++++++++++-- 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index b85163b80a..cd49fa26c3 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -145,6 +145,9 @@ solve(const predicate& ac, const query_options& options) { [&] (const on_column& oc) { return possible_column_values(oc.column, ac.filter, options); }, + [&] (const on_partition_key_token& pkt) { + return possible_partition_token_values(ac.filter, options, *pkt.schema); + }, }, ac.on); } @@ -241,6 +244,7 @@ type(const predicate& p) { return std::visit( overloaded_functor{ [] (const on_column& oc) { return oc.column->type->without_reversed().shared_from_this(); }, + [] (const on_partition_key_token&) { return long_type; }, }, p.on); } @@ -965,7 +969,15 @@ static partition_range_restrictions extract_partition_range( expr::visit(v, where_clause); if (v.tokens) { - return token_range_restrictions{.token_restrictions = std::move(*v.tokens)}; + return token_range_restrictions{ + .token_restrictions = predicate{ + // It's not really a column, but... + .solve_for = std::bind(possible_partition_token_values, *v.tokens, std::placeholders::_1, std::ref(*schema)), + .filter = *v.tokens, + .on = on_partition_key_token{schema.get()}, + .is_singleton = false, // It could return a single token, but it's not important to track it + }, + }; } if (v.single_column.size() == schema->partition_key_size()) { return single_column_partition_range_restrictions{ @@ -1879,10 +1891,10 @@ namespace { using namespace expr; /// Computes partition-key ranges from token atoms in ex. -dht::partition_range_vector partition_ranges_from_token(const expr::expression& ex, +dht::partition_range_vector partition_ranges_from_token(const predicate& ex, const query_options& options, const schema& table_schema) { - auto values = possible_partition_token_values(ex, options, table_schema); + auto values = solve(ex, options); if (values == value_set(value_list{})) { return {}; } @@ -2952,7 +2964,7 @@ void statement_restrictions::validate_primary_key(const query_options& options) [&] (const no_partition_range_restrictions&) { }, [&] (const token_range_restrictions& r) { - validate_primary_key_restrictions(options, std::span(&r.token_restrictions, 1)); + validate_primary_key_restrictions(options, std::span(&r.token_restrictions.filter, 1)); }, [&] (const single_column_partition_range_restrictions& r) { validate_primary_key_restrictions(options, r.per_column_restrictions | std::views::transform(&predicate::filter)); diff --git a/cql3/restrictions/statement_restrictions.hh b/cql3/restrictions/statement_restrictions.hh index 6ca485af2c..c15c9bf877 100644 --- a/cql3/restrictions/statement_restrictions.hh +++ b/cql3/restrictions/statement_restrictions.hh @@ -41,6 +41,13 @@ struct on_column { bool operator==(const on_column&) const = default; }; +// Placeholder type indicating we're solving for the partition key token. +struct on_partition_key_token { + const ::schema* schema; + + bool operator==(const on_partition_key_token&) const = default; +}; + // A predicate on a column or a combination of columns. The WHERE clause analyzer // will attempt to convert predicates (that return true or false for a particular row) // to solvers (that return the set of column values that satisfy the predicate) when possible. @@ -52,7 +59,8 @@ struct predicate { expr::expression filter; // What column the predicate can be solved for std::variant< - on_column // solving for a single column: e.g. c1 = 3 + on_column, // solving for a single column: e.g. c1 = 3 + on_partition_key_token // solving for the token, e.g. token(pk1, pk2) >= :var > on; // Whether the returned value_set will resolve to a single value. bool is_singleton = false; @@ -71,7 +79,7 @@ struct no_partition_range_restrictions { }; struct token_range_restrictions { - expr::expression token_restrictions = expr::conjunction({}); + predicate token_restrictions; }; struct single_column_partition_range_restrictions { From 620df7103fd5aa309495ae9fa1cb2bcafa5ad529 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 22 Oct 2024 23:47:15 +0300 Subject: [PATCH 09/76] cql3: statement_restrictions: do not pass view schema back and forth For indexed queries, statement_restrictions calculates _view_schema, which is passed via get_view_schema() to indexed_select_statement(), which passes it right back to statement_restrictions via one of three functions to calculate clustering ranges. Avoid the back-and-forth and use the stored value. Using a different value would be broken. This change allows unifying the signatures of the four functions that get clustering ranges. --- cql3/restrictions/statement_restrictions.cc | 18 +++++++----------- cql3/restrictions/statement_restrictions.hh | 6 +++--- cql3/statements/select_statement.cc | 6 +++--- test/boost/statement_restrictions_test.cc | 7 +++---- 4 files changed, 16 insertions(+), 21 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index cd49fa26c3..96cccabcfb 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -2894,27 +2894,24 @@ unsigned int statement_restrictions::num_clustering_prefix_columns_that_need_not } std::vector statement_restrictions::get_global_index_clustering_ranges( - const query_options& options, - const schema& idx_tbl_schema) const { + const query_options& options) const { if (!_idx_tbl_ck_prefix) { on_internal_error( rlogger, "statement_restrictions::get_global_index_clustering_ranges called with unprepared index"); } // Multi column restrictions are not added to _idx_tbl_ck_prefix, they are handled later by filtering. - return get_single_column_clustering_bounds(options, idx_tbl_schema, *_idx_tbl_ck_prefix); + return get_single_column_clustering_bounds(options, *_view_schema, *_idx_tbl_ck_prefix); } std::vector statement_restrictions::get_global_index_token_clustering_ranges( - const query_options& options, - const schema& idx_tbl_schema -) const { + const query_options& options) const { if (!_idx_tbl_ck_prefix.has_value()) { on_internal_error( rlogger, "statement_restrictions::get_global_index_token_clustering_ranges called with unprepared index"); } - const column_definition& token_column = idx_tbl_schema.clustering_column_at(0); + const column_definition& token_column = _view_schema->clustering_column_at(0); // In old indexes the token column was of type blob. // This causes problems with sorting and must be handled separately. @@ -2922,19 +2919,18 @@ std::vector statement_restrictions::get_global_index_to return get_index_v1_token_range_clustering_bounds(options, token_column, _idx_tbl_ck_prefix->at(0)); } - return get_single_column_clustering_bounds(options, idx_tbl_schema, *_idx_tbl_ck_prefix); + return get_single_column_clustering_bounds(options, *_view_schema, *_idx_tbl_ck_prefix); } std::vector statement_restrictions::get_local_index_clustering_ranges( - const query_options& options, - const schema& idx_tbl_schema) const { + const query_options& options) const { if (!_idx_tbl_ck_prefix.has_value()) { on_internal_error( rlogger, "statement_restrictions::get_local_index_clustering_ranges called with unprepared index"); } // Multi column restrictions are not added to _idx_tbl_ck_prefix, they are handled later by filtering. - return get_single_column_clustering_bounds(options, idx_tbl_schema, *_idx_tbl_ck_prefix); + return get_single_column_clustering_bounds(options, *_view_schema, *_idx_tbl_ck_prefix); } sstring statement_restrictions::to_string() const { diff --git a/cql3/restrictions/statement_restrictions.hh b/cql3/restrictions/statement_restrictions.hh index c15c9bf877..051b084021 100644 --- a/cql3/restrictions/statement_restrictions.hh +++ b/cql3/restrictions/statement_restrictions.hh @@ -471,15 +471,15 @@ private: public: /// Calculates clustering ranges for querying a global-index table. std::vector get_global_index_clustering_ranges( - const query_options& options, const schema& idx_tbl_schema) const; + const query_options& options) const; /// Calculates clustering ranges for querying a global-index table for queries with token restrictions present. std::vector get_global_index_token_clustering_ranges( - const query_options& options, const schema& idx_tbl_schema) const; + const query_options& options) const; /// Calculates clustering ranges for querying a local-index table. std::vector get_local_index_clustering_ranges( - const query_options& options, const schema& idx_tbl_schema) const; + const query_options& options) const; sstring to_string() const; diff --git a/cql3/statements/select_statement.cc b/cql3/statements/select_statement.cc index da8a068e55..ecc71b4161 100644 --- a/cql3/statements/select_statement.cc +++ b/cql3/statements/select_statement.cc @@ -1374,11 +1374,11 @@ query::partition_slice view_indexed_table_select_statement::get_partition_slice_ // Only EQ restrictions on base partition key can be used in an index view query if (pk_restrictions_is_single && _restrictions->partition_key_restrictions_is_all_eq()) { partition_slice_builder.with_ranges( - _restrictions->get_global_index_clustering_ranges(options, *_view_schema)); + _restrictions->get_global_index_clustering_ranges(options)); } else if (_restrictions->has_token_restrictions()) { // Restrictions like token(p1, p2) < 0 have all partition key components restricted, but require special handling. partition_slice_builder.with_ranges( - _restrictions->get_global_index_token_clustering_ranges(options, *_view_schema)); + _restrictions->get_global_index_token_clustering_ranges(options)); } } @@ -1389,7 +1389,7 @@ query::partition_slice view_indexed_table_select_statement::get_partition_slice_ partition_slice_builder partition_slice_builder{*_view_schema}; partition_slice_builder.with_ranges( - _restrictions->get_local_index_clustering_ranges(options, *_view_schema)); + _restrictions->get_local_index_clustering_ranges(options)); return partition_slice_builder.build(); } diff --git a/test/boost/statement_restrictions_test.cc b/test/boost/statement_restrictions_test.cc index 977f7b8dfb..0230f504ee 100644 --- a/test/boost/statement_restrictions_test.cc +++ b/test/boost/statement_restrictions_test.cc @@ -1106,14 +1106,13 @@ SEASTAR_TEST_CASE(combinatorial_restrictions) { // --- Index table range APIs --- if (uses_idx) { bool is_local_idx = (expected_idx == "comb_v3_local"); - const auto& view_schema = *sr->get_view_schema(); if (is_local_idx) { // --- get_local_index_clustering_ranges --- // Local index CK prefix = (indexed_col, base_ck1, ...). // The indexed column is always EQ (1 value); CK IN values // multiply via the base CK appended to the prefix. - auto local_ranges = sr->get_local_index_clustering_ranges(query_options({}), view_schema); + auto local_ranges = sr->get_local_index_clustering_ranges(query_options({})); unsigned expected_local = 1 * idx_range_multiplier; BOOST_CHECK_MESSAGE(!local_ranges.empty(), ctx_msg("get_local_index_clustering_ranges should not be empty")); @@ -1125,7 +1124,7 @@ SEASTAR_TEST_CASE(combinatorial_restrictions) { // Global index CK prefix = (token, pk1, pk2, ...base CK...). // With full PK: CK IN values expand the prefix. // Without full PK: prefix is empty → 1 open-ended range. - auto global_ranges = sr->get_global_index_clustering_ranges(query_options({}), view_schema); + auto global_ranges = sr->get_global_index_clustering_ranges(query_options({})); BOOST_CHECK_MESSAGE(!global_ranges.empty(), ctx_msg("get_global_index_clustering_ranges should not be empty")); if (full_pk) { @@ -1148,7 +1147,7 @@ SEASTAR_TEST_CASE(combinatorial_restrictions) { // long_type, so this dispatches to the same // get_single_column_clustering_bounds as // get_global_index_clustering_ranges. - auto token_ranges = sr->get_global_index_token_clustering_ranges(query_options({}), view_schema); + auto token_ranges = sr->get_global_index_token_clustering_ranges(query_options({})); BOOST_CHECK_MESSAGE(token_ranges.size() == global_ranges.size(), ctx_msg(fmt::format( "get_global_index_token_clustering_ranges: {} ranges, want {} (same as global)", From 1039ed9ed29c003ab4ce51456a9932ec16acb2ff Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 23 Oct 2024 00:28:18 +0300 Subject: [PATCH 10/76] cql3: statement_restrictions: wrap functions that return clustering ranges During prepare time, build functions for use during execution time. Currently, the wrappers are very shallow, and practically all the work is done at execution time. But the stage is set for more peeling. The index clustering ranges had on_internal_error()s if an index was not used. They're converted to returning a null function. If executed (which is never supposed to happen), it will throw a bad_function_call. --- cql3/restrictions/statement_restrictions.cc | 64 +++++++++++++++------ cql3/restrictions/statement_restrictions.hh | 12 ++++ 2 files changed, 60 insertions(+), 16 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index 96cccabcfb..10a2bc655e 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -1326,6 +1326,11 @@ statement_restrictions::statement_restrictions(private_tag, } _get_partition_key_ranges_fn = build_partition_key_ranges_fn(); + + _get_clustering_bounds_fn = build_get_clustering_bounds_fn(); + _get_global_index_clustering_ranges_fn = build_get_global_index_clustering_ranges_fn(); + _get_global_index_token_clustering_ranges_fn = build_get_global_index_token_clustering_ranges_fn(); + _get_local_index_clustering_ranges_fn = build_get_local_index_clustering_ranges_fn(); } bool @@ -2635,7 +2640,9 @@ query::clustering_range range_from_raw_bounds( } // anonymous namespace -std::vector statement_restrictions::get_clustering_bounds(const query_options& options) const { +get_clustering_bounds_fn_t +statement_restrictions::build_get_clustering_bounds_fn() const { + return [&] (const query_options& options) -> std::vector { if (_clustering_prefix_restrictions.empty()) { return {query::clustering_range::make_open_ended_both_sides()}; } @@ -2674,6 +2681,11 @@ std::vector statement_restrictions::get_clustering_boun } else { return get_single_column_clustering_bounds(options, *_schema, _clustering_prefix_restrictions); } + }; +} + +std::vector statement_restrictions::get_clustering_bounds(const query_options& options) const { + return _get_clustering_bounds_fn(options); } namespace { @@ -2893,22 +2905,27 @@ unsigned int statement_restrictions::num_clustering_prefix_columns_that_need_not return count; } -std::vector statement_restrictions::get_global_index_clustering_ranges( - const query_options& options) const { +get_clustering_bounds_fn_t +statement_restrictions::build_get_global_index_clustering_ranges_fn() const { if (!_idx_tbl_ck_prefix) { - on_internal_error( - rlogger, "statement_restrictions::get_global_index_clustering_ranges called with unprepared index"); + return {}; } + return [&] (const query_options& options) { // Multi column restrictions are not added to _idx_tbl_ck_prefix, they are handled later by filtering. return get_single_column_clustering_bounds(options, *_view_schema, *_idx_tbl_ck_prefix); + }; } -std::vector statement_restrictions::get_global_index_token_clustering_ranges( - const query_options& options) const { +std::vector statement_restrictions::get_global_index_clustering_ranges( + const query_options& options) const { + return _get_global_index_clustering_ranges_fn(options); +} + +get_clustering_bounds_fn_t +statement_restrictions::build_get_global_index_token_clustering_ranges_fn() const { if (!_idx_tbl_ck_prefix.has_value()) { - on_internal_error( - rlogger, "statement_restrictions::get_global_index_token_clustering_ranges called with unprepared index"); + return {}; } const column_definition& token_column = _view_schema->clustering_column_at(0); @@ -2916,21 +2933,36 @@ std::vector statement_restrictions::get_global_index_to // In old indexes the token column was of type blob. // This causes problems with sorting and must be handled separately. if (token_column.type != long_type) { + return [&] (const query_options& options) { return get_index_v1_token_range_clustering_bounds(options, token_column, _idx_tbl_ck_prefix->at(0)); + }; } + return [&] (const query_options& options) { return get_single_column_clustering_bounds(options, *_view_schema, *_idx_tbl_ck_prefix); + }; +} + +std::vector statement_restrictions::get_global_index_token_clustering_ranges( + const query_options& options) const { + return _get_global_index_token_clustering_ranges_fn(options); +} + +get_clustering_bounds_fn_t +statement_restrictions::build_get_local_index_clustering_ranges_fn() const { + if (!_idx_tbl_ck_prefix.has_value()) { + return {}; + } + + return [&] (const query_options& options) { + // Multi column restrictions are not added to _idx_tbl_ck_prefix, they are handled later by filtering. + return get_single_column_clustering_bounds(options, *_view_schema, *_idx_tbl_ck_prefix); + }; } std::vector statement_restrictions::get_local_index_clustering_ranges( const query_options& options) const { - if (!_idx_tbl_ck_prefix.has_value()) { - on_internal_error( - rlogger, "statement_restrictions::get_local_index_clustering_ranges called with unprepared index"); - } - - // Multi column restrictions are not added to _idx_tbl_ck_prefix, they are handled later by filtering. - return get_single_column_clustering_bounds(options, *_view_schema, *_idx_tbl_ck_prefix); + return _get_local_index_clustering_ranges_fn(options); } sstring statement_restrictions::to_string() const { diff --git a/cql3/restrictions/statement_restrictions.hh b/cql3/restrictions/statement_restrictions.hh index 051b084021..cae17acd28 100644 --- a/cql3/restrictions/statement_restrictions.hh +++ b/cql3/restrictions/statement_restrictions.hh @@ -75,6 +75,10 @@ using check_indexes = bool_class; // WHERE clause fragments such as WHERE token(pk) > 1 or WHERE pk1 IN :list1 AND pk2 IN :list2. using get_partition_key_ranges_fn_t = std::function; +// A function that returns the clustering key ranges for a query. It is the solver of +// WHERE clause fragments such as WHERE ck > 1 or WHERE (ck1, ck2) > (1, 2). +using get_clustering_bounds_fn_t = std::function (const query_options& options)>; + struct no_partition_range_restrictions { }; @@ -188,6 +192,10 @@ private: std::optional _idx_opt; expr::expression _idx_restrictions = expr::conjunction({}); get_partition_key_ranges_fn_t _get_partition_key_ranges_fn; + get_clustering_bounds_fn_t _get_clustering_bounds_fn; + get_clustering_bounds_fn_t _get_global_index_clustering_ranges_fn; + get_clustering_bounds_fn_t _get_global_index_token_clustering_ranges_fn; + get_clustering_bounds_fn_t _get_local_index_clustering_ranges_fn; public: /** * Creates a new empty StatementRestrictions. @@ -389,6 +397,10 @@ private: unsigned int num_clustering_prefix_columns_that_need_not_be_filtered() const; void calculate_column_defs_for_filtering_and_erase_restrictions_used_for_index(data_dictionary::database db); get_partition_key_ranges_fn_t build_partition_key_ranges_fn() const; + get_clustering_bounds_fn_t build_get_clustering_bounds_fn() const; + get_clustering_bounds_fn_t build_get_global_index_clustering_ranges_fn() const; + get_clustering_bounds_fn_t build_get_global_index_token_clustering_ranges_fn() const; + get_clustering_bounds_fn_t build_get_local_index_clustering_ranges_fn() const; public: /** * Returns the specified range of the partition key. From dcdd2f7e72221957b1e4c8be3b641f33e7eebede Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 23 Oct 2024 12:58:12 +0300 Subject: [PATCH 11/76] cql3: statement_restrictions: push down clustering prefix wrapper one level This allows us to tackle each case separately. --- cql3/restrictions/statement_restrictions.cc | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index 10a2bc655e..4e4e8e5170 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -2642,11 +2642,13 @@ query::clustering_range range_from_raw_bounds( get_clustering_bounds_fn_t statement_restrictions::build_get_clustering_bounds_fn() const { - return [&] (const query_options& options) -> std::vector { if (_clustering_prefix_restrictions.empty()) { + return [&] (const query_options& options) -> std::vector { return {query::clustering_range::make_open_ended_both_sides()}; + }; } if (find_binop(_clustering_prefix_restrictions[0], is_multi_column)) { + return [&] (const query_options& options) -> std::vector { bool all_natural = true, all_reverse = true; ///< Whether column types are reversed or natural. for (auto& r : _clustering_prefix_restrictions) { // TODO: move to constructor, do only once. using namespace expr; @@ -2678,10 +2680,12 @@ statement_restrictions::build_get_clustering_bounds_fn() const { } } return bounds; + }; } else { + return [&] (const query_options& options) -> std::vector { return get_single_column_clustering_bounds(options, *_schema, _clustering_prefix_restrictions); + }; } - }; } std::vector statement_restrictions::get_clustering_bounds(const query_options& options) const { From 325497d4603d1701af1c8e8da9d1af5046fdb1ca Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 23 Oct 2024 00:59:36 +0300 Subject: [PATCH 12/76] cql3: statement_restrictions: hide value_for() value_for() is a general function that solves for values that satisfy an expression set to TRUE. This goes against our goal to prepare solvers for all the expressions we use. Fortunately, it's only called with one expression, which comes from statement_restrictions, so we can add an accessor that provides the expression from our own state. Later, we'll be able to do prepare-time work on it. --- cql3/restrictions/statement_restrictions.cc | 11 +++++++++++ cql3/restrictions/statement_restrictions.hh | 7 +++---- cql3/statements/select_statement.cc | 9 ++------- 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index 4e4e8e5170..2e896d6032 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -802,6 +802,7 @@ bool is_empty_restriction(const expression& e) { return !contains_non_conjunction; } +static bytes_opt value_for(const column_definition& cdef, const expression& e, const query_options& options) { value_set possible_vals = possible_column_values(&cdef, e, options); return std::visit(overloaded_functor { @@ -2969,6 +2970,16 @@ std::vector statement_restrictions::get_local_index_clu return _get_local_index_clustering_ranges_fn(options); } +bytes_opt +statement_restrictions::value_for_index_partition_key(const query_options& options) const { + const column_definition* cdef = _schema->get_column_definition(to_bytes(_idx_opt->target_column())); + if (!cdef) { + throw exceptions::invalid_request_exception("Indexed column not found in schema"); + } + + return value_for(*cdef, _idx_restrictions, options); +} + sstring statement_restrictions::to_string() const { return _where ? expr::to_string(*_where) : ""; } diff --git a/cql3/restrictions/statement_restrictions.hh b/cql3/restrictions/statement_restrictions.hh index cae17acd28..a8f1fdbbcc 100644 --- a/cql3/restrictions/statement_restrictions.hh +++ b/cql3/restrictions/statement_restrictions.hh @@ -493,6 +493,9 @@ public: std::vector get_local_index_clustering_ranges( const query_options& options) const; + /// Finds the value of partition key of the index table + bytes_opt value_for_index_partition_key(const query_options&) const; + sstring to_string() const; /// Checks that the primary key restrictions don't contain null values, throws invalid_request_exception otherwise. @@ -528,10 +531,6 @@ std::vector extract_single_column_restrictions_for_column(cons // Checks whether this expression is empty - doesn't restrict anything bool is_empty_restriction(const expr::expression&); -// Finds the value of the given column in the expression -// In case of multpiple possible values calls on_internal_error -bytes_opt value_for(const column_definition&, const expr::expression&, const query_options&); - } } diff --git a/cql3/statements/select_statement.cc b/cql3/statements/select_statement.cc index ecc71b4161..aee118f610 100644 --- a/cql3/statements/select_statement.cc +++ b/cql3/statements/select_statement.cc @@ -1139,7 +1139,7 @@ lw_shared_ptr view_indexed_table_select_stat auto& last_base_pk = last_pos.partition; auto* last_base_ck = last_pos.position.has_key() ? &last_pos.position.key() : nullptr; - bytes_opt indexed_column_value = restrictions::value_for(*cdef, _used_index_restrictions, options); + bytes_opt indexed_column_value = _restrictions->value_for_index_partition_key(options); auto index_pk = [&]() { if (_index.metadata().local()) { @@ -1350,12 +1350,7 @@ dht::partition_range_vector view_indexed_table_select_statement::get_partition_r dht::partition_range_vector view_indexed_table_select_statement::get_partition_ranges_for_global_index_posting_list(const query_options& options) const { dht::partition_range_vector partition_ranges; - const column_definition* cdef = _schema->get_column_definition(to_bytes(_index.target_column())); - if (!cdef) { - throw exceptions::invalid_request_exception("Indexed column not found in schema"); - } - - bytes_opt value = restrictions::value_for(*cdef, _used_index_restrictions, options); + bytes_opt value = _restrictions->value_for_index_partition_key(options); if (value) { auto pk = partition_key::from_single_value(*_view_schema, *value); auto dk = dht::decorate_key(*_view_schema, pk); From 201ed5383785631f56559c0848986a7f0f6bd1b0 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 23 Oct 2024 01:12:09 +0300 Subject: [PATCH 13/76] cql3: statement_restrictions: wrap value_for_index_partition_key() To allow more work to be carried out during prepare time, wrap the body in an std::function, which will be called at execution time. Currently we actually do the work during execution time; but the way is prepared. --- cql3/restrictions/statement_restrictions.cc | 15 +++++++++++++-- cql3/restrictions/statement_restrictions.hh | 5 +++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index 2e896d6032..04a41ec3e7 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -1332,6 +1332,7 @@ statement_restrictions::statement_restrictions(private_tag, _get_global_index_clustering_ranges_fn = build_get_global_index_clustering_ranges_fn(); _get_global_index_token_clustering_ranges_fn = build_get_global_index_token_clustering_ranges_fn(); _get_local_index_clustering_ranges_fn = build_get_local_index_clustering_ranges_fn(); + _value_for_index_partition_key_fn = build_value_for_index_partition_key_fn(); } bool @@ -2970,14 +2971,24 @@ std::vector statement_restrictions::get_local_index_clu return _get_local_index_clustering_ranges_fn(options); } -bytes_opt -statement_restrictions::value_for_index_partition_key(const query_options& options) const { +get_singleton_value_fn_t +statement_restrictions::build_value_for_index_partition_key_fn() const { + if (!_idx_opt) { + return {}; + } const column_definition* cdef = _schema->get_column_definition(to_bytes(_idx_opt->target_column())); if (!cdef) { throw exceptions::invalid_request_exception("Indexed column not found in schema"); } + return [this, cdef] (const query_options& options) { return value_for(*cdef, _idx_restrictions, options); + }; +} + +bytes_opt +statement_restrictions::value_for_index_partition_key(const query_options& options) const { + return _value_for_index_partition_key_fn(options); } sstring statement_restrictions::to_string() const { diff --git a/cql3/restrictions/statement_restrictions.hh b/cql3/restrictions/statement_restrictions.hh index a8f1fdbbcc..a9345452a3 100644 --- a/cql3/restrictions/statement_restrictions.hh +++ b/cql3/restrictions/statement_restrictions.hh @@ -79,6 +79,9 @@ using get_partition_key_ranges_fn_t = std::function 1 or WHERE (ck1, ck2) > (1, 2). using get_clustering_bounds_fn_t = std::function (const query_options& options)>; +// A function that returns a singleton value, usable for a key (e.g. bytes_opt) +using get_singleton_value_fn_t = std::function; + struct no_partition_range_restrictions { }; @@ -196,6 +199,7 @@ private: get_clustering_bounds_fn_t _get_global_index_clustering_ranges_fn; get_clustering_bounds_fn_t _get_global_index_token_clustering_ranges_fn; get_clustering_bounds_fn_t _get_local_index_clustering_ranges_fn; + get_singleton_value_fn_t _value_for_index_partition_key_fn; public: /** * Creates a new empty StatementRestrictions. @@ -401,6 +405,7 @@ private: get_clustering_bounds_fn_t build_get_global_index_clustering_ranges_fn() const; get_clustering_bounds_fn_t build_get_global_index_token_clustering_ranges_fn() const; get_clustering_bounds_fn_t build_get_local_index_clustering_ranges_fn() const; + get_singleton_value_fn_t build_value_for_index_partition_key_fn() const; public: /** * Returns the specified range of the partition key. From c4ab0ddb85baecb9753cf004f0fbe8e951583e86 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 3 Nov 2024 19:24:17 +0200 Subject: [PATCH 14/76] cql3: statement_restrictions: pre-analyze single-column clustering key restrictions Change _clustering_prefix_restrictions and _idx_tbl_ck_prefix (the latter is the equivalent of the former, for indexed queries), to use predicate instead of expressions. This lets us do more of the work of solving restrictions during prepare time. We only handle single-column restrictions here. Multi-column restrictions use the existing path. We introduce two helpers: - value_set_to_singleton() converts a restriction solution to a singleton when we know that's the only possible answer - replace_column_def() overload for predicate, similar to the existing overload for expressions There is a wart in get_single_column_clustering_bounds(): we arrive at his point with the two vectors possibly pointing at different columns. Previously, possible_lhs_values() did this check while solving. We now check for it here. The predicate::on variant gets another member, for clustering key prefixes. Since everything is still handled by the legacy paths, we mostly error out. --- cql3/restrictions/statement_restrictions.cc | 180 ++++++++++++++++---- cql3/restrictions/statement_restrictions.hh | 13 +- 2 files changed, 153 insertions(+), 40 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index 04a41ec3e7..e9a9b3cdc0 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -148,6 +148,9 @@ solve(const predicate& ac, const query_options& options) { [&] (const on_partition_key_token& pkt) { return possible_partition_token_values(ac.filter, options, *pkt.schema); }, + [&] (const on_clustering_key_prefix& ockp) -> value_set { + on_internal_error(rlogger, "asked to directly solve for clustering key prefix"); + }, }, ac.on); } @@ -185,6 +188,18 @@ value_set intersection(value_set a, value_set b, const abstract_type* type) { return std::visit(intersection_visitor{type}, std::move(a), std::move(b)); } +static +managed_bytes +value_set_to_singleton(const value_set& vs) { + if (std::holds_alternative(vs)) { + const auto& vl = std::get(vs); + if (vl.size() == 1) { + return vl.front(); + } + } + throw std::logic_error("value_set_to_singleton: value_set is not a singleton"); +} + template value_list to_sorted_vector(Range r, const serialized_compare& comparator) { value_list tmp(r.begin(), r.end()); // Need random-access range to sort (r is not necessarily random-access). @@ -245,6 +260,7 @@ type(const predicate& p) { overloaded_functor{ [] (const on_column& oc) { return oc.column->type->without_reversed().shared_from_this(); }, [] (const on_partition_key_token&) { return long_type; }, + [] (const on_clustering_key_prefix&) -> data_type { on_internal_error(rlogger, "type: asked for clustering key prefix type"); }, }, p.on); } @@ -495,6 +511,17 @@ interval to_range(const value_set& s) { }, s); } +/// Replaces every column_definition in an expression with this one. Throws if any LHS is not a single +/// column_value. +static +predicate +replace_column_def(predicate p, const column_definition* col) { + // Note: does not replace and `col` embedded in the p.solve_for + p.filter = expr::replace_column_def(p.filter, col); + p.on = on_column{col}; + return p; +} + namespace { constexpr inline secondary_index::index::supports_expression_v operator&&(secondary_index::index::supports_expression_v v1, secondary_index::index::supports_expression_v v2) { using namespace secondary_index; @@ -991,7 +1018,7 @@ static partition_range_restrictions extract_partition_range( /// Extracts where_clause atoms with clustering-column LHS and copies them to a vector. These elements define the /// boundaries of any clustering slice that can possibly meet where_clause. This vector can be calculated before /// binding expression markers, since LHS and operator are always known. -static std::vector extract_clustering_prefix_restrictions( +static std::vector extract_clustering_prefix_restrictions( const expr::expression& where_clause, schema_ptr schema) { using namespace expr; @@ -999,10 +1026,10 @@ static std::vector extract_clustering_prefix_restrictions( /// conjunction to combine subexpressions. struct visitor { schema_ptr table_schema; - std::vector multi; ///< All multi-column restrictions. + std::vector multi; ///< All multi-column restrictions. /// All single-clustering-column restrictions, grouped by column. Each value is either an atom or a /// conjunction of atoms. - std::unordered_map single; + std::unordered_map single; const binary_operator* current_binary_operator = nullptr; void operator()(const conjunction& c) { @@ -1019,13 +1046,21 @@ static std::vector extract_clustering_prefix_restrictions( } void operator()(const tuple_constructor& tc) { + std::vector prefix; for (auto& e : tc.elements) { - if (!expr::is(e)) { + if (auto cv = expr::as_if(&e)) { + prefix.push_back(cv->col); + } else { on_internal_error(rlogger, fmt::format("extract_clustering_prefix_restrictions: tuple of non-column_value: {}", tc)); } } with_current_binary_operator(*this, [&] (const binary_operator& b) { - multi.push_back(b); + multi.push_back(predicate{ + .solve_for = nullptr, // FIXME: implement + .filter = b, + .on = on_clustering_key_prefix{prefix}, + .is_singleton = false, + }); }); } @@ -1033,9 +1068,15 @@ static std::vector extract_clustering_prefix_restrictions( auto s = &cv; with_current_binary_operator(*this, [&] (const binary_operator& b) { if (s->col->is_clustering_key()) { - const auto [it, inserted] = single.try_emplace(s->col, b); + auto a = predicate{ + .solve_for = std::bind_front(possible_column_values, s->col, b), + .filter = b, + .on = on_column{s->col}, + .is_singleton = b.op == oper_t::EQ, + }; + const auto [it, inserted] = single.try_emplace(s->col, std::move(a)); if (!inserted) { - it->second = make_conjunction(std::move(it->second), b); + it->second = make_conjunction(std::move(it->second), std::move(a)); } } }); @@ -1046,9 +1087,15 @@ static std::vector extract_clustering_prefix_restrictions( with_current_binary_operator(*this, [&] (const binary_operator& b) { if (cval.col->is_clustering_key()) { - const auto [it, inserted] = single.try_emplace(cval.col, b); + auto a = predicate{ + .solve_for = std::bind_front(possible_column_values, cval.col, b), + .filter = b, + .on = on_column{cval.col}, + .is_singleton = b.op == oper_t::EQ, + }; + const auto [it, inserted] = single.try_emplace(cval.col, std::move(a)); if (!inserted) { - it->second = make_conjunction(std::move(it->second), b); + it->second = make_conjunction(std::move(it->second), std::move(a)); } } }); @@ -1111,19 +1158,19 @@ static std::vector extract_clustering_prefix_restrictions( return std::move(v.multi); } - std::vector prefix; + std::vector prefix; for (const auto& col : schema->clustering_key_columns()) { const auto found = v.single.find(&col); if (found == v.single.end()) { // Any further restrictions are skipping the CK order. break; } - if (find_needs_filtering(found->second)) { // This column's restriction doesn't define a clear bound. + if (find_needs_filtering(found->second.filter)) { // This column's restriction doesn't define a clear bound. // TODO: if this is a conjunction of filtering and non-filtering atoms, we could split them and add the // latter to the prefix. break; } prefix.push_back(found->second); - if (has_slice(found->second)) { + if (has_slice(found->second.filter)) { break; } } @@ -2323,9 +2370,9 @@ struct multi_column_range_accumulator { std::vector get_multi_column_clustering_bounds( const query_options& options, schema_ptr schema, - const std::vector& multi_column_restrictions) { + const std::vector& multi_column_restrictions) { multi_column_range_accumulator acc{options, schema}; - for (const auto& restr : multi_column_restrictions) { + for (const auto& restr : multi_column_restrictions | std::views::transform(&predicate::filter)) { expr::visit(acc, restr); } return acc.ranges; @@ -2340,14 +2387,16 @@ query::clustering_range reverse_if_reqd(query::clustering_range r, const abstrac std::vector get_single_column_clustering_bounds( const query_options& options, const schema& schema, - const std::vector& single_column_restrictions) { + const std::vector& single_column_restrictions) { const size_t size_limit = options.get_cql_config().restrictions.clustering_key_restrictions_max_cartesian_product_size; size_t product_size = 1; std::vector> prior_column_values; // Equality values of columns seen so far. for (size_t i = 0; i < single_column_restrictions.size(); ++i) { - auto values = possible_column_values( - &schema.clustering_column_at(i), // This should be the LHS of restrictions[i]. + if (&schema.clustering_column_at(i) != require_on_single_column(single_column_restrictions[i])) { + break; + } + auto values = solve( single_column_restrictions[i], options); if (auto list = std::get_if(&values)) { @@ -2414,7 +2463,7 @@ std::vector get_single_column_clustering_bounds( static std::vector get_index_v1_token_range_clustering_bounds( const query_options& options, const column_definition& token_column, - const expression& token_restriction) { + const predicate& token_restriction) { // A workaround in order to make possible_column_values work properly. // possible_column_values looks at the column type and uses this type's comparator. @@ -2425,10 +2474,10 @@ static std::vector get_index_v1_token_range_clustering_ // and use this restriction to calculate possible lhs values. column_definition token_column_bigint = token_column; token_column_bigint.type = long_type; - expression new_token_restrictions = replace_column_def(token_restriction, &token_column_bigint); + predicate new_token_restrictions = replace_column_def(token_restriction, &token_column_bigint); std::variant> values = - possible_column_values(&token_column_bigint, new_token_restrictions, options); + new_token_restrictions.solve_for(options); return std::visit(overloaded_functor { [](const value_list& list) { @@ -2618,9 +2667,9 @@ std::vector get_equivalent_ranges( /// Extracts raw multi-column bounds from exprs; last one wins. query::clustering_range range_from_raw_bounds( - const std::vector& exprs, const query_options& options, const schema& schema) { + const std::vector& exprs, const query_options& options, const schema& schema) { opt_bound lb, ub; - for (const auto& e : exprs) { + for (const auto& e : exprs | std::views::transform(&predicate::filter)) { if (auto b = find_clustering_order(e)) { cql3::raw_value tup_val = expr::evaluate(b->rhs, options); if (tup_val.is_null()) { @@ -2649,10 +2698,10 @@ statement_restrictions::build_get_clustering_bounds_fn() const { return {query::clustering_range::make_open_ended_both_sides()}; }; } - if (find_binop(_clustering_prefix_restrictions[0], is_multi_column)) { + if (find_binop(_clustering_prefix_restrictions[0].filter, is_multi_column)) { // FIXME: adjust for solve_for return [&] (const query_options& options) -> std::vector { bool all_natural = true, all_reverse = true; ///< Whether column types are reversed or natural. - for (auto& r : _clustering_prefix_restrictions) { // TODO: move to constructor, do only once. + for (auto& r : _clustering_prefix_restrictions | std::views::transform(&predicate::filter)) { // TODO: move to constructor, do only once. using namespace expr; const auto& binop = expr::as(r); if (is_clustering_order(binop)) { @@ -2798,45 +2847,89 @@ void statement_restrictions::prepare_indexed_global(const schema& idx_tbl_schema // This means that p1 and p2 can have many different values (token is a hash, can have collisions). // Clustering prefix ends after token_restriction, all further restrictions have to be filtered. expr::expression token_restriction = replace_partition_token(_partition_key_restrictions, token_column, *_schema); - _idx_tbl_ck_prefix = std::vector{std::move(token_restriction)}; + _idx_tbl_ck_prefix = std::vector{predicate{ + .solve_for = nullptr, // FIXME: adjust for solve_for + .filter = std::move(token_restriction), + .on = on_column{token_column}, + .is_singleton = false, // FIXME: could be a singleton token. Not very important. + }}; return; } // If we're here, it means the index cannot be on a partition column: process_partition_key_restrictions() // avoids indexing when _partition_range_is_simple. See _idx_tbl_ck_prefix blurb for its composition. - _idx_tbl_ck_prefix = std::vector(1 + _schema->partition_key_size(), expr::conjunction({})); + _idx_tbl_ck_prefix = std::vector(1 + _schema->partition_key_size(), predicate{ + .solve_for = nullptr, // FIXME: this is all overwritten later. Should be refactored. + .filter = expr::expression(expr::conjunction{}), + .on = on_column{nullptr}, // Illegal but will be overwritten + .is_singleton = false, + }); _idx_tbl_ck_prefix->reserve(_idx_tbl_ck_prefix->size() + idx_tbl_schema.clustering_key_size()); auto *single_column_partition_key_restrictions = std::get_if(&_partition_range_restrictions); if (single_column_partition_key_restrictions) { for (const auto& e : single_column_partition_key_restrictions->per_column_restrictions) { - const auto col = expr::as(find(e.filter, oper_t::EQ)->lhs).col; + const auto col = require_on_single_column(e); const auto pos = _schema->position(*col) + 1; - (*_idx_tbl_ck_prefix)[pos] = replace_column_def(e.filter, &idx_tbl_schema.clustering_column_at(pos)); + (*_idx_tbl_ck_prefix)[pos] = replace_column_def(e, &idx_tbl_schema.clustering_column_at(pos)); } } - if (std::ranges::any_of(*_idx_tbl_ck_prefix | std::views::drop(1), is_empty_restriction)) { + if (std::ranges::any_of(*_idx_tbl_ck_prefix | std::views::drop(1) | std::views::transform(&predicate::filter), is_empty_restriction)) { // If the partition key is not fully restricted, the index clustering key is of no use. - (*_idx_tbl_ck_prefix) = std::vector(); + (*_idx_tbl_ck_prefix) = std::vector(); return; } add_clustering_restrictions_to_idx_ck_prefix(idx_tbl_schema); auto pk_expressions = (*_idx_tbl_ck_prefix) + | std::views::transform(&predicate::filter) | std::views::drop(1) // skip the token restriction | std::views::take(_schema->partition_key_size()) // take only the partition key restrictions | std::views::transform(expr::as) // we know it's an EQ | std::views::transform(std::mem_fn(&expr::binary_operator::rhs)) // "solve" for the column value | std::ranges::to(); + auto pk_solvers = (*_idx_tbl_ck_prefix) + | std::views::drop(1) // skip the token restriction + | std::views::take(_schema->partition_key_size()) // take only the partition key restrictions + | std::views::transform(&predicate::solve_for) + | std::ranges::to(); + + auto is_singleton = std::ranges::all_of( + (*_idx_tbl_ck_prefix) + | std::views::drop(1) + | std::views::take(_schema->partition_key_size()), + &predicate::is_singleton); + + if (!is_singleton) { + on_internal_error(rlogger, "Inconsistency in singleton calculation in indexed query"); + } + auto token_func = make_shared(_schema); - (*_idx_tbl_ck_prefix)[0] = binary_operator( + auto token_expr = binary_operator( column_value(token_column), oper_t::EQ, expr::function_call{.func = std::move(token_func), .args = std::move(pk_expressions)}); + + auto token_solver = [this, pk_solvers = std::move(pk_solvers)] (const query_options& options) -> value_set { + auto pk_values = pk_solvers + | std::views::transform([&] (auto&& solver) { return solver(options); }) + | std::views::transform(value_set_to_singleton) + | std::ranges::to>(); + auto pk = partition_key::from_exploded(pk_values); + auto tok = dht::get_token(*_schema, pk); + return value_list{managed_bytes(serialized(dht::token::to_int64(tok)))}; + }; + + (*_idx_tbl_ck_prefix)[0] = predicate{ + .solve_for = std::move(token_solver), + .filter = std::move(token_expr), + .on = on_column{token_column}, + .is_singleton = is_singleton, + }; } void statement_restrictions::prepare_indexed_local(const schema& idx_tbl_schema) { @@ -2845,7 +2938,7 @@ void statement_restrictions::prepare_indexed_local(const schema& idx_tbl_schema) } // Local index clustering key is (indexed column, base clustering key) - _idx_tbl_ck_prefix = std::vector(); + _idx_tbl_ck_prefix = std::vector(); _idx_tbl_ck_prefix->reserve(1 + _clustering_prefix_restrictions.size()); const column_definition& indexed_column = idx_tbl_schema.column_at(column_kind::clustering_key, 0); @@ -2858,7 +2951,12 @@ void statement_restrictions::prepare_indexed_local(const schema& idx_tbl_schema) // Translate the restriction to use column from the index schema and add it expr::expression replaced_idx_restriction = replace_column_def(idx_col_restriction_expr, &indexed_column); - _idx_tbl_ck_prefix->push_back(replaced_idx_restriction); + _idx_tbl_ck_prefix->push_back(predicate{ + .solve_for = std::bind_front(possible_column_values, &indexed_column, replaced_idx_restriction), + .filter = replaced_idx_restriction, + .on = on_column{&indexed_column}, + .is_singleton = false, // Could be true, but not important. + }); // Add restrictions for the clustering key add_clustering_restrictions_to_idx_ck_prefix(idx_tbl_schema); @@ -2866,16 +2964,24 @@ void statement_restrictions::prepare_indexed_local(const schema& idx_tbl_schema) void statement_restrictions::add_clustering_restrictions_to_idx_ck_prefix(const schema& idx_tbl_schema) { for (const auto& e : _clustering_prefix_restrictions) { - if (find_binop(_clustering_prefix_restrictions[0], is_multi_column)) { + if (find_binop(_clustering_prefix_restrictions[0].filter, is_multi_column)) { // TODO: We could handle single-element tuples, eg. `(c)>=(123)`. break; } - const auto any_binop = find_binop(e, [] (auto&&) { return true; }); + const auto any_binop = find_binop(e.filter, [] (auto&&) { return true; }); if (!any_binop) { break; } const auto col = expr::as(any_binop->lhs).col; - _idx_tbl_ck_prefix->push_back(replace_column_def(e, idx_tbl_schema.get_column_definition(col->name()))); + auto col_in_index = idx_tbl_schema.get_column_definition(col->name()); + auto replaced = replace_column_def(e.filter, col_in_index); + auto a = predicate{ + .solve_for = std::bind_front(possible_column_values, col_in_index, replaced), + .filter = replaced, + .on = on_column{col_in_index}, + .is_singleton = false, // FIXME: could be a singleton token. Not very important. + }; + _idx_tbl_ck_prefix->push_back(std::move(a)); } } @@ -3024,7 +3130,7 @@ void statement_restrictions::validate_primary_key(const query_options& options) validate_primary_key_restrictions(options, r.per_column_restrictions | std::views::transform(&predicate::filter)); } }, _partition_range_restrictions); - validate_primary_key_restrictions(options, _clustering_prefix_restrictions); + validate_primary_key_restrictions(options, _clustering_prefix_restrictions | std::views::transform(&predicate::filter)); } diff --git a/cql3/restrictions/statement_restrictions.hh b/cql3/restrictions/statement_restrictions.hh index a9345452a3..babec09b5a 100644 --- a/cql3/restrictions/statement_restrictions.hh +++ b/cql3/restrictions/statement_restrictions.hh @@ -48,6 +48,12 @@ struct on_partition_key_token { bool operator==(const on_partition_key_token&) const = default; }; +struct on_clustering_key_prefix { + std::vector columns; + + bool operator==(const on_clustering_key_prefix&) const = default; +}; + // A predicate on a column or a combination of columns. The WHERE clause analyzer // will attempt to convert predicates (that return true or false for a particular row) // to solvers (that return the set of column values that satisfy the predicate) when possible. @@ -60,7 +66,8 @@ struct predicate { // What column the predicate can be solved for std::variant< on_column, // solving for a single column: e.g. c1 = 3 - on_partition_key_token // solving for the token, e.g. token(pk1, pk2) >= :var + on_partition_key_token, // solving for the token, e.g. token(pk1, pk2) >= :var + on_clustering_key_prefix // solving for a clustering key prefix: e.g. (ck1, ck2) >= (3, 4) > on; // Whether the returned value_set will resolve to a single value. bool is_singleton = false; @@ -167,7 +174,7 @@ private: /// 4.4 elements other than the last have only EQ or IN atoms /// 4.5 the last element has only EQ, IN, or is_slice() atoms /// 5. if multi-column, then each element is a binary_operator - std::vector _clustering_prefix_restrictions; + std::vector _clustering_prefix_restrictions; /// Like _clustering_prefix_restrictions, but for the indexing table (if this is an index-reading statement). /// Recall that the index-table CK is (token, PK, CK) of the base table for a global index and (indexed column, @@ -176,7 +183,7 @@ private: /// Elements are conjunctions of single-column binary operators with the same LHS. /// Element order follows the indexing-table clustering key. /// In case of a global index the first element's (token restriction) RHS is a dummy value, it is filled later. - std::optional> _idx_tbl_ck_prefix; + std::optional> _idx_tbl_ck_prefix; /// Parts of _where defining the partition range. /// From b520e741280024ae6b9abcba328e68baa2568bd3 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 13 Nov 2024 19:24:37 +0200 Subject: [PATCH 15/76] cql3: statement_restrictions: push multi-column post-processing into get_multi_column_clustering_bounds() Doing this splits the multi-column processing code into a preparation phase and an evaluation phase in a single call, making it easier to further split prepare/evaluate. --- cql3/restrictions/statement_restrictions.cc | 40 ++++++++++++--------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index e9a9b3cdc0..6751f7fc8f 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -2366,16 +2366,35 @@ struct multi_column_range_accumulator { } }; +std::vector get_equivalent_ranges( + const query::clustering_range& cql_order_range, const schema& schema); + /// Calculates clustering bounds for the multi-column case. std::vector get_multi_column_clustering_bounds( const query_options& options, schema_ptr schema, - const std::vector& multi_column_restrictions) { + const std::vector& multi_column_restrictions, + bool all_natural, bool all_reverse) { multi_column_range_accumulator acc{options, schema}; for (const auto& restr : multi_column_restrictions | std::views::transform(&predicate::filter)) { expr::visit(acc, restr); } - return acc.ranges; + auto bounds = std::move(acc.ranges); + + if (!all_natural && !all_reverse) { + std::vector bounds_in_clustering_order; + for (const auto& b : bounds) { + const auto eqv = get_equivalent_ranges(b, *schema); + bounds_in_clustering_order.insert(bounds_in_clustering_order.end(), eqv.cbegin(), eqv.cend()); + } + return bounds_in_clustering_order; + } + if (all_reverse) { + for (auto& crange : bounds) { + crange = query::clustering_range(crange.end(), crange.start()); + } + } + return bounds; } /// Reverses the range if the type is reversed. Why don't we have interval::reverse()?? @@ -2716,21 +2735,8 @@ statement_restrictions::build_get_clustering_bounds_fn() const { } } } - auto bounds = get_multi_column_clustering_bounds(options, _schema, _clustering_prefix_restrictions); - if (!all_natural && !all_reverse) { - std::vector bounds_in_clustering_order; - for (const auto& b : bounds) { - const auto eqv = get_equivalent_ranges(b, *_schema); - bounds_in_clustering_order.insert(bounds_in_clustering_order.end(), eqv.cbegin(), eqv.cend()); - } - return bounds_in_clustering_order; - } - if (all_reverse) { - for (auto& crange : bounds) { - crange = query::clustering_range(crange.end(), crange.start()); - } - } - return bounds; + return get_multi_column_clustering_bounds(options, _schema, _clustering_prefix_restrictions, + all_natural, all_reverse); }; } else { return [&] (const query_options& options) -> std::vector { From c60e3d5cf7c2434ac58ea045c8cb45be91f7018e Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 13 Nov 2024 22:59:43 +0200 Subject: [PATCH 16/76] cql3: statement_restrictions: multi-key clustering restrictions one layer deeper For the multi column binary operator case, perform more of the work at prepare time in preparation for consolidating the analysis. --- cql3/restrictions/statement_restrictions.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index 6751f7fc8f..07aa8a527e 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -2718,13 +2718,14 @@ statement_restrictions::build_get_clustering_bounds_fn() const { }; } if (find_binop(_clustering_prefix_restrictions[0].filter, is_multi_column)) { // FIXME: adjust for solve_for - return [&] (const query_options& options) -> std::vector { bool all_natural = true, all_reverse = true; ///< Whether column types are reversed or natural. for (auto& r : _clustering_prefix_restrictions | std::views::transform(&predicate::filter)) { // TODO: move to constructor, do only once. using namespace expr; const auto& binop = expr::as(r); if (is_clustering_order(binop)) { + return [this] (const query_options& options) -> std::vector { return {range_from_raw_bounds(_clustering_prefix_restrictions, options, *_schema)}; + }; } for (auto& element : expr::as(binop.lhs).elements) { auto& cv = expr::as(element); @@ -2735,6 +2736,7 @@ statement_restrictions::build_get_clustering_bounds_fn() const { } } } + return [this, all_natural, all_reverse] (const query_options& options) -> std::vector { return get_multi_column_clustering_bounds(options, _schema, _clustering_prefix_restrictions, all_natural, all_reverse); }; From ea26186043fc4d7fda65d0960756271e19947f08 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 28 Nov 2024 19:55:15 +0200 Subject: [PATCH 17/76] cql3: statement_restrictions: make get_multi_column_clustering_bounds a builder Lay the groundwork for analyzing multi column clustering bounds by splitting the function into prepare-time and execute-time parts. To start with, all of the work is done at query time, but later patches will move bits into prepare time. --- cql3/restrictions/statement_restrictions.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index 07aa8a527e..fcef330704 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -2370,11 +2370,12 @@ std::vector get_equivalent_ranges( const query::clustering_range& cql_order_range, const schema& schema); /// Calculates clustering bounds for the multi-column case. -std::vector get_multi_column_clustering_bounds( - const query_options& options, +std::function (const query_options&)> +build_get_multi_column_clustering_bounds_fn( schema_ptr schema, const std::vector& multi_column_restrictions, bool all_natural, bool all_reverse) { + return [schema, multi_column_restrictions, all_natural, all_reverse] (const query_options& options) -> std::vector { multi_column_range_accumulator acc{options, schema}; for (const auto& restr : multi_column_restrictions | std::views::transform(&predicate::filter)) { expr::visit(acc, restr); @@ -2395,6 +2396,7 @@ std::vector get_multi_column_clustering_bounds( } } return bounds; + }; } /// Reverses the range if the type is reversed. Why don't we have interval::reverse()?? @@ -2736,10 +2738,8 @@ statement_restrictions::build_get_clustering_bounds_fn() const { } } } - return [this, all_natural, all_reverse] (const query_options& options) -> std::vector { - return get_multi_column_clustering_bounds(options, _schema, _clustering_prefix_restrictions, + return build_get_multi_column_clustering_bounds_fn(_schema, _clustering_prefix_restrictions, all_natural, all_reverse); - }; } else { return [&] (const query_options& options) -> std::vector { return get_single_column_clustering_bounds(options, *_schema, _clustering_prefix_restrictions); From e646b763e7e059022571d8a22184cc1524ad8ca1 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 28 Nov 2024 20:41:38 +0200 Subject: [PATCH 18/76] cql3: statement_restrictions: split multi_column_range_accumulator into prepare-time and query-time phases multi_column_range_accumulator analyzes an expression containing multi-column restrictions of the form (a, b) > (?, ?) and simultaneously analyzes them and solves for the set of intervals that satisfy those restrictions. Split this into prepare-time phase (that generates "builders", functions that operator on the accumulator), and a query phase that executes the builders. Importantly, the expression visitor ends up on the prepare phase, so it can be merged with other parts of the analysis. Helper functions of the visitor are made static, since they need to run during the query phase but the visitor only exists during the prepare phase. --- cql3/restrictions/statement_restrictions.cc | 38 +++++++++++++++------ 1 file changed, 28 insertions(+), 10 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index fcef330704..4b77face82 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -2222,14 +2222,20 @@ struct range_less { } }; -/// An expression visitor that translates multi-column atoms into clustering ranges. + struct multi_column_range_accumulator { - const query_options& options; - const schema_ptr schema; std::vector ranges{query::clustering_range::make_open_ended_both_sides()}; +}; + +/// An expression visitor that translates multi-column atoms into functions that accumulate +/// clustering ranges into multi_column_range_accumulator. +struct multi_column_range_accumulator_builder { + const schema_ptr schema; + std::vector> builders; const clustering_key_prefix::prefix_equal_tri_compare prefix3cmp = get_unreversed_tri_compare(*schema); void operator()(const binary_operator& binop) { + builders.emplace_back([binop, schema = schema, prefix3cmp = prefix3cmp] (multi_column_range_accumulator& acc, const query_options& options) { auto& lhs = expr::as(binop.lhs); if (is_compare(binop.op)) { auto opt_values = expr::get_tuple_elements(expr::evaluate(binop.rhs, options), *type_of(binop.rhs)); @@ -2240,7 +2246,7 @@ struct multi_column_range_accumulator { opt_values[i], "Invalid null value in condition for column {}", col.col->name_as_text()); } - intersect_all(to_range(binop.op, clustering_key_prefix(std::move(values)))); + intersect_all(acc, prefix3cmp, to_range(binop.op, clustering_key_prefix(std::move(values)))); } else if (binop.op == oper_t::IN) { const cql3::raw_value tup = expr::evaluate(binop.rhs, options); utils::chunked_vector> tuple_elems; @@ -2259,10 +2265,11 @@ struct multi_column_range_accumulator { "Invalid null value in condition for column {}", col.col->name_as_text()); } } - process_in_values(std::move(tuple_elems)); + process_in_values(acc, prefix3cmp, schema, std::move(tuple_elems)); } else { on_internal_error(rlogger, format("multi_column_range_accumulator: unexpected atom {}", binop)); } + }); } void operator()(const conjunction& c) { @@ -2270,6 +2277,9 @@ struct multi_column_range_accumulator { } void operator()(const constant& v) { + // We might have resolved this at prepare time, but... + builders.emplace_back([v] (multi_column_range_accumulator& acc, const query_options& options) { + auto& ranges = acc.ranges; std::optional bool_val = get_bool_value(v); if (!bool_val.has_value()) { on_internal_error(rlogger, "non-bool constant encountered outside binary operator"); @@ -2278,6 +2288,7 @@ struct multi_column_range_accumulator { if (*bool_val == false) { ranges.clear(); } + }); } void operator()(const column_value&) { @@ -2333,7 +2344,8 @@ struct multi_column_range_accumulator { } /// Intersects each range with v. If any intersection is empty, clears ranges. - void intersect_all(const query::clustering_range& v) { + static void intersect_all(multi_column_range_accumulator& acc, const clustering_key_prefix::prefix_equal_tri_compare& prefix3cmp, const query::clustering_range& v) { + auto& ranges = acc.ranges; for (auto& r : ranges) { auto intrs = intersection(r, v, prefix3cmp); if (!intrs) { @@ -2346,7 +2358,8 @@ struct multi_column_range_accumulator { template requires std::convertible_to - void process_in_values(Range in_values) { + static void process_in_values(multi_column_range_accumulator& acc, const clustering_key_prefix::prefix_equal_tri_compare& prefix3cmp, const schema_ptr& schema, Range in_values) { + auto& ranges = acc.ranges; if (ranges.empty()) { return; // Shortcircuit an easy case. } @@ -2375,10 +2388,15 @@ build_get_multi_column_clustering_bounds_fn( schema_ptr schema, const std::vector& multi_column_restrictions, bool all_natural, bool all_reverse) { - return [schema, multi_column_restrictions, all_natural, all_reverse] (const query_options& options) -> std::vector { - multi_column_range_accumulator acc{options, schema}; + multi_column_range_accumulator_builder acc_builder{schema}; for (const auto& restr : multi_column_restrictions | std::views::transform(&predicate::filter)) { - expr::visit(acc, restr); + expr::visit(acc_builder, restr); + } + auto range_builders = std::move(acc_builder.builders); + return [schema, range_builders, all_natural, all_reverse] (const query_options& options) -> std::vector { + multi_column_range_accumulator acc; + for (auto& builder : range_builders) { + builder(acc, options); } auto bounds = std::move(acc.ranges); From 2c75123bbd56aa939d72ec8d9a51a706361a4694 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 28 Nov 2024 21:17:57 +0200 Subject: [PATCH 19/76] cql3: statement_restrictions: adjust signature of range_from_raw_bounds The get_clustering_bounds() family works in terms of vectors of clustering ranges (to support IN) and in fact the only caller converts it to a vector. Converting it immediately simplifies later patching. --- cql3/restrictions/statement_restrictions.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index 4b77face82..6de6b022be 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -2705,7 +2705,7 @@ std::vector get_equivalent_ranges( } /// Extracts raw multi-column bounds from exprs; last one wins. -query::clustering_range range_from_raw_bounds( +std::vector range_from_raw_bounds( const std::vector& exprs, const query_options& options, const schema& schema) { opt_bound lb, ub; for (const auto& e : exprs | std::views::transform(&predicate::filter)) { @@ -2725,7 +2725,7 @@ query::clustering_range range_from_raw_bounds( } } } - return {lb, ub}; + return {{lb, ub}}; } } // anonymous namespace @@ -2744,7 +2744,7 @@ statement_restrictions::build_get_clustering_bounds_fn() const { const auto& binop = expr::as(r); if (is_clustering_order(binop)) { return [this] (const query_options& options) -> std::vector { - return {range_from_raw_bounds(_clustering_prefix_restrictions, options, *_schema)}; + return range_from_raw_bounds(_clustering_prefix_restrictions, options, *_schema); }; } for (auto& element : expr::as(binop.lhs).elements) { From 56ae02d8a3784c0b2e10781e043cf3f49f44d136 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 28 Nov 2024 21:20:22 +0200 Subject: [PATCH 20/76] cql3: statement_restrictions: split range_from_raw_bounds into prepare phase and query phase range_from_raw_bound processes restrictions of the form (a, b) > SCYLLA_CLUSTERING_BOUND(?, ?) indicating that comparisons respect whether columns are reversed or not. Iterate over expressions during the prepare phase only; generating "builder" functions to be executed during the query phase. --- cql3/restrictions/statement_restrictions.cc | 26 ++++++++++++++------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index 6de6b022be..bd989e9b9c 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -2705,11 +2705,14 @@ std::vector get_equivalent_ranges( } /// Extracts raw multi-column bounds from exprs; last one wins. -std::vector range_from_raw_bounds( - const std::vector& exprs, const query_options& options, const schema& schema) { - opt_bound lb, ub; +get_clustering_bounds_fn_t +build_range_from_raw_bounds_fn( + const std::vector& exprs, const schema& schema) { + std::vector> range_builders; for (const auto& e : exprs | std::views::transform(&predicate::filter)) { if (auto b = find_clustering_order(e)) { + range_builders.emplace_back([bb = *b, &schema] (const query_options& options) { + auto* b = &bb; cql3::raw_value tup_val = expr::evaluate(b->rhs, options); if (tup_val.is_null()) { on_internal_error(rlogger, format("range_from_raw_bounds: unexpected atom {}", *b)); @@ -2717,6 +2720,15 @@ std::vector range_from_raw_bounds( const auto r = to_range( b->op, clustering_key_prefix::from_optional_exploded(schema, expr::get_tuple_elements(tup_val, *type_of(b->rhs)))); + return r; + }); + } + } + return [range_builders] (const query_options& options) -> std::vector { + opt_bound lb, ub; + for (auto& builder : range_builders) { + auto r = builder(options); + if (r.start()) { lb = r.start(); } @@ -2724,8 +2736,8 @@ std::vector range_from_raw_bounds( ub = r.end(); } } - } - return {{lb, ub}}; + return {{lb, ub}}; + }; } } // anonymous namespace @@ -2743,9 +2755,7 @@ statement_restrictions::build_get_clustering_bounds_fn() const { using namespace expr; const auto& binop = expr::as(r); if (is_clustering_order(binop)) { - return [this] (const query_options& options) -> std::vector { - return range_from_raw_bounds(_clustering_prefix_restrictions, options, *_schema); - }; + return build_range_from_raw_bounds_fn(_clustering_prefix_restrictions, *_schema); } for (auto& element : expr::as(binop.lhs).elements) { auto& cv = expr::as(element); From 0a16d90acbc2ec45d38e8ed750f8462f086fbbe9 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Fri, 29 Nov 2024 18:41:09 +0200 Subject: [PATCH 21/76] cql3: statement_restrictions: don't handle boolean constants in multi_column_range_accumulator_builder In statement_restriction's constructor, we check that all the boolean factors are relations. This means the code to handle a constant here is dead code. Remove it; while it's good to handle it, it should be handled at the top level, not in multi-column restriction processing. --- cql3/restrictions/statement_restrictions.cc | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index bd989e9b9c..91ebc51f8f 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -2277,18 +2277,7 @@ struct multi_column_range_accumulator_builder { } void operator()(const constant& v) { - // We might have resolved this at prepare time, but... - builders.emplace_back([v] (multi_column_range_accumulator& acc, const query_options& options) { - auto& ranges = acc.ranges; - std::optional bool_val = get_bool_value(v); - if (!bool_val.has_value()) { - on_internal_error(rlogger, "non-bool constant encountered outside binary operator"); - } - - if (*bool_val == false) { - ranges.clear(); - } - }); + on_internal_error(rlogger, "constant encountered outside binary operator"); } void operator()(const column_value&) { From 135809d97b622d25c15f3e621ab0a9b0a07ffa5d Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Fri, 29 Nov 2024 19:00:05 +0200 Subject: [PATCH 22/76] cql3: statement_restrictions: pre-analyze column in value_for() Since we pre-analyze the column, return a built function, and remove the corresponding lambda from the caller. --- cql3/restrictions/statement_restrictions.cc | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index 91ebc51f8f..3e181eb788 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -830,8 +830,16 @@ bool is_empty_restriction(const expression& e) { } static -bytes_opt value_for(const column_definition& cdef, const expression& e, const query_options& options) { - value_set possible_vals = possible_column_values(&cdef, e, options); +std::function +build_value_for_fn(const column_definition& cdef, const expression& e) { + auto ac = predicate{ + .solve_for = std::bind_front(possible_column_values, &cdef, e), + .filter = e, + .on = on_column{&cdef}, + .is_singleton = false, // Code below assumes 0 or 1 results. + }; + return [ac] (const query_options& options) -> bytes_opt { + value_set possible_vals = solve(ac, options); return std::visit(overloaded_functor { [&](const value_list& val_list) -> bytes_opt { if (val_list.empty()) { @@ -839,15 +847,16 @@ bytes_opt value_for(const column_definition& cdef, const expression& e, const qu } if (val_list.size() != 1) { - on_internal_error(expr_logger, format("expr::value_for - multiple possible values for column: {}", e)); + on_internal_error(expr_logger, format("expr::value_for - multiple possible values for column: {}", ac.filter)); } return to_bytes(val_list.front()); }, [&](const interval&) -> bytes_opt { - on_internal_error(expr_logger, format("expr::value_for - possible values are a range: {}", e)); + on_internal_error(expr_logger, format("expr::value_for - possible values are a range: {}", ac.filter)); } }, possible_vals); + }; } bool contains_multi_column_restriction(const expression& e) { @@ -3112,9 +3121,7 @@ statement_restrictions::build_value_for_index_partition_key_fn() const { throw exceptions::invalid_request_exception("Indexed column not found in schema"); } - return [this, cdef] (const query_options& options) { - return value_for(*cdef, _idx_restrictions, options); - }; + return build_value_for_fn(*cdef, _idx_restrictions); } bytes_opt From 96e8414963fe111de7c1b928d481dbd432e4239b Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 5 Dec 2024 19:11:31 +0200 Subject: [PATCH 23/76] cql3: statement_restrictions: add solver for token restriction on index possible_column_values() knows how to find the values that the token can take, so add a solve_for implementation for tokens. --- cql3/restrictions/statement_restrictions.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index 3e181eb788..b067cc92cd 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -2882,8 +2882,8 @@ void statement_restrictions::prepare_indexed_global(const schema& idx_tbl_schema // Clustering prefix ends after token_restriction, all further restrictions have to be filtered. expr::expression token_restriction = replace_partition_token(_partition_key_restrictions, token_column, *_schema); _idx_tbl_ck_prefix = std::vector{predicate{ - .solve_for = nullptr, // FIXME: adjust for solve_for - .filter = std::move(token_restriction), + .solve_for = std::bind_front(possible_column_values, token_column, token_restriction), + .filter = token_restriction, .on = on_column{token_column}, .is_singleton = false, // FIXME: could be a singleton token. Not very important. }}; From e42ad62561861156b4a2477ba2e95b2954750dd4 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 5 Dec 2024 20:44:34 +0200 Subject: [PATCH 24/76] cql3: statement_restrictions: prepare solver for multi-column restrictions Multi-column restrictions (a, b) > (:v1, :v2) do not obey normal comparison rules. For example, given (a, b) > (5, 1) AND a <= 5 We see that (a, b) = (5, 2) satisfies the constraint, but if we tried to solve for the interval ( (5, 1), (5) ] We'd have to conclude that (5,1) <= (5). It's possible to extend the CQL type system to support this, but that would be a lot of work, and in fact the current code doesn't depend on it (by solving these intersections in its own code path (multi_column_range_accumulator_builder's prefix3cmp). So, we just mark such solvers as non-comparable, and generate an internal error if we try to compare them in make_conjunction. --- cql3/restrictions/statement_restrictions.cc | 8 +++++++- cql3/restrictions/statement_restrictions.hh | 2 ++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index b067cc92cd..f6785b78f1 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -272,6 +272,10 @@ make_conjunction(predicate a, predicate b) { on_internal_error(rlogger, "make_conjunction: merging predicate targets"); } + if (!a.comparable && !b.comparable) { + on_internal_error(rlogger, "make_conjunction: merging non-comparable columns"); + } + auto& sa = a.solve_for; auto& sb = b.solve_for; @@ -290,6 +294,7 @@ make_conjunction(predicate a, predicate b) { .filter = make_conjunction(std::move(a.filter), std::move(b.filter)), .on = a.on, .is_singleton = false, // Even if both columns are singletons, the conjunction of them can return zero values. + .comparable = a.comparable && b.comparable, // Result is only comparable if both inputs follow CQL comparison semantics. }; } @@ -1065,10 +1070,11 @@ static std::vector extract_clustering_prefix_restrictions( } with_current_binary_operator(*this, [&] (const binary_operator& b) { multi.push_back(predicate{ - .solve_for = nullptr, // FIXME: implement + .solve_for = std::bind_front(possible_column_values, /* col */ nullptr, b), .filter = b, .on = on_clustering_key_prefix{prefix}, .is_singleton = false, + .comparable = false, }); }); } diff --git a/cql3/restrictions/statement_restrictions.hh b/cql3/restrictions/statement_restrictions.hh index babec09b5a..98228c55d5 100644 --- a/cql3/restrictions/statement_restrictions.hh +++ b/cql3/restrictions/statement_restrictions.hh @@ -71,6 +71,8 @@ struct predicate { > on; // Whether the returned value_set will resolve to a single value. bool is_singleton = false; + // Whether the returned value_set follows CQL comparison semantics + bool comparable = true; }; ///In some cases checking if columns have indexes is undesired of even From e0445269e55b523715998ff62445d47419ab57c3 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 5 Dec 2024 21:00:41 +0200 Subject: [PATCH 25/76] cql3: statement_restrictions: reorder possible_lhs_column parameters By moving query_options to the end, we can use std::bind_front to convert it from a build-time to a run-time function that depends only on the query_options. --- cql3/restrictions/statement_restrictions.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index f6785b78f1..aea2d021c1 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -314,8 +314,8 @@ require_on_single_column(const predicate& p) { // the partition token. static value_set possible_lhs_values(const column_definition* cdef, const expression& expr, - const query_options& options, - const schema* table_schema_opt) { + const schema* table_schema_opt, + const query_options& options) { const auto type = cdef ? &cdef->type->without_reversed() : long_type.get(); return expr::visit(overloaded_functor{ [] (const constant& constant_val) { @@ -330,7 +330,7 @@ static value_set possible_lhs_values(const column_definition* cdef, [&] (const conjunction& conj) { return std::ranges::fold_left(conj.children, unbounded_value_set, [&](value_set&& acc, const expression& child) { return intersection( - std::move(acc), possible_lhs_values(cdef, child, options, table_schema_opt), type); + std::move(acc), possible_lhs_values(cdef, child, table_schema_opt, options), type); }); }, [&] (const binary_operator& oper) -> value_set { @@ -497,11 +497,11 @@ static value_set possible_lhs_values(const column_definition* cdef, } value_set possible_column_values(const column_definition* col, const expression& e, const query_options& options) { - return possible_lhs_values(col, e, options, nullptr); + return possible_lhs_values(col, e, nullptr, options); } value_set possible_partition_token_values(const expression& e, const query_options& options, const schema& table_schema) { - return possible_lhs_values(nullptr, e, options, &table_schema); + return possible_lhs_values(nullptr, e, &table_schema, options); } interval to_range(const value_set& s) { From c6f6e81fe522c2ba84164a5647bff7eb136e31b9 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 5 Dec 2024 21:16:59 +0200 Subject: [PATCH 26/76] cql3: statement_restrictions: remove fallback path in solve() All query plans that try to solve for the possible values a column (or token, or column-tuple) can take have been converted to set analyzed_column::solve_for. Recognize that by removing the fallback path. This removes the last possible_column_values() call that isn't bound (using std::bind_front), and will allow moving it to prepare time. --- cql3/restrictions/statement_restrictions.cc | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index aea2d021c1..7115878dcb 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -140,19 +140,7 @@ solve(const predicate& ac, const query_options& options) { return ac.solve_for(options); } - return std::visit( - overloaded_functor{ - [&] (const on_column& oc) { - return possible_column_values(oc.column, ac.filter, options); - }, - [&] (const on_partition_key_token& pkt) { - return possible_partition_token_values(ac.filter, options, *pkt.schema); - }, - [&] (const on_clustering_key_prefix& ockp) -> value_set { - on_internal_error(rlogger, "asked to directly solve for clustering key prefix"); - }, - }, - ac.on); + on_internal_error(rlogger, "solve: no solve_for function"); } namespace { From b26e6f7330803bbc61fa63439bac7d926b1cde26 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 5 Dec 2024 21:28:41 +0200 Subject: [PATCH 27/76] cql3: statement_restrictions: pass schema to possible_column_values() This unifies the signature with possible_lhs_values(), paving the way to deduplicating the two functions. We always have the schema and may as well pass it. --- cql3/restrictions/statement_restrictions.cc | 26 ++++++++++----------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index 7115878dcb..d5f2143f61 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -67,7 +67,7 @@ static auto& expr_logger = rlogger; // compatibility with code moved from expres /// - `A=1 AND A<=0` restricts A to an empty list; no value is able to satisfy the expression /// - `A>=NULL` also restricts A to an empty list; all comparisons to NULL are false /// - an expression without A "restricts" A to unbounded range -extern value_set possible_column_values(const column_definition*, const expression&, const query_options&); +extern value_set possible_column_values(const column_definition*, const expression&, const schema*, const query_options&); extern value_set possible_partition_token_values(const expression&, const query_options&, const schema& table_schema); /// Turns value_set into a range, unless it's a multi-valued list (in which case this throws). @@ -484,8 +484,8 @@ static value_set possible_lhs_values(const column_definition* cdef, }, expr); } -value_set possible_column_values(const column_definition* col, const expression& e, const query_options& options) { - return possible_lhs_values(col, e, nullptr, options); +value_set possible_column_values(const column_definition* col, const expression& e, const schema* table_schema, const query_options& options) { + return possible_lhs_values(col, e, table_schema, options); } value_set possible_partition_token_values(const expression& e, const query_options& options, const schema& table_schema) { @@ -824,9 +824,9 @@ bool is_empty_restriction(const expression& e) { static std::function -build_value_for_fn(const column_definition& cdef, const expression& e) { +build_value_for_fn(const column_definition& cdef, const expression& e, const schema& s) { auto ac = predicate{ - .solve_for = std::bind_front(possible_column_values, &cdef, e), + .solve_for = std::bind_front(possible_column_values, &cdef, e, &s), .filter = e, .on = on_column{&cdef}, .is_singleton = false, // Code below assumes 0 or 1 results. @@ -930,7 +930,7 @@ static partition_range_restrictions extract_partition_range( with_current_binary_operator(*this, [&] (const binary_operator& b) { if (s->col->is_partition_key() && (b.op == oper_t::EQ || b.op == oper_t::IN)) { auto a = predicate{ - .solve_for = std::bind_front(possible_column_values, s->col, b), + .solve_for = std::bind_front(possible_column_values, s->col, b, table_schema.get()), .filter = b, .on = on_column{s->col}, .is_singleton = b.op == oper_t::EQ, @@ -1058,7 +1058,7 @@ static std::vector extract_clustering_prefix_restrictions( } with_current_binary_operator(*this, [&] (const binary_operator& b) { multi.push_back(predicate{ - .solve_for = std::bind_front(possible_column_values, /* col */ nullptr, b), + .solve_for = std::bind_front(possible_column_values, /* col */ nullptr, b, table_schema.get()), .filter = b, .on = on_clustering_key_prefix{prefix}, .is_singleton = false, @@ -1072,7 +1072,7 @@ static std::vector extract_clustering_prefix_restrictions( with_current_binary_operator(*this, [&] (const binary_operator& b) { if (s->col->is_clustering_key()) { auto a = predicate{ - .solve_for = std::bind_front(possible_column_values, s->col, b), + .solve_for = std::bind_front(possible_column_values, s->col, b, table_schema.get()), .filter = b, .on = on_column{s->col}, .is_singleton = b.op == oper_t::EQ, @@ -1091,7 +1091,7 @@ static std::vector extract_clustering_prefix_restrictions( with_current_binary_operator(*this, [&] (const binary_operator& b) { if (cval.col->is_clustering_key()) { auto a = predicate{ - .solve_for = std::bind_front(possible_column_values, cval.col, b), + .solve_for = std::bind_front(possible_column_values, cval.col, b, table_schema.get()), .filter = b, .on = on_column{cval.col}, .is_singleton = b.op == oper_t::EQ, @@ -2876,7 +2876,7 @@ void statement_restrictions::prepare_indexed_global(const schema& idx_tbl_schema // Clustering prefix ends after token_restriction, all further restrictions have to be filtered. expr::expression token_restriction = replace_partition_token(_partition_key_restrictions, token_column, *_schema); _idx_tbl_ck_prefix = std::vector{predicate{ - .solve_for = std::bind_front(possible_column_values, token_column, token_restriction), + .solve_for = std::bind_front(possible_column_values, token_column, token_restriction, _schema.get()), .filter = token_restriction, .on = on_column{token_column}, .is_singleton = false, // FIXME: could be a singleton token. Not very important. @@ -2980,7 +2980,7 @@ void statement_restrictions::prepare_indexed_local(const schema& idx_tbl_schema) // Translate the restriction to use column from the index schema and add it expr::expression replaced_idx_restriction = replace_column_def(idx_col_restriction_expr, &indexed_column); _idx_tbl_ck_prefix->push_back(predicate{ - .solve_for = std::bind_front(possible_column_values, &indexed_column, replaced_idx_restriction), + .solve_for = std::bind_front(possible_column_values, &indexed_column, replaced_idx_restriction, _schema.get()), .filter = replaced_idx_restriction, .on = on_column{&indexed_column}, .is_singleton = false, // Could be true, but not important. @@ -3004,7 +3004,7 @@ void statement_restrictions::add_clustering_restrictions_to_idx_ck_prefix(const auto col_in_index = idx_tbl_schema.get_column_definition(col->name()); auto replaced = replace_column_def(e.filter, col_in_index); auto a = predicate{ - .solve_for = std::bind_front(possible_column_values, col_in_index, replaced), + .solve_for = std::bind_front(possible_column_values, col_in_index, replaced, &idx_tbl_schema), .filter = replaced, .on = on_column{col_in_index}, .is_singleton = false, // FIXME: could be a singleton token. Not very important. @@ -3115,7 +3115,7 @@ statement_restrictions::build_value_for_index_partition_key_fn() const { throw exceptions::invalid_request_exception("Indexed column not found in schema"); } - return build_value_for_fn(*cdef, _idx_restrictions); + return build_value_for_fn(*cdef, _idx_restrictions, *_schema); } bytes_opt From c1fc596203c48da4a2e89309811cc177acae4139 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 5 Dec 2024 21:31:38 +0200 Subject: [PATCH 28/76] cql3: statement_restrictions: remove possible_column_values replace with now-identical possible_lhs_values. This paves the way to have only one solver function (after we remove possible_partition_token_values). --- cql3/restrictions/statement_restrictions.cc | 38 ++++++--------------- 1 file changed, 11 insertions(+), 27 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index d5f2143f61..2906d53acc 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -56,18 +56,6 @@ using namespace expr; static logging::logger rlogger("restrictions"); static auto& expr_logger = rlogger; // compatibility with code moved from expression.cc -/// A set of all column values that would satisfy an expression. The _token_values variant finds -/// matching values for the partition token function call instead of the column. -/// -/// An expression restricts possible values of a column or token: -/// - `A>5` restricts A from below -/// - `A>5 AND A>6 AND B<10 AND A=12 AND B>0` restricts A to 12 and B to between 0 and 10 -/// - `A IN (1, 3, 5)` restricts A to 1, 3, or 5 -/// - `A IN (1, 3, 5) AND A>3` restricts A to just 5 -/// - `A=1 AND A<=0` restricts A to an empty list; no value is able to satisfy the expression -/// - `A>=NULL` also restricts A to an empty list; all comparisons to NULL are false -/// - an expression without A "restricts" A to unbounded range -extern value_set possible_column_values(const column_definition*, const expression&, const schema*, const query_options&); extern value_set possible_partition_token_values(const expression&, const query_options&, const schema& table_schema); /// Turns value_set into a range, unless it's a multi-valued list (in which case this throws). @@ -484,10 +472,6 @@ static value_set possible_lhs_values(const column_definition* cdef, }, expr); } -value_set possible_column_values(const column_definition* col, const expression& e, const schema* table_schema, const query_options& options) { - return possible_lhs_values(col, e, table_schema, options); -} - value_set possible_partition_token_values(const expression& e, const query_options& options, const schema& table_schema) { return possible_lhs_values(nullptr, e, &table_schema, options); } @@ -826,7 +810,7 @@ static std::function build_value_for_fn(const column_definition& cdef, const expression& e, const schema& s) { auto ac = predicate{ - .solve_for = std::bind_front(possible_column_values, &cdef, e, &s), + .solve_for = std::bind_front(possible_lhs_values, &cdef, e, &s), .filter = e, .on = on_column{&cdef}, .is_singleton = false, // Code below assumes 0 or 1 results. @@ -930,7 +914,7 @@ static partition_range_restrictions extract_partition_range( with_current_binary_operator(*this, [&] (const binary_operator& b) { if (s->col->is_partition_key() && (b.op == oper_t::EQ || b.op == oper_t::IN)) { auto a = predicate{ - .solve_for = std::bind_front(possible_column_values, s->col, b, table_schema.get()), + .solve_for = std::bind_front(possible_lhs_values, s->col, b, table_schema.get()), .filter = b, .on = on_column{s->col}, .is_singleton = b.op == oper_t::EQ, @@ -1058,7 +1042,7 @@ static std::vector extract_clustering_prefix_restrictions( } with_current_binary_operator(*this, [&] (const binary_operator& b) { multi.push_back(predicate{ - .solve_for = std::bind_front(possible_column_values, /* col */ nullptr, b, table_schema.get()), + .solve_for = std::bind_front(possible_lhs_values, /* col */ nullptr, b, table_schema.get()), .filter = b, .on = on_clustering_key_prefix{prefix}, .is_singleton = false, @@ -1072,7 +1056,7 @@ static std::vector extract_clustering_prefix_restrictions( with_current_binary_operator(*this, [&] (const binary_operator& b) { if (s->col->is_clustering_key()) { auto a = predicate{ - .solve_for = std::bind_front(possible_column_values, s->col, b, table_schema.get()), + .solve_for = std::bind_front(possible_lhs_values, s->col, b, table_schema.get()), .filter = b, .on = on_column{s->col}, .is_singleton = b.op == oper_t::EQ, @@ -1091,7 +1075,7 @@ static std::vector extract_clustering_prefix_restrictions( with_current_binary_operator(*this, [&] (const binary_operator& b) { if (cval.col->is_clustering_key()) { auto a = predicate{ - .solve_for = std::bind_front(possible_column_values, cval.col, b, table_schema.get()), + .solve_for = std::bind_front(possible_lhs_values, cval.col, b, table_schema.get()), .filter = b, .on = on_column{cval.col}, .is_singleton = b.op == oper_t::EQ, @@ -2496,10 +2480,10 @@ static std::vector get_index_v1_token_range_clustering_ const column_definition& token_column, const predicate& token_restriction) { - // A workaround in order to make possible_column_values work properly. - // possible_column_values looks at the column type and uses this type's comparator. + // A workaround in order to make possible_lhs_values work properly. + // possible_lhs_values looks at the column type and uses this type's comparator. // This is a problem because when using blob's comparator, -4 is greater than 4. - // This makes possible_column_values think that an expression like token(p) > -4 and token(p) < 4 + // This makes possible_lhs_values think that an expression like token(p) > -4 and token(p) < 4 // is impossible to fulfill. // Create a fake token column with the type set to bigint, translate the restriction to use this column // and use this restriction to calculate possible lhs values. @@ -2876,7 +2860,7 @@ void statement_restrictions::prepare_indexed_global(const schema& idx_tbl_schema // Clustering prefix ends after token_restriction, all further restrictions have to be filtered. expr::expression token_restriction = replace_partition_token(_partition_key_restrictions, token_column, *_schema); _idx_tbl_ck_prefix = std::vector{predicate{ - .solve_for = std::bind_front(possible_column_values, token_column, token_restriction, _schema.get()), + .solve_for = std::bind_front(possible_lhs_values, token_column, token_restriction, _schema.get()), .filter = token_restriction, .on = on_column{token_column}, .is_singleton = false, // FIXME: could be a singleton token. Not very important. @@ -2980,7 +2964,7 @@ void statement_restrictions::prepare_indexed_local(const schema& idx_tbl_schema) // Translate the restriction to use column from the index schema and add it expr::expression replaced_idx_restriction = replace_column_def(idx_col_restriction_expr, &indexed_column); _idx_tbl_ck_prefix->push_back(predicate{ - .solve_for = std::bind_front(possible_column_values, &indexed_column, replaced_idx_restriction, _schema.get()), + .solve_for = std::bind_front(possible_lhs_values, &indexed_column, replaced_idx_restriction, _schema.get()), .filter = replaced_idx_restriction, .on = on_column{&indexed_column}, .is_singleton = false, // Could be true, but not important. @@ -3004,7 +2988,7 @@ void statement_restrictions::add_clustering_restrictions_to_idx_ck_prefix(const auto col_in_index = idx_tbl_schema.get_column_definition(col->name()); auto replaced = replace_column_def(e.filter, col_in_index); auto a = predicate{ - .solve_for = std::bind_front(possible_column_values, col_in_index, replaced, &idx_tbl_schema), + .solve_for = std::bind_front(possible_lhs_values, col_in_index, replaced, &idx_tbl_schema), .filter = replaced, .on = on_column{col_in_index}, .is_singleton = false, // FIXME: could be a singleton token. Not very important. From 9cbb1b851ebc127a391d8bc88487a28aa8171577 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 5 Dec 2024 21:34:01 +0200 Subject: [PATCH 29/76] cql3: statement_restrictions: remove possible_partition_token_values() It's just a call to possible_lhs_values() with a different signature. Now possible_lhs_values() is our only solver. --- cql3/restrictions/statement_restrictions.cc | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index 2906d53acc..03a6dec7c0 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -56,8 +56,6 @@ using namespace expr; static logging::logger rlogger("restrictions"); static auto& expr_logger = rlogger; // compatibility with code moved from expression.cc -extern value_set possible_partition_token_values(const expression&, const query_options&, const schema& table_schema); - /// Turns value_set into a range, unless it's a multi-valued list (in which case this throws). extern interval to_range(const value_set&); @@ -472,10 +470,6 @@ static value_set possible_lhs_values(const column_definition* cdef, }, expr); } -value_set possible_partition_token_values(const expression& e, const query_options& options, const schema& table_schema) { - return possible_lhs_values(nullptr, e, &table_schema, options); -} - interval to_range(const value_set& s) { return std::visit(overloaded_functor{ [] (const interval& r) { return r; }, @@ -986,7 +980,7 @@ static partition_range_restrictions extract_partition_range( return token_range_restrictions{ .token_restrictions = predicate{ // It's not really a column, but... - .solve_for = std::bind(possible_partition_token_values, *v.tokens, std::placeholders::_1, std::ref(*schema)), + .solve_for = std::bind_front(possible_lhs_values, /* col */ nullptr, *v.tokens, schema.get()), .filter = *v.tokens, .on = on_partition_key_token{schema.get()}, .is_singleton = false, // It could return a single token, but it's not important to track it From 63f9362c89757867d288efe2053d364ff60a5fad Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Fri, 6 Dec 2024 20:55:12 +0200 Subject: [PATCH 30/76] cql3: statement_restrictions: fold add_restriction() into its caller The goal is to simplify flow-control where the order in which variables are updated depends on their location in the source. With functions, this is difficult. --- cql3/restrictions/statement_restrictions.cc | 48 ++++++++++----------- cql3/restrictions/statement_restrictions.hh | 1 - 2 files changed, 23 insertions(+), 26 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index 03a6dec7c0..dac8231fd7 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -1179,7 +1179,29 @@ statement_restrictions::statement_restrictions(private_tag, } expr::binary_operator prepared_restriction = expr::validate_and_prepare_new_restriction(*relation_binop, db, schema, ctx); - add_restriction(prepared_restriction, schema, allow_filtering, for_view); + + auto& restr = prepared_restriction; + if (restr.op == expr::oper_t::IS_NOT) { + // Handle IS NOT NULL restrictions separately + add_is_not_restriction(restr, schema, for_view); + } else if (is_multi_column(restr)) { + // Multi column restrictions are only allowed on clustering columns + add_multi_column_clustering_key_restriction(restr); + } else if (has_partition_token(restr, *_schema)) { + // Token always restricts the partition key + add_token_partition_key_restriction(restr); + } else if (is_single_column_restriction(restr)) { + const column_definition* def = get_the_only_column(restr).col; + if (def->is_partition_key()) { + add_single_column_parition_key_restriction(restr, schema, allow_filtering, for_view); + } else if (def->is_clustering_key()) { + add_single_column_clustering_key_restriction(restr, schema, allow_filtering); + } else { + add_single_column_nonprimary_key_restriction(restr); + } + } else { + throw exceptions::invalid_request_exception(format("Unhandled restriction: {}", restr)); + } if (prepared_restriction.op != expr::oper_t::IS_NOT) { _where = _where.has_value() ? make_conjunction(std::move(*_where), prepared_restriction) : prepared_restriction; @@ -1559,30 +1581,6 @@ void statement_restrictions::calculate_column_defs_for_filtering_and_erase_restr _column_defs_for_filtering = std::move(column_defs_for_filtering); } -void statement_restrictions::add_restriction(const expr::binary_operator& restr, schema_ptr schema, bool allow_filtering, bool for_view) { - if (restr.op == expr::oper_t::IS_NOT) { - // Handle IS NOT NULL restrictions separately - add_is_not_restriction(restr, schema, for_view); - } else if (is_multi_column(restr)) { - // Multi column restrictions are only allowed on clustering columns - add_multi_column_clustering_key_restriction(restr); - } else if (has_partition_token(restr, *_schema)) { - // Token always restricts the partition key - add_token_partition_key_restriction(restr); - } else if (is_single_column_restriction(restr)) { - const column_definition* def = get_the_only_column(restr).col; - if (def->is_partition_key()) { - add_single_column_parition_key_restriction(restr, schema, allow_filtering, for_view); - } else if (def->is_clustering_key()) { - add_single_column_clustering_key_restriction(restr, schema, allow_filtering); - } else { - add_single_column_nonprimary_key_restriction(restr); - } - } else { - throw exceptions::invalid_request_exception(format("Unhandled restriction: {}", restr)); - } -} - void statement_restrictions::add_is_not_restriction(const expr::binary_operator& restr, schema_ptr schema, bool for_view) { const expr::column_value* lhs_col_def = expr::as_if(&restr.lhs); // The "IS NOT NULL" restriction is only supported (and diff --git a/cql3/restrictions/statement_restrictions.hh b/cql3/restrictions/statement_restrictions.hh index 98228c55d5..b33f47bafc 100644 --- a/cql3/restrictions/statement_restrictions.hh +++ b/cql3/restrictions/statement_restrictions.hh @@ -373,7 +373,6 @@ public: schema_ptr get_view_schema() const { return _view_schema; } private: std::pair, expr::expression> do_find_idx(const secondary_index::secondary_index_manager& sim) const; - void add_restriction(const expr::binary_operator& restr, schema_ptr schema, bool allow_filtering, bool for_view); void add_is_not_restriction(const expr::binary_operator& restr, schema_ptr schema, bool for_view); void add_single_column_parition_key_restriction(const expr::binary_operator& restr, schema_ptr schema, bool allow_filtering, bool for_view); void add_token_partition_key_restriction(const expr::binary_operator& restr); From fa130051a69e11e03b885230398fb0c6b84f2b71 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Fri, 6 Dec 2024 20:55:12 +0200 Subject: [PATCH 31/76] cql3: statement_restrictions: fold add_is_not_restriction() into its caller The goal is to simplify flow-control where the order in which variables are updated depends on their location in the source. With functions, this is difficult. --- cql3/restrictions/statement_restrictions.cc | 38 +++++++++------------ cql3/restrictions/statement_restrictions.hh | 1 - 2 files changed, 17 insertions(+), 22 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index dac8231fd7..7a0d69dbe1 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -1183,7 +1183,23 @@ statement_restrictions::statement_restrictions(private_tag, auto& restr = prepared_restriction; if (restr.op == expr::oper_t::IS_NOT) { // Handle IS NOT NULL restrictions separately - add_is_not_restriction(restr, schema, for_view); + const expr::column_value* lhs_col_def = expr::as_if(&restr.lhs); + // The "IS NOT NULL" restriction is only supported (and + // mandatory) for materialized view creation: + if (lhs_col_def == nullptr) { + throw exceptions::invalid_request_exception("IS NOT only supports single column"); + } + // currently, the grammar only allows the NULL argument to be + // "IS NOT", so this assertion should not be able to fail + if (!expr::is(restr.rhs) || !expr::as(restr.rhs).is_null()) { + throw exceptions::invalid_request_exception("Only IS NOT NULL is supported"); + } + + _not_null_columns.insert(lhs_col_def->col); + + if (!for_view) { + throw exceptions::invalid_request_exception(format("restriction '{}' is only supported in materialized view creation", restr)); + } } else if (is_multi_column(restr)) { // Multi column restrictions are only allowed on clustering columns add_multi_column_clustering_key_restriction(restr); @@ -1581,26 +1597,6 @@ void statement_restrictions::calculate_column_defs_for_filtering_and_erase_restr _column_defs_for_filtering = std::move(column_defs_for_filtering); } -void statement_restrictions::add_is_not_restriction(const expr::binary_operator& restr, schema_ptr schema, bool for_view) { - const expr::column_value* lhs_col_def = expr::as_if(&restr.lhs); - // The "IS NOT NULL" restriction is only supported (and - // mandatory) for materialized view creation: - if (lhs_col_def == nullptr) { - throw exceptions::invalid_request_exception("IS NOT only supports single column"); - } - // currently, the grammar only allows the NULL argument to be - // "IS NOT", so this assertion should not be able to fail - if (!expr::is(restr.rhs) || !expr::as(restr.rhs).is_null()) { - throw exceptions::invalid_request_exception("Only IS NOT NULL is supported"); - } - - _not_null_columns.insert(lhs_col_def->col); - - if (!for_view) { - throw exceptions::invalid_request_exception(format("restriction '{}' is only supported in materialized view creation", restr)); - } -} - void statement_restrictions::add_single_column_parition_key_restriction(const expr::binary_operator& restr, schema_ptr schema, bool allow_filtering, bool for_view) { // View definition allows PK slices, because it's not a performance problem. if (restr.op != expr::oper_t::EQ && restr.op != expr::oper_t::IN && !allow_filtering && !for_view) { diff --git a/cql3/restrictions/statement_restrictions.hh b/cql3/restrictions/statement_restrictions.hh index b33f47bafc..c533659972 100644 --- a/cql3/restrictions/statement_restrictions.hh +++ b/cql3/restrictions/statement_restrictions.hh @@ -373,7 +373,6 @@ public: schema_ptr get_view_schema() const { return _view_schema; } private: std::pair, expr::expression> do_find_idx(const secondary_index::secondary_index_manager& sim) const; - void add_is_not_restriction(const expr::binary_operator& restr, schema_ptr schema, bool for_view); void add_single_column_parition_key_restriction(const expr::binary_operator& restr, schema_ptr schema, bool allow_filtering, bool for_view); void add_token_partition_key_restriction(const expr::binary_operator& restr); void add_single_column_clustering_key_restriction(const expr::binary_operator& restr, schema_ptr schema, bool allow_filtering); From 8990346c75a230f4289e23648eb80c936f20c88e Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Fri, 6 Dec 2024 22:20:48 +0200 Subject: [PATCH 32/76] cql3: statement_restrictions: avoid early return in add_multi_column_clustering_key_restrictions Prepare for inlining it into its caller, which doesn't work easily if there's an early return. --- cql3/restrictions/statement_restrictions.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index 7a0d69dbe1..cf350c8856 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -1661,10 +1661,9 @@ void statement_restrictions::add_single_column_clustering_key_restriction(const } void statement_restrictions::add_multi_column_clustering_key_restriction(const expr::binary_operator& restr) { - if (is_empty_restriction(_clustering_columns_restrictions)) { + if (is_empty_restriction(_clustering_columns_restrictions)) { _clustering_columns_restrictions = restr; - return; - } + } else { if (!find_binop(_clustering_columns_restrictions, [] (const expr::binary_operator& b) { return expr::is(b.lhs); @@ -1717,6 +1716,7 @@ void statement_restrictions::add_multi_column_clustering_key_restriction(const e } else { throw exceptions::invalid_request_exception(format("Unsupported multi-column relation: ", restr)); } + } } void statement_restrictions::add_single_column_nonprimary_key_restriction(const expr::binary_operator& restr) { From be3239fc5871dc42c7daae087d0ea7abca0d40d9 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Fri, 6 Dec 2024 20:55:12 +0200 Subject: [PATCH 33/76] cql3: statement_restrictions: fold add_multi_column_clustering_key_restriction() into its caller The goal is to simplify flow-control where the order in which variables are updated depends on their location in the source. With functions, this is difficult. --- cql3/restrictions/statement_restrictions.cc | 116 ++++++++++---------- cql3/restrictions/statement_restrictions.hh | 1 - 2 files changed, 56 insertions(+), 61 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index cf350c8856..493d7b6ec9 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -1202,7 +1202,62 @@ statement_restrictions::statement_restrictions(private_tag, } } else if (is_multi_column(restr)) { // Multi column restrictions are only allowed on clustering columns - add_multi_column_clustering_key_restriction(restr); + if (is_empty_restriction(_clustering_columns_restrictions)) { + _clustering_columns_restrictions = restr; + } else { + + if (!find_binop(_clustering_columns_restrictions, [] (const expr::binary_operator& b) { + return expr::is(b.lhs); + })) { + throw exceptions::invalid_request_exception("Mixing single column relations and multi column relations on clustering columns is not allowed"); + } + + if (restr.op == expr::oper_t::EQ) { + throw exceptions::invalid_request_exception(format("{} cannot be restricted by more than one relation if it includes an Equal", + expr::get_columns_in_commons(_clustering_columns_restrictions, restr))); + } else if (restr.op == expr::oper_t::IN) { + throw exceptions::invalid_request_exception(format("{} cannot be restricted by more than one relation if it includes a IN", + expr::get_columns_in_commons(_clustering_columns_restrictions, restr))); + } else if (is_slice(restr.op)) { + if (!expr::has_slice(_clustering_columns_restrictions)) { + throw exceptions::invalid_request_exception(format("Column \"{}\" cannot be restricted by both an equality and an inequality relation", + expr::get_columns_in_commons(_clustering_columns_restrictions, restr))); + } + + const expr::binary_operator* other_slice = expr::find_in_expression(_clustering_columns_restrictions, [](const expr::binary_operator){return true;}); + if (other_slice == nullptr) { + on_internal_error(rlogger, "add_multi_column_clustering_key_restriction: _clustering_columns_restrictions is empty!"); + } + + // Don't allow to mix plain and SCYLLA_CLUSTERING_BOUND bounds + if (other_slice->order != restr.order) { + static auto order2str = [](auto o) { return o == expr::comparison_order::cql ? "plain" : "SCYLLA_CLUSTERING_BOUND"; }; + throw exceptions::invalid_request_exception( + format("Invalid combination of restrictions ({} / {})", + order2str(other_slice->order), order2str(restr.order))); + } + + // Here check that there aren't two < <= or two > and >= + auto is_greater = [](expr::oper_t op) {return op == expr::oper_t::GT || op == expr::oper_t::GTE; }; + auto is_less = [](expr::oper_t op) {return op == expr::oper_t::LT || op == expr::oper_t::LTE; }; + + if (is_greater(restr.op) && is_greater(other_slice->op)) { + throw exceptions::invalid_request_exception(format( + "More than one restriction was found for the start bound on {}", + expr::get_columns_in_commons(restr, *other_slice))); + } + + if (is_less(restr.op) && is_less(other_slice->op)) { + throw exceptions::invalid_request_exception(format( + "More than one restriction was found for the end bound on {}", + expr::get_columns_in_commons(restr, *other_slice))); + } + + _clustering_columns_restrictions = expr::make_conjunction(_clustering_columns_restrictions, restr); + } else { + throw exceptions::invalid_request_exception(format("Unsupported multi-column relation: ", restr)); + } + } } else if (has_partition_token(restr, *_schema)) { // Token always restricts the partition key add_token_partition_key_restriction(restr); @@ -1660,65 +1715,6 @@ void statement_restrictions::add_single_column_clustering_key_restriction(const _clustering_columns_restrictions = expr::make_conjunction(_clustering_columns_restrictions, restr); } -void statement_restrictions::add_multi_column_clustering_key_restriction(const expr::binary_operator& restr) { - if (is_empty_restriction(_clustering_columns_restrictions)) { - _clustering_columns_restrictions = restr; - } else { - - if (!find_binop(_clustering_columns_restrictions, [] (const expr::binary_operator& b) { - return expr::is(b.lhs); - })) { - throw exceptions::invalid_request_exception("Mixing single column relations and multi column relations on clustering columns is not allowed"); - } - - if (restr.op == expr::oper_t::EQ) { - throw exceptions::invalid_request_exception(format("{} cannot be restricted by more than one relation if it includes an Equal", - expr::get_columns_in_commons(_clustering_columns_restrictions, restr))); - } else if (restr.op == expr::oper_t::IN) { - throw exceptions::invalid_request_exception(format("{} cannot be restricted by more than one relation if it includes a IN", - expr::get_columns_in_commons(_clustering_columns_restrictions, restr))); - } else if (is_slice(restr.op)) { - if (!expr::has_slice(_clustering_columns_restrictions)) { - throw exceptions::invalid_request_exception(format("Column \"{}\" cannot be restricted by both an equality and an inequality relation", - expr::get_columns_in_commons(_clustering_columns_restrictions, restr))); - } - - const expr::binary_operator* other_slice = expr::find_in_expression(_clustering_columns_restrictions, [](const expr::binary_operator){return true;}); - if (other_slice == nullptr) { - on_internal_error(rlogger, "add_multi_column_clustering_key_restriction: _clustering_columns_restrictions is empty!"); - } - - // Don't allow to mix plain and SCYLLA_CLUSTERING_BOUND bounds - if (other_slice->order != restr.order) { - static auto order2str = [](auto o) { return o == expr::comparison_order::cql ? "plain" : "SCYLLA_CLUSTERING_BOUND"; }; - throw exceptions::invalid_request_exception( - format("Invalid combination of restrictions ({} / {})", - order2str(other_slice->order), order2str(restr.order))); - } - - // Here check that there aren't two < <= or two > and >= - auto is_greater = [](expr::oper_t op) {return op == expr::oper_t::GT || op == expr::oper_t::GTE; }; - auto is_less = [](expr::oper_t op) {return op == expr::oper_t::LT || op == expr::oper_t::LTE; }; - - if (is_greater(restr.op) && is_greater(other_slice->op)) { - throw exceptions::invalid_request_exception(format( - "More than one restriction was found for the start bound on {}", - expr::get_columns_in_commons(restr, *other_slice))); - } - - if (is_less(restr.op) && is_less(other_slice->op)) { - throw exceptions::invalid_request_exception(format( - "More than one restriction was found for the end bound on {}", - expr::get_columns_in_commons(restr, *other_slice))); - } - - _clustering_columns_restrictions = expr::make_conjunction(_clustering_columns_restrictions, restr); - } else { - throw exceptions::invalid_request_exception(format("Unsupported multi-column relation: ", restr)); - } - } -} - void statement_restrictions::add_single_column_nonprimary_key_restriction(const expr::binary_operator& restr) { _nonprimary_key_restrictions = expr::make_conjunction(_nonprimary_key_restrictions, restr); } diff --git a/cql3/restrictions/statement_restrictions.hh b/cql3/restrictions/statement_restrictions.hh index c533659972..76a3a5f0d3 100644 --- a/cql3/restrictions/statement_restrictions.hh +++ b/cql3/restrictions/statement_restrictions.hh @@ -376,7 +376,6 @@ private: void add_single_column_parition_key_restriction(const expr::binary_operator& restr, schema_ptr schema, bool allow_filtering, bool for_view); void add_token_partition_key_restriction(const expr::binary_operator& restr); void add_single_column_clustering_key_restriction(const expr::binary_operator& restr, schema_ptr schema, bool allow_filtering); - void add_multi_column_clustering_key_restriction(const expr::binary_operator& restr); void add_single_column_nonprimary_key_restriction(const expr::binary_operator& restr); void process_partition_key_restrictions(bool for_view, bool allow_filtering, statements::statement_type type); From 24cd98e4544c07858eb5ffa39c2b27799edb96aa Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Fri, 6 Dec 2024 20:55:12 +0200 Subject: [PATCH 34/76] cql3: statement_restrictions: fold add_token_partition_key_restriction() into its caller The goal is to simplify flow-control where the order in which variables are updated depends on their location in the source. With functions, this is difficult. --- cql3/restrictions/statement_restrictions.cc | 26 +++++++++------------ cql3/restrictions/statement_restrictions.hh | 1 - 2 files changed, 11 insertions(+), 16 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index 493d7b6ec9..b341516918 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -1260,7 +1260,17 @@ statement_restrictions::statement_restrictions(private_tag, } } else if (has_partition_token(restr, *_schema)) { // Token always restricts the partition key - add_token_partition_key_restriction(restr); + if (!partition_key_restrictions_is_empty() && !has_token_restrictions()) { + throw exceptions::invalid_request_exception( + seastar::format("Columns \"{}\" cannot be restricted by both a normal relation and a token relation", + fmt::join(expr::get_sorted_column_defs(_partition_key_restrictions) | + std::views::transform([](auto* p) { + return maybe_column_definition{p}; + }), + ", "))); + } + + _partition_key_restrictions = expr::make_conjunction(_partition_key_restrictions, restr); } else if (is_single_column_restriction(restr)) { const column_definition* def = get_the_only_column(restr).col; if (def->is_partition_key()) { @@ -1673,20 +1683,6 @@ void statement_restrictions::add_single_column_parition_key_restriction(const ex _partition_range_is_simple &= !find(restr, expr::oper_t::IN); } -void statement_restrictions::add_token_partition_key_restriction(const expr::binary_operator& restr) { - if (!partition_key_restrictions_is_empty() && !has_token_restrictions()) { - throw exceptions::invalid_request_exception( - seastar::format("Columns \"{}\" cannot be restricted by both a normal relation and a token relation", - fmt::join(expr::get_sorted_column_defs(_partition_key_restrictions) | - std::views::transform([](auto* p) { - return maybe_column_definition{p}; - }), - ", "))); - } - - _partition_key_restrictions = expr::make_conjunction(_partition_key_restrictions, restr); -} - void statement_restrictions::add_single_column_clustering_key_restriction(const expr::binary_operator& restr, schema_ptr schema, bool allow_filtering) { if (find_binop(_clustering_columns_restrictions, [] (const expr::binary_operator& b) { return expr::is(b.lhs); diff --git a/cql3/restrictions/statement_restrictions.hh b/cql3/restrictions/statement_restrictions.hh index 76a3a5f0d3..dbdaa26622 100644 --- a/cql3/restrictions/statement_restrictions.hh +++ b/cql3/restrictions/statement_restrictions.hh @@ -374,7 +374,6 @@ public: private: std::pair, expr::expression> do_find_idx(const secondary_index::secondary_index_manager& sim) const; void add_single_column_parition_key_restriction(const expr::binary_operator& restr, schema_ptr schema, bool allow_filtering, bool for_view); - void add_token_partition_key_restriction(const expr::binary_operator& restr); void add_single_column_clustering_key_restriction(const expr::binary_operator& restr, schema_ptr schema, bool allow_filtering); void add_single_column_nonprimary_key_restriction(const expr::binary_operator& restr); From 1d631f7bac023d20da3ded22d227008a5e39788a Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Fri, 6 Dec 2024 20:55:12 +0200 Subject: [PATCH 35/76] cql3: statement_restrictions: fold add_single_column_partition_key_restriction() into its caller The goal is to simplify flow-control where the order in which variables are updated depends on their location in the source. With functions, this is difficult. --- cql3/restrictions/statement_restrictions.cc | 40 ++++++++++----------- cql3/restrictions/statement_restrictions.hh | 1 - 2 files changed, 18 insertions(+), 23 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index b341516918..e470f061b9 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -1274,7 +1274,24 @@ statement_restrictions::statement_restrictions(private_tag, } else if (is_single_column_restriction(restr)) { const column_definition* def = get_the_only_column(restr).col; if (def->is_partition_key()) { - add_single_column_parition_key_restriction(restr, schema, allow_filtering, for_view); + // View definition allows PK slices, because it's not a performance problem. + if (restr.op != expr::oper_t::EQ && restr.op != expr::oper_t::IN && !allow_filtering && !for_view) { + throw exceptions::invalid_request_exception( + "Only EQ and IN relation are supported on the partition key " + "(unless you use the token() function or ALLOW FILTERING)"); + } + if (has_token_restrictions()) { + throw exceptions::invalid_request_exception( + seastar::format("Columns \"{}\" cannot be restricted by both a normal relation and a token relation", + fmt::join(expr::get_sorted_column_defs(_partition_key_restrictions) | + std::views::transform([](auto* p) { + return maybe_column_definition{p}; + }), + ", "))); + } + + _partition_key_restrictions = expr::make_conjunction(_partition_key_restrictions, restr); + _partition_range_is_simple &= !find(restr, expr::oper_t::IN); } else if (def->is_clustering_key()) { add_single_column_clustering_key_restriction(restr, schema, allow_filtering); } else { @@ -1662,27 +1679,6 @@ void statement_restrictions::calculate_column_defs_for_filtering_and_erase_restr _column_defs_for_filtering = std::move(column_defs_for_filtering); } -void statement_restrictions::add_single_column_parition_key_restriction(const expr::binary_operator& restr, schema_ptr schema, bool allow_filtering, bool for_view) { - // View definition allows PK slices, because it's not a performance problem. - if (restr.op != expr::oper_t::EQ && restr.op != expr::oper_t::IN && !allow_filtering && !for_view) { - throw exceptions::invalid_request_exception( - "Only EQ and IN relation are supported on the partition key " - "(unless you use the token() function or ALLOW FILTERING)"); - } - if (has_token_restrictions()) { - throw exceptions::invalid_request_exception( - seastar::format("Columns \"{}\" cannot be restricted by both a normal relation and a token relation", - fmt::join(expr::get_sorted_column_defs(_partition_key_restrictions) | - std::views::transform([](auto* p) { - return maybe_column_definition{p}; - }), - ", "))); - } - - _partition_key_restrictions = expr::make_conjunction(_partition_key_restrictions, restr); - _partition_range_is_simple &= !find(restr, expr::oper_t::IN); -} - void statement_restrictions::add_single_column_clustering_key_restriction(const expr::binary_operator& restr, schema_ptr schema, bool allow_filtering) { if (find_binop(_clustering_columns_restrictions, [] (const expr::binary_operator& b) { return expr::is(b.lhs); diff --git a/cql3/restrictions/statement_restrictions.hh b/cql3/restrictions/statement_restrictions.hh index dbdaa26622..9dfe80e2ec 100644 --- a/cql3/restrictions/statement_restrictions.hh +++ b/cql3/restrictions/statement_restrictions.hh @@ -373,7 +373,6 @@ public: schema_ptr get_view_schema() const { return _view_schema; } private: std::pair, expr::expression> do_find_idx(const secondary_index::secondary_index_manager& sim) const; - void add_single_column_parition_key_restriction(const expr::binary_operator& restr, schema_ptr schema, bool allow_filtering, bool for_view); void add_single_column_clustering_key_restriction(const expr::binary_operator& restr, schema_ptr schema, bool allow_filtering); void add_single_column_nonprimary_key_restriction(const expr::binary_operator& restr); From 196574191453ce299091eef77d90c1c7b3849c45 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Fri, 6 Dec 2024 20:55:12 +0200 Subject: [PATCH 36/76] cql3: statement_restrictions: fold add_single_column_clustering_key_restriction() into its caller The goal is to simplify flow-control where the order in which variables are updated depends on their location in the source. With functions, this is difficult. --- cql3/restrictions/statement_restrictions.cc | 54 ++++++++++----------- cql3/restrictions/statement_restrictions.hh | 1 - 2 files changed, 25 insertions(+), 30 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index e470f061b9..05c19b2d46 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -1293,7 +1293,31 @@ statement_restrictions::statement_restrictions(private_tag, _partition_key_restrictions = expr::make_conjunction(_partition_key_restrictions, restr); _partition_range_is_simple &= !find(restr, expr::oper_t::IN); } else if (def->is_clustering_key()) { - add_single_column_clustering_key_restriction(restr, schema, allow_filtering); + if (find_binop(_clustering_columns_restrictions, [] (const expr::binary_operator& b) { + return expr::is(b.lhs); + })) { + throw exceptions::invalid_request_exception( + "Mixing single column relations and multi column relations on clustering columns is not allowed"); + } + + const column_definition* new_column = get_the_only_column(restr).col; + const column_definition* last_column = expr::get_last_column_def(_clustering_columns_restrictions); + + if (last_column != nullptr && !allow_filtering) { + if (has_slice(_clustering_columns_restrictions) && schema->position(*new_column) > schema->position(*last_column)) { + throw exceptions::invalid_request_exception(format("Clustering column \"{}\" cannot be restricted (preceding column \"{}\" is restricted by a non-EQ relation)", + new_column->name_as_text(), last_column->name_as_text())); + } + + if (schema->position(*new_column) < schema->position(*last_column)) { + if (has_slice(restr)) { + throw exceptions::invalid_request_exception(format("PRIMARY KEY column \"{}\" cannot be restricted (preceding column \"{}\" is restricted by a non-EQ relation)", + last_column->name_as_text(), new_column->name_as_text())); + } + } + } + + _clustering_columns_restrictions = expr::make_conjunction(_clustering_columns_restrictions, restr); } else { add_single_column_nonprimary_key_restriction(restr); } @@ -1679,34 +1703,6 @@ void statement_restrictions::calculate_column_defs_for_filtering_and_erase_restr _column_defs_for_filtering = std::move(column_defs_for_filtering); } -void statement_restrictions::add_single_column_clustering_key_restriction(const expr::binary_operator& restr, schema_ptr schema, bool allow_filtering) { - if (find_binop(_clustering_columns_restrictions, [] (const expr::binary_operator& b) { - return expr::is(b.lhs); - })) { - throw exceptions::invalid_request_exception( - "Mixing single column relations and multi column relations on clustering columns is not allowed"); - } - - const column_definition* new_column = get_the_only_column(restr).col; - const column_definition* last_column = expr::get_last_column_def(_clustering_columns_restrictions); - - if (last_column != nullptr && !allow_filtering) { - if (has_slice(_clustering_columns_restrictions) && schema->position(*new_column) > schema->position(*last_column)) { - throw exceptions::invalid_request_exception(format("Clustering column \"{}\" cannot be restricted (preceding column \"{}\" is restricted by a non-EQ relation)", - new_column->name_as_text(), last_column->name_as_text())); - } - - if (schema->position(*new_column) < schema->position(*last_column)) { - if (has_slice(restr)) { - throw exceptions::invalid_request_exception(format("PRIMARY KEY column \"{}\" cannot be restricted (preceding column \"{}\" is restricted by a non-EQ relation)", - last_column->name_as_text(), new_column->name_as_text())); - } - } - } - - _clustering_columns_restrictions = expr::make_conjunction(_clustering_columns_restrictions, restr); -} - void statement_restrictions::add_single_column_nonprimary_key_restriction(const expr::binary_operator& restr) { _nonprimary_key_restrictions = expr::make_conjunction(_nonprimary_key_restrictions, restr); } diff --git a/cql3/restrictions/statement_restrictions.hh b/cql3/restrictions/statement_restrictions.hh index 9dfe80e2ec..10b8793830 100644 --- a/cql3/restrictions/statement_restrictions.hh +++ b/cql3/restrictions/statement_restrictions.hh @@ -373,7 +373,6 @@ public: schema_ptr get_view_schema() const { return _view_schema; } private: std::pair, expr::expression> do_find_idx(const secondary_index::secondary_index_manager& sim) const; - void add_single_column_clustering_key_restriction(const expr::binary_operator& restr, schema_ptr schema, bool allow_filtering); void add_single_column_nonprimary_key_restriction(const expr::binary_operator& restr); void process_partition_key_restrictions(bool for_view, bool allow_filtering, statements::statement_type type); From 35f14544dc1c8bbe2e963bd6f6a4fb02fe7e5265 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Fri, 6 Dec 2024 20:55:12 +0200 Subject: [PATCH 37/76] cql3: statement_restrictions: fold add_single_column_nonprimary_key_restriction() into its caller The goal is to simplify flow-control where the order in which variables are updated depends on their location in the source. With functions, this is difficult. --- cql3/restrictions/statement_restrictions.cc | 6 +----- cql3/restrictions/statement_restrictions.hh | 1 - 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index 05c19b2d46..e31e4bf74d 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -1319,7 +1319,7 @@ statement_restrictions::statement_restrictions(private_tag, _clustering_columns_restrictions = expr::make_conjunction(_clustering_columns_restrictions, restr); } else { - add_single_column_nonprimary_key_restriction(restr); + _nonprimary_key_restrictions = expr::make_conjunction(_nonprimary_key_restrictions, restr); } } else { throw exceptions::invalid_request_exception(format("Unhandled restriction: {}", restr)); @@ -1703,10 +1703,6 @@ void statement_restrictions::calculate_column_defs_for_filtering_and_erase_restr _column_defs_for_filtering = std::move(column_defs_for_filtering); } -void statement_restrictions::add_single_column_nonprimary_key_restriction(const expr::binary_operator& restr) { - _nonprimary_key_restrictions = expr::make_conjunction(_nonprimary_key_restrictions, restr); -} - void statement_restrictions::process_partition_key_restrictions(bool for_view, bool allow_filtering, statements::statement_type type) { // If there is a queryable index, no special condition are required on the other restrictions. // But we still need to know 2 things: diff --git a/cql3/restrictions/statement_restrictions.hh b/cql3/restrictions/statement_restrictions.hh index 10b8793830..55518a9878 100644 --- a/cql3/restrictions/statement_restrictions.hh +++ b/cql3/restrictions/statement_restrictions.hh @@ -373,7 +373,6 @@ public: schema_ptr get_view_schema() const { return _view_schema; } private: std::pair, expr::expression> do_find_idx(const secondary_index::secondary_index_manager& sim) const; - void add_single_column_nonprimary_key_restriction(const expr::binary_operator& restr); void process_partition_key_restrictions(bool for_view, bool allow_filtering, statements::statement_type type); From 694c1aed988195a9e51cbb68dff3eb38a4cc1e2a Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Fri, 6 Dec 2024 22:52:25 +0200 Subject: [PATCH 38/76] cql3: statement_restrictions: refactor IS NOT NULL processing Move some code to a helper, but don't let it mutate state. --- cql3/restrictions/statement_restrictions.cc | 32 ++++++++++++--------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index e31e4bf74d..130d30c22e 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -1158,6 +1158,23 @@ static std::vector extract_clustering_prefix_restrictions( return prefix; } +static +const column_definition* +extract_column_from_is_not_null_restriction(const expr::binary_operator& restr) { + const expr::column_value* lhs_col_def = expr::as_if(&restr.lhs); + // The "IS NOT NULL" restriction is only supported (and + // mandatory) for materialized view creation: + if (lhs_col_def == nullptr) { + throw exceptions::invalid_request_exception("IS NOT only supports single column"); + } + // currently, the grammar only allows the NULL argument to be + // "IS NOT", so this assertion should not be able to fail + if (!expr::is(restr.rhs) || !expr::as(restr.rhs).is_null()) { + throw exceptions::invalid_request_exception("Only IS NOT NULL is supported"); + } + return lhs_col_def->col; +} + statement_restrictions::statement_restrictions(private_tag, data_dictionary::database db, schema_ptr schema, @@ -1183,19 +1200,8 @@ statement_restrictions::statement_restrictions(private_tag, auto& restr = prepared_restriction; if (restr.op == expr::oper_t::IS_NOT) { // Handle IS NOT NULL restrictions separately - const expr::column_value* lhs_col_def = expr::as_if(&restr.lhs); - // The "IS NOT NULL" restriction is only supported (and - // mandatory) for materialized view creation: - if (lhs_col_def == nullptr) { - throw exceptions::invalid_request_exception("IS NOT only supports single column"); - } - // currently, the grammar only allows the NULL argument to be - // "IS NOT", so this assertion should not be able to fail - if (!expr::is(restr.rhs) || !expr::as(restr.rhs).is_null()) { - throw exceptions::invalid_request_exception("Only IS NOT NULL is supported"); - } - - _not_null_columns.insert(lhs_col_def->col); + auto col = extract_column_from_is_not_null_restriction(restr); + _not_null_columns.insert(col); if (!for_view) { throw exceptions::invalid_request_exception(format("restriction '{}' is only supported in materialized view creation", restr)); From 92a43557dc352d9b31caa480e86cc37fed4f30db Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Fri, 28 Mar 2025 21:15:55 +0300 Subject: [PATCH 39/76] cql3: statement_restrictions: split _where to boolean factors in preparation for predicates conversion Expressions are a tree-like structure so a single expression is sufficient (for complicated ones, a conjunction is used), but predicates are flat. Prepare for conversion to predicates by storing the expressions that will correspond to predicates, namely the boolean factors of the WHERE clause. --- cql3/restrictions/statement_restrictions.cc | 44 +++++++++++---------- cql3/restrictions/statement_restrictions.hh | 4 +- test/boost/statement_restrictions_test.cc | 18 ++++----- 3 files changed, 34 insertions(+), 32 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index 130d30c22e..c1824057f1 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -667,7 +667,7 @@ bool has_eq_restriction_on_column(const column_definition& column, const express return eq_restriction_search_res != nullptr; } -std::vector extract_single_column_restrictions_for_column(const expression& expr, +std::vector extract_single_column_restrictions_for_column(std::span exprs, const column_definition& column) { struct visitor { std::vector restrictions; @@ -725,7 +725,9 @@ std::vector extract_single_column_restrictions_for_column(const expr .current_binary_operator = nullptr, }; - expr::visit(v, expr); + for (auto& e : exprs) { + expr::visit(v, e); + } return std::move(v.restrictions); } @@ -784,7 +786,7 @@ single_column_restrictions_map get_single_column_restrictions_map(const expressi std::vector sorted_defs = get_sorted_column_defs(e); for (const column_definition* cdef : sorted_defs) { expression col_restrictions = conjunction { - .children = extract_single_column_restrictions_for_column(e, *cdef) + .children = extract_single_column_restrictions_for_column(std::span(&e, 1), *cdef) }; result.emplace(cdef, std::move(col_restrictions)); } @@ -868,7 +870,7 @@ void with_current_binary_operator( /// Every token, or if no tokens, an EQ/IN of every single PK column. static partition_range_restrictions extract_partition_range( - const expr::expression& where_clause, schema_ptr schema) { + std::span where_clause, schema_ptr schema) { using namespace expr; struct extract_partition_range_visitor { schema_ptr table_schema; @@ -975,7 +977,10 @@ static partition_range_restrictions extract_partition_range( .table_schema = schema }; - expr::visit(v, where_clause); + for (auto& e : where_clause) { + expr::visit(v, e); + } + if (v.tokens) { return token_range_restrictions{ .token_restrictions = predicate{ @@ -999,7 +1004,7 @@ static partition_range_restrictions extract_partition_range( /// boundaries of any clustering slice that can possibly meet where_clause. This vector can be calculated before /// binding expression markers, since LHS and operator are always known. static std::vector extract_clustering_prefix_restrictions( - const expr::expression& where_clause, schema_ptr schema) { + std::span where_clause, schema_ptr schema) { using namespace expr; /// Collects all clustering-column restrictions from an expression. Presumes the expression only uses @@ -1133,7 +1138,9 @@ static std::vector extract_clustering_prefix_restrictions( .table_schema = schema }; - expr::visit(v, where_clause); + for (auto& e : where_clause) { + expr::visit(v, e); + } if (!v.multi.empty()) { return std::move(v.multi); @@ -1332,10 +1339,10 @@ statement_restrictions::statement_restrictions(private_tag, } if (prepared_restriction.op != expr::oper_t::IS_NOT) { - _where = _where.has_value() ? make_conjunction(std::move(*_where), prepared_restriction) : prepared_restriction; + _where.push_back(prepared_restriction); } } - if (_where.has_value()) { + if (!_where.empty()) { if (!has_token_restrictions()) { _single_column_partition_key_restrictions = get_single_column_restrictions_map(_partition_key_restrictions); } @@ -1343,8 +1350,8 @@ statement_restrictions::statement_restrictions(private_tag, _single_column_clustering_key_restrictions = get_single_column_restrictions_map(_clustering_columns_restrictions); } _single_column_nonprimary_key_restrictions = get_single_column_restrictions_map(_nonprimary_key_restrictions); - _clustering_prefix_restrictions = extract_clustering_prefix_restrictions(*_where, _schema); - _partition_range_restrictions = extract_partition_range(*_where, _schema); + _clustering_prefix_restrictions = extract_clustering_prefix_restrictions(_where, _schema); + _partition_range_restrictions = extract_partition_range(_where, _schema); } _has_multi_column = find_binop(_clustering_columns_restrictions, is_multi_column); if (_check_indexes) { @@ -1582,7 +1589,7 @@ const std::vector& statement_restrictions::index_restrictions( } bool statement_restrictions::is_empty() const { - return !_where.has_value(); + return _where.empty(); } // Current score table: @@ -1620,7 +1627,7 @@ std::pair, expr::expression> statement_res expr::for_each_expression(restriction, [&](const expr::column_value& cval) { auto& cdef = cval.col; expr::expression col_restrictions = expr::conjunction { - .children = extract_single_column_restrictions_for_column(restriction, *cdef) + .children = extract_single_column_restrictions_for_column(std::span(&restriction, 1), *cdef) }; for (const auto& index : sim.list_indexes()) { if (cdef->name_as_text() == index.target_column() && @@ -1642,11 +1649,8 @@ statement_restrictions::find_idx(const secondary_index::secondary_index_manager& } bool statement_restrictions::has_eq_restriction_on_column(const column_definition& column) const { - if (!_where.has_value()) { - return false; - } - - return restrictions::has_eq_restriction_on_column(column, *_where); + return std::ranges::any_of(_where, + std::bind_front(restrictions::has_eq_restriction_on_column, std::ref(column))); } std::vector statement_restrictions::get_column_defs_for_filtering(data_dictionary::database db) const { @@ -2932,7 +2936,7 @@ void statement_restrictions::prepare_indexed_local(const schema& idx_tbl_schema) // Find index column restrictions in the WHERE clause std::vector idx_col_restrictions = - extract_single_column_restrictions_for_column(*_where, indexed_column_base_schema); + extract_single_column_restrictions_for_column(_where, indexed_column_base_schema); expr::expression idx_col_restriction_expr = expr::expression(expr::conjunction{std::move(idx_col_restrictions)}); // Translate the restriction to use column from the index schema and add it @@ -3082,7 +3086,7 @@ statement_restrictions::value_for_index_partition_key(const query_options& optio } sstring statement_restrictions::to_string() const { - return _where ? expr::to_string(*_where) : ""; + return !_where.empty() ? expr::to_string(expr::conjunction{.children = _where}) : ""; } static void validate_primary_key_restrictions(const query_options& options, std::ranges::range auto&& restrictions) { diff --git a/cql3/restrictions/statement_restrictions.hh b/cql3/restrictions/statement_restrictions.hh index 55518a9878..37b96f31ac 100644 --- a/cql3/restrictions/statement_restrictions.hh +++ b/cql3/restrictions/statement_restrictions.hh @@ -161,7 +161,7 @@ private: bool _has_queriable_regular_index = false, _has_queriable_pk_index = false, _has_queriable_ck_index = false; bool _has_multi_column; ///< True iff _clustering_columns_restrictions has a multi-column restriction. - std::optional _where; ///< The entire WHERE clause. + std::vector _where; ///< The entire WHERE clause (factorized). /// Parts of _where defining the clustering slice. /// @@ -532,7 +532,7 @@ shared_ptr make_trivial_statement_restrictions( // Does not include token() restrictions. // Does not include boolean constant restrictions. // For example "WHERE c = 1 AND (a, c) = (2, 1) AND token(p) < 2 AND FALSE" will return {"c = 1"}. -std::vector extract_single_column_restrictions_for_column(const expr::expression&, const column_definition&); +std::vector extract_single_column_restrictions_for_column(std::span, const column_definition&); // Checks whether this expression is empty - doesn't restrict anything diff --git a/test/boost/statement_restrictions_test.cc b/test/boost/statement_restrictions_test.cc index 0230f504ee..c50cd63dea 100644 --- a/test/boost/statement_restrictions_test.cc +++ b/test/boost/statement_restrictions_test.cc @@ -1211,7 +1211,7 @@ BOOST_AUTO_TEST_CASE(expression_extract_column_restrictions) { column_definition col_r3 = make_column("r3", column_kind::regular_column, 2); // Empty input test - assert_expr_vec_eq(cql3::restrictions::extract_single_column_restrictions_for_column(conjunction{}, col_pk1), {}); + assert_expr_vec_eq(cql3::restrictions::extract_single_column_restrictions_for_column({}, col_pk1), {}); // BIG_WHERE test // big_where contains: @@ -1293,27 +1293,25 @@ BOOST_AUTO_TEST_CASE(expression_extract_column_restrictions) { big_where.push_back(pk2_expr); big_where.push_back(pk1_pk2_expr); - expression big_where_expr = conjunction{std::move(big_where)}; - - assert_expr_vec_eq(restrictions::extract_single_column_restrictions_for_column(big_where_expr, col_pk1), + assert_expr_vec_eq(restrictions::extract_single_column_restrictions_for_column(big_where, col_pk1), {pk1_restriction}); - assert_expr_vec_eq(restrictions::extract_single_column_restrictions_for_column(big_where_expr, col_pk2), + assert_expr_vec_eq(restrictions::extract_single_column_restrictions_for_column(big_where, col_pk2), {pk2_restriction, pk2_restriction2}); - assert_expr_vec_eq(restrictions::extract_single_column_restrictions_for_column(big_where_expr, col_ck1), + assert_expr_vec_eq(restrictions::extract_single_column_restrictions_for_column(big_where, col_ck1), {ck1_restriction}); - assert_expr_vec_eq(restrictions::extract_single_column_restrictions_for_column(big_where_expr, col_ck2), + assert_expr_vec_eq(restrictions::extract_single_column_restrictions_for_column(big_where, col_ck2), {ck2_restriction}); - assert_expr_vec_eq(restrictions::extract_single_column_restrictions_for_column(big_where_expr, col_r1), + assert_expr_vec_eq(restrictions::extract_single_column_restrictions_for_column(big_where, col_r1), {r1_restriction, r1_restriction2, r1_restriction3}); - assert_expr_vec_eq(restrictions::extract_single_column_restrictions_for_column(big_where_expr, col_r2), + assert_expr_vec_eq(restrictions::extract_single_column_restrictions_for_column(big_where, col_r2), {r2_restriction}); - assert_expr_vec_eq(restrictions::extract_single_column_restrictions_for_column(big_where_expr, col_r3), + assert_expr_vec_eq(restrictions::extract_single_column_restrictions_for_column(big_where, col_r3), {}); } From 370f3fd2e882a3f428dac5ca7f2f804e2d7f256c Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sat, 29 Mar 2025 00:26:35 +0300 Subject: [PATCH 40/76] cql3: statement_restrictions: convert possible_lhs_values into a solver Convert from an execute-time function to a prepare-time function by returning a solver function instead of directly solving. When not possible to solve, but still possible to evaluate (filter), return nullptr. --- cql3/restrictions/statement_restrictions.cc | 140 ++++++++++++-------- 1 file changed, 88 insertions(+), 52 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index c1824057f1..f2ed6f847a 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -281,63 +281,94 @@ require_on_single_column(const predicate& p) { on_internal_error(rlogger, "require_on_single_column: predicate is not on a single column"); } +/// Given an expression and a column definition , builds a function that returns +/// the set of all column values that would satisfy the expression. The _token_values variant finds +/// matching values for the partition token function call instead of the column. +/// +/// An expression restricts possible values of a column or token: +/// - `A>5` restricts A from below +/// - `A>5 AND A>6 AND B<10 AND A=12 AND B>0` restricts A to 12 and B to between 0 and 10 +/// - `A IN (1, 3, 5)` restricts A to 1, 3, or 5 +/// - `A IN (1, 3, 5) AND A>3` restricts A to just 5 +/// - `A=1 AND A<=0` restricts A to an empty list; no value is able to satisfy the expression +/// - `A>=NULL` also restricts A to an empty list; all comparisons to NULL are false +/// - an expression without A "restricts" A to unbounded range +// // When cdef == nullptr it finds possible token values instead of column values. // When finding token values the table_schema_opt argument has to point to a valid schema, // but it isn't used when finding values for column. // The schema is needed to find out whether a call to token() function represents // the partition token. -static value_set possible_lhs_values(const column_definition* cdef, +static +solve_for_t +possible_lhs_values(const column_definition* cdef, const expression& expr, - const schema* table_schema_opt, - const query_options& options) { + const schema* table_schema_opt) { const auto type = cdef ? &cdef->type->without_reversed() : long_type.get(); return expr::visit(overloaded_functor{ - [] (const constant& constant_val) { + [] (const constant& constant_val) -> solve_for_t { std::optional bool_val = get_bool_value(constant_val); if (bool_val.has_value()) { - return *bool_val ? unbounded_value_set : empty_value_set; + return *bool_val + ? solve_for_t([] (const query_options&) { return unbounded_value_set; }) + : solve_for_t([] (const query_options&) { return empty_value_set; }); } on_internal_error(expr_logger, "possible_lhs_values: a constant that is not a bool value cannot serve as a restriction by itself"); }, - [&] (const conjunction& conj) { - return std::ranges::fold_left(conj.children, unbounded_value_set, [&](value_set&& acc, const expression& child) { + [&] (const conjunction& conj) -> solve_for_t { + auto children = + conj.children + | std::views::transform([&] (const expression& e) { + return possible_lhs_values(cdef, e, table_schema_opt); + }) + | std::ranges::to(); + return [children, type] (const query_options& options) -> value_set { + return std::ranges::fold_left(children, unbounded_value_set, [&](value_set&& acc, const solve_for_t& child) { return intersection( - std::move(acc), possible_lhs_values(cdef, child, table_schema_opt, options), type); - }); + std::move(acc), child(options), type); + }); + }; }, - [&] (const binary_operator& oper) -> value_set { + [&] (const binary_operator& oper) -> solve_for_t { return expr::visit(overloaded_functor{ - [&] (const column_value& col) -> value_set { + [&] (const column_value& col) -> solve_for_t { if (!cdef || cdef != col.col) { - return unbounded_value_set; + return [] (const query_options&) { return unbounded_value_set; }; } if (is_compare(oper.op)) { + return [oper] (const query_options& options) { managed_bytes_opt val = evaluate(oper.rhs, options).to_managed_bytes_opt(); if (!val) { return empty_value_set; // All NULL comparisons fail; no column values match. } return oper.op == oper_t::EQ ? value_set(value_list{*val}) : to_range(oper.op, std::move(*val)); + }; } else if (oper.op == oper_t::IN) { + return [oper, type, cdef] (const query_options& options) { return get_IN_values(oper.rhs, options, type->as_less_comparator(), cdef->name_as_text()); + }; } else if (oper.op == oper_t::CONTAINS || oper.op == oper_t::CONTAINS_KEY) { + return [oper] (const query_options& options) { managed_bytes_opt val = evaluate(oper.rhs, options).to_managed_bytes_opt(); if (!val) { return empty_value_set; // All NULL comparisons fail; no column values match. } return value_set(value_list{*val}); + }; } - throw std::logic_error(format("possible_lhs_values: unhandled operator {}", oper)); + return nullptr; }, - [&] (const subscript& s) -> value_set { + [&] (const subscript& s) -> solve_for_t { const column_value& col = get_subscripted_column(s); if (!cdef || cdef != col.col) { - return unbounded_value_set; + return [] (const query_options&) { return unbounded_value_set; }; } + return [s, oper] (const query_options& options) { managed_bytes_opt sval = evaluate(s.sub, options).to_managed_bytes_opt(); if (!sval) { return empty_value_set; // NULL can't be a map key @@ -353,20 +384,24 @@ static value_set possible_lhs_values(const column_definition* cdef, return value_set(value_list{val}); } throw std::logic_error(format("possible_lhs_values: unhandled operator {}", oper)); + }; }, - [&] (const tuple_constructor& tuple) -> value_set { + [&] (const tuple_constructor& tuple) -> solve_for_t { + return [cdef] (const query_options& options) -> value_set { on_internal_error(rlogger, fmt::format("possible_lhs_values: trying to solve for {} on tuple inequality", cdef ? "single column" : "token")); + }; }, - [&] (const function_call& token_fun_call) -> value_set { + [&] (const function_call& token_fun_call) -> solve_for_t { if (!is_partition_token_for_schema(token_fun_call, *table_schema_opt)) { on_internal_error(expr_logger, "possible_lhs_values: function calls are not supported as the LHS of a binary expression"); } if (cdef) { - return unbounded_value_set; + return [] (const query_options&) -> value_set { return unbounded_value_set; }; } + return [oper] (const query_options& options) -> value_set { auto val = evaluate(oper.rhs, options).to_managed_bytes_opt(); if (!val) { return empty_value_set; // All NULL comparisons fail; no token values match. @@ -389,82 +424,83 @@ static value_set possible_lhs_values(const column_definition* cdef, return interval::make_ending_with(interval_bound(std::move(adjusted_val), inclusive)); } throw std::logic_error(format("get_token_interval invalid operator {}", oper.op)); + }; }, - [&] (const binary_operator&) -> value_set { + [&] (const binary_operator&) -> solve_for_t { on_internal_error(expr_logger, "possible_lhs_values: nested binary operators are not supported"); }, - [&] (const conjunction&) -> value_set { + [&] (const conjunction&) -> solve_for_t { on_internal_error(expr_logger, "possible_lhs_values: conjunctions are not supported as the LHS of a binary expression"); }, - [] (const constant&) -> value_set { + [] (const constant&) -> solve_for_t { on_internal_error(expr_logger, "possible_lhs_values: constants are not supported as the LHS of a binary expression"); }, - [] (const unresolved_identifier&) -> value_set { + [] (const unresolved_identifier&) -> solve_for_t { on_internal_error(expr_logger, "possible_lhs_values: unresolved identifiers are not supported as the LHS of a binary expression"); }, - [] (const column_mutation_attribute&) -> value_set { + [] (const column_mutation_attribute&) -> solve_for_t { on_internal_error(expr_logger, "possible_lhs_values: writetime/ttl are not supported as the LHS of a binary expression"); }, - [] (const cast&) -> value_set { + [] (const cast&) -> solve_for_t { on_internal_error(expr_logger, "possible_lhs_values: typecasts are not supported as the LHS of a binary expression"); }, - [] (const field_selection&) -> value_set { + [] (const field_selection&) -> solve_for_t { on_internal_error(expr_logger, "possible_lhs_values: field selections are not supported as the LHS of a binary expression"); }, - [] (const bind_variable&) -> value_set { + [] (const bind_variable&) -> solve_for_t { on_internal_error(expr_logger, "possible_lhs_values: bind variables are not supported as the LHS of a binary expression"); }, - [] (const untyped_constant&) -> value_set { + [] (const untyped_constant&) -> solve_for_t { on_internal_error(expr_logger, "possible_lhs_values: untyped constants are not supported as the LHS of a binary expression"); }, - [] (const collection_constructor&) -> value_set { + [] (const collection_constructor&) -> solve_for_t { on_internal_error(expr_logger, "possible_lhs_values: collection constructors are not supported as the LHS of a binary expression"); }, - [] (const usertype_constructor&) -> value_set { + [] (const usertype_constructor&) -> solve_for_t { on_internal_error(expr_logger, "possible_lhs_values: user type constructors are not supported as the LHS of a binary expression"); }, - [] (const temporary&) -> value_set { + [] (const temporary&) -> solve_for_t { on_internal_error(expr_logger, "possible_lhs_values: temporaries are not supported as the LHS of a binary expression"); }, }, oper.lhs); }, - [] (const column_value&) -> value_set { + [] (const column_value&) -> solve_for_t { on_internal_error(expr_logger, "possible_lhs_values: a column cannot serve as a restriction by itself"); }, - [] (const subscript&) -> value_set { + [] (const subscript&) -> solve_for_t { on_internal_error(expr_logger, "possible_lhs_values: a subscript cannot serve as a restriction by itself"); }, - [] (const unresolved_identifier&) -> value_set { + [] (const unresolved_identifier&) -> solve_for_t { on_internal_error(expr_logger, "possible_lhs_values: an unresolved identifier cannot serve as a restriction"); }, - [] (const column_mutation_attribute&) -> value_set { + [] (const column_mutation_attribute&) -> solve_for_t { on_internal_error(expr_logger, "possible_lhs_values: the writetime/ttl functions cannot serve as a restriction by itself"); }, - [] (const function_call&) -> value_set { + [] (const function_call&) -> solve_for_t { on_internal_error(expr_logger, "possible_lhs_values: a function call cannot serve as a restriction by itself"); }, - [] (const cast&) -> value_set { + [] (const cast&) -> solve_for_t { on_internal_error(expr_logger, "possible_lhs_values: a typecast cannot serve as a restriction by itself"); }, - [] (const field_selection&) -> value_set { + [] (const field_selection&) -> solve_for_t { on_internal_error(expr_logger, "possible_lhs_values: a field selection cannot serve as a restriction by itself"); }, - [] (const bind_variable&) -> value_set { + [] (const bind_variable&) -> solve_for_t { on_internal_error(expr_logger, "possible_lhs_values: a bind variable cannot serve as a restriction by itself"); }, - [] (const untyped_constant&) -> value_set { + [] (const untyped_constant&) -> solve_for_t { on_internal_error(expr_logger, "possible_lhs_values: an untyped constant cannot serve as a restriction by itself"); }, - [] (const tuple_constructor&) -> value_set { + [] (const tuple_constructor&) -> solve_for_t { on_internal_error(expr_logger, "possible_lhs_values: an tuple constructor cannot serve as a restriction by itself"); }, - [] (const collection_constructor&) -> value_set { + [] (const collection_constructor&) -> solve_for_t { on_internal_error(expr_logger, "possible_lhs_values: a collection constructor cannot serve as a restriction by itself"); }, - [] (const usertype_constructor&) -> value_set { + [] (const usertype_constructor&) -> solve_for_t { on_internal_error(expr_logger, "possible_lhs_values: a user type constructor cannot serve as a restriction by itself"); }, - [] (const temporary&) -> value_set { + [] (const temporary&) -> solve_for_t { on_internal_error(expr_logger, "possible_lhs_values: a temporary cannot serve as a restriction by itself"); }, }, expr); @@ -806,7 +842,7 @@ static std::function build_value_for_fn(const column_definition& cdef, const expression& e, const schema& s) { auto ac = predicate{ - .solve_for = std::bind_front(possible_lhs_values, &cdef, e, &s), + .solve_for = possible_lhs_values(&cdef, e, &s), .filter = e, .on = on_column{&cdef}, .is_singleton = false, // Code below assumes 0 or 1 results. @@ -910,7 +946,7 @@ static partition_range_restrictions extract_partition_range( with_current_binary_operator(*this, [&] (const binary_operator& b) { if (s->col->is_partition_key() && (b.op == oper_t::EQ || b.op == oper_t::IN)) { auto a = predicate{ - .solve_for = std::bind_front(possible_lhs_values, s->col, b, table_schema.get()), + .solve_for = possible_lhs_values(s->col, b, table_schema.get()), .filter = b, .on = on_column{s->col}, .is_singleton = b.op == oper_t::EQ, @@ -985,7 +1021,7 @@ static partition_range_restrictions extract_partition_range( return token_range_restrictions{ .token_restrictions = predicate{ // It's not really a column, but... - .solve_for = std::bind_front(possible_lhs_values, /* col */ nullptr, *v.tokens, schema.get()), + .solve_for = possible_lhs_values(/* col */ nullptr, *v.tokens, schema.get()), .filter = *v.tokens, .on = on_partition_key_token{schema.get()}, .is_singleton = false, // It could return a single token, but it's not important to track it @@ -1041,7 +1077,7 @@ static std::vector extract_clustering_prefix_restrictions( } with_current_binary_operator(*this, [&] (const binary_operator& b) { multi.push_back(predicate{ - .solve_for = std::bind_front(possible_lhs_values, /* col */ nullptr, b, table_schema.get()), + .solve_for = possible_lhs_values(/* col */ nullptr, b, table_schema.get()), .filter = b, .on = on_clustering_key_prefix{prefix}, .is_singleton = false, @@ -1055,7 +1091,7 @@ static std::vector extract_clustering_prefix_restrictions( with_current_binary_operator(*this, [&] (const binary_operator& b) { if (s->col->is_clustering_key()) { auto a = predicate{ - .solve_for = std::bind_front(possible_lhs_values, s->col, b, table_schema.get()), + .solve_for = possible_lhs_values(s->col, b, table_schema.get()), .filter = b, .on = on_column{s->col}, .is_singleton = b.op == oper_t::EQ, @@ -1074,7 +1110,7 @@ static std::vector extract_clustering_prefix_restrictions( with_current_binary_operator(*this, [&] (const binary_operator& b) { if (cval.col->is_clustering_key()) { auto a = predicate{ - .solve_for = std::bind_front(possible_lhs_values, cval.col, b, table_schema.get()), + .solve_for = possible_lhs_values(cval.col, b, table_schema.get()), .filter = b, .on = on_column{cval.col}, .is_singleton = b.op == oper_t::EQ, @@ -2838,7 +2874,7 @@ void statement_restrictions::prepare_indexed_global(const schema& idx_tbl_schema // Clustering prefix ends after token_restriction, all further restrictions have to be filtered. expr::expression token_restriction = replace_partition_token(_partition_key_restrictions, token_column, *_schema); _idx_tbl_ck_prefix = std::vector{predicate{ - .solve_for = std::bind_front(possible_lhs_values, token_column, token_restriction, _schema.get()), + .solve_for = possible_lhs_values(token_column, token_restriction, _schema.get()), .filter = token_restriction, .on = on_column{token_column}, .is_singleton = false, // FIXME: could be a singleton token. Not very important. @@ -2942,7 +2978,7 @@ void statement_restrictions::prepare_indexed_local(const schema& idx_tbl_schema) // Translate the restriction to use column from the index schema and add it expr::expression replaced_idx_restriction = replace_column_def(idx_col_restriction_expr, &indexed_column); _idx_tbl_ck_prefix->push_back(predicate{ - .solve_for = std::bind_front(possible_lhs_values, &indexed_column, replaced_idx_restriction, _schema.get()), + .solve_for = possible_lhs_values(&indexed_column, replaced_idx_restriction, _schema.get()), .filter = replaced_idx_restriction, .on = on_column{&indexed_column}, .is_singleton = false, // Could be true, but not important. @@ -2966,7 +3002,7 @@ void statement_restrictions::add_clustering_restrictions_to_idx_ck_prefix(const auto col_in_index = idx_tbl_schema.get_column_definition(col->name()); auto replaced = replace_column_def(e.filter, col_in_index); auto a = predicate{ - .solve_for = std::bind_front(possible_lhs_values, col_in_index, replaced, &idx_tbl_schema), + .solve_for = possible_lhs_values(col_in_index, replaced, &idx_tbl_schema), .filter = replaced, .on = on_column{col_in_index}, .is_singleton = false, // FIXME: could be a singleton token. Not very important. From a28689a99a42f5725ec220ed26bbe4b143a06e5c Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sat, 29 Mar 2025 01:12:48 +0300 Subject: [PATCH 41/76] cql3: statement_restrictions: return nullptr from possible_lhs_values instead of on_internal_error Since we're a first-resort call now, and there's a last-restort (evaluate) Logically should be part of previous patch, but the rest of the code is still careful enough not to call here when not expecting a solution, so the split is not breaking bisectability. --- cql3/restrictions/statement_restrictions.cc | 53 ++++++++++----------- 1 file changed, 26 insertions(+), 27 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index f2ed6f847a..185aa6df45 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -314,8 +314,7 @@ possible_lhs_values(const column_definition* cdef, : solve_for_t([] (const query_options&) { return empty_value_set; }); } - on_internal_error(expr_logger, - "possible_lhs_values: a constant that is not a bool value cannot serve as a restriction by itself"); + return nullptr; }, [&] (const conjunction& conj) -> solve_for_t { auto children = @@ -427,81 +426,81 @@ possible_lhs_values(const column_definition* cdef, }; }, [&] (const binary_operator&) -> solve_for_t { - on_internal_error(expr_logger, "possible_lhs_values: nested binary operators are not supported"); + return nullptr; }, [&] (const conjunction&) -> solve_for_t { - on_internal_error(expr_logger, "possible_lhs_values: conjunctions are not supported as the LHS of a binary expression"); + return nullptr; }, [] (const constant&) -> solve_for_t { - on_internal_error(expr_logger, "possible_lhs_values: constants are not supported as the LHS of a binary expression"); + return nullptr; }, [] (const unresolved_identifier&) -> solve_for_t { - on_internal_error(expr_logger, "possible_lhs_values: unresolved identifiers are not supported as the LHS of a binary expression"); + return nullptr; }, [] (const column_mutation_attribute&) -> solve_for_t { - on_internal_error(expr_logger, "possible_lhs_values: writetime/ttl are not supported as the LHS of a binary expression"); + return nullptr; }, [] (const cast&) -> solve_for_t { - on_internal_error(expr_logger, "possible_lhs_values: typecasts are not supported as the LHS of a binary expression"); + return nullptr; }, [] (const field_selection&) -> solve_for_t { - on_internal_error(expr_logger, "possible_lhs_values: field selections are not supported as the LHS of a binary expression"); + return nullptr; }, [] (const bind_variable&) -> solve_for_t { - on_internal_error(expr_logger, "possible_lhs_values: bind variables are not supported as the LHS of a binary expression"); + return nullptr; }, [] (const untyped_constant&) -> solve_for_t { - on_internal_error(expr_logger, "possible_lhs_values: untyped constants are not supported as the LHS of a binary expression"); + return nullptr; }, [] (const collection_constructor&) -> solve_for_t { - on_internal_error(expr_logger, "possible_lhs_values: collection constructors are not supported as the LHS of a binary expression"); + return nullptr; }, [] (const usertype_constructor&) -> solve_for_t { - on_internal_error(expr_logger, "possible_lhs_values: user type constructors are not supported as the LHS of a binary expression"); + return nullptr; }, [] (const temporary&) -> solve_for_t { - on_internal_error(expr_logger, "possible_lhs_values: temporaries are not supported as the LHS of a binary expression"); + return nullptr; }, }, oper.lhs); }, [] (const column_value&) -> solve_for_t { - on_internal_error(expr_logger, "possible_lhs_values: a column cannot serve as a restriction by itself"); + return nullptr; }, [] (const subscript&) -> solve_for_t { - on_internal_error(expr_logger, "possible_lhs_values: a subscript cannot serve as a restriction by itself"); + return nullptr; }, [] (const unresolved_identifier&) -> solve_for_t { - on_internal_error(expr_logger, "possible_lhs_values: an unresolved identifier cannot serve as a restriction"); + return nullptr; }, [] (const column_mutation_attribute&) -> solve_for_t { - on_internal_error(expr_logger, "possible_lhs_values: the writetime/ttl functions cannot serve as a restriction by itself"); + return nullptr; }, [] (const function_call&) -> solve_for_t { - on_internal_error(expr_logger, "possible_lhs_values: a function call cannot serve as a restriction by itself"); + return nullptr; }, [] (const cast&) -> solve_for_t { - on_internal_error(expr_logger, "possible_lhs_values: a typecast cannot serve as a restriction by itself"); + return nullptr; }, [] (const field_selection&) -> solve_for_t { - on_internal_error(expr_logger, "possible_lhs_values: a field selection cannot serve as a restriction by itself"); + return nullptr; }, [] (const bind_variable&) -> solve_for_t { - on_internal_error(expr_logger, "possible_lhs_values: a bind variable cannot serve as a restriction by itself"); + return nullptr; }, [] (const untyped_constant&) -> solve_for_t { - on_internal_error(expr_logger, "possible_lhs_values: an untyped constant cannot serve as a restriction by itself"); + return nullptr; }, [] (const tuple_constructor&) -> solve_for_t { - on_internal_error(expr_logger, "possible_lhs_values: an tuple constructor cannot serve as a restriction by itself"); + return nullptr; }, [] (const collection_constructor&) -> solve_for_t { - on_internal_error(expr_logger, "possible_lhs_values: a collection constructor cannot serve as a restriction by itself"); + return nullptr; }, [] (const usertype_constructor&) -> solve_for_t { - on_internal_error(expr_logger, "possible_lhs_values: a user type constructor cannot serve as a restriction by itself"); + return nullptr; }, [] (const temporary&) -> solve_for_t { - on_internal_error(expr_logger, "possible_lhs_values: a temporary cannot serve as a restriction by itself"); + return nullptr; }, }, expr); } From 8faf62a1aa95281776917375aa679fb92c7567ec Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sat, 29 Mar 2025 01:32:16 +0300 Subject: [PATCH 42/76] cql3: statement_restrictions: refine possible_lhs_values() subscript solving Do more work at prepare time. --- cql3/restrictions/statement_restrictions.cc | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index 185aa6df45..7c84b403a5 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -367,13 +367,13 @@ possible_lhs_values(const column_definition* cdef, return [] (const query_options&) { return unbounded_value_set; }; } - return [s, oper] (const query_options& options) { - managed_bytes_opt sval = evaluate(s.sub, options).to_managed_bytes_opt(); - if (!sval) { - return empty_value_set; // NULL can't be a map key - } + if (oper.op == oper_t::EQ) { + return [s, oper] (const query_options& options) { + managed_bytes_opt sval = evaluate(s.sub, options).to_managed_bytes_opt(); + if (!sval) { + return empty_value_set; // NULL can't be a map key + } - if (oper.op == oper_t::EQ) { managed_bytes_opt rval = evaluate(oper.rhs, options).to_managed_bytes_opt(); if (!rval) { return empty_value_set; // All NULL comparisons fail; no column values match. @@ -381,9 +381,9 @@ possible_lhs_values(const column_definition* cdef, managed_bytes_opt elements[] = {sval, rval}; managed_bytes val = tuple_type_impl::build_value_fragmented(elements); return value_set(value_list{val}); - } - throw std::logic_error(format("possible_lhs_values: unhandled operator {}", oper)); - }; + }; + } + return nullptr; }, [&] (const tuple_constructor& tuple) -> solve_for_t { return [cdef] (const query_options& options) -> value_set { From 736011b663781d92dc7baee209db907870249f1f Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sat, 29 Mar 2025 01:35:15 +0300 Subject: [PATCH 43/76] cql3: statement_restrictions: return nullptr for function solver if not token Currently, possible_lhs_values() for a function call expression will only be called when we're sure it's the token() function. But soon this will no longer be the case. Return nullptr for non-token functions to indicate we can't solve for a column value instead of an internal error. --- cql3/restrictions/statement_restrictions.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index 7c84b403a5..569dd30c81 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -394,7 +394,7 @@ possible_lhs_values(const column_definition* cdef, }, [&] (const function_call& token_fun_call) -> solve_for_t { if (!is_partition_token_for_schema(token_fun_call, *table_schema_opt)) { - on_internal_error(expr_logger, "possible_lhs_values: function calls are not supported as the LHS of a binary expression"); + return nullptr; } if (cdef) { From bfd13023117300ed2e5f83032dcac7567886eead Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sat, 29 Mar 2025 01:43:08 +0300 Subject: [PATCH 44/76] cql3: statement_restrictions: refine possible_lhs_values() function_call processing Currently, we are careful to call possible_lhs_values() for a token function only when slice/equality operators are used. We wish to relax this, so return nullptr (must filter) for the other cases instead of raising an internal error. --- cql3/restrictions/statement_restrictions.cc | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index 569dd30c81..d0027cd37e 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -400,6 +400,10 @@ possible_lhs_values(const column_definition* cdef, if (cdef) { return [] (const query_options&) -> value_set { return unbounded_value_set; }; } + + if (!(oper.op == oper_t::EQ || is_slice(oper.op))) { + return nullptr; + } return [oper] (const query_options& options) -> value_set { auto val = evaluate(oper.rhs, options).to_managed_bytes_opt(); if (!val) { @@ -422,7 +426,7 @@ possible_lhs_values(const column_definition* cdef, } else if (oper.op == oper_t::LTE) { return interval::make_ending_with(interval_bound(std::move(adjusted_val), inclusive)); } - throw std::logic_error(format("get_token_interval invalid operator {}", oper.op)); + throw std::logic_error(format("get_token_interval unexpected operator {}", oper.op)); }; }, [&] (const binary_operator&) -> solve_for_t { From ed5dd645e86f536a7335cffa9fc4f2f57afc2023 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 1 May 2025 20:27:24 +0300 Subject: [PATCH 45/76] cql3: statement_restrictions: convert expressions to predicates without being directed at a specific column Currently, possible_lhs_values accepts a column_definition parameter that tells it which column we are interested in. This works because callers pre-analyze the expression and only pass a subexpression that contains the specified columns. We wish to convert expressions to predicates early, and so won't have the benefit of knowing which columns we're interested in. Generally, this is simple: a binary operator contains a column on the left-hand side, so use that. If the expression is on a token, use that. When the expression is a boolean constant (not expressible by the grammar, but somehow found its way into the code). We invent a new `on_row` designator meaning it's not about a specific column. It will be useful one day when we allow things like `WHERE some_boolean_function(c1, c2)` that aren't specific to any single column. Finally, we introduce helpers that, given such an expression decomposed into predicates and a column_definition, extract the predicate related to the given column. This mimics the possible_lhs_values API and allows us to make minimal changes to callers, deferring that until later. possible_lhs_values() is renamed to to_predicates() and loses the column_definition parameter to indicate its new role. --- cql3/restrictions/statement_restrictions.cc | 410 ++++++++++++-------- cql3/restrictions/statement_restrictions.hh | 5 + 2 files changed, 251 insertions(+), 164 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index d0027cd37e..de82680651 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -232,6 +232,7 @@ data_type type(const predicate& p) { return std::visit( overloaded_functor{ + [] (const on_row&) { return boolean_type; }, // Not true, but the type won't be used. [] (const on_column& oc) { return oc.column->type->without_reversed().shared_from_this(); }, [] (const on_partition_key_token&) { return long_type; }, [] (const on_clustering_key_prefix&) -> data_type { on_internal_error(rlogger, "type: asked for clustering key prefix type"); }, @@ -281,9 +282,10 @@ require_on_single_column(const predicate& p) { on_internal_error(rlogger, "require_on_single_column: predicate is not on a single column"); } -/// Given an expression and a column definition , builds a function that returns -/// the set of all column values that would satisfy the expression. The _token_values variant finds -/// matching values for the partition token function call instead of the column. +/// Given an expression, decompose it into a set of predicates, on individual columns, +/// the table's tokens, or multiple columns. A predicate may know how to solve for +/// the set of all column values that would satisfy the expression, treated a a boolean +/// predicate on the column. If it does, the .solve_for member is set. /// /// An expression restricts possible values of a column or token: /// - `A>5` restricts A from below @@ -300,44 +302,62 @@ require_on_single_column(const predicate& p) { // The schema is needed to find out whether a call to token() function represents // the partition token. static -solve_for_t -possible_lhs_values(const column_definition* cdef, - const expression& expr, - const schema* table_schema_opt) { - const auto type = cdef ? &cdef->type->without_reversed() : long_type.get(); +std::vector +to_predicates( + const expression& expr, + const schema* table_schema_opt) { + static auto to_vector = [] (predicate p) -> std::vector { + return {std::move(p)}; + }; + static auto cannot_solve = [] (const expression& e) -> std::vector { + return to_vector(predicate{ + .solve_for = nullptr, + .filter = e, + .on = on_row{}, + }); + }; + static auto cannot_solve_on_column = [] (const expression& e, const column_definition* cdef) -> std::vector { + return to_vector(predicate{ + .solve_for = nullptr, + .filter = e, + .on = on_column{cdef}, + }); + }; return expr::visit(overloaded_functor{ - [] (const constant& constant_val) -> solve_for_t { + [] (const constant& constant_val) -> std::vector { std::optional bool_val = get_bool_value(constant_val); if (bool_val.has_value()) { - return *bool_val + auto solve = *bool_val ? solve_for_t([] (const query_options&) { return unbounded_value_set; }) : solve_for_t([] (const query_options&) { return empty_value_set; }); + return to_vector(predicate{ + .solve_for = std::move(solve), + .filter = constant_val, + .on = on_row{}, + }); } - return nullptr; - }, - [&] (const conjunction& conj) -> solve_for_t { - auto children = - conj.children - | std::views::transform([&] (const expression& e) { - return possible_lhs_values(cdef, e, table_schema_opt); - }) - | std::ranges::to(); - return [children, type] (const query_options& options) -> value_set { - return std::ranges::fold_left(children, unbounded_value_set, [&](value_set&& acc, const solve_for_t& child) { - return intersection( - std::move(acc), child(options), type); + return to_vector(predicate{ + .solve_for = [] (const query_options&) { return unbounded_value_set; }, + .filter = constant_val, + .on = on_row{}, }); - }; }, - [&] (const binary_operator& oper) -> solve_for_t { + [&] (const conjunction& conj) -> std::vector { + std::vector ret; + for (auto& pa : conj.children) { + auto p = to_predicates(pa, table_schema_opt); + ret.insert(ret.end(), p.begin(), p.end()); + } + return ret; + }, + [&] (const binary_operator& oper) -> std::vector { return expr::visit(overloaded_functor{ - [&] (const column_value& col) -> solve_for_t { - if (!cdef || cdef != col.col) { - return [] (const query_options&) { return unbounded_value_set; }; - } + [&] (const column_value& col) -> std::vector { + auto cdef = col.col; + auto type = &cdef->type->without_reversed(); if (is_compare(oper.op)) { - return [oper] (const query_options& options) { + auto solve = [oper] (const query_options& options) { managed_bytes_opt val = evaluate(oper.rhs, options).to_managed_bytes_opt(); if (!val) { return empty_value_set; // All NULL comparisons fail; no column values match. @@ -345,30 +365,44 @@ possible_lhs_values(const column_definition* cdef, return oper.op == oper_t::EQ ? value_set(value_list{*val}) : to_range(oper.op, std::move(*val)); }; + return to_vector(predicate{ + .solve_for = std::move(solve), + .filter = oper, + .on = on_column{col.col}, + .is_singleton = (oper.op == oper_t::EQ), + }); } else if (oper.op == oper_t::IN) { - return [oper, type, cdef] (const query_options& options) { + auto solve = [oper, type, cdef] (const query_options& options) { return get_IN_values(oper.rhs, options, type->as_less_comparator(), cdef->name_as_text()); }; + return to_vector(predicate{ + .solve_for = std::move(solve), + .filter = oper, + .on = on_column{col.col}, + .is_singleton = false, + }); } else if (oper.op == oper_t::CONTAINS || oper.op == oper_t::CONTAINS_KEY) { - return [oper] (const query_options& options) { + auto solve = [oper] (const query_options& options) { managed_bytes_opt val = evaluate(oper.rhs, options).to_managed_bytes_opt(); if (!val) { return empty_value_set; // All NULL comparisons fail; no column values match. } return value_set(value_list{*val}); }; + return to_vector(predicate{ + .solve_for = std::move(solve), + .filter = oper, + .on = on_column{col.col}, + .is_singleton = false, + }); } - return nullptr; + return cannot_solve_on_column(oper, col.col); }, - [&] (const subscript& s) -> solve_for_t { + [&] (const subscript& s) -> std::vector { const column_value& col = get_subscripted_column(s); - if (!cdef || cdef != col.col) { - return [] (const query_options&) { return unbounded_value_set; }; - } - if (oper.op == oper_t::EQ) { - return [s, oper] (const query_options& options) { + auto solve = [s, oper] (const query_options& options) { managed_bytes_opt sval = evaluate(s.sub, options).to_managed_bytes_opt(); if (!sval) { return empty_value_set; // NULL can't be a map key @@ -382,29 +416,54 @@ possible_lhs_values(const column_definition* cdef, managed_bytes val = tuple_type_impl::build_value_fragmented(elements); return value_set(value_list{val}); }; + return to_vector(predicate{ + .solve_for = std::move(solve), + .filter = oper, + .on = on_column{col.col}, + .is_singleton = true, + }); } - return nullptr; + return cannot_solve_on_column(oper, col.col); }, - [&] (const tuple_constructor& tuple) -> solve_for_t { - return [cdef] (const query_options& options) -> value_set { - on_internal_error(rlogger, - fmt::format("possible_lhs_values: trying to solve for {} on tuple inequality", - cdef ? "single column" : "token")); - }; - }, - [&] (const function_call& token_fun_call) -> solve_for_t { - if (!is_partition_token_for_schema(token_fun_call, *table_schema_opt)) { - return nullptr; + [&] (const tuple_constructor& tuple) -> std::vector { + auto columns = tuple.elements + | std::views::transform([] (const expression& e) { return as(e).col; }) + | std::ranges::to(); + for (unsigned i = 0; i < columns.size(); ++i) { + if (!columns[i]->is_clustering_key() || columns[i]->position() != i) { + on_internal_error(rlogger, "to_predicates: multi-column relation not on a clustering key prefix"); + } } - - if (cdef) { - return [] (const query_options&) -> value_set { return unbounded_value_set; }; + // The solve_for lambda is only correct for EQ; other operators + // (IN, slices) are handled directly by + // build_get_multi_column_clustering_bounds_fn() which bypasses + // solve_for and evaluates the binary_operator's RHS itself. + solve_for_t solve = nullptr; + if (oper.op == oper_t::EQ) { + solve = [oper] (const query_options& options) { + managed_bytes_opt val = evaluate(oper.rhs, options).to_managed_bytes_opt(); + if (!val) { + return empty_value_set; // All NULL comparisons fail; no column values match. + } + return value_set(value_list{*val}); + }; + } + return to_vector(predicate{ + .solve_for = std::move(solve), + .filter = oper, + .on = on_clustering_key_prefix{std::move(columns)}, + .is_singleton = oper.op == oper_t::EQ, + }); + }, + [&] (const function_call& token_fun_call) -> std::vector { + if (!is_partition_token_for_schema(token_fun_call, *table_schema_opt)) { + return cannot_solve(oper); } if (!(oper.op == oper_t::EQ || is_slice(oper.op))) { - return nullptr; + return cannot_solve(oper); } - return [oper] (const query_options& options) -> value_set { + auto solve = [oper] (const query_options& options) -> value_set { auto val = evaluate(oper.rhs, options).to_managed_bytes_opt(); if (!val) { return empty_value_set; // All NULL comparisons fail; no token values match. @@ -428,87 +487,157 @@ possible_lhs_values(const column_definition* cdef, } throw std::logic_error(format("get_token_interval unexpected operator {}", oper.op)); }; + return to_vector(predicate{ + .solve_for = std::move(solve), + .filter = oper, + .on = on_partition_key_token{table_schema_opt}, + .is_singleton = (oper.op == oper_t::EQ), + }); }, - [&] (const binary_operator&) -> solve_for_t { - return nullptr; + [&] (const binary_operator&) -> std::vector { + return cannot_solve(oper); }, - [&] (const conjunction&) -> solve_for_t { - return nullptr; + [&] (const conjunction&) -> std::vector { + return cannot_solve(oper); }, - [] (const constant&) -> solve_for_t { - return nullptr; + [&] (const constant&) -> std::vector { + return cannot_solve(oper); }, - [] (const unresolved_identifier&) -> solve_for_t { - return nullptr; + [&] (const unresolved_identifier&) -> std::vector { + return cannot_solve(oper); }, - [] (const column_mutation_attribute&) -> solve_for_t { - return nullptr; + [&] (const column_mutation_attribute&) -> std::vector { + return cannot_solve(oper); }, - [] (const cast&) -> solve_for_t { - return nullptr; + [&] (const cast&) -> std::vector { + return cannot_solve(oper); }, - [] (const field_selection&) -> solve_for_t { - return nullptr; + [&] (const field_selection&) -> std::vector { + return cannot_solve(oper); }, - [] (const bind_variable&) -> solve_for_t { - return nullptr; + [&] (const bind_variable&) -> std::vector { + return cannot_solve(oper); }, - [] (const untyped_constant&) -> solve_for_t { - return nullptr; + [&] (const untyped_constant&) -> std::vector { + return cannot_solve(oper); }, - [] (const collection_constructor&) -> solve_for_t { - return nullptr; + [&] (const collection_constructor&) -> std::vector { + return cannot_solve(oper); }, - [] (const usertype_constructor&) -> solve_for_t { - return nullptr; + [&] (const usertype_constructor&) -> std::vector { + return cannot_solve(oper); }, - [] (const temporary&) -> solve_for_t { - return nullptr; + [&] (const temporary&) -> std::vector { + return cannot_solve(oper); }, }, oper.lhs); }, - [] (const column_value&) -> solve_for_t { - return nullptr; + [] (const column_value& cv) -> std::vector { + return cannot_solve(cv); }, - [] (const subscript&) -> solve_for_t { - return nullptr; + [] (const subscript& s) -> std::vector { + return cannot_solve(s); }, - [] (const unresolved_identifier&) -> solve_for_t { - return nullptr; + [] (const unresolved_identifier& ui) -> std::vector { + return cannot_solve(ui); }, - [] (const column_mutation_attribute&) -> solve_for_t { - return nullptr; + [] (const column_mutation_attribute& cma) -> std::vector { + return cannot_solve(cma); }, - [] (const function_call&) -> solve_for_t { - return nullptr; + [] (const function_call& fc) -> std::vector { + return cannot_solve(fc); }, - [] (const cast&) -> solve_for_t { - return nullptr; + [] (const cast& c) -> std::vector { + return cannot_solve(c); }, - [] (const field_selection&) -> solve_for_t { - return nullptr; + [] (const field_selection& fs) -> std::vector { + return cannot_solve(fs); }, - [] (const bind_variable&) -> solve_for_t { - return nullptr; + [] (const bind_variable& bv) -> std::vector { + return cannot_solve(bv); }, - [] (const untyped_constant&) -> solve_for_t { - return nullptr; + [] (const untyped_constant& uc) -> std::vector { + return cannot_solve(uc); }, - [] (const tuple_constructor&) -> solve_for_t { - return nullptr; + [] (const tuple_constructor& tc) -> std::vector { + return cannot_solve(tc); }, - [] (const collection_constructor&) -> solve_for_t { - return nullptr; + [] (const collection_constructor& cc) -> std::vector { + return cannot_solve(cc); }, - [] (const usertype_constructor&) -> solve_for_t { - return nullptr; + [] (const usertype_constructor& uc) -> std::vector { + return cannot_solve(uc); }, - [] (const temporary&) -> solve_for_t { - return nullptr; + [] (const temporary& t) -> std::vector { + return cannot_solve(t); }, }, expr); } +// Convert an expression to a predicate on a column. If cdef is nullptr, the predicate +// is on the partition key token. +static +predicate +to_predicate_on_column( + const expression& expr, + const column_definition* cdef, + const schema* table_schema_opt) { + auto predicates = to_predicates(expr, table_schema_opt); + using on_t = std::variant< + on_row, // cannot determine, so predicate is on entire row + on_column, // solving for a single column: e.g. c1 = 3 + on_partition_key_token, // solving for the token, e.g. token(pk1, pk2) >= :var + on_clustering_key_prefix // solving for a clustering key prefix: e.g. (ck1, ck2) >= (3, 4) + >; + auto target = cdef ? on_t(on_column{cdef}) : on_t(on_partition_key_token{table_schema_opt}); + auto collected = std::vector{}; + for (auto& predicate : predicates) { + if (predicate.on == target) { + collected.push_back(std::move(predicate)); + continue; + } + } + if (collected.empty()) { + on_internal_error(rlogger, "to_predicate_on_column: no predicates found"); + } + auto ret = std::ranges::fold_left_first( + collected | std::views::as_rvalue, + make_conjunction + ); + if (!ret) { + on_internal_error(rlogger, "to_predicate_on_column: no predicates found"); + } + return std::move(*ret); +} + +// Convert an expression to a predicate on a column. If cdef is nullptr, the predicate +// is on the partition key token. +static +predicate +to_predicate_on_clustering_key_prefix( + const expression& expr, + const schema* table_schema_opt) { + auto predicates = to_predicates(expr, table_schema_opt); + std::vector collected; + for (auto& predicate : predicates) { + if (std::holds_alternative(predicate.on)) { + collected.push_back(std::move(predicate)); + continue; + } + } + if (collected.empty()) { + on_internal_error(rlogger, "to_predicate_on_clustering_key_prefix: no predicates found"); + } + auto ret = std::ranges::fold_left_first( + collected | std::views::as_rvalue, + make_conjunction + ); + if (!ret) { + on_internal_error(rlogger, "to_predicate_on_clustering_key_prefix: no predicates found"); + } + return std::move(*ret); +} + interval to_range(const value_set& s) { return std::visit(overloaded_functor{ [] (const interval& r) { return r; }, @@ -844,12 +973,7 @@ bool is_empty_restriction(const expression& e) { static std::function build_value_for_fn(const column_definition& cdef, const expression& e, const schema& s) { - auto ac = predicate{ - .solve_for = possible_lhs_values(&cdef, e, &s), - .filter = e, - .on = on_column{&cdef}, - .is_singleton = false, // Code below assumes 0 or 1 results. - }; + auto ac = to_predicate_on_column(e, &cdef, &s); return [ac] (const query_options& options) -> bytes_opt { value_set possible_vals = solve(ac, options); return std::visit(overloaded_functor { @@ -948,12 +1072,7 @@ static partition_range_restrictions extract_partition_range( auto s = &cv; with_current_binary_operator(*this, [&] (const binary_operator& b) { if (s->col->is_partition_key() && (b.op == oper_t::EQ || b.op == oper_t::IN)) { - auto a = predicate{ - .solve_for = possible_lhs_values(s->col, b, table_schema.get()), - .filter = b, - .on = on_column{s->col}, - .is_singleton = b.op == oper_t::EQ, - }; + auto a = to_predicate_on_column(b, s->col, table_schema.get()); const auto [it, inserted] = single_column.try_emplace(s->col, std::move(a)); if (!inserted) { it->second = make_conjunction(std::move(it->second), std::move(a)); @@ -1022,13 +1141,7 @@ static partition_range_restrictions extract_partition_range( if (v.tokens) { return token_range_restrictions{ - .token_restrictions = predicate{ - // It's not really a column, but... - .solve_for = possible_lhs_values(/* col */ nullptr, *v.tokens, schema.get()), - .filter = *v.tokens, - .on = on_partition_key_token{schema.get()}, - .is_singleton = false, // It could return a single token, but it's not important to track it - }, + .token_restrictions = to_predicate_on_column(*v.tokens, nullptr, schema.get()), }; } if (v.single_column.size() == schema->partition_key_size()) { @@ -1079,13 +1192,7 @@ static std::vector extract_clustering_prefix_restrictions( } } with_current_binary_operator(*this, [&] (const binary_operator& b) { - multi.push_back(predicate{ - .solve_for = possible_lhs_values(/* col */ nullptr, b, table_schema.get()), - .filter = b, - .on = on_clustering_key_prefix{prefix}, - .is_singleton = false, - .comparable = false, - }); + multi.push_back(to_predicate_on_clustering_key_prefix(b, table_schema.get())); }); } @@ -1093,12 +1200,7 @@ static std::vector extract_clustering_prefix_restrictions( auto s = &cv; with_current_binary_operator(*this, [&] (const binary_operator& b) { if (s->col->is_clustering_key()) { - auto a = predicate{ - .solve_for = possible_lhs_values(s->col, b, table_schema.get()), - .filter = b, - .on = on_column{s->col}, - .is_singleton = b.op == oper_t::EQ, - }; + auto a = to_predicate_on_column(b, s->col, table_schema.get()); const auto [it, inserted] = single.try_emplace(s->col, std::move(a)); if (!inserted) { it->second = make_conjunction(std::move(it->second), std::move(a)); @@ -1112,12 +1214,7 @@ static std::vector extract_clustering_prefix_restrictions( with_current_binary_operator(*this, [&] (const binary_operator& b) { if (cval.col->is_clustering_key()) { - auto a = predicate{ - .solve_for = possible_lhs_values(cval.col, b, table_schema.get()), - .filter = b, - .on = on_column{cval.col}, - .is_singleton = b.op == oper_t::EQ, - }; + auto a = to_predicate_on_column(b, cval.col, table_schema.get()); const auto [it, inserted] = single.try_emplace(cval.col, std::move(a)); if (!inserted) { it->second = make_conjunction(std::move(it->second), std::move(a)); @@ -2497,10 +2594,10 @@ static std::vector get_index_v1_token_range_clustering_ const column_definition& token_column, const predicate& token_restriction) { - // A workaround in order to make possible_lhs_values work properly. - // possible_lhs_values looks at the column type and uses this type's comparator. + // A workaround in order to make to_predicate work properly. + // to_predicate looks at the column type and uses this type's comparator. // This is a problem because when using blob's comparator, -4 is greater than 4. - // This makes possible_lhs_values think that an expression like token(p) > -4 and token(p) < 4 + // This makes to_predicate think that an expression like token(p) > -4 and token(p) < 4 // is impossible to fulfill. // Create a fake token column with the type set to bigint, translate the restriction to use this column // and use this restriction to calculate possible lhs values. @@ -2876,12 +2973,7 @@ void statement_restrictions::prepare_indexed_global(const schema& idx_tbl_schema // This means that p1 and p2 can have many different values (token is a hash, can have collisions). // Clustering prefix ends after token_restriction, all further restrictions have to be filtered. expr::expression token_restriction = replace_partition_token(_partition_key_restrictions, token_column, *_schema); - _idx_tbl_ck_prefix = std::vector{predicate{ - .solve_for = possible_lhs_values(token_column, token_restriction, _schema.get()), - .filter = token_restriction, - .on = on_column{token_column}, - .is_singleton = false, // FIXME: could be a singleton token. Not very important. - }}; + _idx_tbl_ck_prefix = std::vector{to_predicate_on_column(token_restriction, token_column, _schema.get())}; return; } @@ -2980,12 +3072,7 @@ void statement_restrictions::prepare_indexed_local(const schema& idx_tbl_schema) // Translate the restriction to use column from the index schema and add it expr::expression replaced_idx_restriction = replace_column_def(idx_col_restriction_expr, &indexed_column); - _idx_tbl_ck_prefix->push_back(predicate{ - .solve_for = possible_lhs_values(&indexed_column, replaced_idx_restriction, _schema.get()), - .filter = replaced_idx_restriction, - .on = on_column{&indexed_column}, - .is_singleton = false, // Could be true, but not important. - }); + _idx_tbl_ck_prefix->push_back(to_predicate_on_column(replaced_idx_restriction, &indexed_column, _schema.get())); // Add restrictions for the clustering key add_clustering_restrictions_to_idx_ck_prefix(idx_tbl_schema); @@ -3004,12 +3091,7 @@ void statement_restrictions::add_clustering_restrictions_to_idx_ck_prefix(const const auto col = expr::as(any_binop->lhs).col; auto col_in_index = idx_tbl_schema.get_column_definition(col->name()); auto replaced = replace_column_def(e.filter, col_in_index); - auto a = predicate{ - .solve_for = possible_lhs_values(col_in_index, replaced, &idx_tbl_schema), - .filter = replaced, - .on = on_column{col_in_index}, - .is_singleton = false, // FIXME: could be a singleton token. Not very important. - }; + auto a = to_predicate_on_column(replaced, col_in_index, &idx_tbl_schema); _idx_tbl_ck_prefix->push_back(std::move(a)); } } diff --git a/cql3/restrictions/statement_restrictions.hh b/cql3/restrictions/statement_restrictions.hh index 37b96f31ac..74f08ffc1f 100644 --- a/cql3/restrictions/statement_restrictions.hh +++ b/cql3/restrictions/statement_restrictions.hh @@ -35,6 +35,10 @@ using value_set = std::variant>; // clause to TRUE. using solve_for_t = std::function; +struct on_row { + bool operator==(const on_row&) const = default; +}; + struct on_column { const column_definition* column; @@ -65,6 +69,7 @@ struct predicate { expr::expression filter; // What column the predicate can be solved for std::variant< + on_row, // cannot determine, so predicate is on entire row on_column, // solving for a single column: e.g. c1 = 3 on_partition_key_token, // solving for the token, e.g. token(pk1, pk2) >= :var on_clustering_key_prefix // solving for a clustering key prefix: e.g. (ck1, ck2) >= (3, 4) From 689264217636d1192450bfe0dfdbdc6f19a912a7 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 5 May 2025 17:52:54 +0300 Subject: [PATCH 46/76] cql3: statement_restrictions: complete preparation early We want to move away from the unprepared domain to the prepared domain to avoid confusion. Ideally we'd receive prepared expressions via the constructor, but that is left for later. --- cql3/restrictions/statement_restrictions.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index de82680651..36484699df 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -1331,6 +1331,7 @@ statement_restrictions::statement_restrictions(private_tag, : statement_restrictions(private_tag{}, schema, allow_filtering) { _check_indexes = do_check_indexes; + std::vector prepared_where_clause; for (auto&& relation_expr : boolean_factors(where_clause)) { const expr::binary_operator* relation_binop = expr::as_if(&relation_expr); @@ -1339,7 +1340,10 @@ statement_restrictions::statement_restrictions(private_tag, } expr::binary_operator prepared_restriction = expr::validate_and_prepare_new_restriction(*relation_binop, db, schema, ctx); + prepared_where_clause.push_back(std::move(prepared_restriction)); + } + for (auto& prepared_restriction : prepared_where_clause) { auto& restr = prepared_restriction; if (restr.op == expr::oper_t::IS_NOT) { // Handle IS NOT NULL restrictions separately From e0eb3bde8dc7a8e153c94724bae15f181cba7853 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sat, 10 May 2025 19:55:58 +0300 Subject: [PATCH 47/76] cql3: statement_restrictions: annotate predicates with is_not_null and is_multi_column To avoid having to dig deep into the expression, compute is_not_null and is_multicolumn early and store them in the predicate. --- cql3/restrictions/statement_restrictions.cc | 20 ++++++++++++++++++++ cql3/restrictions/statement_restrictions.hh | 2 ++ 2 files changed, 22 insertions(+) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index 36484699df..aa31b9458c 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -270,6 +270,8 @@ make_conjunction(predicate a, predicate b) { .on = a.on, .is_singleton = false, // Even if both columns are singletons, the conjunction of them can return zero values. .comparable = a.comparable && b.comparable, // Result is only comparable if both inputs follow CQL comparison semantics. + .is_multi_column = a.is_multi_column, // Both predicates are on the same target, so they agree on multi-column-ness. + .is_not_null_single_column = false, // A conjunction is not a pure IS NOT NULL check. }; } @@ -282,6 +284,15 @@ require_on_single_column(const predicate& p) { on_internal_error(rlogger, "require_on_single_column: predicate is not on a single column"); } +static +bool +is_null_constant(const expression& e) { + if (auto* c = as_if(&e)) { + return c->value.is_null(); + } + return false; +} + /// Given an expression, decompose it into a set of predicates, on individual columns, /// the table's tokens, or multiple columns. A predicate may know how to solve for /// the set of all column values that would satisfy the expression, treated a a boolean @@ -356,6 +367,14 @@ to_predicates( [&] (const column_value& col) -> std::vector { auto cdef = col.col; auto type = &cdef->type->without_reversed(); + if (oper.op == oper_t::IS_NOT) { + return to_vector(predicate{ + .solve_for = nullptr, + .filter = oper, + .on = on_column{col.col}, + .is_not_null_single_column = is_null_constant(oper.rhs), + }); + } if (is_compare(oper.op)) { auto solve = [oper] (const query_options& options) { managed_bytes_opt val = evaluate(oper.rhs, options).to_managed_bytes_opt(); @@ -453,6 +472,7 @@ to_predicates( .filter = oper, .on = on_clustering_key_prefix{std::move(columns)}, .is_singleton = oper.op == oper_t::EQ, + .is_multi_column = true, }); }, [&] (const function_call& token_fun_call) -> std::vector { diff --git a/cql3/restrictions/statement_restrictions.hh b/cql3/restrictions/statement_restrictions.hh index 74f08ffc1f..d4703a3029 100644 --- a/cql3/restrictions/statement_restrictions.hh +++ b/cql3/restrictions/statement_restrictions.hh @@ -78,6 +78,8 @@ struct predicate { bool is_singleton = false; // Whether the returned value_set follows CQL comparison semantics bool comparable = true; + bool is_multi_column = false; + bool is_not_null_single_column = false; }; ///In some cases checking if columns have indexes is undesired of even From 440d9f2d821d8f16f3b26649497ad872dbf5653d Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 15 Mar 2026 00:33:15 +0200 Subject: [PATCH 48/76] cql3: statement_restrictions: annotate predicates with operator properties Add boolean fields to struct predicate that describe the operator: equality, is_in, is_slice, is_upper_bound, is_lower_bound, and comparison_order. Populate them in all to_predicates() return sites. These fields will allow the constructor loop to inspect predicate properties directly instead of re-examining the expression. --- cql3/restrictions/statement_restrictions.cc | 31 +++++++++++++++++++++ cql3/restrictions/statement_restrictions.hh | 6 ++++ 2 files changed, 37 insertions(+) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index aa31b9458c..27aeb21e59 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -251,6 +251,10 @@ make_conjunction(predicate a, predicate b) { on_internal_error(rlogger, "make_conjunction: merging non-comparable columns"); } + if (a.order != b.order) { + on_internal_error(rlogger, "make_conjunction: merging predicates with different comparison orders"); + } + auto& sa = a.solve_for; auto& sb = b.solve_for; @@ -272,6 +276,12 @@ make_conjunction(predicate a, predicate b) { .comparable = a.comparable && b.comparable, // Result is only comparable if both inputs follow CQL comparison semantics. .is_multi_column = a.is_multi_column, // Both predicates are on the same target, so they agree on multi-column-ness. .is_not_null_single_column = false, // A conjunction is not a pure IS NOT NULL check. + .equality = false, // A conjunction is not a single EQ. + .is_in = false, // A conjunction is not a single IN. + .is_slice = false, // A conjunction is not a single slice. + .is_upper_bound = false, // A conjunction has no single direction. + .is_lower_bound = false, // A conjunction has no single direction. + .order = a.order, // Both predicates are on the same column, so comparison order must agree. }; } @@ -389,6 +399,11 @@ to_predicates( .filter = oper, .on = on_column{col.col}, .is_singleton = (oper.op == oper_t::EQ), + .equality = (oper.op == oper_t::EQ), + .is_slice = expr::is_slice(oper.op), + .is_upper_bound = (oper.op == oper_t::LT || oper.op == oper_t::LTE), + .is_lower_bound = (oper.op == oper_t::GT || oper.op == oper_t::GTE), + .order = oper.order, }); } else if (oper.op == oper_t::IN) { auto solve = [oper, type, cdef] (const query_options& options) { @@ -399,6 +414,8 @@ to_predicates( .filter = oper, .on = on_column{col.col}, .is_singleton = false, + .is_in = true, + .order = oper.order, }); } else if (oper.op == oper_t::CONTAINS || oper.op == oper_t::CONTAINS_KEY) { auto solve = [oper] (const query_options& options) { @@ -413,6 +430,7 @@ to_predicates( .filter = oper, .on = on_column{col.col}, .is_singleton = false, + .order = oper.order, }); } return cannot_solve_on_column(oper, col.col); @@ -440,6 +458,8 @@ to_predicates( .filter = oper, .on = on_column{col.col}, .is_singleton = true, + .equality = true, + .order = oper.order, }); } return cannot_solve_on_column(oper, col.col); @@ -473,6 +493,12 @@ to_predicates( .on = on_clustering_key_prefix{std::move(columns)}, .is_singleton = oper.op == oper_t::EQ, .is_multi_column = true, + .equality = (oper.op == oper_t::EQ), + .is_in = (oper.op == oper_t::IN), + .is_slice = expr::is_slice(oper.op), + .is_upper_bound = (oper.op == oper_t::LT || oper.op == oper_t::LTE), + .is_lower_bound = (oper.op == oper_t::GT || oper.op == oper_t::GTE), + .order = oper.order, }); }, [&] (const function_call& token_fun_call) -> std::vector { @@ -512,6 +538,11 @@ to_predicates( .filter = oper, .on = on_partition_key_token{table_schema_opt}, .is_singleton = (oper.op == oper_t::EQ), + .equality = (oper.op == oper_t::EQ), + .is_slice = expr::is_slice(oper.op), + .is_upper_bound = (oper.op == oper_t::LT || oper.op == oper_t::LTE), + .is_lower_bound = (oper.op == oper_t::GT || oper.op == oper_t::GTE), + .order = oper.order, }); }, [&] (const binary_operator&) -> std::vector { diff --git a/cql3/restrictions/statement_restrictions.hh b/cql3/restrictions/statement_restrictions.hh index d4703a3029..a88675266c 100644 --- a/cql3/restrictions/statement_restrictions.hh +++ b/cql3/restrictions/statement_restrictions.hh @@ -80,6 +80,12 @@ struct predicate { bool comparable = true; bool is_multi_column = false; bool is_not_null_single_column = false; + bool equality = false; // operator is EQ + bool is_in = false; // operator is IN + bool is_slice = false; // operator is LT/LTE/GT/GTE + bool is_upper_bound = false; // operator is LT/LTE + bool is_lower_bound = false; // operator is GT/GTE + expr::comparison_order order = expr::comparison_order::cql; }; ///In some cases checking if columns have indexes is undesired of even From afd68187ea1bdba0b0ecac7a90f047d4bd86d384 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 15 Mar 2026 00:37:09 +0200 Subject: [PATCH 49/76] cql3: statement_restrictions: convert constructor loop to iterate over predicates Convert the constructor loop to first build predicates from the prepared where clause, then iterate over the predicates. The IS_NOT branch now uses pred.is_not_null_single_column and pred.on instead of inspecting the expression directly. The branch conditions for multi-column (pred.is_multi_column), token (on_partition_key_token), and single-column (on_column) now use predicate properties instead of expression helpers. Remove extract_column_from_is_not_null_restriction() which is no longer needed. --- cql3/restrictions/statement_restrictions.cc | 48 ++++++++------------- 1 file changed, 18 insertions(+), 30 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index 27aeb21e59..0443af8e25 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -1352,23 +1352,6 @@ static std::vector extract_clustering_prefix_restrictions( return prefix; } -static -const column_definition* -extract_column_from_is_not_null_restriction(const expr::binary_operator& restr) { - const expr::column_value* lhs_col_def = expr::as_if(&restr.lhs); - // The "IS NOT NULL" restriction is only supported (and - // mandatory) for materialized view creation: - if (lhs_col_def == nullptr) { - throw exceptions::invalid_request_exception("IS NOT only supports single column"); - } - // currently, the grammar only allows the NULL argument to be - // "IS NOT", so this assertion should not be able to fail - if (!expr::is(restr.rhs) || !expr::as(restr.rhs).is_null()) { - throw exceptions::invalid_request_exception("Only IS NOT NULL is supported"); - } - return lhs_col_def->col; -} - statement_restrictions::statement_restrictions(private_tag, data_dictionary::database db, schema_ptr schema, @@ -1394,17 +1377,22 @@ statement_restrictions::statement_restrictions(private_tag, prepared_where_clause.push_back(std::move(prepared_restriction)); } + std::vector predicates; for (auto& prepared_restriction : prepared_where_clause) { - auto& restr = prepared_restriction; - if (restr.op == expr::oper_t::IS_NOT) { - // Handle IS NOT NULL restrictions separately - auto col = extract_column_from_is_not_null_restriction(restr); + auto preds = to_predicates(prepared_restriction, _schema.get()); + predicates.insert(predicates.end(), std::make_move_iterator(preds.begin()), std::make_move_iterator(preds.end())); + } + + for (auto& pred : predicates) { + auto& restr = expr::as(pred.filter); + if (pred.is_not_null_single_column) { + auto* col = require_on_single_column(pred); _not_null_columns.insert(col); if (!for_view) { - throw exceptions::invalid_request_exception(format("restriction '{}' is only supported in materialized view creation", restr)); + throw exceptions::invalid_request_exception(format("restriction '{}' is only supported in materialized view creation", pred.filter)); } - } else if (is_multi_column(restr)) { + } else if (pred.is_multi_column) { // Multi column restrictions are only allowed on clustering columns if (is_empty_restriction(_clustering_columns_restrictions)) { _clustering_columns_restrictions = restr; @@ -1462,7 +1450,7 @@ statement_restrictions::statement_restrictions(private_tag, throw exceptions::invalid_request_exception(format("Unsupported multi-column relation: ", restr)); } } - } else if (has_partition_token(restr, *_schema)) { + } else if (std::holds_alternative(pred.on)) { // Token always restricts the partition key if (!partition_key_restrictions_is_empty() && !has_token_restrictions()) { throw exceptions::invalid_request_exception( @@ -1475,8 +1463,8 @@ statement_restrictions::statement_restrictions(private_tag, } _partition_key_restrictions = expr::make_conjunction(_partition_key_restrictions, restr); - } else if (is_single_column_restriction(restr)) { - const column_definition* def = get_the_only_column(restr).col; + } else if (std::holds_alternative(pred.on)) { + const column_definition* def = std::get(pred.on).column; if (def->is_partition_key()) { // View definition allows PK slices, because it's not a performance problem. if (restr.op != expr::oper_t::EQ && restr.op != expr::oper_t::IN && !allow_filtering && !for_view) { @@ -1504,7 +1492,7 @@ statement_restrictions::statement_restrictions(private_tag, "Mixing single column relations and multi column relations on clustering columns is not allowed"); } - const column_definition* new_column = get_the_only_column(restr).col; + const column_definition* new_column = std::get(pred.on).column; const column_definition* last_column = expr::get_last_column_def(_clustering_columns_restrictions); if (last_column != nullptr && !allow_filtering) { @@ -1526,11 +1514,11 @@ statement_restrictions::statement_restrictions(private_tag, _nonprimary_key_restrictions = expr::make_conjunction(_nonprimary_key_restrictions, restr); } } else { - throw exceptions::invalid_request_exception(format("Unhandled restriction: {}", restr)); + throw exceptions::invalid_request_exception(format("Unhandled restriction: {}", pred.filter)); } - if (prepared_restriction.op != expr::oper_t::IS_NOT) { - _where.push_back(prepared_restriction); + if (!pred.is_not_null_single_column) { + _where.push_back(restr); } } if (!_where.empty()) { From b0c5eed3848b1469a24a0372a9abd1becb0bf6c3 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 15 Mar 2026 00:38:21 +0200 Subject: [PATCH 50/76] cql3: statement_restrictions: convert multi-column branch to use predicate properties Replace direct operator comparisons with predicate boolean fields: pred.equality, pred.is_in, pred.is_slice, pred.is_lower_bound, pred.is_upper_bound, and pred.order. --- cql3/restrictions/statement_restrictions.cc | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index 0443af8e25..12d84ffc75 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -1404,13 +1404,13 @@ statement_restrictions::statement_restrictions(private_tag, throw exceptions::invalid_request_exception("Mixing single column relations and multi column relations on clustering columns is not allowed"); } - if (restr.op == expr::oper_t::EQ) { + if (pred.equality) { throw exceptions::invalid_request_exception(format("{} cannot be restricted by more than one relation if it includes an Equal", expr::get_columns_in_commons(_clustering_columns_restrictions, restr))); - } else if (restr.op == expr::oper_t::IN) { + } else if (pred.is_in) { throw exceptions::invalid_request_exception(format("{} cannot be restricted by more than one relation if it includes a IN", expr::get_columns_in_commons(_clustering_columns_restrictions, restr))); - } else if (is_slice(restr.op)) { + } else if (pred.is_slice) { if (!expr::has_slice(_clustering_columns_restrictions)) { throw exceptions::invalid_request_exception(format("Column \"{}\" cannot be restricted by both an equality and an inequality relation", expr::get_columns_in_commons(_clustering_columns_restrictions, restr))); @@ -1422,7 +1422,7 @@ statement_restrictions::statement_restrictions(private_tag, } // Don't allow to mix plain and SCYLLA_CLUSTERING_BOUND bounds - if (other_slice->order != restr.order) { + if (other_slice->order != pred.order) { static auto order2str = [](auto o) { return o == expr::comparison_order::cql ? "plain" : "SCYLLA_CLUSTERING_BOUND"; }; throw exceptions::invalid_request_exception( format("Invalid combination of restrictions ({} / {})", @@ -1430,16 +1430,16 @@ statement_restrictions::statement_restrictions(private_tag, } // Here check that there aren't two < <= or two > and >= - auto is_greater = [](expr::oper_t op) {return op == expr::oper_t::GT || op == expr::oper_t::GTE; }; - auto is_less = [](expr::oper_t op) {return op == expr::oper_t::LT || op == expr::oper_t::LTE; }; + auto is_lower_bound = [](const expr::binary_operator& b) { return b.op == expr::oper_t::GT || b.op == expr::oper_t::GTE; }; + auto is_upper_bound = [](const expr::binary_operator& b) { return b.op == expr::oper_t::LT || b.op == expr::oper_t::LTE; }; - if (is_greater(restr.op) && is_greater(other_slice->op)) { + if (pred.is_lower_bound && is_lower_bound(*other_slice)) { throw exceptions::invalid_request_exception(format( "More than one restriction was found for the start bound on {}", expr::get_columns_in_commons(restr, *other_slice))); } - if (is_less(restr.op) && is_less(other_slice->op)) { + if (pred.is_upper_bound && is_upper_bound(*other_slice)) { throw exceptions::invalid_request_exception(format( "More than one restriction was found for the end bound on {}", expr::get_columns_in_commons(restr, *other_slice))); From 44b18f3399b22cf05abbff0122fa84319d3e45d3 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 15 Mar 2026 12:20:06 +0200 Subject: [PATCH 51/76] cql3: statement_restrictions: convert single-column branch to use predicate properties In the single-column partition-key and clustering-key sub-branches, replace direct binary_operator field inspections with pre-computed predicate booleans: !pred.equality && !pred.is_in instead of restr.op != EQ && restr.op != IN, pred.is_in instead of find(restr, IN), and pred.is_slice instead of has_slice(restr). Also fix a leftover restr.order in the multi-column branch error message. --- cql3/restrictions/statement_restrictions.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index 12d84ffc75..60d85a624e 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -1426,7 +1426,7 @@ statement_restrictions::statement_restrictions(private_tag, static auto order2str = [](auto o) { return o == expr::comparison_order::cql ? "plain" : "SCYLLA_CLUSTERING_BOUND"; }; throw exceptions::invalid_request_exception( format("Invalid combination of restrictions ({} / {})", - order2str(other_slice->order), order2str(restr.order))); + order2str(other_slice->order), order2str(pred.order))); } // Here check that there aren't two < <= or two > and >= @@ -1467,7 +1467,7 @@ statement_restrictions::statement_restrictions(private_tag, const column_definition* def = std::get(pred.on).column; if (def->is_partition_key()) { // View definition allows PK slices, because it's not a performance problem. - if (restr.op != expr::oper_t::EQ && restr.op != expr::oper_t::IN && !allow_filtering && !for_view) { + if (!pred.equality && !pred.is_in && !allow_filtering && !for_view) { throw exceptions::invalid_request_exception( "Only EQ and IN relation are supported on the partition key " "(unless you use the token() function or ALLOW FILTERING)"); @@ -1483,7 +1483,7 @@ statement_restrictions::statement_restrictions(private_tag, } _partition_key_restrictions = expr::make_conjunction(_partition_key_restrictions, restr); - _partition_range_is_simple &= !find(restr, expr::oper_t::IN); + _partition_range_is_simple &= !pred.is_in; } else if (def->is_clustering_key()) { if (find_binop(_clustering_columns_restrictions, [] (const expr::binary_operator& b) { return expr::is(b.lhs); @@ -1502,7 +1502,7 @@ statement_restrictions::statement_restrictions(private_tag, } if (schema->position(*new_column) < schema->position(*last_column)) { - if (has_slice(restr)) { + if (pred.is_slice) { throw exceptions::invalid_request_exception(format("PRIMARY KEY column \"{}\" cannot be restricted (preceding column \"{}\" is restricted by a non-EQ relation)", last_column->name_as_text(), new_column->name_as_text())); } From d4ff613c0a790a9dfaaf220a6acbb0d047a2df32 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 15 Mar 2026 12:26:26 +0200 Subject: [PATCH 52/76] cql3: statement_restrictions: replace restr bridge variable with pred.filter The constructor loop no longer needs to extract a binary_operator reference from each predicate. All remaining uses (make_conjunction, get_columns_in_commons, assignment to accumulated restriction members, _where.push_back, and error formatting) accept expression directly, which is what pred.filter already is. This eliminates the unnecessary as cast at the top of the loop. --- cql3/restrictions/statement_restrictions.cc | 27 ++++++++++----------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index 60d85a624e..8d9d443a9c 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -1384,7 +1384,6 @@ statement_restrictions::statement_restrictions(private_tag, } for (auto& pred : predicates) { - auto& restr = expr::as(pred.filter); if (pred.is_not_null_single_column) { auto* col = require_on_single_column(pred); _not_null_columns.insert(col); @@ -1395,7 +1394,7 @@ statement_restrictions::statement_restrictions(private_tag, } else if (pred.is_multi_column) { // Multi column restrictions are only allowed on clustering columns if (is_empty_restriction(_clustering_columns_restrictions)) { - _clustering_columns_restrictions = restr; + _clustering_columns_restrictions = pred.filter; } else { if (!find_binop(_clustering_columns_restrictions, [] (const expr::binary_operator& b) { @@ -1406,14 +1405,14 @@ statement_restrictions::statement_restrictions(private_tag, if (pred.equality) { throw exceptions::invalid_request_exception(format("{} cannot be restricted by more than one relation if it includes an Equal", - expr::get_columns_in_commons(_clustering_columns_restrictions, restr))); + expr::get_columns_in_commons(_clustering_columns_restrictions, pred.filter))); } else if (pred.is_in) { throw exceptions::invalid_request_exception(format("{} cannot be restricted by more than one relation if it includes a IN", - expr::get_columns_in_commons(_clustering_columns_restrictions, restr))); + expr::get_columns_in_commons(_clustering_columns_restrictions, pred.filter))); } else if (pred.is_slice) { if (!expr::has_slice(_clustering_columns_restrictions)) { throw exceptions::invalid_request_exception(format("Column \"{}\" cannot be restricted by both an equality and an inequality relation", - expr::get_columns_in_commons(_clustering_columns_restrictions, restr))); + expr::get_columns_in_commons(_clustering_columns_restrictions, pred.filter))); } const expr::binary_operator* other_slice = expr::find_in_expression(_clustering_columns_restrictions, [](const expr::binary_operator){return true;}); @@ -1436,18 +1435,18 @@ statement_restrictions::statement_restrictions(private_tag, if (pred.is_lower_bound && is_lower_bound(*other_slice)) { throw exceptions::invalid_request_exception(format( "More than one restriction was found for the start bound on {}", - expr::get_columns_in_commons(restr, *other_slice))); + expr::get_columns_in_commons(pred.filter, *other_slice))); } if (pred.is_upper_bound && is_upper_bound(*other_slice)) { throw exceptions::invalid_request_exception(format( "More than one restriction was found for the end bound on {}", - expr::get_columns_in_commons(restr, *other_slice))); + expr::get_columns_in_commons(pred.filter, *other_slice))); } - _clustering_columns_restrictions = expr::make_conjunction(_clustering_columns_restrictions, restr); + _clustering_columns_restrictions = expr::make_conjunction(_clustering_columns_restrictions, pred.filter); } else { - throw exceptions::invalid_request_exception(format("Unsupported multi-column relation: ", restr)); + throw exceptions::invalid_request_exception(format("Unsupported multi-column relation: ", pred.filter)); } } } else if (std::holds_alternative(pred.on)) { @@ -1462,7 +1461,7 @@ statement_restrictions::statement_restrictions(private_tag, ", "))); } - _partition_key_restrictions = expr::make_conjunction(_partition_key_restrictions, restr); + _partition_key_restrictions = expr::make_conjunction(_partition_key_restrictions, pred.filter); } else if (std::holds_alternative(pred.on)) { const column_definition* def = std::get(pred.on).column; if (def->is_partition_key()) { @@ -1482,7 +1481,7 @@ statement_restrictions::statement_restrictions(private_tag, ", "))); } - _partition_key_restrictions = expr::make_conjunction(_partition_key_restrictions, restr); + _partition_key_restrictions = expr::make_conjunction(_partition_key_restrictions, pred.filter); _partition_range_is_simple &= !pred.is_in; } else if (def->is_clustering_key()) { if (find_binop(_clustering_columns_restrictions, [] (const expr::binary_operator& b) { @@ -1509,16 +1508,16 @@ statement_restrictions::statement_restrictions(private_tag, } } - _clustering_columns_restrictions = expr::make_conjunction(_clustering_columns_restrictions, restr); + _clustering_columns_restrictions = expr::make_conjunction(_clustering_columns_restrictions, pred.filter); } else { - _nonprimary_key_restrictions = expr::make_conjunction(_nonprimary_key_restrictions, restr); + _nonprimary_key_restrictions = expr::make_conjunction(_nonprimary_key_restrictions, pred.filter); } } else { throw exceptions::invalid_request_exception(format("Unhandled restriction: {}", pred.filter)); } if (!pred.is_not_null_single_column) { - _where.push_back(restr); + _where.push_back(pred.filter); } } if (!_where.empty()) { From aa6a0ad326298c4acc6c27fffe8b029e35cd7849 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 15 Mar 2026 12:42:23 +0200 Subject: [PATCH 53/76] cql3: statement_restrictions: track clustering-empty state incrementally Replace is_empty_restriction(_clustering_columns_restrictions), which recursively walks the accumulated expression tree, with a local bool ck_is_empty that is set to false when a clustering restriction is first added. Updated at both insertion points: multi-column first assignment and single-column make_conjunction. --- cql3/restrictions/statement_restrictions.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index 8d9d443a9c..da8b534eea 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -1383,6 +1383,7 @@ statement_restrictions::statement_restrictions(private_tag, predicates.insert(predicates.end(), std::make_move_iterator(preds.begin()), std::make_move_iterator(preds.end())); } + bool ck_is_empty = true; for (auto& pred : predicates) { if (pred.is_not_null_single_column) { auto* col = require_on_single_column(pred); @@ -1393,8 +1394,9 @@ statement_restrictions::statement_restrictions(private_tag, } } else if (pred.is_multi_column) { // Multi column restrictions are only allowed on clustering columns - if (is_empty_restriction(_clustering_columns_restrictions)) { + if (ck_is_empty) { _clustering_columns_restrictions = pred.filter; + ck_is_empty = false; } else { if (!find_binop(_clustering_columns_restrictions, [] (const expr::binary_operator& b) { @@ -1509,6 +1511,7 @@ statement_restrictions::statement_restrictions(private_tag, } _clustering_columns_restrictions = expr::make_conjunction(_clustering_columns_restrictions, pred.filter); + ck_is_empty = false; } else { _nonprimary_key_restrictions = expr::make_conjunction(_nonprimary_key_restrictions, pred.filter); } From 1071c39f17bcb631c3bfd534beb32f1b8d971872 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 15 Mar 2026 12:43:25 +0200 Subject: [PATCH 54/76] cql3: statement_restrictions: track has-multi-column-clustering incrementally Replace find_binop(_clustering_columns_restrictions, is_tuple_constructor), which walks the accumulated expression tree looking for multi-column restrictions, with a local bool has_mc_clustering set when a multi-column predicate is first added. This serves both the multi-column branch (checking existing restrictions are also multi-column) and the single-column branch (checking no multi-column restrictions exist). --- cql3/restrictions/statement_restrictions.cc | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index da8b534eea..ee51b466a0 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -1384,6 +1384,7 @@ statement_restrictions::statement_restrictions(private_tag, } bool ck_is_empty = true; + bool has_mc_clustering = false; for (auto& pred : predicates) { if (pred.is_not_null_single_column) { auto* col = require_on_single_column(pred); @@ -1397,11 +1398,10 @@ statement_restrictions::statement_restrictions(private_tag, if (ck_is_empty) { _clustering_columns_restrictions = pred.filter; ck_is_empty = false; + has_mc_clustering = true; } else { - if (!find_binop(_clustering_columns_restrictions, [] (const expr::binary_operator& b) { - return expr::is(b.lhs); - })) { + if (!has_mc_clustering) { throw exceptions::invalid_request_exception("Mixing single column relations and multi column relations on clustering columns is not allowed"); } @@ -1486,9 +1486,7 @@ statement_restrictions::statement_restrictions(private_tag, _partition_key_restrictions = expr::make_conjunction(_partition_key_restrictions, pred.filter); _partition_range_is_simple &= !pred.is_in; } else if (def->is_clustering_key()) { - if (find_binop(_clustering_columns_restrictions, [] (const expr::binary_operator& b) { - return expr::is(b.lhs); - })) { + if (has_mc_clustering) { throw exceptions::invalid_request_exception( "Mixing single column relations and multi column relations on clustering columns is not allowed"); } From 88bd5ea1b77ca72c77264ebc31f970a52fd5b150 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 15 Mar 2026 12:44:44 +0200 Subject: [PATCH 55/76] cql3: statement_restrictions: track clustering-has-slice incrementally Replace has_slice(_clustering_columns_restrictions), which walks the accumulated expression tree looking for slice operators, with a local bool ck_has_slice set when any clustering predicate with is_slice is added. Updated at all three clustering insertion points: multi-column first assignment, multi-column slice conjunction, and single-column conjunction. --- cql3/restrictions/statement_restrictions.cc | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index ee51b466a0..2b03356dfd 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -1385,6 +1385,7 @@ statement_restrictions::statement_restrictions(private_tag, bool ck_is_empty = true; bool has_mc_clustering = false; + bool ck_has_slice = false; for (auto& pred : predicates) { if (pred.is_not_null_single_column) { auto* col = require_on_single_column(pred); @@ -1399,6 +1400,9 @@ statement_restrictions::statement_restrictions(private_tag, _clustering_columns_restrictions = pred.filter; ck_is_empty = false; has_mc_clustering = true; + if (pred.is_slice) { + ck_has_slice = true; + } } else { if (!has_mc_clustering) { @@ -1412,7 +1416,7 @@ statement_restrictions::statement_restrictions(private_tag, throw exceptions::invalid_request_exception(format("{} cannot be restricted by more than one relation if it includes a IN", expr::get_columns_in_commons(_clustering_columns_restrictions, pred.filter))); } else if (pred.is_slice) { - if (!expr::has_slice(_clustering_columns_restrictions)) { + if (!ck_has_slice) { throw exceptions::invalid_request_exception(format("Column \"{}\" cannot be restricted by both an equality and an inequality relation", expr::get_columns_in_commons(_clustering_columns_restrictions, pred.filter))); } @@ -1447,6 +1451,7 @@ statement_restrictions::statement_restrictions(private_tag, } _clustering_columns_restrictions = expr::make_conjunction(_clustering_columns_restrictions, pred.filter); + ck_has_slice = true; } else { throw exceptions::invalid_request_exception(format("Unsupported multi-column relation: ", pred.filter)); } @@ -1495,7 +1500,7 @@ statement_restrictions::statement_restrictions(private_tag, const column_definition* last_column = expr::get_last_column_def(_clustering_columns_restrictions); if (last_column != nullptr && !allow_filtering) { - if (has_slice(_clustering_columns_restrictions) && schema->position(*new_column) > schema->position(*last_column)) { + if (ck_has_slice && schema->position(*new_column) > schema->position(*last_column)) { throw exceptions::invalid_request_exception(format("Clustering column \"{}\" cannot be restricted (preceding column \"{}\" is restricted by a non-EQ relation)", new_column->name_as_text(), last_column->name_as_text())); } @@ -1510,6 +1515,9 @@ statement_restrictions::statement_restrictions(private_tag, _clustering_columns_restrictions = expr::make_conjunction(_clustering_columns_restrictions, pred.filter); ck_is_empty = false; + if (pred.is_slice) { + ck_has_slice = true; + } } else { _nonprimary_key_restrictions = expr::make_conjunction(_nonprimary_key_restrictions, pred.filter); } From ef005c10bab4b6e83d1052e5740ca7a001631a15 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 15 Mar 2026 12:46:02 +0200 Subject: [PATCH 56/76] cql3: statement_restrictions: track last clustering column incrementally Replace get_last_column_def(_clustering_columns_restrictions), which walks the entire accumulated expression tree to collect and sort all column definitions, with a local pointer ck_last_column that tracks the column with the highest schema position as single-column clustering restrictions are added. --- cql3/restrictions/statement_restrictions.cc | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index 2b03356dfd..7d7400eee2 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -1386,6 +1386,7 @@ statement_restrictions::statement_restrictions(private_tag, bool ck_is_empty = true; bool has_mc_clustering = false; bool ck_has_slice = false; + const column_definition* ck_last_column = nullptr; for (auto& pred : predicates) { if (pred.is_not_null_single_column) { auto* col = require_on_single_column(pred); @@ -1497,7 +1498,7 @@ statement_restrictions::statement_restrictions(private_tag, } const column_definition* new_column = std::get(pred.on).column; - const column_definition* last_column = expr::get_last_column_def(_clustering_columns_restrictions); + const column_definition* last_column = ck_last_column; if (last_column != nullptr && !allow_filtering) { if (ck_has_slice && schema->position(*new_column) > schema->position(*last_column)) { @@ -1518,6 +1519,9 @@ statement_restrictions::statement_restrictions(private_tag, if (pred.is_slice) { ck_has_slice = true; } + if (ck_last_column == nullptr || schema->position(*new_column) > schema->position(*ck_last_column)) { + ck_last_column = new_column; + } } else { _nonprimary_key_restrictions = expr::make_conjunction(_nonprimary_key_restrictions, pred.filter); } From 14812ea1e010e5a18ff8904d778e7b49d965f1a4 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 15 Mar 2026 12:47:16 +0200 Subject: [PATCH 57/76] cql3: statement_restrictions: track first multi-column predicate incrementally Replace find_in_expression(_clustering_columns_restrictions, always_true), which walks the accumulated expression tree to find the first binary_operator, with a tracked pointer first_mc_pred set when the first multi-column predicate is added. This eliminates the tree scan, the null check, and the is_lower_bound/is_upper_bound lambdas, replacing them with direct predicate field accesses: first_mc_pred->order, first_mc_pred->is_lower_bound, first_mc_pred->is_upper_bound, and first_mc_pred->filter for error messages. --- cql3/restrictions/statement_restrictions.cc | 22 ++++++++------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index 7d7400eee2..c3bfdc900d 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -1387,6 +1387,7 @@ statement_restrictions::statement_restrictions(private_tag, bool has_mc_clustering = false; bool ck_has_slice = false; const column_definition* ck_last_column = nullptr; + const predicate* first_mc_pred = nullptr; for (auto& pred : predicates) { if (pred.is_not_null_single_column) { auto* col = require_on_single_column(pred); @@ -1401,6 +1402,7 @@ statement_restrictions::statement_restrictions(private_tag, _clustering_columns_restrictions = pred.filter; ck_is_empty = false; has_mc_clustering = true; + first_mc_pred = &pred; if (pred.is_slice) { ck_has_slice = true; } @@ -1422,33 +1424,25 @@ statement_restrictions::statement_restrictions(private_tag, expr::get_columns_in_commons(_clustering_columns_restrictions, pred.filter))); } - const expr::binary_operator* other_slice = expr::find_in_expression(_clustering_columns_restrictions, [](const expr::binary_operator){return true;}); - if (other_slice == nullptr) { - on_internal_error(rlogger, "add_multi_column_clustering_key_restriction: _clustering_columns_restrictions is empty!"); - } - // Don't allow to mix plain and SCYLLA_CLUSTERING_BOUND bounds - if (other_slice->order != pred.order) { + if (first_mc_pred->order != pred.order) { static auto order2str = [](auto o) { return o == expr::comparison_order::cql ? "plain" : "SCYLLA_CLUSTERING_BOUND"; }; throw exceptions::invalid_request_exception( format("Invalid combination of restrictions ({} / {})", - order2str(other_slice->order), order2str(pred.order))); + order2str(first_mc_pred->order), order2str(pred.order))); } // Here check that there aren't two < <= or two > and >= - auto is_lower_bound = [](const expr::binary_operator& b) { return b.op == expr::oper_t::GT || b.op == expr::oper_t::GTE; }; - auto is_upper_bound = [](const expr::binary_operator& b) { return b.op == expr::oper_t::LT || b.op == expr::oper_t::LTE; }; - - if (pred.is_lower_bound && is_lower_bound(*other_slice)) { + if (pred.is_lower_bound && first_mc_pred->is_lower_bound) { throw exceptions::invalid_request_exception(format( "More than one restriction was found for the start bound on {}", - expr::get_columns_in_commons(pred.filter, *other_slice))); + expr::get_columns_in_commons(pred.filter, first_mc_pred->filter))); } - if (pred.is_upper_bound && is_upper_bound(*other_slice)) { + if (pred.is_upper_bound && first_mc_pred->is_upper_bound) { throw exceptions::invalid_request_exception(format( "More than one restriction was found for the end bound on {}", - expr::get_columns_in_commons(pred.filter, *other_slice))); + expr::get_columns_in_commons(pred.filter, first_mc_pred->filter))); } _clustering_columns_restrictions = expr::make_conjunction(_clustering_columns_restrictions, pred.filter); From 1344278a19255dc839112bc6942e8eafae12054e Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 15 Mar 2026 13:01:34 +0200 Subject: [PATCH 58/76] cql3: statement_restrictions: track partition-key-empty state incrementally Replace the in-loop call to partition_key_restrictions_is_empty() (which walks the _partition_key_restrictions expression tree via is_empty_restriction()) with a local bool pk_is_empty, set to false at the two sites where partition key restrictions are added. The member function is retained since it's used outside the constructor. --- cql3/restrictions/statement_restrictions.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index c3bfdc900d..c21fd17f6f 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -1388,6 +1388,7 @@ statement_restrictions::statement_restrictions(private_tag, bool ck_has_slice = false; const column_definition* ck_last_column = nullptr; const predicate* first_mc_pred = nullptr; + bool pk_is_empty = true; for (auto& pred : predicates) { if (pred.is_not_null_single_column) { auto* col = require_on_single_column(pred); @@ -1453,7 +1454,7 @@ statement_restrictions::statement_restrictions(private_tag, } } else if (std::holds_alternative(pred.on)) { // Token always restricts the partition key - if (!partition_key_restrictions_is_empty() && !has_token_restrictions()) { + if (!pk_is_empty && !has_token_restrictions()) { throw exceptions::invalid_request_exception( seastar::format("Columns \"{}\" cannot be restricted by both a normal relation and a token relation", fmt::join(expr::get_sorted_column_defs(_partition_key_restrictions) | @@ -1464,6 +1465,7 @@ statement_restrictions::statement_restrictions(private_tag, } _partition_key_restrictions = expr::make_conjunction(_partition_key_restrictions, pred.filter); + pk_is_empty = false; } else if (std::holds_alternative(pred.on)) { const column_definition* def = std::get(pred.on).column; if (def->is_partition_key()) { @@ -1484,6 +1486,7 @@ statement_restrictions::statement_restrictions(private_tag, } _partition_key_restrictions = expr::make_conjunction(_partition_key_restrictions, pred.filter); + pk_is_empty = false; _partition_range_is_simple &= !pred.is_in; } else if (def->is_clustering_key()) { if (has_mc_clustering) { From da438507d06e350e0c2f045437d3af5ae2655ae4 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 15 Mar 2026 13:03:35 +0200 Subject: [PATCH 59/76] cql3: statement_restrictions: track has-token state incrementally Replace the two in-loop calls to has_token_restrictions() (which walks the _partition_key_restrictions expression tree looking for token function calls) with a local bool has_token, set to true when a token predicate is processed. The member function is retained since it's used outside the constructor. With this change, the constructor loop's non-error control flow performs zero expression tree scanning. The only remaining tree walks are on error paths (get_sorted_column_defs, get_columns_in_commons for formatting exception messages) and structural (make_conjunction for building accumulated expressions). --- cql3/restrictions/statement_restrictions.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index c21fd17f6f..01533df889 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -1389,6 +1389,7 @@ statement_restrictions::statement_restrictions(private_tag, const column_definition* ck_last_column = nullptr; const predicate* first_mc_pred = nullptr; bool pk_is_empty = true; + bool has_token = false; for (auto& pred : predicates) { if (pred.is_not_null_single_column) { auto* col = require_on_single_column(pred); @@ -1454,7 +1455,7 @@ statement_restrictions::statement_restrictions(private_tag, } } else if (std::holds_alternative(pred.on)) { // Token always restricts the partition key - if (!pk_is_empty && !has_token_restrictions()) { + if (!pk_is_empty && !has_token) { throw exceptions::invalid_request_exception( seastar::format("Columns \"{}\" cannot be restricted by both a normal relation and a token relation", fmt::join(expr::get_sorted_column_defs(_partition_key_restrictions) | @@ -1466,6 +1467,7 @@ statement_restrictions::statement_restrictions(private_tag, _partition_key_restrictions = expr::make_conjunction(_partition_key_restrictions, pred.filter); pk_is_empty = false; + has_token = true; } else if (std::holds_alternative(pred.on)) { const column_definition* def = std::get(pred.on).column; if (def->is_partition_key()) { @@ -1475,7 +1477,7 @@ statement_restrictions::statement_restrictions(private_tag, "Only EQ and IN relation are supported on the partition key " "(unless you use the token() function or ALLOW FILTERING)"); } - if (has_token_restrictions()) { + if (has_token) { throw exceptions::invalid_request_exception( seastar::format("Columns \"{}\" cannot be restricted by both a normal relation and a token relation", fmt::join(expr::get_sorted_column_defs(_partition_key_restrictions) | From 701366a8d17858aad6b5b9d52faeed01152e68bc Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 15 Mar 2026 13:51:31 +0200 Subject: [PATCH 60/76] cql3: statement_restrictions: use tracked has_mc_clustering for _has_multi_column Replace the two post-loop find_binop(_clustering_columns_restrictions, is_multi_column) tree walks and the contains_multi_column_restriction() tree walk with the already-tracked local has_mc_clustering. The redundant second assignment inside the _check_indexes block is removed entirely. --- cql3/restrictions/statement_restrictions.cc | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index 01533df889..23f515b407 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -1536,21 +1536,20 @@ statement_restrictions::statement_restrictions(private_tag, if (!has_token_restrictions()) { _single_column_partition_key_restrictions = get_single_column_restrictions_map(_partition_key_restrictions); } - if (!contains_multi_column_restriction(_clustering_columns_restrictions)) { + if (!has_mc_clustering) { _single_column_clustering_key_restrictions = get_single_column_restrictions_map(_clustering_columns_restrictions); } _single_column_nonprimary_key_restrictions = get_single_column_restrictions_map(_nonprimary_key_restrictions); _clustering_prefix_restrictions = extract_clustering_prefix_restrictions(_where, _schema); _partition_range_restrictions = extract_partition_range(_where, _schema); } - _has_multi_column = find_binop(_clustering_columns_restrictions, is_multi_column); + _has_multi_column = has_mc_clustering; if (_check_indexes) { auto cf = db.find_column_family(schema); auto& sim = cf.get_index_manager(); const expr::allow_local_index allow_local( !has_partition_key_unrestricted_components() && partition_key_restrictions_is_all_eq()); - _has_multi_column = find_binop(_clustering_columns_restrictions, is_multi_column); _has_queriable_ck_index = clustering_columns_restrictions_have_supporting_index(sim, allow_local) && !type.is_delete(); _has_queriable_pk_index = parition_key_restrictions_have_supporting_index(sim, allow_local) From e9b16a11ba1426247cad1a9434d2a447b3bf9fdf Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 15 Mar 2026 13:52:40 +0200 Subject: [PATCH 61/76] cql3: statement_restrictions: build non-primary-key single-column restrictions map incrementally Instead of accumulating all non-primary-key restrictions into a conjunction tree and then decomposing it by column via get_single_column_restrictions_map() post-loop, build the per-column map incrementally as each non-primary-key predicate is processed. This eliminates a post-loop tree walk over _nonprimary_key_restrictions. --- cql3/restrictions/statement_restrictions.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index 23f515b407..a96095e78c 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -1523,6 +1523,10 @@ statement_restrictions::statement_restrictions(private_tag, } } else { _nonprimary_key_restrictions = expr::make_conjunction(_nonprimary_key_restrictions, pred.filter); + { + auto [it, inserted] = _single_column_nonprimary_key_restrictions.try_emplace(def, expr::conjunction{}); + it->second = expr::make_conjunction(std::move(it->second), pred.filter); + } } } else { throw exceptions::invalid_request_exception(format("Unhandled restriction: {}", pred.filter)); @@ -1539,7 +1543,6 @@ statement_restrictions::statement_restrictions(private_tag, if (!has_mc_clustering) { _single_column_clustering_key_restrictions = get_single_column_restrictions_map(_clustering_columns_restrictions); } - _single_column_nonprimary_key_restrictions = get_single_column_restrictions_map(_nonprimary_key_restrictions); _clustering_prefix_restrictions = extract_clustering_prefix_restrictions(_where, _schema); _partition_range_restrictions = extract_partition_range(_where, _schema); } From a4608804d8c5137f9640b0bccd3c5ec55b179862 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 15 Mar 2026 13:53:44 +0200 Subject: [PATCH 62/76] cql3: statement_restrictions: build partition-key single-column restrictions map incrementally Instead of accumulating all partition-key restrictions into a conjunction tree and then decomposing it by column via get_single_column_restrictions_map() post-loop, build the per-column map incrementally as each single-column partition-key predicate is processed. The post-loop guard (!has_token_restrictions()) is no longer needed: token predicates go through the on_partition_key_token branch and never insert into this map, and mixing token with non-token is rejected with an exception. This eliminates a post-loop tree walk over _partition_key_restrictions. --- cql3/restrictions/statement_restrictions.cc | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index a96095e78c..25bb2495e7 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -1489,6 +1489,10 @@ statement_restrictions::statement_restrictions(private_tag, _partition_key_restrictions = expr::make_conjunction(_partition_key_restrictions, pred.filter); pk_is_empty = false; + { + auto [it, inserted] = _single_column_partition_key_restrictions.try_emplace(def, expr::conjunction{}); + it->second = expr::make_conjunction(std::move(it->second), pred.filter); + } _partition_range_is_simple &= !pred.is_in; } else if (def->is_clustering_key()) { if (has_mc_clustering) { @@ -1537,9 +1541,6 @@ statement_restrictions::statement_restrictions(private_tag, } } if (!_where.empty()) { - if (!has_token_restrictions()) { - _single_column_partition_key_restrictions = get_single_column_restrictions_map(_partition_key_restrictions); - } if (!has_mc_clustering) { _single_column_clustering_key_restrictions = get_single_column_restrictions_map(_clustering_columns_restrictions); } From db28411548aa22d4a81cdf549f656bf46d9ae432 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 15 Mar 2026 13:54:56 +0200 Subject: [PATCH 63/76] cql3: statement_restrictions: build clustering-key single-column restrictions map incrementally Instead of accumulating all clustering-key restrictions into a conjunction tree and then decomposing it by column via get_single_column_restrictions_map() post-loop, build the per-column map incrementally as each single-column clustering-key predicate is processed. The post-loop guard (!has_mc_clustering) is no longer needed: multi-column predicates go through the is_multi_column branch and never insert into this map, and mixing multi with single-column is rejected with an exception. This eliminates a post-loop tree walk over _clustering_columns_restrictions. --- cql3/restrictions/statement_restrictions.cc | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index 25bb2495e7..e34e6ca5b0 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -1519,6 +1519,10 @@ statement_restrictions::statement_restrictions(private_tag, _clustering_columns_restrictions = expr::make_conjunction(_clustering_columns_restrictions, pred.filter); ck_is_empty = false; + { + auto [it, inserted] = _single_column_clustering_key_restrictions.try_emplace(def, expr::conjunction{}); + it->second = expr::make_conjunction(std::move(it->second), pred.filter); + } if (pred.is_slice) { ck_has_slice = true; } @@ -1541,9 +1545,6 @@ statement_restrictions::statement_restrictions(private_tag, } } if (!_where.empty()) { - if (!has_mc_clustering) { - _single_column_clustering_key_restrictions = get_single_column_restrictions_map(_clustering_columns_restrictions); - } _clustering_prefix_restrictions = extract_clustering_prefix_restrictions(_where, _schema); _partition_range_restrictions = extract_partition_range(_where, _schema); } From 3bd308986a815f0ddce60a2f13864bf9700eeca4 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 15 Mar 2026 13:59:53 +0200 Subject: [PATCH 64/76] cql3: statement_restrictions: build partition-range restrictions incrementally Replace the extract_partition_range() tree walk with incremental collection during the main loop. Two new locals before the loop -- token_pred and pk_range_preds -- accumulate token and single-column EQ/IN partition key predicates respectively. A short post-loop block materializes _partition_range_restrictions from these locals, replacing the removed function. This removes the last tree walk over partition-key restrictions. --- cql3/restrictions/statement_restrictions.cc | 143 +++----------------- 1 file changed, 22 insertions(+), 121 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index e34e6ca5b0..c48c02afe1 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -1082,126 +1082,6 @@ void with_current_binary_operator( func(*visitor.current_binary_operator); } -/// Every token, or if no tokens, an EQ/IN of every single PK column. -static partition_range_restrictions extract_partition_range( - std::span where_clause, schema_ptr schema) { - using namespace expr; - struct extract_partition_range_visitor { - schema_ptr table_schema; - std::optional tokens; - std::unordered_map single_column; - const binary_operator* current_binary_operator = nullptr; - - void operator()(const conjunction& c) { - std::ranges::for_each(c.children, [this] (const expression& child) { expr::visit(*this, child); }); - } - - void operator()(const binary_operator& b) { - if (current_binary_operator) { - throw std::logic_error("Nested binary operators are not supported"); - } - current_binary_operator = &b; - expr::visit(*this, b.lhs); - current_binary_operator = nullptr; - } - - void operator()(const function_call& token_fun_call) { - if (!is_partition_token_for_schema(token_fun_call, *table_schema)) { - on_internal_error(rlogger, "extract_partition_range(function_call)"); - } - - with_current_binary_operator(*this, [&] (const binary_operator& b) { - if (tokens) { - tokens = make_conjunction(std::move(*tokens), b); - } else { - tokens = b; - } - }); - } - - void operator()(const column_value& cv) { - auto s = &cv; - with_current_binary_operator(*this, [&] (const binary_operator& b) { - if (s->col->is_partition_key() && (b.op == oper_t::EQ || b.op == oper_t::IN)) { - auto a = to_predicate_on_column(b, s->col, table_schema.get()); - const auto [it, inserted] = single_column.try_emplace(s->col, std::move(a)); - if (!inserted) { - it->second = make_conjunction(std::move(it->second), std::move(a)); - } - } - }); - } - - void operator()(const tuple_constructor& s) { - // Partition key columns are not legal in tuples, so ignore tuples. - } - - void operator()(const subscript& sub) { - const column_value& cval = get_subscripted_column(sub.val); - if (cval.col->is_partition_key()) { - on_internal_error(rlogger, "extract_partition_range(subscript)"); - } - } - - void operator()(const constant&) {} - - void operator()(const unresolved_identifier&) { - on_internal_error(rlogger, "extract_partition_range(unresolved_identifier)"); - } - - void operator()(const column_mutation_attribute&) { - on_internal_error(rlogger, "extract_partition_range(column_mutation_attribute)"); - } - - void operator()(const cast&) { - on_internal_error(rlogger, "extract_partition_range(cast)"); - } - - void operator()(const field_selection&) { - on_internal_error(rlogger, "extract_partition_range(field_selection)"); - } - - void operator()(const bind_variable&) { - on_internal_error(rlogger, "extract_partition_range(bind_variable)"); - } - - void operator()(const untyped_constant&) { - on_internal_error(rlogger, "extract_partition_range(untyped_constant)"); - } - - void operator()(const collection_constructor&) { - on_internal_error(rlogger, "extract_partition_range(collection_constructor)"); - } - - void operator()(const usertype_constructor&) { - on_internal_error(rlogger, "extract_partition_range(usertype_constructor)"); - } - - void operator()(const temporary&) { - on_internal_error(rlogger, "extract_partition_range(temporary)"); - } - }; - - extract_partition_range_visitor v { - .table_schema = schema - }; - - for (auto& e : where_clause) { - expr::visit(v, e); - } - - if (v.tokens) { - return token_range_restrictions{ - .token_restrictions = to_predicate_on_column(*v.tokens, nullptr, schema.get()), - }; - } - if (v.single_column.size() == schema->partition_key_size()) { - return single_column_partition_range_restrictions{ - .per_column_restrictions = v.single_column | std::views::values | std::ranges::to(), - }; - } - return no_partition_range_restrictions{}; -} /// Extracts where_clause atoms with clustering-column LHS and copies them to a vector. These elements define the /// boundaries of any clustering slice that can possibly meet where_clause. This vector can be calculated before @@ -1390,6 +1270,8 @@ statement_restrictions::statement_restrictions(private_tag, const predicate* first_mc_pred = nullptr; bool pk_is_empty = true; bool has_token = false; + std::optional token_pred; + std::unordered_map pk_range_preds; for (auto& pred : predicates) { if (pred.is_not_null_single_column) { auto* col = require_on_single_column(pred); @@ -1468,6 +1350,11 @@ statement_restrictions::statement_restrictions(private_tag, _partition_key_restrictions = expr::make_conjunction(_partition_key_restrictions, pred.filter); pk_is_empty = false; has_token = true; + if (token_pred) { + token_pred = make_conjunction(std::move(*token_pred), pred); + } else { + token_pred = pred; + } } else if (std::holds_alternative(pred.on)) { const column_definition* def = std::get(pred.on).column; if (def->is_partition_key()) { @@ -1493,6 +1380,12 @@ statement_restrictions::statement_restrictions(private_tag, auto [it, inserted] = _single_column_partition_key_restrictions.try_emplace(def, expr::conjunction{}); it->second = expr::make_conjunction(std::move(it->second), pred.filter); } + if (pred.equality || pred.is_in) { + auto [it, inserted] = pk_range_preds.try_emplace(def, pred); + if (!inserted) { + it->second = make_conjunction(std::move(it->second), pred); + } + } _partition_range_is_simple &= !pred.is_in; } else if (def->is_clustering_key()) { if (has_mc_clustering) { @@ -1546,7 +1439,15 @@ statement_restrictions::statement_restrictions(private_tag, } if (!_where.empty()) { _clustering_prefix_restrictions = extract_clustering_prefix_restrictions(_where, _schema); - _partition_range_restrictions = extract_partition_range(_where, _schema); + if (token_pred) { + _partition_range_restrictions = token_range_restrictions{ + .token_restrictions = std::move(*token_pred), + }; + } else if (pk_range_preds.size() == _schema->partition_key_size()) { + _partition_range_restrictions = single_column_partition_range_restrictions{ + .per_column_restrictions = std::move(pk_range_preds) | std::views::values | std::ranges::to(), + }; + } } _has_multi_column = has_mc_clustering; if (_check_indexes) { From fab90224b32a3640467414f5a8b47694b96ec19a Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 15 Mar 2026 14:04:06 +0200 Subject: [PATCH 65/76] cql3: statement_restrictions: build clustering-prefix restrictions incrementally Replace the extract_clustering_prefix_restrictions() tree walk with incremental collection during the main loop. Two new locals -- mc_ck_preds and sc_ck_preds -- accumulate multi-column and single-column clustering key predicates respectively. A short post-loop block computes the longest contiguous prefix from sc_ck_preds (or uses mc_ck_preds directly for multi-column), replacing the removed function. Also remove the now-unused to_predicate_on_clustering_key_prefix(), with_current_binary_operator() helper, and the visitor_with_binary_operator_context concept. --- cql3/restrictions/statement_restrictions.cc | 222 +++----------------- 1 file changed, 29 insertions(+), 193 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index c48c02afe1..62c26ef7f4 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -661,34 +661,6 @@ to_predicate_on_column( return std::move(*ret); } -// Convert an expression to a predicate on a column. If cdef is nullptr, the predicate -// is on the partition key token. -static -predicate -to_predicate_on_clustering_key_prefix( - const expression& expr, - const schema* table_schema_opt) { - auto predicates = to_predicates(expr, table_schema_opt); - std::vector collected; - for (auto& predicate : predicates) { - if (std::holds_alternative(predicate.on)) { - collected.push_back(std::move(predicate)); - continue; - } - } - if (collected.empty()) { - on_internal_error(rlogger, "to_predicate_on_clustering_key_prefix: no predicates found"); - } - auto ret = std::ranges::fold_left_first( - collected | std::views::as_rvalue, - make_conjunction - ); - if (!ret) { - on_internal_error(rlogger, "to_predicate_on_clustering_key_prefix: no predicates found"); - } - return std::move(*ret); -} - interval to_range(const value_set& s) { return std::visit(overloaded_functor{ [] (const interval& r) { return r; }, @@ -1068,170 +1040,6 @@ statement_restrictions::statement_restrictions(private_tag, schema_ptr schema, b , _partition_range_is_simple(true) { } -template -concept visitor_with_binary_operator_context = requires (Visitor v) { - { v.current_binary_operator } -> std::convertible_to; -}; - -void with_current_binary_operator( - visitor_with_binary_operator_context auto& visitor, - std::invocable auto func) { - if (!visitor.current_binary_operator) { - throw std::logic_error("Evaluation expected within binary operator"); - } - func(*visitor.current_binary_operator); -} - - -/// Extracts where_clause atoms with clustering-column LHS and copies them to a vector. These elements define the -/// boundaries of any clustering slice that can possibly meet where_clause. This vector can be calculated before -/// binding expression markers, since LHS and operator are always known. -static std::vector extract_clustering_prefix_restrictions( - std::span where_clause, schema_ptr schema) { - using namespace expr; - - /// Collects all clustering-column restrictions from an expression. Presumes the expression only uses - /// conjunction to combine subexpressions. - struct visitor { - schema_ptr table_schema; - std::vector multi; ///< All multi-column restrictions. - /// All single-clustering-column restrictions, grouped by column. Each value is either an atom or a - /// conjunction of atoms. - std::unordered_map single; - const binary_operator* current_binary_operator = nullptr; - - void operator()(const conjunction& c) { - std::ranges::for_each(c.children, [this] (const expression& child) { expr::visit(*this, child); }); - } - - void operator()(const binary_operator& b) { - if (current_binary_operator) { - throw std::logic_error("Nested binary operators are not supported"); - } - current_binary_operator = &b; - expr::visit(*this, b.lhs); - current_binary_operator = nullptr; - } - - void operator()(const tuple_constructor& tc) { - std::vector prefix; - for (auto& e : tc.elements) { - if (auto cv = expr::as_if(&e)) { - prefix.push_back(cv->col); - } else { - on_internal_error(rlogger, fmt::format("extract_clustering_prefix_restrictions: tuple of non-column_value: {}", tc)); - } - } - with_current_binary_operator(*this, [&] (const binary_operator& b) { - multi.push_back(to_predicate_on_clustering_key_prefix(b, table_schema.get())); - }); - } - - void operator()(const column_value& cv) { - auto s = &cv; - with_current_binary_operator(*this, [&] (const binary_operator& b) { - if (s->col->is_clustering_key()) { - auto a = to_predicate_on_column(b, s->col, table_schema.get()); - const auto [it, inserted] = single.try_emplace(s->col, std::move(a)); - if (!inserted) { - it->second = make_conjunction(std::move(it->second), std::move(a)); - } - } - }); - } - - void operator()(const subscript& sub) { - const column_value& cval = get_subscripted_column(sub.val); - - with_current_binary_operator(*this, [&] (const binary_operator& b) { - if (cval.col->is_clustering_key()) { - auto a = to_predicate_on_column(b, cval.col, table_schema.get()); - const auto [it, inserted] = single.try_emplace(cval.col, std::move(a)); - if (!inserted) { - it->second = make_conjunction(std::move(it->second), std::move(a)); - } - } - }); - } - - void operator()(const function_call& fun_call) { - if (is_partition_token_for_schema(fun_call, *table_schema)) { - // A token cannot be a clustering prefix restriction - return; - } - - on_internal_error(rlogger, "extract_clustering_prefix_restrictions(function_call)"); - } - - void operator()(const constant&) {} - - void operator()(const unresolved_identifier&) { - on_internal_error(rlogger, "extract_clustering_prefix_restrictions(unresolved_identifier)"); - } - - void operator()(const column_mutation_attribute&) { - on_internal_error(rlogger, "extract_clustering_prefix_restrictions(column_mutation_attribute)"); - } - - void operator()(const cast&) { - on_internal_error(rlogger, "extract_clustering_prefix_restrictions(cast)"); - } - - void operator()(const field_selection&) { - on_internal_error(rlogger, "extract_clustering_prefix_restrictions(field_selection)"); - } - - void operator()(const bind_variable&) { - on_internal_error(rlogger, "extract_clustering_prefix_restrictions(bind_variable)"); - } - - void operator()(const untyped_constant&) { - on_internal_error(rlogger, "extract_clustering_prefix_restrictions(untyped_constant)"); - } - - void operator()(const collection_constructor&) { - on_internal_error(rlogger, "extract_clustering_prefix_restrictions(collection_constructor)"); - } - - void operator()(const usertype_constructor&) { - on_internal_error(rlogger, "extract_clustering_prefix_restrictions(usertype_constructor)"); - } - - void operator()(const temporary&) { - on_internal_error(rlogger, "extract_clustering_prefix_restrictions(temporary)"); - } - }; - visitor v { - .table_schema = schema - }; - - for (auto& e : where_clause) { - expr::visit(v, e); - } - - if (!v.multi.empty()) { - return std::move(v.multi); - } - - std::vector prefix; - for (const auto& col : schema->clustering_key_columns()) { - const auto found = v.single.find(&col); - if (found == v.single.end()) { // Any further restrictions are skipping the CK order. - break; - } - if (find_needs_filtering(found->second.filter)) { // This column's restriction doesn't define a clear bound. - // TODO: if this is a conjunction of filtering and non-filtering atoms, we could split them and add the - // latter to the prefix. - break; - } - prefix.push_back(found->second); - if (has_slice(found->second.filter)) { - break; - } - } - return prefix; -} - statement_restrictions::statement_restrictions(private_tag, data_dictionary::database db, schema_ptr schema, @@ -1272,6 +1080,8 @@ statement_restrictions::statement_restrictions(private_tag, bool has_token = false; std::optional token_pred; std::unordered_map pk_range_preds; + std::vector mc_ck_preds; + std::unordered_map sc_ck_preds; for (auto& pred : predicates) { if (pred.is_not_null_single_column) { auto* col = require_on_single_column(pred); @@ -1287,6 +1097,7 @@ statement_restrictions::statement_restrictions(private_tag, ck_is_empty = false; has_mc_clustering = true; first_mc_pred = &pred; + mc_ck_preds.push_back(pred); if (pred.is_slice) { ck_has_slice = true; } @@ -1330,6 +1141,7 @@ statement_restrictions::statement_restrictions(private_tag, } _clustering_columns_restrictions = expr::make_conjunction(_clustering_columns_restrictions, pred.filter); + mc_ck_preds.push_back(pred); ck_has_slice = true; } else { throw exceptions::invalid_request_exception(format("Unsupported multi-column relation: ", pred.filter)); @@ -1416,6 +1228,12 @@ statement_restrictions::statement_restrictions(private_tag, auto [it, inserted] = _single_column_clustering_key_restrictions.try_emplace(def, expr::conjunction{}); it->second = expr::make_conjunction(std::move(it->second), pred.filter); } + { + auto [it, inserted] = sc_ck_preds.try_emplace(def, pred); + if (!inserted) { + it->second = make_conjunction(std::move(it->second), pred); + } + } if (pred.is_slice) { ck_has_slice = true; } @@ -1438,7 +1256,25 @@ statement_restrictions::statement_restrictions(private_tag, } } if (!_where.empty()) { - _clustering_prefix_restrictions = extract_clustering_prefix_restrictions(_where, _schema); + if (!mc_ck_preds.empty()) { + _clustering_prefix_restrictions = std::move(mc_ck_preds); + } else { + std::vector prefix; + for (const auto& col : _schema->clustering_key_columns()) { + const auto found = sc_ck_preds.find(&col); + if (found == sc_ck_preds.end()) { + break; + } + if (find_needs_filtering(found->second.filter)) { + break; + } + prefix.push_back(found->second); + if (has_slice(found->second.filter)) { + break; + } + } + _clustering_prefix_restrictions = std::move(prefix); + } if (token_pred) { _partition_range_restrictions = token_range_restrictions{ .token_restrictions = std::move(*token_pred), From 25ba3bd64994d1a8744ea46ade5f5a02266e80bd Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 15 Mar 2026 14:22:42 +0200 Subject: [PATCH 66/76] cql3: statement_restrictions: use pre-built single-column maps for index support checks Replace index_supports_some_column(expression, ...) with index_supports_some_column(single_column_restrictions_map, ...) to eliminate get_single_column_restrictions_map() tree walks when checking index support. The three call sites now use the maps already built incrementally in the constructor loop: _single_column_nonprimary_key_restrictions, _single_column_clustering_key_restrictions, and _single_column_partition_key_restrictions. Also replace contains_multi_column_restriction() tree walk in clustering_columns_restrictions_have_supporting_index() with _has_multi_column. --- cql3/restrictions/statement_restrictions.cc | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index 62c26ef7f4..50475485b8 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -71,9 +71,8 @@ extern bool has_supporting_index( const expression&, const secondary_index::secondary_index_manager&, allow_local_index allow_local); // Looks at each column individually and checks whether some index can support restrictions on this single column. -// Expression has to consist only of single column restrictions. extern bool index_supports_some_column( - const expression&, + const single_column_restrictions_map&, const secondary_index::secondary_index_manager&, allow_local_index allow_local); @@ -786,11 +785,9 @@ bool has_supporting_index( } bool index_supports_some_column( - const expression& e, + const single_column_restrictions_map& single_col_restrictions, const secondary_index::secondary_index_manager& index_manager, allow_local_index allow_local) { - single_column_restrictions_map single_col_restrictions = get_single_column_restrictions_map(e); - for (auto&& [col, col_restrictions] : single_col_restrictions) { if (has_supporting_index(col_restrictions, index_manager, allow_local)) { return true; @@ -1296,7 +1293,7 @@ statement_restrictions::statement_restrictions(private_tag, && !type.is_delete(); _has_queriable_pk_index = parition_key_restrictions_have_supporting_index(sim, allow_local) && !type.is_delete(); - _has_queriable_regular_index = index_supports_some_column(_nonprimary_key_restrictions, sim, allow_local) + _has_queriable_regular_index = index_supports_some_column(_single_column_nonprimary_key_restrictions, sim, allow_local) && !type.is_delete(); } else { _has_queriable_ck_index = false; @@ -1727,8 +1724,8 @@ bool statement_restrictions::clustering_columns_restrictions_have_supporting_ind const secondary_index::secondary_index_manager& index_manager, expr::allow_local_index allow_local) const { // Single column restrictions can be handled by the existing code - if (!contains_multi_column_restriction(_clustering_columns_restrictions)) { - return index_supports_some_column(_clustering_columns_restrictions, index_manager, allow_local); + if (!_has_multi_column) { + return index_supports_some_column(_single_column_clustering_key_restrictions, index_manager, allow_local); } // Multi column restrictions have to be handled separately @@ -1805,7 +1802,7 @@ bool statement_restrictions::parition_key_restrictions_have_supporting_index(con return false; } - return index_supports_some_column(_partition_key_restrictions, index_manager, allow_local); + return index_supports_some_column(_single_column_partition_key_restrictions, index_manager, allow_local); } void statement_restrictions::process_clustering_columns_restrictions(bool for_view, bool allow_filtering) { From fa6f239cc7420fcc26368e40930883cef3422ca2 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 15 Mar 2026 14:33:49 +0200 Subject: [PATCH 67/76] cql3: statement_restrictions: add predicate-based index support checking Add `op` and `is_subscript` fields to `struct predicate` and populate them in all predicate creation sites in `to_predicates()`. These fields record the binary operator and whether the LHS is a subscript (map element access), which are the two pieces of information needed to query index support. Add `is_predicate_supported_by()` which mirrors `is_supported_by_helper()` but operates on a single predicate's fields instead of walking the expression tree. Add a predicate-vector overload of `index_supports_some_column()` and use it in the constructor to replace expression-based index support checks for single-column partition key, clustering key, and non-primary-key restrictions. The multi-column clustering key case still uses the existing expression-based path. --- cql3/restrictions/statement_restrictions.cc | 91 ++++++++++++++++++++- cql3/restrictions/statement_restrictions.hh | 2 + 2 files changed, 89 insertions(+), 4 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index 50475485b8..2ac4a096e8 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -281,6 +281,8 @@ make_conjunction(predicate a, predicate b) { .is_upper_bound = false, // A conjunction has no single direction. .is_lower_bound = false, // A conjunction has no single direction. .order = a.order, // Both predicates are on the same column, so comparison order must agree. + .op = std::nullopt, // A conjunction has no single operator. + .is_subscript = a.is_subscript, // Both predicates are on the same target, so they agree on subscript-ness. }; } @@ -382,6 +384,7 @@ to_predicates( .filter = oper, .on = on_column{col.col}, .is_not_null_single_column = is_null_constant(oper.rhs), + .op = oper.op, }); } if (is_compare(oper.op)) { @@ -403,6 +406,7 @@ to_predicates( .is_upper_bound = (oper.op == oper_t::LT || oper.op == oper_t::LTE), .is_lower_bound = (oper.op == oper_t::GT || oper.op == oper_t::GTE), .order = oper.order, + .op = oper.op, }); } else if (oper.op == oper_t::IN) { auto solve = [oper, type, cdef] (const query_options& options) { @@ -415,6 +419,7 @@ to_predicates( .is_singleton = false, .is_in = true, .order = oper.order, + .op = oper.op, }); } else if (oper.op == oper_t::CONTAINS || oper.op == oper_t::CONTAINS_KEY) { auto solve = [oper] (const query_options& options) { @@ -430,6 +435,7 @@ to_predicates( .on = on_column{col.col}, .is_singleton = false, .order = oper.order, + .op = oper.op, }); } return cannot_solve_on_column(oper, col.col); @@ -459,6 +465,8 @@ to_predicates( .is_singleton = true, .equality = true, .order = oper.order, + .op = oper.op, + .is_subscript = true, }); } return cannot_solve_on_column(oper, col.col); @@ -498,6 +506,7 @@ to_predicates( .is_upper_bound = (oper.op == oper_t::LT || oper.op == oper_t::LTE), .is_lower_bound = (oper.op == oper_t::GT || oper.op == oper_t::GTE), .order = oper.order, + .op = oper.op, }); }, [&] (const function_call& token_fun_call) -> std::vector { @@ -542,6 +551,7 @@ to_predicates( .is_upper_bound = (oper.op == oper_t::LT || oper.op == oper_t::LTE), .is_lower_bound = (oper.op == oper_t::GT || oper.op == oper_t::GTE), .order = oper.order, + .op = oper.op, }); }, [&] (const binary_operator&) -> std::vector { @@ -771,6 +781,67 @@ bool is_supported_by(const expression& expr, const secondary_index::index& idx) return s != secondary_index::index::supports_expression_v::from_bool(false); } +// Like is_supported_by_helper, but operates on a single predicate instead of walking +// an expression tree. Returns how an index supports this predicate: UsualYes, CollectionYes, or No. +static secondary_index::index::supports_expression_v +is_predicate_supported_by(const predicate& pred, const secondary_index::index& idx) { + using ret_t = secondary_index::index::supports_expression_v; + if (!pred.op) { + return ret_t::from_bool(false); + } + return std::visit(overloaded_functor{ + [&] (const on_column& oc) -> ret_t { + if (pred.is_subscript) { + return idx.supports_subscript_expression(*oc.column, *pred.op); + } + return idx.supports_expression(*oc.column, *pred.op); + }, + [&] (const on_clustering_key_prefix& ocp) -> ret_t { + // Single-element tuple_constructor: treat like a single column + if (ocp.columns.size() == 1) { + return idx.supports_expression(*ocp.columns[0], *pred.op); + } + // Multi-element tuple: index cannot avoid filtering + return ret_t::from_bool(false); + }, + [&] (const on_partition_key_token&) -> ret_t { + return ret_t::from_bool(false); + }, + [&] (const on_row&) -> ret_t { + return ret_t::from_bool(false); + }, + }, pred.on); +} + +using single_column_predicate_vectors = std::map, schema_pos_column_definition_comparator>; + +// Like index_supports_some_column, but operates on per-column predicate vectors +// instead of walking per-column expression trees. +static bool index_supports_some_column( + const single_column_predicate_vectors& per_column_predicates, + const secondary_index::secondary_index_manager& index_manager, + allow_local_index allow_local) { + using namespace secondary_index; + for (auto& [col, preds] : per_column_predicates) { + for (const auto& idx : index_manager.list_indexes()) { + if (!allow_local && idx.metadata().local()) { + continue; + } + // AND all predicate results for this column-index pair, mirroring the + // conjunction logic in is_supported_by_helper. Initialize with the + // first predicate (not from_bool(true)) to preserve CollectionYes. + auto result = is_predicate_supported_by(preds[0], idx); + for (size_t i = 1; i < preds.size(); ++i) { + result = result && is_predicate_supported_by(preds[i], idx); + } + if (result) { + return true; + } + } + } + return false; +} + bool has_supporting_index( const expression& expr, @@ -1079,6 +1150,9 @@ statement_restrictions::statement_restrictions(private_tag, std::unordered_map pk_range_preds; std::vector mc_ck_preds; std::unordered_map sc_ck_preds; + single_column_predicate_vectors sc_pk_pred_vectors; + single_column_predicate_vectors sc_ck_pred_vectors; + single_column_predicate_vectors sc_nonpk_pred_vectors; for (auto& pred : predicates) { if (pred.is_not_null_single_column) { auto* col = require_on_single_column(pred); @@ -1189,6 +1263,7 @@ statement_restrictions::statement_restrictions(private_tag, auto [it, inserted] = _single_column_partition_key_restrictions.try_emplace(def, expr::conjunction{}); it->second = expr::make_conjunction(std::move(it->second), pred.filter); } + sc_pk_pred_vectors[def].push_back(pred); if (pred.equality || pred.is_in) { auto [it, inserted] = pk_range_preds.try_emplace(def, pred); if (!inserted) { @@ -1225,6 +1300,7 @@ statement_restrictions::statement_restrictions(private_tag, auto [it, inserted] = _single_column_clustering_key_restrictions.try_emplace(def, expr::conjunction{}); it->second = expr::make_conjunction(std::move(it->second), pred.filter); } + sc_ck_pred_vectors[def].push_back(pred); { auto [it, inserted] = sc_ck_preds.try_emplace(def, pred); if (!inserted) { @@ -1243,6 +1319,7 @@ statement_restrictions::statement_restrictions(private_tag, auto [it, inserted] = _single_column_nonprimary_key_restrictions.try_emplace(def, expr::conjunction{}); it->second = expr::make_conjunction(std::move(it->second), pred.filter); } + sc_nonpk_pred_vectors[def].push_back(pred); } } else { throw exceptions::invalid_request_exception(format("Unhandled restriction: {}", pred.filter)); @@ -1289,11 +1366,17 @@ statement_restrictions::statement_restrictions(private_tag, const expr::allow_local_index allow_local( !has_partition_key_unrestricted_components() && partition_key_restrictions_is_all_eq()); - _has_queriable_ck_index = clustering_columns_restrictions_have_supporting_index(sim, allow_local) + if (!_has_multi_column) { + _has_queriable_ck_index = index_supports_some_column(sc_ck_pred_vectors, sim, allow_local) + && !type.is_delete(); + } else { + _has_queriable_ck_index = clustering_columns_restrictions_have_supporting_index(sim, allow_local) + && !type.is_delete(); + } + _has_queriable_pk_index = !has_token + && index_supports_some_column(sc_pk_pred_vectors, sim, allow_local) && !type.is_delete(); - _has_queriable_pk_index = parition_key_restrictions_have_supporting_index(sim, allow_local) - && !type.is_delete(); - _has_queriable_regular_index = index_supports_some_column(_single_column_nonprimary_key_restrictions, sim, allow_local) + _has_queriable_regular_index = index_supports_some_column(sc_nonpk_pred_vectors, sim, allow_local) && !type.is_delete(); } else { _has_queriable_ck_index = false; diff --git a/cql3/restrictions/statement_restrictions.hh b/cql3/restrictions/statement_restrictions.hh index a88675266c..93f038b94b 100644 --- a/cql3/restrictions/statement_restrictions.hh +++ b/cql3/restrictions/statement_restrictions.hh @@ -86,6 +86,8 @@ struct predicate { bool is_upper_bound = false; // operator is LT/LTE bool is_lower_bound = false; // operator is GT/GTE expr::comparison_order order = expr::comparison_order::cql; + std::optional op; // the binary operator, if any + bool is_subscript = false; // whether the LHS is a subscript (map element access) }; ///In some cases checking if columns have indexes is undesired of even From 1aafe0708a127cb7b2b5d7cdf610d783bdb3beae Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 15 Mar 2026 14:37:31 +0200 Subject: [PATCH 68/76] cql3: statement_restrictions: replace multi-column and PK index support checks with predicate-based versions Replace clustering_columns_restrictions_have_supporting_index(), multi_column_clustering_restrictions_are_supported_by(), get_clustering_slice(), and partition_key_restrictions_have_supporting_index() with predicate-based equivalents that use the already-accumulated mc_ck_preds and sc_pk_pred_vectors locals. The new multi_column_predicates_have_supporting_index() checks each multi-column predicate's columns list directly against indexes, avoiding expression tree walks through find_in_expression and bounds_slice. --- cql3/restrictions/statement_restrictions.cc | 124 ++++++-------------- cql3/restrictions/statement_restrictions.hh | 10 -- 2 files changed, 36 insertions(+), 98 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index 2ac4a096e8..ecd6ad70d1 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -827,9 +827,13 @@ static bool index_supports_some_column( if (!allow_local && idx.metadata().local()) { continue; } + if (preds.empty()) { + continue; + } // AND all predicate results for this column-index pair, mirroring the - // conjunction logic in is_supported_by_helper. Initialize with the - // first predicate (not from_bool(true)) to preserve CollectionYes. + // conjunction logic in is_supported_by_helper. Seed with the first + // predicate's result instead of from_bool(true) (which is UsualYes) + // so that CollectionYes values are preserved through the chain. auto result = is_predicate_supported_by(preds[0], idx); for (size_t i = 1; i < preds.size(); ++i) { result = result && is_predicate_supported_by(preds[i], idx); @@ -842,6 +846,35 @@ static bool index_supports_some_column( return false; } +// Check if any index supports any multi-column clustering predicate. +// For each predicate in mc_preds, checks if any column in the predicate's +// columns list is supported by the index for that predicate's operator. +static bool multi_column_predicates_have_supporting_index( + const std::vector& mc_preds, + const secondary_index::secondary_index_manager& index_manager, + allow_local_index allow_local) { + for (const auto& idx : index_manager.list_indexes()) { + if (!allow_local && idx.metadata().local()) { + continue; + } + for (const auto& pred : mc_preds) { + if (!pred.op) { + continue; + } + auto* ocp = std::get_if(&pred.on); + if (!ocp) { + continue; + } + for (const auto* col : ocp->columns) { + if (idx.supports_expression(*col, *pred.op)) { + return true; + } + } + } + } + return false; +} + bool has_supporting_index( const expression& expr, @@ -1370,7 +1403,7 @@ statement_restrictions::statement_restrictions(private_tag, _has_queriable_ck_index = index_supports_some_column(sc_ck_pred_vectors, sim, allow_local) && !type.is_delete(); } else { - _has_queriable_ck_index = clustering_columns_restrictions_have_supporting_index(sim, allow_local) + _has_queriable_ck_index = multi_column_predicates_have_supporting_index(mc_ck_preds, sim, allow_local) && !type.is_delete(); } _has_queriable_pk_index = !has_token @@ -1803,91 +1836,6 @@ const column_definition& statement_restrictions::unrestricted_column(column_kind to_sstring(kind), restrictions)); }; -bool statement_restrictions::clustering_columns_restrictions_have_supporting_index( - const secondary_index::secondary_index_manager& index_manager, - expr::allow_local_index allow_local) const { - // Single column restrictions can be handled by the existing code - if (!_has_multi_column) { - return index_supports_some_column(_single_column_clustering_key_restrictions, index_manager, allow_local); - } - - // Multi column restrictions have to be handled separately - for (const auto& index : index_manager.list_indexes()) { - if (!allow_local && index.metadata().local()) { - continue; - } - if (multi_column_clustering_restrictions_are_supported_by(index)) { - return true; - } - } - return false; -} - -bool statement_restrictions::multi_column_clustering_restrictions_are_supported_by( - const secondary_index::index& index) const { - // Slice restrictions have to be checked depending on the clustering slice - if (has_slice(_clustering_columns_restrictions)) { - bounds_slice clustering_slice = get_clustering_slice(); - - const expr::column_value* supported_column = - find_in_expression(_clustering_columns_restrictions, - [&](const expr::column_value& cval) -> bool { - return clustering_slice.is_supported_by(*cval.col, index); - } - ); - return supported_column != nullptr; - } - - // Otherwise it has to be a single binary operator with EQ or IN. - // This is checked earlier during add_restriction. - const expr::binary_operator* single_binop = - expr::as_if(&_clustering_columns_restrictions); - if (single_binop == nullptr) { - on_internal_error(rlogger, format( - "multi_column_clustering_restrictions_are_supported_by more than one non-slice restriction: {}", - _clustering_columns_restrictions)); - } - - if (single_binop->op != expr::oper_t::IN && single_binop->op != expr::oper_t::EQ) { - on_internal_error(rlogger, format("Disallowed multi column restriction: {}", *single_binop)); - } - - const expr::column_value* supported_column = - find_in_expression(_clustering_columns_restrictions, - [&](const expr::column_value& cval) -> bool { - return index.supports_expression(*cval.col, single_binop->op); - } - ); - return supported_column != nullptr; -} - -bounds_slice statement_restrictions::get_clustering_slice() const { - std::optional result; - - expr::for_each_expression(_clustering_columns_restrictions, - [&](const expr::binary_operator& binop) { - bounds_slice cur_slice = bounds_slice::from_binary_operator(binop); - if (!result.has_value()) { - result = cur_slice; - } else { - result->merge(cur_slice); - } - } - ); - - return *result; -} - -bool statement_restrictions::parition_key_restrictions_have_supporting_index(const secondary_index::secondary_index_manager& index_manager, - expr::allow_local_index allow_local) const { - // Token restrictions can't be supported by an index - if (has_token_restrictions()) { - return false; - } - - return index_supports_some_column(_single_column_partition_key_restrictions, index_manager, allow_local); -} - void statement_restrictions::process_clustering_columns_restrictions(bool for_view, bool allow_filtering) { if (!has_clustering_columns_restriction()) { return; diff --git a/cql3/restrictions/statement_restrictions.hh b/cql3/restrictions/statement_restrictions.hh index 93f038b94b..fd3724e1d6 100644 --- a/cql3/restrictions/statement_restrictions.hh +++ b/cql3/restrictions/statement_restrictions.hh @@ -358,18 +358,8 @@ public: size_t partition_key_restrictions_size() const; - bool parition_key_restrictions_have_supporting_index(const secondary_index::secondary_index_manager& index_manager, expr::allow_local_index allow_local) const; - size_t clustering_columns_restrictions_size() const; - bool clustering_columns_restrictions_have_supporting_index( - const secondary_index::secondary_index_manager& index_manager, - expr::allow_local_index allow_local) const; - - bool multi_column_clustering_restrictions_are_supported_by(const secondary_index::index& index) const; - - bounds_slice get_clustering_slice() const; - /** * Checks if the clustering key has some unrestricted components. * @return true if the clustering key has some unrestricted components, false otherwise. From c42397e995f18c32e8d7f6f817e1e017c753850a Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 15 Mar 2026 14:39:27 +0200 Subject: [PATCH 69/76] cql3: statement_restrictions: remove expression-based has_supporting_index and index_supports_some_column These functions are no longer called now that all index support checks in the constructor use predicate-based alternatives. The expression-based is_supported_by and is_supported_by_helper are still needed by choose_idx() and calculate_column_defs_for_filtering_and_erase_restrictions_used_for_index(). --- cql3/restrictions/statement_restrictions.cc | 36 --------------------- 1 file changed, 36 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index ecd6ad70d1..bd886aff5f 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -65,17 +65,6 @@ interval to_range(oper_t op, const clustering_key_prefix& /// True iff the index can support the entire expression. extern bool is_supported_by(const expression&, const secondary_index::index&); -/// True iff any of the indices from the manager can support the entire expression. If allow_local, use all -/// indices; otherwise, use only global indices. -extern bool has_supporting_index( - const expression&, const secondary_index::secondary_index_manager&, allow_local_index allow_local); - -// Looks at each column individually and checks whether some index can support restrictions on this single column. -extern bool index_supports_some_column( - const single_column_restrictions_map&, - const secondary_index::secondary_index_manager&, - allow_local_index allow_local); - inline bool needs_filtering(oper_t op) { return (op == oper_t::CONTAINS) || (op == oper_t::CONTAINS_KEY) || (op == oper_t::LIKE) || (op == oper_t::IS_NOT) || (op == oper_t::NEQ) || (op == oper_t::NOT_IN); @@ -876,31 +865,6 @@ static bool multi_column_predicates_have_supporting_index( } -bool has_supporting_index( - const expression& expr, - const secondary_index::secondary_index_manager& index_manager, - allow_local_index allow_local) { - const auto indexes = index_manager.list_indexes(); - const auto support = std::bind(is_supported_by, std::ref(expr), std::placeholders::_1); - return allow_local ? std::ranges::any_of(indexes, support) - : std::ranges::any_of( - indexes | std::views::filter([] (const secondary_index::index& i) { return !i.metadata().local(); }), - support); -} - -bool index_supports_some_column( - const single_column_restrictions_map& single_col_restrictions, - const secondary_index::secondary_index_manager& index_manager, - allow_local_index allow_local) { - for (auto&& [col, col_restrictions] : single_col_restrictions) { - if (has_supporting_index(col_restrictions, index_manager, allow_local)) { - return true; - } - } - - return false; -} - bool is_on_collection(const binary_operator& b) { if (b.op == oper_t::CONTAINS || b.op == oper_t::CONTAINS_KEY) { return true; From 68c2e292ac1fba570f9f710a9012bfa08565831b Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 15 Mar 2026 14:59:58 +0200 Subject: [PATCH 70/76] cql3: statement_restrictions: replace do_find_idx and is_supported_by with predicate-based versions Convert do_find_idx() from a member function that walks expression trees via index_restrictions()/for_each_expression/extract_single_column_restrictions to a static free function that iterates index_search_group spans using are_predicates_supported_by(). Convert calculate_column_defs_for_filtering_and_erase_restrictions_used_for_index() to use predicate vectors instead of expression-based is_supported_by(). Remove now-dead code: is_supported_by(), is_supported_by_helper(), score() member function, and do_find_idx() member function. --- cql3/restrictions/statement_restrictions.cc | 215 ++++++++------------ cql3/restrictions/statement_restrictions.hh | 17 +- 2 files changed, 95 insertions(+), 137 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index bd886aff5f..e69cf2091b 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -62,9 +62,6 @@ extern interval to_range(const value_set&); /// A range of all X such that X op val. interval to_range(oper_t op, const clustering_key_prefix& val); -/// True iff the index can support the entire expression. -extern bool is_supported_by(const expression&, const secondary_index::index&); - inline bool needs_filtering(oper_t op) { return (op == oper_t::CONTAINS) || (op == oper_t::CONTAINS_KEY) || (op == oper_t::LIKE) || (op == oper_t::IS_NOT) || (op == oper_t::NEQ) || (op == oper_t::NOT_IN); @@ -689,85 +686,6 @@ constexpr inline secondary_index::index::supports_expression_v operator&&(second return v1 == True && v2 == True ? True : index::supports_expression_v::from_bool(false); } -secondary_index::index::supports_expression_v is_supported_by_helper(const expression& expr, const secondary_index::index& idx) { - using ret_t = secondary_index::index::supports_expression_v; - using namespace secondary_index; - return expr::visit(overloaded_functor{ - [&] (const conjunction& conj) -> ret_t { - if (conj.children.empty()) { - return index::supports_expression_v::from_bool(true); - } - auto init = is_supported_by_helper(conj.children[0], idx); - return std::accumulate(std::begin(conj.children) + 1, std::end(conj.children), init, - [&] (ret_t acc, const expression& child) -> ret_t { - return acc && is_supported_by_helper(child, idx); - }); - }, - [&] (const binary_operator& oper) { - return expr::visit(overloaded_functor{ - [&] (const column_value& col) { - return idx.supports_expression(*col.col, oper.op); - }, - [&] (const tuple_constructor& tuple) { - if (tuple.elements.size() == 1) { - if (auto column = expr::as_if(&tuple.elements[0])) { - return idx.supports_expression(*column->col, oper.op); - } - } - // We don't use index table for multi-column restrictions, as it cannot avoid filtering. - return index::supports_expression_v::from_bool(false); - }, - [&] (const function_call&) { return index::supports_expression_v::from_bool(false); }, - [&] (const subscript& s) -> ret_t { - const column_value& col = get_subscripted_column(s); - return idx.supports_subscript_expression(*col.col, oper.op); - }, - [&] (const binary_operator&) -> ret_t { - on_internal_error(expr_logger, "is_supported_by: nested binary operators are not supported"); - }, - [&] (const conjunction&) -> ret_t { - on_internal_error(expr_logger, "is_supported_by: conjunctions are not supported as the LHS of a binary expression"); - }, - [] (const constant&) -> ret_t { - on_internal_error(expr_logger, "is_supported_by: constants are not supported as the LHS of a binary expression"); - }, - [] (const unresolved_identifier&) -> ret_t { - on_internal_error(expr_logger, "is_supported_by: an unresolved identifier is not supported as the LHS of a binary expression"); - }, - [&] (const column_mutation_attribute&) -> ret_t { - on_internal_error(expr_logger, "is_supported_by: writetime/ttl are not supported as the LHS of a binary expression"); - }, - [&] (const cast&) -> ret_t { - on_internal_error(expr_logger, "is_supported_by: typecasts are not supported as the LHS of a binary expression"); - }, - [&] (const field_selection&) -> ret_t { - on_internal_error(expr_logger, "is_supported_by: field selections are not supported as the LHS of a binary expression"); - }, - [&] (const bind_variable&) -> ret_t { - on_internal_error(expr_logger, "is_supported_by: bind variables are not supported as the LHS of a binary expression"); - }, - [&] (const untyped_constant&) -> ret_t { - on_internal_error(expr_logger, "is_supported_by: untyped constants are not supported as the LHS of a binary expression"); - }, - [&] (const collection_constructor&) -> ret_t { - on_internal_error(expr_logger, "is_supported_by: collection constructors are not supported as the LHS of a binary expression"); - }, - [&] (const usertype_constructor&) -> ret_t { - on_internal_error(expr_logger, "is_supported_by: user type constructors are not supported as the LHS of a binary expression"); - }, - [&] (const temporary&) -> ret_t { - on_internal_error(expr_logger, "is_supported_by: temporaries are not supported as the LHS of a binary expression"); - }, - }, oper.lhs); - }, - [] (const auto& default_case) { return index::supports_expression_v::from_bool(false); } - }, expr); -} -} - -bool is_supported_by(const expression& expr, const secondary_index::index& idx) { - auto s = is_supported_by_helper(expr, idx); - return s != secondary_index::index::supports_expression_v::from_bool(false); } // Like is_supported_by_helper, but operates on a single predicate instead of walking @@ -802,7 +720,10 @@ is_predicate_supported_by(const predicate& pred, const secondary_index::index& i }, pred.on); } -using single_column_predicate_vectors = std::map, schema_pos_column_definition_comparator>; +struct index_search_group { + const single_column_predicate_vectors& pred_vectors; + const expr::expression& restriction_expr; +}; // Like index_supports_some_column, but operates on per-column predicate vectors // instead of walking per-column expression trees. @@ -864,6 +785,27 @@ static bool multi_column_predicates_have_supporting_index( return false; } +// Check if all predicates for a column are supported by an index. +// Mirrors the conjunction logic of is_supported_by_helper: initializes with +// the first predicate's result, then ANDs the rest. +static bool are_predicates_supported_by(const std::vector& preds, + const secondary_index::index& idx) { + if (preds.empty()) { + return true; + } + auto result = is_predicate_supported_by(preds[0], idx); + for (size_t i = 1; i < preds.size(); ++i) { + result = result && is_predicate_supported_by(preds[i], idx); + } + return bool(result); +} + +static std::pair, expr::expression> do_find_idx( + bool uses_secondary_indexing, + const secondary_index::secondary_index_manager& sim, + std::span search_groups, + allow_local_index allow_local); + bool is_on_collection(const binary_operator& b) { if (b.op == oper_t::CONTAINS || b.op == oper_t::CONTAINS_KEY) { @@ -1386,8 +1328,10 @@ statement_restrictions::statement_restrictions(private_tag, // Some but not all of the partition key columns have been specified; // hence we need turn these restrictions into index expressions. + std::vector search_groups; if (_uses_secondary_indexing || pk_restrictions_need_filtering()) { _index_restrictions.push_back(_partition_key_restrictions); + search_groups.push_back({sc_pk_pred_vectors, _partition_key_restrictions}); } // If the only updated/deleted columns are static, then we don't need clustering columns. @@ -1420,6 +1364,7 @@ statement_restrictions::statement_restrictions(private_tag, if (_uses_secondary_indexing || clustering_key_restrictions_need_filtering()) { _index_restrictions.push_back(_clustering_columns_restrictions); + search_groups.push_back({sc_ck_pred_vectors, _clustering_columns_restrictions}); } else if (find_binop(_clustering_columns_restrictions, is_on_collection)) { fail(unimplemented::cause::INDEXES); } @@ -1433,6 +1378,7 @@ statement_restrictions::statement_restrictions(private_tag, "this query despite the performance unpredictability, use ALLOW FILTERING"); } _index_restrictions.push_back(_nonprimary_key_restrictions); + search_groups.push_back({sc_nonpk_pred_vectors, _nonprimary_key_restrictions}); } if (_uses_secondary_indexing && !(for_view || allow_filtering)) { @@ -1442,10 +1388,14 @@ statement_restrictions::statement_restrictions(private_tag, if (_check_indexes) { auto cf = db.find_column_family(_schema); auto& sim = cf.get_index_manager(); - std::tie(_idx_opt, _idx_restrictions) = do_find_idx(sim); + const expr::allow_local_index allow_local_for_idx( + !has_partition_key_unrestricted_components() + && partition_key_restrictions_is_all_eq()); + std::tie(_idx_opt, _idx_restrictions) = do_find_idx( + _uses_secondary_indexing, sim, search_groups, allow_local_for_idx); } - calculate_column_defs_for_filtering_and_erase_restrictions_used_for_index(db); + calculate_column_defs_for_filtering_and_erase_restrictions_used_for_index(db, sc_pk_pred_vectors, sc_ck_pred_vectors, sc_nonpk_pred_vectors); if (pk_restrictions_need_filtering()) { auto partition_key_filter = expr::conjunction{ @@ -1600,23 +1550,27 @@ bool statement_restrictions::is_empty() const { return _where.empty(); } -// Current score table: -// local and restrictions include full partition key: 2 -// global: 1 -// local and restrictions does not include full partition key: 0 (do not pick) -int statement_restrictions::score(const secondary_index::index& index) const { - if (index.metadata().local()) { - const bool allow_local = !has_partition_key_unrestricted_components() && partition_key_restrictions_is_all_eq(); - return allow_local ? 2 : 0; - } - return 1; -} -std::pair, expr::expression> statement_restrictions::do_find_idx(const secondary_index::secondary_index_manager& sim) const { - if (!_uses_secondary_indexing) { +static std::pair, expr::expression> do_find_idx( + bool uses_secondary_indexing, + const secondary_index::secondary_index_manager& sim, + std::span search_groups, + allow_local_index allow_local) { + if (!uses_secondary_indexing) { return {std::nullopt, expr::conjunction({})}; } + // Current score table: + // local and restrictions include full partition key: 2 + // global: 1 + // local and restrictions does not include full partition key: 0 (do not pick) + auto index_score = [&] (const secondary_index::index& index) -> int { + if (index.metadata().local()) { + return allow_local ? 2 : 0; + } + return 1; + }; + std::optional chosen_index; int chosen_index_score = 0; expr::expression chosen_index_restrictions = expr::conjunction({}); @@ -1628,22 +1582,25 @@ std::pair, expr::expression> statement_res // index), but it is critical that two coordinators - or the same // coordinator over time - must choose the same index for the same query. // Otherwise, paging can break (see issue #7969). - for (const expr::expression& restriction : index_restrictions()) { - if (has_partition_token(restriction, *_schema) || contains_multi_column_restriction(restriction)) { - continue; - } - expr::for_each_expression(restriction, [&](const expr::column_value& cval) { - auto& cdef = cval.col; - expr::expression col_restrictions = expr::conjunction { - .children = extract_single_column_restrictions_for_column(std::span(&restriction, 1), *cdef) - }; + for (const auto& group : search_groups) { + // Iterate columns in WHERE-clause order (from the restriction expression) + // rather than schema-position order (from the pred_vectors map). When + // scores are tied the first column visited wins (strict >), so the + // iteration order determines which index is chosen for equal-score + // candidates -- matching the old expression-based do_find_idx behaviour. + expr::for_each_expression(group.restriction_expr, [&](const expr::column_value& cval) { + auto it = group.pred_vectors.find(cval.col); + if (it == group.pred_vectors.end()) { + return; + } + const auto& [col, preds] = *it; for (const auto& index : sim.list_indexes()) { - if (cdef->name_as_text() == index.target_column() && - is_supported_by(col_restrictions, index) && - score(index) > chosen_index_score) { + if (col->name_as_text() == index.target_column() && + are_predicates_supported_by(preds, index) && + index_score(index) > chosen_index_score) { chosen_index = index; - chosen_index_score = score(index); - chosen_index_restrictions = restriction; + chosen_index_score = index_score(index); + chosen_index_restrictions = group.restriction_expr; } } }); @@ -1665,26 +1622,32 @@ std::vector statement_restrictions::get_column_defs_fo return _column_defs_for_filtering; } -void statement_restrictions::calculate_column_defs_for_filtering_and_erase_restrictions_used_for_index(data_dictionary::database db) { +void statement_restrictions::calculate_column_defs_for_filtering_and_erase_restrictions_used_for_index( + data_dictionary::database db, + const single_column_predicate_vectors& sc_pk_pred_vectors, + const single_column_predicate_vectors& sc_ck_pred_vectors, + const single_column_predicate_vectors& sc_nonpk_pred_vectors) { std::vector column_defs_for_filtering; if (need_filtering()) { std::optional opt_idx; if (_check_indexes) { opt_idx = _idx_opt; } - auto column_uses_indexing = [&opt_idx] (const column_definition* cdef, const expr::expression* single_col_restr) { - return opt_idx && single_col_restr && is_supported_by(*single_col_restr, *opt_idx); + auto column_uses_indexing = [&opt_idx] (const single_column_predicate_vectors& pred_vectors, + const column_definition* cdef) { + if (!opt_idx) { + return false; + } + auto it = pred_vectors.find(cdef); + if (it == pred_vectors.end()) { + return false; + } + return are_predicates_supported_by(it->second, *opt_idx); }; if (pk_restrictions_need_filtering()) { for (auto&& cdef : expr::get_sorted_column_defs(_partition_key_restrictions)) { - const expr::expression* single_col_restr = nullptr; auto it = _single_column_partition_key_restrictions.find(cdef); - if (it != _single_column_partition_key_restrictions.end()) { - if (is_single_column_restriction(it->second)) { - single_col_restr = &it->second; - } - } - if (!column_uses_indexing(cdef, single_col_restr)) { + if (!column_uses_indexing(sc_pk_pred_vectors, cdef)) { column_defs_for_filtering.emplace_back(cdef); } else { _single_column_partition_key_restrictions.erase(it); @@ -1696,12 +1659,8 @@ void statement_restrictions::calculate_column_defs_for_filtering_and_erase_restr column_id first_filtering_id = pk_has_unrestricted_components ? 0 : _schema->clustering_key_columns().begin()->id + num_clustering_prefix_columns_that_need_not_be_filtered(); for (auto&& cdef : expr::get_sorted_column_defs(_clustering_columns_restrictions)) { - const expr::expression* single_col_restr = nullptr; auto it = _single_column_clustering_key_restrictions.find(cdef); - if (it != _single_column_clustering_key_restrictions.end()) { - single_col_restr = &it->second; - } - if (cdef->id >= first_filtering_id && !column_uses_indexing(cdef, single_col_restr)) { + if (cdef->id >= first_filtering_id && !column_uses_indexing(sc_ck_pred_vectors, cdef)) { column_defs_for_filtering.emplace_back(cdef); } else { _single_column_clustering_key_restrictions.erase(it); @@ -1710,7 +1669,7 @@ void statement_restrictions::calculate_column_defs_for_filtering_and_erase_restr } for (auto it = _single_column_nonprimary_key_restrictions.begin(); it != _single_column_nonprimary_key_restrictions.end();) { auto&& [cdef, cur_restr] = *it; - if (!column_uses_indexing(cdef, &cur_restr)) { + if (!column_uses_indexing(sc_nonpk_pred_vectors, cdef)) { column_defs_for_filtering.emplace_back(cdef); ++it; } else { diff --git a/cql3/restrictions/statement_restrictions.hh b/cql3/restrictions/statement_restrictions.hh index fd3724e1d6..c0940bdb1f 100644 --- a/cql3/restrictions/statement_restrictions.hh +++ b/cql3/restrictions/statement_restrictions.hh @@ -122,6 +122,9 @@ using partition_range_restrictions = std::variant< token_range_restrictions, single_column_partition_range_restrictions>; +// A map of per-column predicate vectors, ordered by schema position. +using single_column_predicate_vectors = std::map, expr::schema_pos_column_definition_comparator>; + /** * The restrictions corresponding to the relations specified on the where-clause of CQL query. */ @@ -332,12 +335,6 @@ public: */ std::vector get_column_defs_for_filtering(data_dictionary::database db) const; - /** - * Gives a score that the index has - index with the highest score will be chosen - * in find_idx() - */ - int score(const secondary_index::index& index) const; - /** * Determines the index to be used with the restriction. * @param db - the data_dictionary::database context (for extracting index manager) @@ -377,8 +374,6 @@ public: schema_ptr get_view_schema() const { return _view_schema; } private: - std::pair, expr::expression> do_find_idx(const secondary_index::secondary_index_manager& sim) const; - void process_partition_key_restrictions(bool for_view, bool allow_filtering, statements::statement_type type); /** @@ -406,7 +401,11 @@ private: void add_clustering_restrictions_to_idx_ck_prefix(const schema& idx_tbl_schema); unsigned int num_clustering_prefix_columns_that_need_not_be_filtered() const; - void calculate_column_defs_for_filtering_and_erase_restrictions_used_for_index(data_dictionary::database db); + void calculate_column_defs_for_filtering_and_erase_restrictions_used_for_index( + data_dictionary::database db, + const single_column_predicate_vectors& sc_pk_pred_vectors, + const single_column_predicate_vectors& sc_ck_pred_vectors, + const single_column_predicate_vectors& sc_nonpk_pred_vectors); get_partition_key_ranges_fn_t build_partition_key_ranges_fn() const; get_clustering_bounds_fn_t build_get_clustering_bounds_fn() const; get_clustering_bounds_fn_t build_get_global_index_clustering_ranges_fn() const; From a725e392189a1db744cb4f0cce7b3b17596894d8 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 15 Mar 2026 20:43:14 +0200 Subject: [PATCH 71/76] cql3: statement_restrictions: use predicate vector size for clustering prefix length Replace the body of num_clustering_prefix_columns_that_need_not_be_filtered() with a single return of _clustering_prefix_restrictions.size(). The old implementation called get_single_column_restrictions_map() to rebuild a per-column map from the clustering expression tree, then iterated it in schema order counting columns until it hit a gap, a needs-filtering predicate, or a slice. But _clustering_prefix_restrictions is already built with exactly that same logic during the constructor (lines 1234-1248): it iterates CK columns in schema order, appending predicates until it encounters a gap in column_id, a predicate that needs_filtering, or a slice -- at which point it stops. So the vector's size is, by construction, the answer to the same question the old code was re-deriving at query time. This makes four helper functions dead code: - get_single_column_restrictions_map(): walked the expression tree to build a map of per-column restrictions. Was a ~15-line function that called get_sorted_column_defs() and extract_single_column_restrictions_for_column() for each column. - get_the_only_column(): extracted the single column_value from a restriction expression, asserting it was single-column. Called by the old loop body. - is_single_column_restriction(): thin wrapper around get_single_column_restriction_column(). - get_single_column_restriction_column(): ~25-line function that walked an expression tree with for_each_expression to determine whether all column_value nodes refer to the same column. Called by the above two. Remove all four functions and their forward declarations (-95 lines). --- cql3/restrictions/statement_restrictions.cc | 102 ++------------------ 1 file changed, 7 insertions(+), 95 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index e69cf2091b..24bda1bb97 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -88,17 +88,6 @@ extern bool is_on_collection(const binary_operator&); // Uses column_defintion::operator== for comparison, columns with the same name but different schema will not be equal. bool has_eq_restriction_on_column(const column_definition& column, const expression& e); -// Checks whether this expression contains restrictions on one single column. -// There might be more than one restriction, but exactly one column. -// The expression must be prepared. -bool is_single_column_restriction(const expression&); - -// Gets the only column from a single_column_restriction expression. -const column_value& get_the_only_column(const expression&); - -// Extracts map of single column restrictions for each column from expression -single_column_restrictions_map get_single_column_restrictions_map(const expression&); - bool contains_multi_column_restriction(const expression&); @@ -930,68 +919,6 @@ std::vector extract_single_column_restrictions_for_column(std::span< return std::move(v.restrictions); } -static std::optional> get_single_column_restriction_column(const expression& e) { - if (find_in_expression(e, [](const auto&) {return true;})) { - on_internal_error(expr_logger, - seastar::format("get_single_column_restriction_column expects a prepared expression, but it's not: {}", e)); - } - - const column_value* the_only_column = nullptr; - bool expression_is_single_column = false; - - for_each_expression(e, - [&](const column_value& cval) { - if (the_only_column == nullptr) { - // It's the first column_value we've encountered - set it as the only column - the_only_column = &cval; - expression_is_single_column = true; - return; - } - - if (cval.col != the_only_column->col) { - // In case any other column is encountered the restriction - // restricts more than one column. - expression_is_single_column = false; - } - } - ); - - if (expression_is_single_column) { - return std::cref(*the_only_column); - } else { - return std::nullopt; - } -} - -bool is_single_column_restriction(const expression& e) { - return get_single_column_restriction_column(e).has_value(); -} - -const column_value& get_the_only_column(const expression& e) { - std::optional> result = get_single_column_restriction_column(e); - - if (!result.has_value()) { - on_internal_error(expr_logger, - format("get_the_only_column - bad expression: {}", e)); - } - - return *result; -} - -single_column_restrictions_map get_single_column_restrictions_map(const expression& e) { - single_column_restrictions_map result; - - std::vector sorted_defs = get_sorted_column_defs(e); - for (const column_definition* cdef : sorted_defs) { - expression col_restrictions = conjunction { - .children = extract_single_column_restrictions_for_column(std::span(&e, 1), *cdef) - }; - result.emplace(cdef, std::move(col_restrictions)); - } - - return result; -} - bool is_empty_restriction(const expression& e) { bool contains_non_conjunction = recurse_until(e, [&](const expression& e) -> bool { return !is(e); @@ -2847,31 +2774,16 @@ void statement_restrictions::add_clustering_restrictions_to_idx_ck_prefix(const // read). For example, if we have the filter "c1 < 3 and c2 > 3", c1 does not // need filtering but c2 does so num_prefix_columns_that_need_not_be_filtered // will be 1. +// +// _clustering_prefix_restrictions is already built with exactly this logic +// (iterating CK columns in schema order, stopping at gaps, needs-filtering +// predicates, and after a slice), so its size is the answer. Multi-column +// restrictions are treated as needing filtering. unsigned int statement_restrictions::num_clustering_prefix_columns_that_need_not_be_filtered() const { - if (contains_multi_column_restriction(_clustering_columns_restrictions)) { + if (_has_multi_column) { return 0; } - - single_column_restrictions_map column_restrictions = - get_single_column_restrictions_map(_clustering_columns_restrictions); - - // Restrictions currently need filtering in three cases: - // 1. any of them is a CONTAINS restriction - // 2. restrictions do not form a contiguous prefix (i.e. there are gaps in it) - // 3. a SLICE restriction isn't on a last place - column_id position = 0; - unsigned int count = 0; - for (const auto& restriction : column_restrictions | std::views::values) { - if (find_needs_filtering(restriction) - || position != get_the_only_column(restriction).col->id) { - return count; - } - if (!has_slice(restriction)) { - position = get_the_only_column(restriction).col->id + 1; - } - count++; - } - return count; + return _clustering_prefix_restrictions.size(); } get_clustering_bounds_fn_t From b093477cf7e548d0e3192673999018f196c2b000 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 15 Mar 2026 21:27:03 +0200 Subject: [PATCH 72/76] cql3: statement_restrictions: use predicate vectors in prepare_indexed_local Replace the extract_single_column_restrictions_for_column(_where, ...) call in prepare_indexed_local() with a direct lookup in the pre-built predicate vectors. The old code walked the entire WHERE expression tree to extract binary operators mentioning the indexed column, wrapped them in a conjunction, translated column definitions to the index schema, then called to_predicate_on_column() which walked the expression *again* to convert back to predicates. The new code selects the appropriate predicate vector map (PK, CK, or non-PK) based on the indexed column's kind, looks up the column's predicates directly, applies replace_column_def to each, and folds them with make_conjunction -- producing the same result without any expression tree walks. This removes the last production caller of extract_single_column_restrictions_for_column (unit tests in statement_restrictions_test.cc still exercise it). --- cql3/restrictions/statement_restrictions.cc | 34 +++++++++++++++------ cql3/restrictions/statement_restrictions.hh | 5 ++- 2 files changed, 29 insertions(+), 10 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index 24bda1bb97..bf4911401f 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -1389,7 +1389,7 @@ statement_restrictions::statement_restrictions(private_tag, _view_schema = view_schema; if (im.local()) { - prepare_indexed_local(*view_schema); + prepare_indexed_local(*view_schema, sc_pk_pred_vectors, sc_ck_pred_vectors, sc_nonpk_pred_vectors); } else { prepare_indexed_global(*view_schema); } @@ -2726,7 +2726,10 @@ void statement_restrictions::prepare_indexed_global(const schema& idx_tbl_schema }; } -void statement_restrictions::prepare_indexed_local(const schema& idx_tbl_schema) { +void statement_restrictions::prepare_indexed_local(const schema& idx_tbl_schema, + const single_column_predicate_vectors& sc_pk_pred_vectors, + const single_column_predicate_vectors& sc_ck_pred_vectors, + const single_column_predicate_vectors& sc_nonpk_pred_vectors) { if (!_partition_range_is_simple) { return; } @@ -2738,14 +2741,27 @@ void statement_restrictions::prepare_indexed_local(const schema& idx_tbl_schema) const column_definition& indexed_column = idx_tbl_schema.column_at(column_kind::clustering_key, 0); const column_definition& indexed_column_base_schema = *_schema->get_column_definition(indexed_column.name()); - // Find index column restrictions in the WHERE clause - std::vector idx_col_restrictions = - extract_single_column_restrictions_for_column(_where, indexed_column_base_schema); - expr::expression idx_col_restriction_expr = expr::expression(expr::conjunction{std::move(idx_col_restrictions)}); + // Find index column restrictions in the pre-built predicate vectors + const single_column_predicate_vectors* pvecs; + switch (indexed_column_base_schema.kind) { + case column_kind::partition_key: pvecs = &sc_pk_pred_vectors; break; + case column_kind::clustering_key: pvecs = &sc_ck_pred_vectors; break; + default: pvecs = &sc_nonpk_pred_vectors; break; + } + auto it = pvecs->find(&indexed_column_base_schema); + if (it == pvecs->end()) { + on_internal_error(rlogger, format("prepare_indexed_local: no predicates found for column {}", indexed_column_base_schema.name_as_text())); + } + const auto& preds = it->second; - // Translate the restriction to use column from the index schema and add it - expr::expression replaced_idx_restriction = replace_column_def(idx_col_restriction_expr, &indexed_column); - _idx_tbl_ck_prefix->push_back(to_predicate_on_column(replaced_idx_restriction, &indexed_column, _schema.get())); + // Translate each predicate to use column from the index schema, then merge + auto folded = std::ranges::fold_left_first( + preds | std::views::transform([&indexed_column](const predicate& p) { + return replace_column_def(p, &indexed_column); + }), + make_conjunction + ); + _idx_tbl_ck_prefix->push_back(std::move(*folded)); // Add restrictions for the clustering key add_clustering_restrictions_to_idx_ck_prefix(idx_tbl_schema); diff --git a/cql3/restrictions/statement_restrictions.hh b/cql3/restrictions/statement_restrictions.hh index c0940bdb1f..3edb34cf01 100644 --- a/cql3/restrictions/statement_restrictions.hh +++ b/cql3/restrictions/statement_restrictions.hh @@ -485,7 +485,10 @@ public: private: /// Prepares internal data for evaluating index-table queries. Must be called before /// get_local_index_clustering_ranges(). - void prepare_indexed_local(const schema& idx_tbl_schema); + void prepare_indexed_local(const schema& idx_tbl_schema, + const single_column_predicate_vectors& sc_pk_pred_vectors, + const single_column_predicate_vectors& sc_ck_pred_vectors, + const single_column_predicate_vectors& sc_nonpk_pred_vectors); /// Prepares internal data for evaluating index-table queries. Must be called before /// get_global_index_clustering_ranges() or get_global_index_token_clustering_ranges(). From 72da1207d7096bdec30364d6dc110722419fa264 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 15 Mar 2026 21:43:14 +0200 Subject: [PATCH 73/76] cql3: statement_restrictions: remove extract_single_column_restrictions_for_column The previous commit made prepare_indexed_local() use the pre-built predicate vectors instead of calling extract_single_column_restrictions_for_column(). That was the last production caller. Remove the function definition (65 lines of expression-walking visitor) and its declaration/doc-comment from the header. Replace the unit test (expression_extract_column_restrictions) which directly called the removed function with synthetic column_definitions, with per_column_restriction_routing which exercises the same routing logic through the public analyze_statement_restrictions() API. The new test verifies not just factor counts but the exact (column_name, oper_t) pairs in each per-column entry, catching misrouted restrictions that a count-only check would miss. --- cql3/restrictions/statement_restrictions.cc | 65 ------ cql3/restrictions/statement_restrictions.hh | 8 - test/boost/statement_restrictions_test.cc | 238 ++++++++------------ 3 files changed, 94 insertions(+), 217 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index bf4911401f..6f4c43e7bb 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -854,71 +854,6 @@ bool has_eq_restriction_on_column(const column_definition& column, const express return eq_restriction_search_res != nullptr; } -std::vector extract_single_column_restrictions_for_column(std::span exprs, - const column_definition& column) { - struct visitor { - std::vector restrictions; - const column_definition& column; - const binary_operator* current_binary_operator; - - void operator()(const constant&) {} - - void operator()(const conjunction& conj) { - for (const expression& child : conj.children) { - expr::visit(*this, child); - } - } - - void operator()(const binary_operator& oper) { - if (current_binary_operator != nullptr) { - on_internal_error(expr_logger, - "extract_single_column_restrictions_for_column: nested binary operators are not supported"); - } - - current_binary_operator = &oper; - expr::visit(*this, oper.lhs); - current_binary_operator = nullptr; - } - - void operator()(const column_value& cv) { - if (*cv.col == column && current_binary_operator != nullptr) { - restrictions.emplace_back(*current_binary_operator); - } - } - - void operator()(const subscript& s) { - const column_value& cv = get_subscripted_column(s); - if (*cv.col == column && current_binary_operator != nullptr) { - restrictions.emplace_back(*current_binary_operator); - } - } - - void operator()(const unresolved_identifier&) {} - void operator()(const column_mutation_attribute&) {} - void operator()(const function_call&) {} - void operator()(const cast&) {} - void operator()(const field_selection&) {} - void operator()(const bind_variable&) {} - void operator()(const untyped_constant&) {} - void operator()(const tuple_constructor&) {} - void operator()(const collection_constructor&) {} - void operator()(const usertype_constructor&) {} - void operator()(const temporary&) {} - }; - - visitor v { - .restrictions = std::vector(), - .column = column, - .current_binary_operator = nullptr, - }; - - for (auto& e : exprs) { - expr::visit(v, e); - } - - return std::move(v.restrictions); -} - bool is_empty_restriction(const expression& e) { bool contains_non_conjunction = recurse_until(e, [&](const expression& e) -> bool { return !is(e); diff --git a/cql3/restrictions/statement_restrictions.hh b/cql3/restrictions/statement_restrictions.hh index 3edb34cf01..649ac7d0af 100644 --- a/cql3/restrictions/statement_restrictions.hh +++ b/cql3/restrictions/statement_restrictions.hh @@ -533,14 +533,6 @@ shared_ptr make_trivial_statement_restrictions( schema_ptr schema, bool allow_filtering); -// Extracts all binary operators which have the given column on their left hand side. -// Extracts only single-column restrictions. -// Does not include multi-column restrictions. -// Does not include token() restrictions. -// Does not include boolean constant restrictions. -// For example "WHERE c = 1 AND (a, c) = (2, 1) AND token(p) < 2 AND FALSE" will return {"c = 1"}. -std::vector extract_single_column_restrictions_for_column(std::span, const column_definition&); - // Checks whether this expression is empty - doesn't restrict anything bool is_empty_restriction(const expr::expression&); diff --git a/test/boost/statement_restrictions_test.cc b/test/boost/statement_restrictions_test.cc index c50cd63dea..3d7efe5bf6 100644 --- a/test/boost/statement_restrictions_test.cc +++ b/test/boost/statement_restrictions_test.cc @@ -103,19 +103,6 @@ auto both_closed(std::vector lb, std::vector ub) { clustering_key_prefix cklb(std::move(lb)), ckub(std::move(ub)); return query::clustering_range({{cklb, inclusive}}, {{ckub, inclusive}}); } - -expr::tuple_constructor -column_definitions_as_tuple_constructor(const std::vector& defs) { - std::vector columns; - std::vector column_types; - columns.reserve(defs.size()); - for (auto& def : defs) { - columns.push_back(expr::column_value{def}); - column_types.push_back(def->type); - } - data_type ttype = tuple_type_impl::get_instance(std::move(column_types)); - return expr::tuple_constructor{std::move(columns), std::move(ttype)}; -} } // anonymous namespace SEASTAR_TEST_CASE(slice_empty_restriction) { @@ -1163,156 +1150,119 @@ SEASTAR_TEST_CASE(combinatorial_restrictions) { }, {}, tattr); } - -// Currently expression doesn't have operator==(). -// Implementing it is ugly, because there are shared pointers and the term base class. -// For testing purposes checking stringified expressions is enough. -static bool expression_eq(const expr::expression& e1, const expr::expression& e2) { - return to_string(e1) == to_string(e2); +/// Helper to get statement_restrictions from a parsed WHERE clause string. +static shared_ptr make_restrictions( + std::string_view where_clause, cql_test_env& env, + const sstring& table_name = "t", const sstring& keyspace_name = "ks") { + prepare_context ctx; + auto factors = where_clause.empty() + ? std::vector{} + : boolean_factors(cql3::util::where_clause_to_relations(where_clause, cql3::dialect{})); + return restrictions::analyze_statement_restrictions( + env.data_dictionary(), + env.local_db().find_schema(keyspace_name, table_name), + statements::statement_type::SELECT, + expr::conjunction{std::move(factors)}, + ctx, + /*contains_only_static_columns=*/false, + /*for_view=*/false, + /*allow_filtering=*/true, + restrictions::check_indexes::yes); } -static void assert_expr_vec_eq( - const std::vector& v1, - const std::vector& v2, - const std::source_location& loc = std::source_location::current()) { - - if (std::equal(v1.begin(), v1.end(), v2.begin(), v2.end(), expression_eq)) { - return; +/// Extract (column_name, operator) pairs from each boolean factor of a conjunction expression. +/// Each factor must be a binary_operator whose LHS is a column_value or subscript. +static std::vector> factor_ops(const expr::expression& e) { + std::vector> result; + for (auto& factor : expr::boolean_factors(e)) { + BOOST_REQUIRE_MESSAGE(expr::is(factor), + fmt::format("expected binary_operator, got: {}", factor)); + auto& binop = expr::as(factor); + const auto& cv = expr::get_subscripted_column(binop.lhs); + result.emplace_back(cv.col->name_as_text(), binop.op); } - - std::string error_msg = fmt::format("Location: {}:{}, Expression vectors not equal! [{}] != [{}]", - loc.file_name(), loc.line(), fmt::join(v1, ", "), fmt::join(v2, ", ")); - - BOOST_FAIL(error_msg); + return result; } -// Unit tests for extract_column_restrictions function -BOOST_AUTO_TEST_CASE(expression_extract_column_restrictions) { - using namespace expr; +// Test that restrictions are correctly routed to per-column maps and that each +// per-column entry contains exactly the right boolean factors (verified by column +// name and operator, not just count). This is the higher-level replacement for +// the old extract_single_column_restrictions_for_column test. +SEASTAR_TEST_CASE(per_column_restriction_routing) { + return do_with_cql_env_thread([](cql_test_env& e) { + cquery_nofail(e, "create table ks.trc(pk1 int, pk2 int, ck1 int, ck2 int, v1 int, v2 int, v3 int, " + "primary key((pk1, pk2), ck1, ck2))"); - auto make_column = [](const char* name, column_kind kind, int id) -> column_definition { - column_definition definition(name, int32_type, kind, id); + auto schema = e.local_db().find_schema("ks", "trc"); - // column_definition has to have column_specifiction because to_string uses it for column name - ::shared_ptr identifier = ::make_shared(name, true); - column_specification specification("ks", "cf", std::move(identifier), int32_type); - definition.column_specification = make_lw_shared( - std::move(specification)); + using op = expr::oper_t; + using col_op = std::pair; - return definition; - }; + // Multiple single-column restrictions on regular columns are correctly + // accumulated per-column, while unrestricted columns don't appear. + { + auto sr = make_restrictions( + "pk1=1 AND pk2=2 AND ck1=3 AND ck2=4 AND v1=5 AND v1<10 AND v1>0 AND v2=6", + e, "trc"); - column_definition col_pk1 = make_column("pk1", column_kind::partition_key, 0); - column_definition col_pk2 = make_column("pk2", column_kind::partition_key, 1); - column_definition col_ck1 = make_column("ck1", column_kind::clustering_key, 0); - column_definition col_ck2 = make_column("ck2", column_kind::clustering_key, 1); - column_definition col_r1 = make_column("r2", column_kind::regular_column, 0); - column_definition col_r2 = make_column("r2", column_kind::regular_column, 1); - column_definition col_r3 = make_column("r3", column_kind::regular_column, 2); + // --- Non-PK per-column map --- + auto& npk = sr->get_non_pk_restriction(); + BOOST_CHECK_EQUAL(npk.size(), 2u); - // Empty input test - assert_expr_vec_eq(cql3::restrictions::extract_single_column_restrictions_for_column({}, col_pk1), {}); + auto* v1_def = schema->get_column_definition("v1"); + auto* v2_def = schema->get_column_definition("v2"); + auto* v3_def = schema->get_column_definition("v3"); - // BIG_WHERE test - // big_where contains: - // WHERE pk1 = 0 AND pk2 = 0 AND ck1 = 0 AND ck2 = 0 AND r1 = 0 AND r2 = 0 - // AND (pk1, pk2) < (0, 0) AND (pk1, ck2, r1) = (0, 0, 0) AND (r1, r2) > 0 - // AND ((c1, c2) < (0, 0) AND r1 < 0) - // AND pk2 > 0 AND r2 > 0 - // AND token(pk1, pk2) > 0 AND token(pk1, pk2) < 0 - // AND TRUE AND FALSE - // AND token(pk1, pk2) - // AND pk1 AND pk2 - // AND (pk1, pk2) - std::vector big_where; - expr::constant zero_value = constant(raw_value::make_value(I(0)), int32_type); + BOOST_REQUIRE(npk.contains(v1_def)); + BOOST_REQUIRE(npk.contains(v2_def)); + BOOST_CHECK(!npk.contains(v3_def)); - expression pk1_restriction(binary_operator(column_value(&col_pk1), oper_t::EQ, zero_value)); - expression pk2_restriction(binary_operator(column_value(&col_pk2), oper_t::EQ, zero_value)); - expression pk2_restriction2(binary_operator(column_value(&col_pk2), oper_t::GT, zero_value)); - expression ck1_restriction(binary_operator(column_value(&col_ck1), oper_t::EQ, zero_value)); - expression ck2_restriction(binary_operator(column_value(&col_ck2), oper_t::EQ, zero_value)); - expression r1_restriction(binary_operator(column_value(&col_r1), oper_t::EQ, zero_value)); - expression r1_restriction2(binary_operator(column_value(&col_r1), oper_t::LT, zero_value)); - expression r1_restriction3(binary_operator(column_value(&col_r1), oper_t::GT, zero_value)); - expression r2_restriction(binary_operator(column_value(&col_r2), oper_t::EQ, zero_value)); + // v1 should have EQ, LT, GT (in WHERE-clause order). + BOOST_CHECK_EQUAL(factor_ops(npk.at(v1_def)), + (std::vector{{"v1", op::EQ}, {"v1", op::LT}, {"v1", op::GT}})); + // v2 should have a single EQ. + BOOST_CHECK_EQUAL(factor_ops(npk.at(v2_def)), + (std::vector{{"v2", op::EQ}})); - auto make_multi_column_restriction = [](std::vector columns, oper_t oper) -> expression { - tuple_constructor column_tuple(column_definitions_as_tuple_constructor(columns)); + // --- PK expression: pk1=1 AND pk2=2 --- + BOOST_CHECK_EQUAL(factor_ops(sr->get_partition_key_restrictions()), + (std::vector{{"pk1", op::EQ}, {"pk2", op::EQ}})); - std::vector zeros_tuple_elems(columns.size(), managed_bytes_opt(I(0))); - data_type tup_type = tuple_type_impl::get_instance(std::vector(columns.size(), int32_type)); - managed_bytes tup_bytes = tuple_type_impl::build_value_fragmented(std::move(zeros_tuple_elems)); - constant zeros_tuple(raw_value::make_value(std::move(tup_bytes)), std::move(tup_type)); + // --- CK expression: ck1=3 AND ck2=4 --- + BOOST_CHECK_EQUAL(factor_ops(sr->get_clustering_columns_restrictions()), + (std::vector{{"ck1", op::EQ}, {"ck2", op::EQ}})); + } - return binary_operator(column_tuple, oper, std::move(zeros_tuple)); - }; + // Multi-column CK restriction doesn't appear in single-column non-PK map. + { + auto sr = make_restrictions( + "pk1=1 AND pk2=2 AND (ck1, ck2) > (0, 0) AND v1=5", + e, "trc"); - expression pk1_pk2_restriction = make_multi_column_restriction({&col_pk1, &col_pk2}, oper_t::LT); - expression pk1_ck2_r1_restriction = make_multi_column_restriction({&col_pk1, &col_ck2, &col_r1}, oper_t::EQ); - expression r1_r2_restriction = make_multi_column_restriction({&col_r1, &col_r2}, oper_t::GT); + auto& npk = sr->get_non_pk_restriction(); + BOOST_CHECK_EQUAL(npk.size(), 1u); - std::vector conjunction_elems; - expression ck1_ck2_restriction = make_multi_column_restriction({&col_ck1, &col_ck2}, oper_t::LT); - expression conjunction_expr = conjunction{std::vector{ck1_ck2_restriction, r1_restriction2}}; + auto* v1_def = schema->get_column_definition("v1"); + BOOST_REQUIRE(npk.contains(v1_def)); + BOOST_CHECK_EQUAL(factor_ops(npk.at(v1_def)), + (std::vector{{"v1", op::EQ}})); - function_call token_expr = function_call { - .func = functions::function_name::native_function("token"), - .args = {column_value(&col_pk1), column_value(&col_pk2)} - }; - expression token_lt_restriction = binary_operator(token_expr, oper_t::LT, zero_value); - expression token_gt_restriction = binary_operator(token_expr, oper_t::GT, zero_value); + // CK expression should have 1 factor: the multi-column (ck1, ck2) > (0, 0). + // The multi-column restriction's LHS is a tuple_constructor, not a single + // column_value, so we verify only the count and operator here. + auto ck_factors = expr::boolean_factors(sr->get_clustering_columns_restrictions()); + BOOST_CHECK_EQUAL(ck_factors.size(), 1u); + BOOST_REQUIRE(expr::is(ck_factors[0])); + BOOST_CHECK(expr::as(ck_factors[0]).op == op::GT); + } - expression true_restriction = constant::make_bool(true); - expression false_restriction = constant::make_bool(false); - expression pk1_expr = column_value(&col_pk1); - expression pk2_expr = column_value(&col_pk1); - data_type ttype = tuple_type_impl::get_instance({int32_type, int32_type}); - expression pk1_pk2_expr = tuple_constructor{{expression{column_value{&col_pk1}}, - expression{column_value{&col_pk2}}}, - std::move(ttype)}; - - big_where.push_back(pk1_restriction); - big_where.push_back(pk2_restriction); - big_where.push_back(ck1_restriction); - big_where.push_back(ck2_restriction); - big_where.push_back(r1_restriction); - big_where.push_back(r2_restriction); - big_where.push_back(pk1_pk2_restriction); - big_where.push_back(pk1_ck2_r1_restriction); - big_where.push_back(r1_r2_restriction); - big_where.push_back(conjunction_expr); - big_where.push_back(pk2_restriction2); - big_where.push_back(r1_restriction3); - big_where.push_back(token_lt_restriction); - big_where.push_back(token_gt_restriction); - big_where.push_back(true_restriction); - big_where.push_back(false_restriction); - big_where.push_back(token_expr); - big_where.push_back(pk1_expr); - big_where.push_back(pk2_expr); - big_where.push_back(pk1_pk2_expr); - - assert_expr_vec_eq(restrictions::extract_single_column_restrictions_for_column(big_where, col_pk1), - {pk1_restriction}); - - assert_expr_vec_eq(restrictions::extract_single_column_restrictions_for_column(big_where, col_pk2), - {pk2_restriction, pk2_restriction2}); - - assert_expr_vec_eq(restrictions::extract_single_column_restrictions_for_column(big_where, col_ck1), - {ck1_restriction}); - - assert_expr_vec_eq(restrictions::extract_single_column_restrictions_for_column(big_where, col_ck2), - {ck2_restriction}); - - assert_expr_vec_eq(restrictions::extract_single_column_restrictions_for_column(big_where, col_r1), - {r1_restriction, r1_restriction2, r1_restriction3}); - - assert_expr_vec_eq(restrictions::extract_single_column_restrictions_for_column(big_where, col_r2), - {r2_restriction}); - - assert_expr_vec_eq(restrictions::extract_single_column_restrictions_for_column(big_where, col_r3), - {}); + // Unrestricted table: all maps are empty. + { + auto sr = make_restrictions("", e, "trc"); + BOOST_CHECK(sr->get_non_pk_restriction().empty()); + BOOST_CHECK(restrictions::is_empty_restriction(sr->get_clustering_columns_restrictions())); + } + }); } BOOST_AUTO_TEST_SUITE_END() From ece9af229de04ff7a053675aefc84a89bd20ad89 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 17 Mar 2026 19:18:41 +0200 Subject: [PATCH 74/76] cql3: statement_restrictions: use predicate fields in build_get_clustering_bounds_fn Replace find_binop(..., is_multi_column) with pred.is_multi_column in build_get_clustering_bounds_fn() and add_clustering_restrictions_to_idx_ck_prefix(). Replace is_clustering_order(binop) with pred.order == comparison_order::clustering and iterate predicates directly instead of extracting filter expressions. Remove the now-dead is_multi_column() free function. --- cql3/restrictions/statement_restrictions.cc | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index 6f4c43e7bb..4f666c5ea8 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -71,10 +71,6 @@ inline auto find_needs_filtering(const expression& e) { return find_binop(e, [] (const binary_operator& bo) { return needs_filtering(bo.op); }); } -inline bool is_multi_column(const binary_operator& op) { - return expr::is(op.lhs); -} - inline bool has_slice_or_needs_filtering(const expression& e) { return find_binop(e, [] (const binary_operator& o) { return is_slice(o.op) || needs_filtering(o.op); }); } @@ -2447,16 +2443,15 @@ statement_restrictions::build_get_clustering_bounds_fn() const { return {query::clustering_range::make_open_ended_both_sides()}; }; } - if (find_binop(_clustering_prefix_restrictions[0].filter, is_multi_column)) { // FIXME: adjust for solve_for + if (_clustering_prefix_restrictions[0].is_multi_column) { bool all_natural = true, all_reverse = true; ///< Whether column types are reversed or natural. - for (auto& r : _clustering_prefix_restrictions | std::views::transform(&predicate::filter)) { // TODO: move to constructor, do only once. - using namespace expr; - const auto& binop = expr::as(r); - if (is_clustering_order(binop)) { + for (auto& pred : _clustering_prefix_restrictions) { + if (pred.order == expr::comparison_order::clustering) { return build_range_from_raw_bounds_fn(_clustering_prefix_restrictions, *_schema); } - for (auto& element : expr::as(binop.lhs).elements) { - auto& cv = expr::as(element); + auto& lhs = expr::as(expr::as(pred.filter).lhs); + for (auto& element : lhs.elements) { + auto& cv = expr::as(element); if (cv.col->type->is_reversed()) { all_natural = false; } else { @@ -2704,7 +2699,7 @@ void statement_restrictions::prepare_indexed_local(const schema& idx_tbl_schema, void statement_restrictions::add_clustering_restrictions_to_idx_ck_prefix(const schema& idx_tbl_schema) { for (const auto& e : _clustering_prefix_restrictions) { - if (find_binop(_clustering_prefix_restrictions[0].filter, is_multi_column)) { + if (_clustering_prefix_restrictions[0].is_multi_column) { // TODO: We could handle single-element tuples, eg. `(c)>=(123)`. break; } From b7f86eaabc82b4d58bbd05558814257f94c2e572 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 17 Mar 2026 19:21:13 +0200 Subject: [PATCH 75/76] cql3: statement_restrictions: replace multi_column_range_accumulator_builder with direct predicate iteration build_get_multi_column_clustering_bounds_fn() used expr::visit() to dispatch each restriction through a 15-handler visitor struct. Only the binary_operator handler did real work; the conjunction handler just recursed, and the remaining 13 handlers were dead-code on_internal_error calls (the filter expression of each predicate is always a binary_operator). Replace the visitor with a loop over predicates that does as(pred.filter) directly, building the same query-time lambda inline. Promote intersect_all() and process_in_values() from static methods of the deleted struct to free functions in the anonymous namespace -- they are still called from the query-time lambda. --- cql3/restrictions/statement_restrictions.cc | 203 +++++++------------- 1 file changed, 66 insertions(+), 137 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index 4f666c5ea8..e4186f8ed8 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -1932,146 +1932,40 @@ struct multi_column_range_accumulator { std::vector ranges{query::clustering_range::make_open_ended_both_sides()}; }; -/// An expression visitor that translates multi-column atoms into functions that accumulate -/// clustering ranges into multi_column_range_accumulator. -struct multi_column_range_accumulator_builder { - const schema_ptr schema; - std::vector> builders; - const clustering_key_prefix::prefix_equal_tri_compare prefix3cmp = get_unreversed_tri_compare(*schema); - - void operator()(const binary_operator& binop) { - builders.emplace_back([binop, schema = schema, prefix3cmp = prefix3cmp] (multi_column_range_accumulator& acc, const query_options& options) { - auto& lhs = expr::as(binop.lhs); - if (is_compare(binop.op)) { - auto opt_values = expr::get_tuple_elements(expr::evaluate(binop.rhs, options), *type_of(binop.rhs)); - std::vector values(lhs.elements.size()); - for (size_t i = 0; i < lhs.elements.size(); ++i) { - auto& col = expr::as(lhs.elements.at(i)); - values[i] = *statements::request_validations::check_not_null( - opt_values[i], - "Invalid null value in condition for column {}", col.col->name_as_text()); - } - intersect_all(acc, prefix3cmp, to_range(binop.op, clustering_key_prefix(std::move(values)))); - } else if (binop.op == oper_t::IN) { - const cql3::raw_value tup = expr::evaluate(binop.rhs, options); - utils::chunked_vector> tuple_elems; - if (tup.is_value()) { - tuple_elems = expr::get_list_of_tuples_elements(tup, *type_of(binop.rhs)); - } - for(size_t i = 0; i < tuple_elems.size(); ++i) { - if(tuple_elems[i].size() != lhs.elements.size()) { - throw exceptions::invalid_request_exception(format("Expected {} elements in value tuple, but got {}", - lhs.elements.size(), tuple_elems[i].size())); - } - for(size_t j = 0; j < lhs.elements.size(); ++j) { - auto& col = expr::as(lhs.elements.at(j)); - statements::request_validations::check_not_null( - tuple_elems[i][j], - "Invalid null value in condition for column {}", col.col->name_as_text()); - } - } - process_in_values(acc, prefix3cmp, schema, std::move(tuple_elems)); - } else { - on_internal_error(rlogger, format("multi_column_range_accumulator: unexpected atom {}", binop)); +/// Intersects each range with v. If any intersection is empty, clears ranges. +void intersect_all(multi_column_range_accumulator& acc, const clustering_key_prefix::prefix_equal_tri_compare& prefix3cmp, const query::clustering_range& v) { + auto& ranges = acc.ranges; + for (auto& r : ranges) { + auto intrs = intersection(r, v, prefix3cmp); + if (!intrs) { + ranges.clear(); + break; } - }); + r = *intrs; } +} - void operator()(const conjunction& c) { - std::ranges::for_each(c.children, [this] (const expression& child) { expr::visit(*this, child); }); +template +requires std::convertible_to +void process_in_values(multi_column_range_accumulator& acc, const clustering_key_prefix::prefix_equal_tri_compare& prefix3cmp, const schema_ptr& schema, Range in_values) { + auto& ranges = acc.ranges; + if (ranges.empty()) { + return; // Shortcircuit an easy case. } - - void operator()(const constant& v) { - on_internal_error(rlogger, "constant encountered outside binary operator"); - } - - void operator()(const column_value&) { - on_internal_error(rlogger, "Column encountered outside binary operator"); - } - - void operator()(const subscript&) { - on_internal_error(rlogger, "Subscript encountered outside binary operator"); - } - - void operator()(const unresolved_identifier&) { - on_internal_error(rlogger, "Unresolved identifier encountered outside binary operator"); - } - - void operator()(const column_mutation_attribute&) { - on_internal_error(rlogger, "writetime/ttl encountered outside binary operator"); - } - - void operator()(const function_call&) { - on_internal_error(rlogger, "function call encountered outside binary operator"); - } - - void operator()(const cast&) { - on_internal_error(rlogger, "typecast encountered outside binary operator"); - } - - void operator()(const field_selection&) { - on_internal_error(rlogger, "field selection encountered outside binary operator"); - } - - void operator()(const bind_variable&) { - on_internal_error(rlogger, "bind variable encountered outside binary operator"); - } - - void operator()(const untyped_constant&) { - on_internal_error(rlogger, "untyped constant encountered outside binary operator"); - } - - void operator()(const tuple_constructor&) { - on_internal_error(rlogger, "tuple constructor encountered outside binary operator"); - } - - void operator()(const collection_constructor&) { - on_internal_error(rlogger, "collection constructor encountered outside binary operator"); - } - - void operator()(const usertype_constructor&) { - on_internal_error(rlogger, "collection constructor encountered outside binary operator"); - } - - void operator()(const temporary&) { - on_internal_error(rlogger, "temporary encountered outside binary operator"); - } - - /// Intersects each range with v. If any intersection is empty, clears ranges. - static void intersect_all(multi_column_range_accumulator& acc, const clustering_key_prefix::prefix_equal_tri_compare& prefix3cmp, const query::clustering_range& v) { - auto& ranges = acc.ranges; - for (auto& r : ranges) { - auto intrs = intersection(r, v, prefix3cmp); - if (!intrs) { - ranges.clear(); - break; - } - r = *intrs; - } - } - - template - requires std::convertible_to - static void process_in_values(multi_column_range_accumulator& acc, const clustering_key_prefix::prefix_equal_tri_compare& prefix3cmp, const schema_ptr& schema, Range in_values) { - auto& ranges = acc.ranges; - if (ranges.empty()) { - return; // Shortcircuit an easy case. - } - std::set new_ranges(range_less{*schema}); - for (const auto& current_tuple : in_values) { - // Each IN value is like a separate EQ restriction ANDed to the existing state. - auto current_range = to_range( - oper_t::EQ, clustering_key_prefix::from_optional_exploded(*schema, current_tuple)); - for (const auto& r : ranges) { - auto intrs = intersection(r, current_range, prefix3cmp); - if (intrs) { - new_ranges.insert(*intrs); - } + std::set new_ranges(range_less{*schema}); + for (const auto& current_tuple : in_values) { + // Each IN value is like a separate EQ restriction ANDed to the existing state. + auto current_range = to_range( + oper_t::EQ, clustering_key_prefix::from_optional_exploded(*schema, current_tuple)); + for (const auto& r : ranges) { + auto intrs = intersection(r, current_range, prefix3cmp); + if (intrs) { + new_ranges.insert(*intrs); } } - ranges.assign(new_ranges.cbegin(), new_ranges.cend()); } -}; + ranges.assign(new_ranges.cbegin(), new_ranges.cend()); +} std::vector get_equivalent_ranges( const query::clustering_range& cql_order_range, const schema& schema); @@ -2082,11 +1976,46 @@ build_get_multi_column_clustering_bounds_fn( schema_ptr schema, const std::vector& multi_column_restrictions, bool all_natural, bool all_reverse) { - multi_column_range_accumulator_builder acc_builder{schema}; - for (const auto& restr : multi_column_restrictions | std::views::transform(&predicate::filter)) { - expr::visit(acc_builder, restr); + const auto prefix3cmp = get_unreversed_tri_compare(*schema); + std::vector> range_builders; + for (const auto& pred : multi_column_restrictions) { + const auto& binop = expr::as(pred.filter); + range_builders.emplace_back([binop, schema, prefix3cmp] (multi_column_range_accumulator& acc, const query_options& options) { + auto& lhs = expr::as(binop.lhs); + if (is_compare(binop.op)) { + auto opt_values = expr::get_tuple_elements(expr::evaluate(binop.rhs, options), *type_of(binop.rhs)); + std::vector values(lhs.elements.size()); + for (size_t i = 0; i < lhs.elements.size(); ++i) { + auto& col = expr::as(lhs.elements.at(i)); + values[i] = *statements::request_validations::check_not_null( + opt_values[i], + "Invalid null value in condition for column {}", col.col->name_as_text()); + } + intersect_all(acc, prefix3cmp, to_range(binop.op, clustering_key_prefix(std::move(values)))); + } else if (binop.op == oper_t::IN) { + const cql3::raw_value tup = expr::evaluate(binop.rhs, options); + utils::chunked_vector> tuple_elems; + if (tup.is_value()) { + tuple_elems = expr::get_list_of_tuples_elements(tup, *type_of(binop.rhs)); + } + for (size_t i = 0; i < tuple_elems.size(); ++i) { + if (tuple_elems[i].size() != lhs.elements.size()) { + throw exceptions::invalid_request_exception(format("Expected {} elements in value tuple, but got {}", + lhs.elements.size(), tuple_elems[i].size())); + } + for (size_t j = 0; j < lhs.elements.size(); ++j) { + auto& col = expr::as(lhs.elements.at(j)); + statements::request_validations::check_not_null( + tuple_elems[i][j], + "Invalid null value in condition for column {}", col.col->name_as_text()); + } + } + process_in_values(acc, prefix3cmp, schema, std::move(tuple_elems)); + } else { + on_internal_error(rlogger, format("multi_column_range_accumulator: unexpected atom {}", binop)); + } + }); } - auto range_builders = std::move(acc_builder.builders); return [schema, range_builders, all_natural, all_reverse] (const query_options& options) -> std::vector { multi_column_range_accumulator acc; for (auto& builder : range_builders) { From d584bd7358b0ffc4e7332fb8055cc5ccc7bf53e6 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 17 Mar 2026 19:59:57 +0200 Subject: [PATCH 76/76] cql3: statement_restrictions: replace has_eq_restriction_on_column with precomputed set has_eq_restriction_on_column() walked expression trees at prepare time to find binary_operators with op==EQ that mention a given column on the LHS. Its only caller is ORDER BY validation in select_statement, which checks that clustering columns without an explicit ordering have an EQ restriction. Replace the 50-line expression-walking free function with a precomputed unordered_set (_columns_with_eq) populated during the main predicate loop in analyze_statement_restrictions. For single-column EQ predicates the column is taken from on_column; for multi-column EQ like (ck1, ck2) = (1, 2), all columns in on_clustering_key_prefix are included. The member function becomes a single set::contains() call. --- cql3/restrictions/statement_restrictions.cc | 68 ++++----------------- cql3/restrictions/statement_restrictions.hh | 8 +-- 2 files changed, 15 insertions(+), 61 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index e4186f8ed8..c5976a23fd 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -78,13 +78,6 @@ inline bool has_slice_or_needs_filtering(const expression& e) { /// True iff binary_operator involves a collection. extern bool is_on_collection(const binary_operator&); -// Checks whether the given column has an EQ restriction in the expression. -// EQ restriction is `col = ...` or `(col, col2) = ...` -// IN restriction is NOT an EQ restriction, this function will not look for IN restrictions. -// Uses column_defintion::operator== for comparison, columns with the same name but different schema will not be equal. -bool has_eq_restriction_on_column(const column_definition& column, const expression& e); - - bool contains_multi_column_restriction(const expression&); bool has_only_eq_binops(const expression&); @@ -802,54 +795,6 @@ bool is_on_collection(const binary_operator& b) { return false; } -bool has_eq_restriction_on_column(const column_definition& column, const expression& e) { - std::function column_in_lhs = [&](const expression& e) -> bool { - return visit(overloaded_functor { - [&](const column_value& cv) { - // Use column_defintion::operator== for comparison, - // columns with the same name but different schema will not be equal. - return *cv.col == column; - }, - [&](const tuple_constructor& tc) { - for (const expression& elem : tc.elements) { - if (column_in_lhs(elem)) { - return true; - } - } - - return false; - }, - [&](const auto&) {return false;} - }, e); - }; - - // Look for binary operator describing eq relation with this column on lhs - const binary_operator* eq_restriction_search_res = find_binop(e, [&](const binary_operator& b) { - if (b.op != oper_t::EQ) { - return false; - } - - if (!column_in_lhs(b.lhs)) { - return false; - } - - // These conditions are not allowed to occur in the current code, - // but they might be allowed in the future. - // They are added now to avoid surprises later. - // - // These conditions detect cases like: - // WHERE column1 = column2 - // WHERE column1 = row_number() - if (contains_column(column, b.rhs) || contains_nonpure_function(b.rhs)) { - return false; - } - - return true; - }); - - return eq_restriction_search_res != nullptr; -} - bool is_empty_restriction(const expression& e) { bool contains_non_conjunction = recurse_until(e, [&](const expression& e) -> bool { return !is(e); @@ -1125,6 +1070,16 @@ statement_restrictions::statement_restrictions(private_tag, if (!pred.is_not_null_single_column) { _where.push_back(pred.filter); } + // Subscript EQ (e.g. m[1] = 'a') is not considered an EQ on the column + // itself, matching the behavior of the old expression-walking code which + // only recognized column_value and tuple_constructor in the LHS. + if (pred.equality && !pred.is_subscript) { + if (auto* sc = std::get_if(&pred.on)) { + _columns_with_eq.insert(sc->column); + } else if (auto* mc = std::get_if(&pred.on)) { + _columns_with_eq.insert(mc->columns.begin(), mc->columns.end()); + } + } } if (!_where.empty()) { if (!mc_ck_preds.empty()) { @@ -1472,8 +1427,7 @@ statement_restrictions::find_idx(const secondary_index::secondary_index_manager& } bool statement_restrictions::has_eq_restriction_on_column(const column_definition& column) const { - return std::ranges::any_of(_where, - std::bind_front(restrictions::has_eq_restriction_on_column, std::ref(column))); + return _columns_with_eq.contains(&column); } std::vector statement_restrictions::get_column_defs_for_filtering(data_dictionary::database db) const { diff --git a/cql3/restrictions/statement_restrictions.hh b/cql3/restrictions/statement_restrictions.hh index 649ac7d0af..f3a5c7dfbc 100644 --- a/cql3/restrictions/statement_restrictions.hh +++ b/cql3/restrictions/statement_restrictions.hh @@ -217,6 +217,9 @@ private: check_indexes _check_indexes = check_indexes::yes; + /// Columns that appear on the LHS of an EQ restriction (not IN). + /// For multi-column EQ like (ck1, ck2) = (1, 2), all columns in the tuple are included. + std::unordered_set _columns_with_eq; std::vector _column_defs_for_filtering; schema_ptr _view_schema; std::optional _idx_opt; @@ -322,10 +325,7 @@ public: bool has_token_restrictions() const; - // Checks whether the given column has an EQ restriction. - // EQ restriction is `col = ...` or `(col, col2) = ...` - // IN restriction is NOT an EQ restriction, this function will not look for IN restrictions. - // Uses column_defintion::operator== for comparison, columns with the same name but different schema will not be equal. + // Checks whether the given column has an EQ restriction (not IN). bool has_eq_restriction_on_column(const column_definition&) const; /**