test/cql-pytest: tests for single-element multi-column restrictions

It turns out that Cassandra handles a restriction like `(c2) = (1)` just like `c2 = 1`, and is not limited like multi-column restrictions. In particular, this query works despite missing "c1", and may also use an index if c2 is indexed. But currently in Scylla, `(c2) = (1)` is handled like a multi-column restriction, so complains if c2 is not the first clustering key column, and cannot use an index. This patch adds several tests demonstrating this difference between Scylla and Cassandra (#13250). The xfailing tests pass on Cassandra but fail on Scylla. Refs #13250 Signed-off-by: Nadav Har'El <nyh@scylladb.com> Closes #13252
2023-03-20 16:07:00 +02:00
parent 26bb36cdf5
commit 511308bccf
2 changed files with 45 additions and 0 deletions
--- a/test/cql-pytest/test_allow_filtering.py
+++ b/test/cql-pytest/test_allow_filtering.py
@@ -418,3 +418,23 @@ def test_allow_filtering_multi_column_and_index(cql, test_keyspace):
        check_af_optional(cql, (table, everything),
            "p=1 AND (c1,c2)<(2,0) AND r = 0",
            lambda r : r.p == 1 and r.r == 0 and (r.c1 < 2 or (r.c1 == 2 and r.c2 < 0)))
+
+# In test_allow_filtering_clustering_key above we checked that a scan of the
+# whole table looking for one particular *clustering* column value requires
+# filtering: Such a query may return just a few or even no matches, but still
+# needs to go over all the partitions. Here we do exactly the same but
+# instead of the restriction c=2 we use multi-column syntax (c)=(2) - which
+# should be the same (see discussion in issue #13250).
+def test_allow_filtering_clustering_key_multicolumn_syntax(cql, table1):
+    check_af_mandatory(cql, table1, '(c)=(2)', lambda row: row.c==2)
+
+# Moreover, if we have multiple clustering key columns, c1 and c2,
+# (c2)=(10) should be allowed just like c2=10 (and require filtering
+# just like it) - we shouldn't complain that c1 is missing. Reproduces #13250.
+@pytest.mark.xfail(reason="issue #13250")
+def test_allow_filtering_compound_clustering_key_multicolumn_syntax(cql, table3):
+    check_af_mandatory(cql, table3, 'c1=10', lambda row: row.c1==10)
+    check_af_mandatory(cql, table3, '(c1)=(10)', lambda row: row.c1==10)
+    check_af_mandatory(cql, table3, 'c2=10', lambda row: row.c2==10)
+    # Reproduces #13250:
+    check_af_mandatory(cql, table3, '(c2)=(10)', lambda row: row.c2==10)
--- a/test/cql-pytest/test_secondary_index.py
+++ b/test/cql-pytest/test_secondary_index.py
@@ -345,6 +345,31 @@ def test_filter_cluster_key(cql, test_keyspace):
        rows = cql.execute(stmt)
        assert_rows(rows, [1, 1])

+# Selecting *only* an indexed clustering key does not require filtering, it's
+# a full-index scan (the amount of output is proportional to the read).
+# Additionally, with unnecessary parentheses the query also works, and isn't
+# handled like a multi-column restriction (reproduces #13250).
+@pytest.mark.xfail(reason="issue #13250")
+def test_index_scan_multicolumn_syntax(cql, test_keyspace):
+    schema = 'p int, c1 int, c2 int, primary key (p, c1, c2)'
+    with new_test_table(cql, test_keyspace, schema) as table:
+        cql.execute(f"CREATE INDEX ON {table}(c1)")
+        cql.execute(f"CREATE INDEX ON {table}(c2)")
+        cql.execute(f"INSERT INTO {table} (p, c1, c2) VALUES (0, 1, 1)")
+        cql.execute(f"INSERT INTO {table} (p, c1, c2) VALUES (0, 0, 1)")
+        cql.execute(f"INSERT INTO {table} (p, c1, c2) VALUES (0, 1, 0)")
+        assert [(0,), (1,)] == list(cql.execute(f'SELECT c2 FROM {table} WHERE c1 = 1'))
+        assert [(0,), (1,)] == list(cql.execute(f'SELECT c1 FROM {table} WHERE c2 = 1'))
+        # The query (c1) = (1) isn't a real multi-column restriction (it
+        # should mean the same as c1=1) so it can use the index - and work
+        # without ALLOW FILTERING. Reproduces #13250:
+        assert [(0,), (1,)] == list(cql.execute(f'SELECT c2 FROM {table} WHERE (c1) = (1)'))
+        # The query (c2) = (1) isn't a real multi-column restriction (it
+        # should mean the same as c2=1) so it should be allowed despite
+        # missing a restriction on c1. In our case c2=1 is allowed because
+        # c2 is indexed. Reproduces #13250:
+        assert [(0,), (1,)] == list(cql.execute(f'SELECT c1 FROM {table} WHERE (c2) = (1)'))
+
 def test_multi_column_with_regular_index(cql, test_keyspace):
    """Reproduces #9085."""
    with new_test_table(cql, test_keyspace, 'p int, c1 int, c2 int, r int, primary key(p,c1,c2)') as tbl: