From 854d2917a1e4b61a3521ae0e3cc4fb36cacba917 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Zakrzewski?= Date: Thu, 20 Feb 2025 01:04:56 +0100 Subject: [PATCH] cql3/select_statement: reject PER PARTITION LIMIT with SELECT DISTINCT Before this patch we silently allowed and ignored PER PARTITION LIMIT. SELECT DISTINCT requires all the partition key columns, which means that setting PER PARTITION LIMIT is redundant - only one result will be returned from every partition anyway. Cassandra behaves the same way, so this patch also ensures compatibility. Fixes scylladb/scylladb#15109 Closes scylladb/scylladb#22950 --- cql3/statements/raw/select_statement.hh | 4 ++-- cql3/statements/select_statement.cc | 6 +++++- .../validation/operations/select_limit_test.py | 4 ++-- test/cqlpy/test_distinct.py | 8 +++++++- 4 files changed, 16 insertions(+), 6 deletions(-) diff --git a/cql3/statements/raw/select_statement.hh b/cql3/statements/raw/select_statement.hh index b3dff7ac3f..da67dbaaa0 100644 --- a/cql3/statements/raw/select_statement.hh +++ b/cql3/statements/raw/select_statement.hh @@ -135,9 +135,9 @@ private: selection::selection& selection, const restrictions::statement_restrictions& restrictions); - static void validate_distinct_selection(const schema& schema, + void validate_distinct_selection(const schema& schema, const selection::selection& selection, - const restrictions::statement_restrictions& restrictions); + const restrictions::statement_restrictions& restrictions) const; /** If ALLOW FILTERING was not specified, this verifies that it is not needed */ void check_needs_filtering( diff --git a/cql3/statements/select_statement.cc b/cql3/statements/select_statement.cc index cfe5a78204..ed7b1b4790 100644 --- a/cql3/statements/select_statement.cc +++ b/cql3/statements/select_statement.cc @@ -2383,8 +2383,12 @@ select_statement::ordering_comparator_type select_statement::get_ordering_compar void select_statement::validate_distinct_selection(const schema& schema, const selection::selection& selection, - const restrictions::statement_restrictions& restrictions) + const restrictions::statement_restrictions& restrictions) const { + if (_per_partition_limit) { + throw exceptions::invalid_request_exception("PER PARTITION LIMIT is not allowed with SELECT DISTINCT queries"); + } + if (restrictions.has_non_primary_key_restriction() || restrictions.has_clustering_columns_restriction()) { throw exceptions::invalid_request_exception( "SELECT DISTINCT with WHERE clause only supports restriction by partition key."); diff --git a/test/cqlpy/cassandra_tests/validation/operations/select_limit_test.py b/test/cqlpy/cassandra_tests/validation/operations/select_limit_test.py index 0bdda04e89..8d99418476 100644 --- a/test/cqlpy/cassandra_tests/validation/operations/select_limit_test.py +++ b/test/cqlpy/cassandra_tests/validation/operations/select_limit_test.py @@ -16,7 +16,7 @@ def testSparseTable(cql, test_keyspace): execute(cql, table, "INSERT INTO %s (userid, url, day, month, year) VALUES (?, ?, 1, 'jan', 2012)", i, f"http://foo.{tld}") assertRowCount(execute(cql, table, "SELECT * FROM %s LIMIT 4"), 4) -@pytest.mark.xfail(reason="issues #9879, #15099, #15109") +@pytest.mark.xfail(reason="issues #9879, #15099") def testPerPartitionLimit(cql, test_keyspace): with create_table(cql, test_keyspace, "(a int, b int, c int, PRIMARY KEY (a, b))") as table: for i in range(5): @@ -110,7 +110,7 @@ def testPerPartitionLimit(cql, test_keyspace): assertInvalidMessage(cql, table, "PER PARTITION LIMIT is not allowed with aggregate queries.", "SELECT COUNT(*) FROM %s PER PARTITION LIMIT ?", 3) -@pytest.mark.xfail(reason="issues #9879, #15109") +@pytest.mark.xfail(reason="issues #9879") def testPerPartitionLimitWithStaticDataAndPaging(cql, test_keyspace): with create_table(cql, test_keyspace, "(a int, b int, s int static, c int, PRIMARY KEY (a, b))") as table: for i in range(5): diff --git a/test/cqlpy/test_distinct.py b/test/cqlpy/test_distinct.py index ea07e3c835..9ea2cb094d 100644 --- a/test/cqlpy/test_distinct.py +++ b/test/cqlpy/test_distinct.py @@ -74,7 +74,13 @@ def test_distinct_limit(cql, test_keyspace): assert n == len(results) assert set(results).issubset(set(all)) -# Test combination of SELECT DISTINCT, COUNT, GROUP BY and LIMIT. +# Test that SELECT DISTINCT and PER PARTITION LIMIT is not allowed in the same query +# reproduces #15109 +def test_distinct_and_per_partition_limit(cql, table1): + with pytest.raises(InvalidRequest, match='PER PARTITION LIMIT is not allowed with SELECT DISTINCT queries'): + cql.execute(f"select distinct p from {table1} per partition limit 1") + +# Test combination of SELECT DISTINCT, COUNT, GROUP BY and LIMIT. # COUNT + GROUP BY means generate one count per partition, and the # SELECT DISTINCT simply hands the counter just one row per partition # to count, so all the counts come up 1. Adding a LIMIT to all of