From f6f974cdeb11a257d9318763a8e8a8077f569bc0 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 26 Jun 2023 21:28:41 +0300 Subject: [PATCH] cql3: selection: fix GROUP BY, empty groups, and aggregations A GROUP BY combined with aggregation should produce a single row per group, except for empty groups. This is in contrast to an aggregation without GROUP BY, which produces a single row no matter what. The existing code only considered the case of no grouping and forced a row into the result, but this caused an unwanted row if grouping was used. Fix by refining the check to also consider GROUP BY. XFAIL tests are relaxed. Fixes #12477. Note, forward_service requires that aggregation produce exactly one row, but since it can't work with grouping, it isn't affected. Closes #14399 --- cql3/selection/selection.cc | 2 +- .../validation/operations/compact_storage_test.py | 2 +- .../validation/operations/select_group_by_test.py | 2 +- test/cql-pytest/test_aggregate.py | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/cql3/selection/selection.cc b/cql3/selection/selection.cc index 0cccf12ab2..37e6cdc327 100644 --- a/cql3/selection/selection.cc +++ b/cql3/selection/selection.cc @@ -427,7 +427,7 @@ void result_set_builder::new_row() { std::unique_ptr result_set_builder::build() { process_current_row(/*more_rows_coming=*/false); - if (_result_set->empty() && _selectors->is_aggregate()) { + if (_result_set->empty() && _selectors->is_aggregate() && _group_by_cell_indices.empty()) { _result_set->add_row(_selectors->get_output_row()); } return std::move(_result_set); diff --git a/test/cql-pytest/cassandra_tests/validation/operations/compact_storage_test.py b/test/cql-pytest/cassandra_tests/validation/operations/compact_storage_test.py index 7a0a37c1db..a53e51c1ed 100644 --- a/test/cql-pytest/cassandra_tests/validation/operations/compact_storage_test.py +++ b/test/cql-pytest/cassandra_tests/validation/operations/compact_storage_test.py @@ -1247,7 +1247,7 @@ def testCompactStorage(cql, test_keyspace): assert_rows2(execute(cql, table, "INSERT INTO %s (partition, key, owner) VALUES ('a', 'c', 'x') IF NOT EXISTS"), [row(True)], [row(True,None,None,None)]) # SelectGroupByTest -@pytest.mark.xfail(reason="issue #4244, #5361, #5362, #5363, #12477, #12479") +@pytest.mark.xfail(reason="issue #4244, #5361, #5362, #5363, #12479") def testGroupByWithoutPaging(cql, test_keyspace): with create_table(cql, test_keyspace, "(a int, b int, c int, d int, e int, PRIMARY KEY (a, b, c, d)) WITH COMPACT STORAGE") as table: execute(cql, table, "INSERT INTO %s (a, b, c, d, e) VALUES (1, 2, 1, 3, 6)") diff --git a/test/cql-pytest/cassandra_tests/validation/operations/select_group_by_test.py b/test/cql-pytest/cassandra_tests/validation/operations/select_group_by_test.py index 9d9f0b54b5..534efe2b39 100644 --- a/test/cql-pytest/cassandra_tests/validation/operations/select_group_by_test.py +++ b/test/cql-pytest/cassandra_tests/validation/operations/select_group_by_test.py @@ -7,7 +7,7 @@ from cassandra_tests.porting import * -@pytest.mark.xfail(reason="Issue #2060, #5361, #5362, #5363, #12477, #12479, #13109") +@pytest.mark.xfail(reason="Issue #2060, #5361, #5362, #5363, #12479, #13109") def testGroupByWithoutPaging(cql, test_keyspace): with create_table(cql, test_keyspace, "(a int, b int, c int, d int, e int, primary key (a, b, c, d))") as table: diff --git a/test/cql-pytest/test_aggregate.py b/test/cql-pytest/test_aggregate.py index da6b1e29f5..341ac50f24 100644 --- a/test/cql-pytest/test_aggregate.py +++ b/test/cql-pytest/test_aggregate.py @@ -124,12 +124,12 @@ def test_count_and_group_by_partition(cql, table1): # In the above tests we looked for per-row or per-partition counts and got # back more than one count. But if our query matches no row, we should get # back no count. -@pytest.mark.xfail(reason="issue #12477") +# +# Reproduces #12477 def test_count_and_group_by_row_none(cql, table1): p = unique_key_int() assert [] == list(cql.execute(f"select p, c, count(v) from {table1} where p = {p} group by p,c")) -@pytest.mark.xfail(reason="issue #12477") def test_count_and_group_by_partition_none(cql, table1): p = unique_key_int() assert [] == list(cql.execute(f"select p, count(v) from {table1} where p = {p} group by p"))