view: apply existing range tombstones after exhausting the update reader

When view_update_builder::on_results() hits the path where the update fragment reader is already exhausted, it still needs to keep tracking existing range tombstones and apply them to encountered rows. Otherwise a row covered by an existing range tombstone can appear alive while generating the view update and create a spurious view row. Update the existing tombstone state even on the exhausted-reader path and apply the effective tombstone to clustering rows before generating the row tombstone update. Add a cqlpy regression test covering the partition-delete-after-range-tombstone case. Fixes: SCYLLADB-1554 Closes scylladb/scylladb#29481
2026-04-15 10:47:42 +02:00
parent 40740104ab
commit 073710a661
2 changed files with 50 additions and 2 deletions
--- a/db/view/view.cc
+++ b/db/view/view.cc
@@ -1584,9 +1584,11 @@ future<stop_iteration> view_update_builder::on_results() {

    auto tombstone = std::max(_update_partition_tombstone, _update_current_tombstone);
    if (tombstone && _existing && !_existing->is_end_of_partition()) {
-        // We don't care if it's a range tombstone, as we're only looking for existing entries that get deleted
-        if (_existing->is_clustering_row()) {
+        if (_existing->is_range_tombstone_change()) {
+            _existing_current_tombstone = _existing->as_range_tombstone_change().tombstone();
+        } else if (_existing->is_clustering_row()) {
            auto existing = clustering_row(*_schema, _existing->as_clustering_row());
+            existing.apply(std::max(_existing_partition_tombstone, _existing_current_tombstone));
            auto update = clustering_row(existing.key(), row_tombstone(std::move(tombstone)), row_marker(), ::row());
            generate_update(std::move(update), { std::move(existing) });
        } else if (_existing->is_static_row()) {
--- a/test/cqlpy/test_materialized_view.py
+++ b/test/cqlpy/test_materialized_view.py
@@ -1510,6 +1510,52 @@ def test_views_with_future_tombstones(cql, test_keyspace):
            assert [] == list(cql.execute(f'select * from {table}'))
            assert [] == list(cql.execute(f'select * from {mv}'))

+# Test that a partition tombstone arriving after a range tombstone doesn't
+# create a spurious view row for a base row that was never alive.
+# The bug was in view_update_builder::on_results() which failed to
+# track existing range tombstones when the update reader was exhausted,
+# causing the existing row to appear alive (ignoring that it was covered
+# by a range tombstone) and generating an incorrect view update with a
+# row marker that created a view row for a dead base row.
+# Reproduces SCYLLADB-1554
+def test_mv_partition_tombstone_does_not_resurrect_range_deleted_row(cql, test_keyspace):
+    with new_test_table(cql, test_keyspace,
+            'p int, c int, v int, w int, primary key (p, c)') as table:
+        with new_materialized_view(cql, table, '*', 'v, p, c',
+                'v is not null and p is not null and c is not null') as mv:
+            # First, write a range tombstone at ts=150. This will cover any
+            # future inserts with lower timestamps and we won't generate
+            # any view updates for them.
+            cql.execute(f"DELETE FROM {table} USING TIMESTAMP 150 "
+                        f"WHERE p = 1 AND c >= 1 AND c <= 3")
+
+            # Insert a row with v at ts=100 and w at ts=30. Both timestamps
+            # are below the range tombstone (150), so the row is dead in the
+            # base table. No view entry should be created.
+            cql.execute(f"INSERT INTO {table} (p, c, v) VALUES (1, 2, 3) "
+                        f"USING TIMESTAMP 100")
+            cql.execute(f"UPDATE {table} USING TIMESTAMP 30 SET w = 4 "
+                        f"WHERE p = 1 AND c = 2")
+            assert [] == list(cql.execute(f"SELECT * FROM {table}"))
+            assert [] == list(cql.execute(f"SELECT * FROM {mv}"))
+
+            # Partition delete at ts=50. This is weaker than the range
+            # tombstone and should be a complete no-op. The different
+            # timestamps of v (100) and w (30) relative to the partition
+            # tombstone (50) are critical: the partition tombstone kills w
+            # but not v, making cells differ between the update and existing
+            # in generate_updates(). This prevents can_skip_view_updates()
+            # from masking the bug.
+            #
+            # Without the fix, generate_updates() fails to apply the
+            # existing range tombstone to the existing row, making it appear
+            # alive. This causes update_entry() to write a row marker at
+            # ts=100 (based on the view key column v's timestamp) to the
+            # view, incorrectly creating a view row for a dead base row.
+            cql.execute(f"DELETE FROM {table} USING TIMESTAMP 50 WHERE p = 1")
+            assert [] == list(cql.execute(f"SELECT * FROM {table}"))
+            assert [] == list(cql.execute(f"SELECT * FROM {mv}"))
+
 # Test view representation in system.* tables
 def test_view_in_system_tables(cql, test_keyspace):
    with new_test_table(cql, test_keyspace, "p int PRIMARY KEY, v int") as base: