view: apply existing range tombstones after exhausting the update reader

When view_update_builder::on_results() hits the path where the update
fragment reader is already exhausted, it still needs to keep tracking
existing range tombstones and apply them to encountered rows.
Otherwise a row covered by an existing range tombstone can appear
alive while generating the view update and create a spurious view row.

Update the existing tombstone state even on the exhausted-reader path
and apply the effective tombstone to clustering rows before generating
the row tombstone update. Add a cqlpy regression test covering the
partition-delete-after-range-tombstone case.

Fixes: SCYLLADB-1554

Closes scylladb/scylladb#29481
This commit is contained in:
Wojciech Mitros
2026-04-15 10:47:42 +02:00
committed by Piotr Dulikowski
parent 40740104ab
commit 073710a661
2 changed files with 50 additions and 2 deletions

View File

@@ -1584,9 +1584,11 @@ future<stop_iteration> view_update_builder::on_results() {
auto tombstone = std::max(_update_partition_tombstone, _update_current_tombstone);
if (tombstone && _existing && !_existing->is_end_of_partition()) {
// We don't care if it's a range tombstone, as we're only looking for existing entries that get deleted
if (_existing->is_clustering_row()) {
if (_existing->is_range_tombstone_change()) {
_existing_current_tombstone = _existing->as_range_tombstone_change().tombstone();
} else if (_existing->is_clustering_row()) {
auto existing = clustering_row(*_schema, _existing->as_clustering_row());
existing.apply(std::max(_existing_partition_tombstone, _existing_current_tombstone));
auto update = clustering_row(existing.key(), row_tombstone(std::move(tombstone)), row_marker(), ::row());
generate_update(std::move(update), { std::move(existing) });
} else if (_existing->is_static_row()) {

View File

@@ -1510,6 +1510,52 @@ def test_views_with_future_tombstones(cql, test_keyspace):
assert [] == list(cql.execute(f'select * from {table}'))
assert [] == list(cql.execute(f'select * from {mv}'))
# Test that a partition tombstone arriving after a range tombstone doesn't
# create a spurious view row for a base row that was never alive.
# The bug was in view_update_builder::on_results() which failed to
# track existing range tombstones when the update reader was exhausted,
# causing the existing row to appear alive (ignoring that it was covered
# by a range tombstone) and generating an incorrect view update with a
# row marker that created a view row for a dead base row.
# Reproduces SCYLLADB-1554
def test_mv_partition_tombstone_does_not_resurrect_range_deleted_row(cql, test_keyspace):
with new_test_table(cql, test_keyspace,
'p int, c int, v int, w int, primary key (p, c)') as table:
with new_materialized_view(cql, table, '*', 'v, p, c',
'v is not null and p is not null and c is not null') as mv:
# First, write a range tombstone at ts=150. This will cover any
# future inserts with lower timestamps and we won't generate
# any view updates for them.
cql.execute(f"DELETE FROM {table} USING TIMESTAMP 150 "
f"WHERE p = 1 AND c >= 1 AND c <= 3")
# Insert a row with v at ts=100 and w at ts=30. Both timestamps
# are below the range tombstone (150), so the row is dead in the
# base table. No view entry should be created.
cql.execute(f"INSERT INTO {table} (p, c, v) VALUES (1, 2, 3) "
f"USING TIMESTAMP 100")
cql.execute(f"UPDATE {table} USING TIMESTAMP 30 SET w = 4 "
f"WHERE p = 1 AND c = 2")
assert [] == list(cql.execute(f"SELECT * FROM {table}"))
assert [] == list(cql.execute(f"SELECT * FROM {mv}"))
# Partition delete at ts=50. This is weaker than the range
# tombstone and should be a complete no-op. The different
# timestamps of v (100) and w (30) relative to the partition
# tombstone (50) are critical: the partition tombstone kills w
# but not v, making cells differ between the update and existing
# in generate_updates(). This prevents can_skip_view_updates()
# from masking the bug.
#
# Without the fix, generate_updates() fails to apply the
# existing range tombstone to the existing row, making it appear
# alive. This causes update_entry() to write a row marker at
# ts=100 (based on the view key column v's timestamp) to the
# view, incorrectly creating a view row for a dead base row.
cql.execute(f"DELETE FROM {table} USING TIMESTAMP 50 WHERE p = 1")
assert [] == list(cql.execute(f"SELECT * FROM {table}"))
assert [] == list(cql.execute(f"SELECT * FROM {mv}"))
# Test view representation in system.* tables
def test_view_in_system_tables(cql, test_keyspace):
with new_test_table(cql, test_keyspace, "p int PRIMARY KEY, v int") as base: