db,view: fix generating view updates for partition tombstones

The update generation path must track and apply all tombstones,
both from the existing base row (if read-before-write was needed)
and for the new row. One such path contained an error, because
it assumed that if the existing row is empty, then the update
can be simply generated from the new row. However, lack of the
existing row can also be the result of a partition/range tombstone.
If that's the case, it needs to be applied, because it's entirely
possible that this partition row also hides the new row.
Without taking the partition tombstone into account, creating
a future tombstone and inserting an out-of-order write before it
in the base table can result in ghost rows in the view table.
This patch comes with a test which was proven to fail before the
changes.

Branches 3.1,3.2,3.3
Fixes #5793

Tests: unit(dev)
Message-Id: <8d3b2abad31572668693ab585f37f4af5bb7577a.1581525398.git.sarna@scylladb.com>
This commit is contained in:
Piotr Sarna
2020-02-12 17:37:22 +01:00
committed by Nadav Har'El
parent 3252068588
commit e93c54e837
2 changed files with 47 additions and 1 deletions

View File

@@ -910,7 +910,11 @@ future<stop_iteration> view_update_builder::on_results() {
if (_update && !_update->is_end_of_partition()) {
if (_update->is_clustering_row()) {
apply_tracked_tombstones(_update_tombstone_tracker, _update->as_mutable_clustering_row());
generate_update(std::move(*_update).as_clustering_row(), { });
auto existing_tombstone = _existing_tombstone_tracker.current_tombstone();
auto existing = existing_tombstone
? std::optional<clustering_row>(std::in_place, _update->as_clustering_row().key(), row_tombstone(std::move(existing_tombstone)), row_marker(), ::row())
: std::nullopt;
generate_update(std::move(*_update).as_clustering_row(), std::move(existing));
}
return advance_updates();
}

View File

@@ -4996,3 +4996,45 @@ SEASTAR_TEST_CASE(test_time_uuid_fcts_result) {
require_timestamp_timeuuid_or_date("tounixtimestamp");
});
}
// Test that tombstones with future timestamps work correctly
// when a write with lower timestamp arrives - in such case,
// if the base row is covered by such a tombstone, a view update
// needs to take it into account. Refs #5793
SEASTAR_TEST_CASE(test_views_with_future_tombstones) {
return do_with_cql_env_thread([] (auto& e) {
cquery_nofail(e, "CREATE TABLE t (a int, b int, c int, d int, e int, PRIMARY KEY (a,b,c));");
cquery_nofail(e, "CREATE MATERIALIZED VIEW tv AS SELECT * FROM t"
" WHERE a IS NOT NULL AND b IS NOT NULL AND c IS NOT NULL PRIMARY KEY (b,a,c);");
// Partition tombstone
cquery_nofail(e, "delete from t using timestamp 10 where a=1;");
auto msg = cquery_nofail(e, "select * from t;");
assert_that(msg).is_rows().with_size(0);
cquery_nofail(e, "insert into t (a,b,c,d,e) values (1,2,3,4,5) using timestamp 8;");
msg = cquery_nofail(e, "select * from t;");
assert_that(msg).is_rows().with_size(0);
msg = cquery_nofail(e, "select * from tv;");
assert_that(msg).is_rows().with_size(0);
// Range tombstone
cquery_nofail(e, "delete from t using timestamp 16 where a=2 and b > 1 and b < 4;");
msg = cquery_nofail(e, "select * from t;");
assert_that(msg).is_rows().with_size(0);
cquery_nofail(e, "insert into t (a,b,c,d,e) values (2,3,4,5,6) using timestamp 12;");
msg = cquery_nofail(e, "select * from t;");
assert_that(msg).is_rows().with_size(0);
msg = cquery_nofail(e, "select * from tv;");
assert_that(msg).is_rows().with_size(0);
// Row tombstone
cquery_nofail(e, "delete from t using timestamp 24 where a=3 and b=4 and c=5;");
msg = cquery_nofail(e, "select * from t;");
assert_that(msg).is_rows().with_size(0);
cquery_nofail(e, "insert into t (a,b,c,d,e) values (3,4,5,6,7) using timestamp 18;");
msg = cquery_nofail(e, "select * from t;");
assert_that(msg).is_rows().with_size(0);
msg = cquery_nofail(e, "select * from tv;");
assert_that(msg).is_rows().with_size(0);
});
}