db,view: fix generating view updates for partition tombstones
The update generation path must track and apply all tombstones, both from the existing base row (if read-before-write was needed) and for the new row. One such path contained an error, because it assumed that if the existing row is empty, then the update can be simply generated from the new row. However, lack of the existing row can also be the result of a partition/range tombstone. If that's the case, it needs to be applied, because it's entirely possible that this partition row also hides the new row. Without taking the partition tombstone into account, creating a future tombstone and inserting an out-of-order write before it in the base table can result in ghost rows in the view table. This patch comes with a test which was proven to fail before the changes. Branches 3.1,3.2,3.3 Fixes #5793 Tests: unit(dev) Message-Id: <8d3b2abad31572668693ab585f37f4af5bb7577a.1581525398.git.sarna@scylladb.com>
This commit is contained in:
committed by
Nadav Har'El
parent
3252068588
commit
e93c54e837
@@ -910,7 +910,11 @@ future<stop_iteration> view_update_builder::on_results() {
|
||||
if (_update && !_update->is_end_of_partition()) {
|
||||
if (_update->is_clustering_row()) {
|
||||
apply_tracked_tombstones(_update_tombstone_tracker, _update->as_mutable_clustering_row());
|
||||
generate_update(std::move(*_update).as_clustering_row(), { });
|
||||
auto existing_tombstone = _existing_tombstone_tracker.current_tombstone();
|
||||
auto existing = existing_tombstone
|
||||
? std::optional<clustering_row>(std::in_place, _update->as_clustering_row().key(), row_tombstone(std::move(existing_tombstone)), row_marker(), ::row())
|
||||
: std::nullopt;
|
||||
generate_update(std::move(*_update).as_clustering_row(), std::move(existing));
|
||||
}
|
||||
return advance_updates();
|
||||
}
|
||||
|
||||
@@ -4996,3 +4996,45 @@ SEASTAR_TEST_CASE(test_time_uuid_fcts_result) {
|
||||
require_timestamp_timeuuid_or_date("tounixtimestamp");
|
||||
});
|
||||
}
|
||||
|
||||
// Test that tombstones with future timestamps work correctly
|
||||
// when a write with lower timestamp arrives - in such case,
|
||||
// if the base row is covered by such a tombstone, a view update
|
||||
// needs to take it into account. Refs #5793
|
||||
SEASTAR_TEST_CASE(test_views_with_future_tombstones) {
|
||||
return do_with_cql_env_thread([] (auto& e) {
|
||||
cquery_nofail(e, "CREATE TABLE t (a int, b int, c int, d int, e int, PRIMARY KEY (a,b,c));");
|
||||
cquery_nofail(e, "CREATE MATERIALIZED VIEW tv AS SELECT * FROM t"
|
||||
" WHERE a IS NOT NULL AND b IS NOT NULL AND c IS NOT NULL PRIMARY KEY (b,a,c);");
|
||||
|
||||
// Partition tombstone
|
||||
cquery_nofail(e, "delete from t using timestamp 10 where a=1;");
|
||||
auto msg = cquery_nofail(e, "select * from t;");
|
||||
assert_that(msg).is_rows().with_size(0);
|
||||
cquery_nofail(e, "insert into t (a,b,c,d,e) values (1,2,3,4,5) using timestamp 8;");
|
||||
msg = cquery_nofail(e, "select * from t;");
|
||||
assert_that(msg).is_rows().with_size(0);
|
||||
msg = cquery_nofail(e, "select * from tv;");
|
||||
assert_that(msg).is_rows().with_size(0);
|
||||
|
||||
// Range tombstone
|
||||
cquery_nofail(e, "delete from t using timestamp 16 where a=2 and b > 1 and b < 4;");
|
||||
msg = cquery_nofail(e, "select * from t;");
|
||||
assert_that(msg).is_rows().with_size(0);
|
||||
cquery_nofail(e, "insert into t (a,b,c,d,e) values (2,3,4,5,6) using timestamp 12;");
|
||||
msg = cquery_nofail(e, "select * from t;");
|
||||
assert_that(msg).is_rows().with_size(0);
|
||||
msg = cquery_nofail(e, "select * from tv;");
|
||||
assert_that(msg).is_rows().with_size(0);
|
||||
|
||||
// Row tombstone
|
||||
cquery_nofail(e, "delete from t using timestamp 24 where a=3 and b=4 and c=5;");
|
||||
msg = cquery_nofail(e, "select * from t;");
|
||||
assert_that(msg).is_rows().with_size(0);
|
||||
cquery_nofail(e, "insert into t (a,b,c,d,e) values (3,4,5,6,7) using timestamp 18;");
|
||||
msg = cquery_nofail(e, "select * from t;");
|
||||
assert_that(msg).is_rows().with_size(0);
|
||||
msg = cquery_nofail(e, "select * from tv;");
|
||||
assert_that(msg).is_rows().with_size(0);
|
||||
});
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user