sstables: Don't rely on lexicographical prefix comparison

When creating a deletion log for a bunch of sstables the code checks
that all sstables share the same "storage" by lexicographically
comparing their prefixes. That's not correct, as filesystem paths may
refer to the same directory even if not being equal.

So far that's been mostly OK, because paths manipulations were done in
simple forms without producing unequal paths. Patch 8a061bd8 (sstables,
code: Introduce and use change_state() call) triggerred a corner case.

    fs::path foo("/foo");
    sstring sub("");
    foo = foo / sub;

produces a correct path of "/foo/", but the trailing slash breaks the
aforementioned assumption about prefixes comparison. As a result, when
an sstable moves between, say, staging and normal locations it may gain
a trailing slash breaking the deletion log creation code.

The fix is to restrict the deletion log creation not to rely on path
strings comparison completely and trim the trailing slash if it happens.

A test is included.

fixes: #13085

Signed-off-by: Pavel Emelyanov <xemul@scylladb.com>

Closes #13090
This commit is contained in:
Pavel Emelyanov
2023-03-06 18:57:05 +03:00
committed by Avi Kivity
parent beaa5a9117
commit 0cd3a6993b
4 changed files with 44 additions and 6 deletions

View File

@@ -384,27 +384,44 @@ sstable_directory::retrieve_shared_sstables() {
return std::exchange(_shared_sstable_info, {});
}
bool sstable_directory::compare_sstable_storage_prefix(const sstring& prefix_a, const sstring& prefix_b) noexcept {
size_t size_a = prefix_a.size();
if (prefix_a.back() == '/') {
size_a--;
}
size_t size_b = prefix_b.size();
if (prefix_b.back() == '/') {
size_b--;
}
return size_a == size_b && sstring::traits_type::compare(prefix_a.begin(), prefix_b.begin(), size_a) == 0;
}
future<> sstable_directory::delete_atomically(std::vector<shared_sstable> ssts) {
if (ssts.empty()) {
return make_ready_future<>();
}
return seastar::async([ssts = std::move(ssts)] {
sstring sstdir;
shared_sstable first = nullptr;
min_max_tracker<generation_type> gen_tracker;
for (const auto& sst : ssts) {
gen_tracker.update(sst->generation());
if (sstdir.empty()) {
sstdir = sst->_storage.prefix();
if (first == nullptr) {
first = sst;
} else {
// All sstables are assumed to be in the same column_family, hence
// sharing their base directory.
assert (sstdir == sst->_storage.prefix());
// sharing their base directory. Since lexicographical comparison of
// paths is not the same as their actualy equivalence, this should
// rather check for fs::equivalent call on _storage.prefix()-s. But
// since we know that the worst thing filesystem storage driver can
// do is to prepend/drop the trailing slash, it should be enough to
// compare prefixes of both ... prefixes
assert(compare_sstable_storage_prefix(first->_storage.prefix(), sst->_storage.prefix()));
}
}
sstring pending_delete_dir = sstdir + "/" + sstable::pending_delete_dir_basename();
sstring pending_delete_dir = first->_storage.prefix() + "/" + sstable::pending_delete_dir_basename();
sstring pending_delete_log = format("{}/sstables-{}-{}.log", pending_delete_dir, gen_tracker.min(), gen_tracker.max());
sstring tmp_pending_delete_log = pending_delete_log + ".tmp";
sstlog.trace("Writing {}", tmp_pending_delete_log);

View File

@@ -218,6 +218,8 @@ public:
// This function only solves the second problem for now.
static future<> delete_atomically(std::vector<shared_sstable> ssts);
static future<> replay_pending_delete_log(std::filesystem::path log_file);
static bool compare_sstable_storage_prefix(const sstring& a, const sstring& b) noexcept;
};
}

View File

@@ -53,6 +53,21 @@ SEASTAR_THREAD_TEST_CASE(test_sstable_move) {
sst = env.reusable_sst(uncompressed_schema(), cur_dir, gen).get0();
}
SEASTAR_THREAD_TEST_CASE(test_sstable_move_idempotent) {
tmpdir tmp;
auto env = test_env();
auto stop_env = defer([&env] { env.stop().get(); });
auto [ sst, cur_dir ] = copy_sst_to_tmpdir(tmp.path(), env, uncompressed_schema(), fs::path(uncompressed_dir()), 1);
sstring old_path = test(sst).storage_prefix();
touch_directory(format("{}/{}", old_path, sstables::staging_dir)).get();
sst->change_state(sstables::staging_dir).get();
sst->change_state(sstables::normal_dir).get();
BOOST_REQUIRE(sstable_directory::compare_sstable_storage_prefix(old_path, test(sst).storage_prefix()));
sst->close_files().get();
}
// Simulate a crashed create_links.
// This implementation simulates create_links;
// ideally, we should have error injection points in create_links

View File

@@ -223,6 +223,10 @@ public:
static fs::path filename(const sstable& sst, component_type c) {
return fs::path(sst.filename(c));
}
sstring storage_prefix() const {
return _sst->_storage.prefix();
}
};
inline auto replacer_fn_no_op() {