sstables: Don't rely on lexicographical prefix comparison
When creating a deletion log for a bunch of sstables the code checks
that all sstables share the same "storage" by lexicographically
comparing their prefixes. That's not correct, as filesystem paths may
refer to the same directory even if not being equal.
So far that's been mostly OK, because paths manipulations were done in
simple forms without producing unequal paths. Patch 8a061bd8 (sstables,
code: Introduce and use change_state() call) triggerred a corner case.
fs::path foo("/foo");
sstring sub("");
foo = foo / sub;
produces a correct path of "/foo/", but the trailing slash breaks the
aforementioned assumption about prefixes comparison. As a result, when
an sstable moves between, say, staging and normal locations it may gain
a trailing slash breaking the deletion log creation code.
The fix is to restrict the deletion log creation not to rely on path
strings comparison completely and trim the trailing slash if it happens.
A test is included.
fixes: #13085
Signed-off-by: Pavel Emelyanov <xemul@scylladb.com>
Closes #13090
This commit is contained in:
committed by
Avi Kivity
parent
beaa5a9117
commit
0cd3a6993b
@@ -384,27 +384,44 @@ sstable_directory::retrieve_shared_sstables() {
|
||||
return std::exchange(_shared_sstable_info, {});
|
||||
}
|
||||
|
||||
bool sstable_directory::compare_sstable_storage_prefix(const sstring& prefix_a, const sstring& prefix_b) noexcept {
|
||||
size_t size_a = prefix_a.size();
|
||||
if (prefix_a.back() == '/') {
|
||||
size_a--;
|
||||
}
|
||||
size_t size_b = prefix_b.size();
|
||||
if (prefix_b.back() == '/') {
|
||||
size_b--;
|
||||
}
|
||||
return size_a == size_b && sstring::traits_type::compare(prefix_a.begin(), prefix_b.begin(), size_a) == 0;
|
||||
}
|
||||
|
||||
future<> sstable_directory::delete_atomically(std::vector<shared_sstable> ssts) {
|
||||
if (ssts.empty()) {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
return seastar::async([ssts = std::move(ssts)] {
|
||||
sstring sstdir;
|
||||
shared_sstable first = nullptr;
|
||||
min_max_tracker<generation_type> gen_tracker;
|
||||
|
||||
for (const auto& sst : ssts) {
|
||||
gen_tracker.update(sst->generation());
|
||||
|
||||
if (sstdir.empty()) {
|
||||
sstdir = sst->_storage.prefix();
|
||||
if (first == nullptr) {
|
||||
first = sst;
|
||||
} else {
|
||||
// All sstables are assumed to be in the same column_family, hence
|
||||
// sharing their base directory.
|
||||
assert (sstdir == sst->_storage.prefix());
|
||||
// sharing their base directory. Since lexicographical comparison of
|
||||
// paths is not the same as their actualy equivalence, this should
|
||||
// rather check for fs::equivalent call on _storage.prefix()-s. But
|
||||
// since we know that the worst thing filesystem storage driver can
|
||||
// do is to prepend/drop the trailing slash, it should be enough to
|
||||
// compare prefixes of both ... prefixes
|
||||
assert(compare_sstable_storage_prefix(first->_storage.prefix(), sst->_storage.prefix()));
|
||||
}
|
||||
}
|
||||
|
||||
sstring pending_delete_dir = sstdir + "/" + sstable::pending_delete_dir_basename();
|
||||
sstring pending_delete_dir = first->_storage.prefix() + "/" + sstable::pending_delete_dir_basename();
|
||||
sstring pending_delete_log = format("{}/sstables-{}-{}.log", pending_delete_dir, gen_tracker.min(), gen_tracker.max());
|
||||
sstring tmp_pending_delete_log = pending_delete_log + ".tmp";
|
||||
sstlog.trace("Writing {}", tmp_pending_delete_log);
|
||||
|
||||
@@ -218,6 +218,8 @@ public:
|
||||
// This function only solves the second problem for now.
|
||||
static future<> delete_atomically(std::vector<shared_sstable> ssts);
|
||||
static future<> replay_pending_delete_log(std::filesystem::path log_file);
|
||||
|
||||
static bool compare_sstable_storage_prefix(const sstring& a, const sstring& b) noexcept;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@@ -53,6 +53,21 @@ SEASTAR_THREAD_TEST_CASE(test_sstable_move) {
|
||||
sst = env.reusable_sst(uncompressed_schema(), cur_dir, gen).get0();
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_sstable_move_idempotent) {
|
||||
tmpdir tmp;
|
||||
auto env = test_env();
|
||||
auto stop_env = defer([&env] { env.stop().get(); });
|
||||
|
||||
auto [ sst, cur_dir ] = copy_sst_to_tmpdir(tmp.path(), env, uncompressed_schema(), fs::path(uncompressed_dir()), 1);
|
||||
sstring old_path = test(sst).storage_prefix();
|
||||
touch_directory(format("{}/{}", old_path, sstables::staging_dir)).get();
|
||||
sst->change_state(sstables::staging_dir).get();
|
||||
sst->change_state(sstables::normal_dir).get();
|
||||
BOOST_REQUIRE(sstable_directory::compare_sstable_storage_prefix(old_path, test(sst).storage_prefix()));
|
||||
|
||||
sst->close_files().get();
|
||||
}
|
||||
|
||||
// Simulate a crashed create_links.
|
||||
// This implementation simulates create_links;
|
||||
// ideally, we should have error injection points in create_links
|
||||
|
||||
@@ -223,6 +223,10 @@ public:
|
||||
static fs::path filename(const sstable& sst, component_type c) {
|
||||
return fs::path(sst.filename(c));
|
||||
}
|
||||
|
||||
sstring storage_prefix() const {
|
||||
return _sst->_storage.prefix();
|
||||
}
|
||||
};
|
||||
|
||||
inline auto replacer_fn_no_op() {
|
||||
|
||||
Reference in New Issue
Block a user