From de931702ec5fcca013e62a864587833945c39766 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 30 Nov 2023 16:36:46 +0300 Subject: [PATCH] sstable_directory: Enlight deletion log replay Garbage collection of sstables is scattered between two strages -- g.c. per-se and the regular processing. The former stage collects deletion logs and for each log found goes ahead and deletes the full sstable with the standard sequence: - move TOC -> TOC.tmp - remove components - remove TOC.tmp The latter stage picks up partially unlinked sstables that didn't go via atomic deletion with the log. This comes as - collect all components - keep TOC's and TOC.tmp's in separate lists - attach other components to TOC/TOC.tmp by generation value - for all TOC.tmp's get all attached components and remove them - continue loading TOC's with attached components Said that, replaying deletion log can be as light as just the first step out of the above sequence -- just move TOC to TOC.tmp. After that the regular processing would pick the remaining components and clean them Signed-off-by: Pavel Emelyanov --- sstables/sstable_directory.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sstables/sstable_directory.cc b/sstables/sstable_directory.cc index f9dce37e30..5b04b45950 100644 --- a/sstables/sstable_directory.cc +++ b/sstables/sstable_directory.cc @@ -639,7 +639,8 @@ future<> sstable_directory::filesystem_components_lister::replay_pending_delete_ boost::split(basenames, all, boost::is_any_of("\n"), boost::token_compress_on); auto tocs = boost::copy_range>(basenames | boost::adaptors::filtered([] (auto&& basename) { return !basename.empty(); })); co_await parallel_for_each(tocs, [&sstdir] (const sstring& name) { - return remove_by_toc_name(sstdir + "/" + name); + // Only move TOC to TOC.tmp, the rest will be finished by regular process + return make_toc_temporary(sstdir + "/" + name).discard_result(); }); sstlog.debug("Replayed {}, removing", pending_delete_log); co_await remove_file(pending_delete_log.native());