commitlog: make it possible to provide base segment ID
Adds a configuration option to the commitlog: base_segment_id. When provided, the commitlog uses this ID as a base of its segment IDs instead of calculating it based on the number of milliseconds between the epoch and boot time. This is needed in order for the feature which allows to wait for hints to be replayed to work - it relies on the replay positions monotonically increasing. Endpoint managers periodically re-creates its commitlog instance - if it is re-created when there are no segments on disk, currently it will choose the number of milliseconds between the epoch and boot time, which might result in segments being generated with the same IDs as some segments previously created and deleted during the same runtime.
This commit is contained in:
@@ -1198,6 +1198,10 @@ db::commitlog::segment_manager::segment_manager(config c)
|
||||
}
|
||||
cfg.max_active_flushes = std::max(uint64_t(1), cfg.max_active_flushes / smp::count);
|
||||
|
||||
if (!cfg.base_segment_id) {
|
||||
cfg.base_segment_id = std::chrono::duration_cast<std::chrono::milliseconds>(runtime::get_boot_time().time_since_epoch()).count() + 1;
|
||||
}
|
||||
|
||||
return cfg;
|
||||
}())
|
||||
, max_size(std::min<size_t>(std::numeric_limits<position_type>::max() / (1024 * 1024), std::max<size_t>(cfg.commitlog_segment_size_in_mb, 1)) * 1024 * 1024)
|
||||
@@ -1299,7 +1303,7 @@ future<> db::commitlog::segment_manager::init() {
|
||||
auto descs = co_await list_descriptors(cfg.commit_log_location);
|
||||
|
||||
assert(_reserve_segments.empty()); // _segments_to_replay must not pick them up
|
||||
segment_id_type id = std::chrono::duration_cast<std::chrono::milliseconds>(runtime::get_boot_time().time_since_epoch()).count() + 1;
|
||||
segment_id_type id = *cfg.base_segment_id;
|
||||
for (auto& d : descs) {
|
||||
id = std::max(id, replay_position(d.id).base_id());
|
||||
_segments_to_replay.push_back(cfg.commit_log_location + "/" + d.filename());
|
||||
|
||||
@@ -142,6 +142,16 @@ public:
|
||||
bool warn_about_segments_left_on_disk_after_shutdown = true;
|
||||
bool allow_going_over_size_limit = true;
|
||||
|
||||
// The base segment ID to use.
|
||||
// The segment IDs of newly allocated segments will be issued sequentially
|
||||
// and will start _right after_ this parameter.
|
||||
// If not set, it will be calculated based on the number of milliseconds
|
||||
// since boot time.
|
||||
// If there are segments to replay which have base IDs greater than
|
||||
// this parameter, new segment IDs will start after the largest one
|
||||
// of them.
|
||||
std::optional<segment_id_type> base_segment_id;
|
||||
|
||||
const db::extensions * extensions = nullptr;
|
||||
};
|
||||
|
||||
|
||||
@@ -417,6 +417,27 @@ future<db::commitlog> manager::end_point_hints_manager::add_store() noexcept {
|
||||
// in resource_manager, so its redundant for the commitlog to apply
|
||||
// a hard limit.
|
||||
cfg.allow_going_over_size_limit = true;
|
||||
// The API for waiting for hint replay relies on replay positions
|
||||
// monotonically increasing. When there are no segments on disk,
|
||||
// by default the commitlog will calculate the first segment ID
|
||||
// based on the boot time. This may cause the following sequence
|
||||
// of events to occur:
|
||||
//
|
||||
// 1. Node starts with empty hints queue
|
||||
// 2. Some hints are written and some segments are created
|
||||
// 3. All hints are replayed
|
||||
// 4. Hint sync point is created
|
||||
// 5. Commitlog instance gets re-created and resets it segment ID counter
|
||||
// 6. New hint segment has the first ID as the first (deleted by now) segment
|
||||
// 7. Waiting for the sync point commences but resolves immediately
|
||||
// before new hints are replayed - since point 5., `_last_written_rp`
|
||||
// and `_sent_upper_bound_rp` are not updated because RPs of new
|
||||
// hints are much lower than both of those marks.
|
||||
//
|
||||
// In order to prevent this situation, we override the base segment ID
|
||||
// of the newly created commitlog instance - it should start with an ID
|
||||
// which is larger than the segment ID of the RP of the last written hint.
|
||||
cfg.base_segment_id = _last_written_rp.base_id();
|
||||
|
||||
return commitlog::create_commitlog(std::move(cfg)).then([this] (commitlog l) {
|
||||
// add_store() is triggered every time hint files are forcefully flushed to I/O (every hints_flush_period).
|
||||
|
||||
Reference in New Issue
Block a user