tasks: do not use binary progress for task manager tasks

Currently, progress of a parent task depends on expected_total_workload,
expected_children_number, and children progresses. Basically, if total
workload is known or all children have already been created, progresses
of children are summed up. Otherwise binary progress is returned.

As a result, two tasks of the same type may return progress in different
units. If they are children of the same task and this parent gathers the
progress - it becomes meaningless.

Drop expected_children_number as we can't assume that children are able
to show their progresses.

Modify get_progress method - progress is calculated based on children
progresses. If expected_total_workload isn't specified, the total
progress of a task may grow. If expected_total_workload isn't specified
and no children are created, empty progress (0/0) is returned.

Fixes: https://github.com/scylladb/scylladb/issues/24650.

Closes scylladb/scylladb#25113

(cherry picked from commit a7ee2bbbd8)

Closes scylladb/scylladb#25197
This commit is contained in:
Aleksandra Martyniuk
2025-07-21 15:20:35 +02:00
committed by Pavel Emelyanov
parent 3f93cdc61b
commit 23479939b9
4 changed files with 3 additions and 33 deletions

View File

@@ -1440,10 +1440,6 @@ future<std::optional<double>> repair::user_requested_repair_task_impl::expected_
co_return _ranges.size() * _cfs.size() * smp::count;
}
std::optional<double> repair::user_requested_repair_task_impl::expected_children_number() const {
return smp::count;
}
future<int> repair_start(seastar::sharded<repair_service>& repair, sharded<gms::gossip_address_map>& am,
sstring keyspace, std::unordered_map<sstring, sstring> options) {
return repair.invoke_on(0, [keyspace = std::move(keyspace), options = std::move(options), &am] (repair_service& local_repair) {
@@ -1612,10 +1608,6 @@ future<std::optional<double>> repair::data_sync_repair_task_impl::expected_total
co_return _cfs_size ? std::make_optional<double>(_ranges.size() * _cfs_size * smp::count) : std::nullopt;
}
std::optional<double> repair::data_sync_repair_task_impl::expected_children_number() const {
return smp::count;
}
future<> repair_service::bootstrap_with_repair(locator::token_metadata_ptr tmptr, std::unordered_set<dht::token> bootstrap_tokens) {
SCYLLA_ASSERT(this_shard_id() == 0);
return seastar::async([this, tmptr = std::move(tmptr), tokens = std::move(bootstrap_tokens)] () mutable {
@@ -2665,10 +2657,6 @@ future<std::optional<double>> repair::tablet_repair_task_impl::expected_total_wo
co_return sz ? std::make_optional<double>(sz) : std::nullopt;
}
std::optional<double> repair::tablet_repair_task_impl::expected_children_number() const {
return get_metas_size();
}
node_ops_cmd_category categorize_node_ops_cmd(node_ops_cmd cmd) noexcept {
switch (cmd) {
case node_ops_cmd::removenode_prepare:

View File

@@ -74,7 +74,6 @@ protected:
future<> run() override;
virtual future<std::optional<double>> expected_total_workload() const override;
virtual std::optional<double> expected_children_number() const override;
};
class data_sync_repair_task_impl : public repair_task_impl {
@@ -103,7 +102,6 @@ protected:
future<> run() override;
virtual future<std::optional<double>> expected_total_workload() const override;
virtual std::optional<double> expected_children_number() const override;
};
class tablet_repair_task_impl : public repair_task_impl {
@@ -145,7 +143,6 @@ protected:
future<> run() override;
virtual future<std::optional<double>> expected_total_workload() const override;
virtual std::optional<double> expected_children_number() const override;
};
class shard_repair_task_impl : public repair_task_impl {

View File

@@ -120,10 +120,6 @@ future<std::optional<double>> task_manager::task::impl::expected_total_workload(
return make_ready_future<std::optional<double>>(std::nullopt);
}
std::optional<double> task_manager::task::impl::expected_children_number() const {
return std::nullopt;
}
task_manager::task::progress task_manager::task::impl::get_binary_progress() const {
return tasks::task_manager::task::progress{
.completed = is_complete(),
@@ -132,20 +128,10 @@ task_manager::task::progress task_manager::task::impl::get_binary_progress() con
}
future<task_manager::task::progress> task_manager::task::impl::get_progress() const {
auto children_num = _children.size();
if (children_num == 0) {
co_return get_binary_progress();
std::optional<double> expected_workload = co_await expected_total_workload();
if (!expected_workload && _children.size() == 0) {
co_return task_manager::task::progress{};
}
std::optional<double> expected_workload = std::nullopt;
auto expected_children_num = expected_children_number();
// When get_progress is called, the task can have some of its children unregistered yet.
// Then if total workload is not known, progress obtained from children may be deceiving.
// In such a situation it's safer to return binary progress value.
if (expected_children_num.value_or(0) != children_num && !(expected_workload = co_await expected_total_workload())) {
co_return get_binary_progress();
}
auto progress = co_await _children.get_progress(_status.progress_units);
progress.total = expected_workload.value_or(progress.total);
co_return progress;

View File

@@ -226,7 +226,6 @@ public:
future<> finish_failed(std::exception_ptr ex, std::string error) noexcept;
future<> finish_failed(std::exception_ptr ex) noexcept;
virtual future<std::optional<double>> expected_total_workload() const;
virtual std::optional<double> expected_children_number() const;
task_manager::task::progress get_binary_progress() const;
friend task;