replica: iterate safely over tables related maps
Loops over _column_families and _ks_cf_to_uuid which may preempt are protected by reader mode of rwlock so that iterators won't get invalid.
This commit is contained in:
@@ -135,9 +135,9 @@ static future<json::json_return_type> get_cf_histogram(http_context& ctx, const
|
|||||||
static future<json::json_return_type> get_cf_histogram(http_context& ctx, utils::timed_rate_moving_average_summary_and_histogram replica::column_family_stats::*f) {
|
static future<json::json_return_type> get_cf_histogram(http_context& ctx, utils::timed_rate_moving_average_summary_and_histogram replica::column_family_stats::*f) {
|
||||||
std::function<utils::ihistogram(const replica::database&)> fun = [f] (const replica::database& db) {
|
std::function<utils::ihistogram(const replica::database&)> fun = [f] (const replica::database& db) {
|
||||||
utils::ihistogram res;
|
utils::ihistogram res;
|
||||||
for (auto i : db.get_tables_metadata()._column_families) {
|
db.get_tables_metadata().for_each_table([&] (table_id, lw_shared_ptr<replica::table> table) mutable {
|
||||||
res += (i.second->get_stats().*f).hist;
|
res += (table->get_stats().*f).hist;
|
||||||
}
|
});
|
||||||
return res;
|
return res;
|
||||||
};
|
};
|
||||||
return ctx.db.map(fun).then([](const std::vector<utils::ihistogram> &res) {
|
return ctx.db.map(fun).then([](const std::vector<utils::ihistogram> &res) {
|
||||||
@@ -162,9 +162,9 @@ static future<json::json_return_type> get_cf_rate_and_histogram(http_context& c
|
|||||||
static future<json::json_return_type> get_cf_rate_and_histogram(http_context& ctx, utils::timed_rate_moving_average_summary_and_histogram replica::column_family_stats::*f) {
|
static future<json::json_return_type> get_cf_rate_and_histogram(http_context& ctx, utils::timed_rate_moving_average_summary_and_histogram replica::column_family_stats::*f) {
|
||||||
std::function<utils::rate_moving_average_and_histogram(const replica::database&)> fun = [f] (const replica::database& db) {
|
std::function<utils::rate_moving_average_and_histogram(const replica::database&)> fun = [f] (const replica::database& db) {
|
||||||
utils::rate_moving_average_and_histogram res;
|
utils::rate_moving_average_and_histogram res;
|
||||||
for (auto i : db.get_tables_metadata()._column_families) {
|
db.get_tables_metadata().for_each_table([&] (table_id, lw_shared_ptr<replica::table> table) {
|
||||||
res += (i.second->get_stats().*f).rate();
|
res += (table->get_stats().*f).rate();
|
||||||
}
|
});
|
||||||
return res;
|
return res;
|
||||||
};
|
};
|
||||||
return ctx.db.map(fun).then([](const std::vector<utils::rate_moving_average_and_histogram> &res) {
|
return ctx.db.map(fun).then([](const std::vector<utils::rate_moving_average_and_histogram> &res) {
|
||||||
@@ -306,21 +306,21 @@ ratio_holder filter_recent_false_positive_as_ratio_holder(const sstables::shared
|
|||||||
void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace>& sys_ks) {
|
void set_column_family(http_context& ctx, routes& r, sharded<db::system_keyspace>& sys_ks) {
|
||||||
cf::get_column_family_name.set(r, [&ctx] (const_req req){
|
cf::get_column_family_name.set(r, [&ctx] (const_req req){
|
||||||
std::vector<sstring> res;
|
std::vector<sstring> res;
|
||||||
for (auto i: ctx.db.local().get_tables_metadata()._ks_cf_to_uuid) {
|
ctx.db.local().get_tables_metadata().for_each_table_id([&] (const std::pair<sstring, sstring>& kscf, table_id) {
|
||||||
res.push_back(i.first.first + ":" + i.first.second);
|
res.push_back(kscf.first + ":" + kscf.second);
|
||||||
}
|
});
|
||||||
return res;
|
return res;
|
||||||
});
|
});
|
||||||
|
|
||||||
cf::get_column_family.set(r, [&ctx] (std::unique_ptr<http::request> req){
|
cf::get_column_family.set(r, [&ctx] (std::unique_ptr<http::request> req){
|
||||||
std::list<cf::column_family_info> res;
|
std::list<cf::column_family_info> res;
|
||||||
for (auto i: ctx.db.local().get_tables_metadata()._ks_cf_to_uuid) {
|
ctx.db.local().get_tables_metadata().for_each_table_id([&] (const std::pair<sstring, sstring>& kscf, table_id) {
|
||||||
cf::column_family_info info;
|
cf::column_family_info info;
|
||||||
info.ks = i.first.first;
|
info.ks = kscf.first;
|
||||||
info.cf = i.first.second;
|
info.cf = kscf.second;
|
||||||
info.type = "ColumnFamilies";
|
info.type = "ColumnFamilies";
|
||||||
res.push_back(info);
|
res.push_back(info);
|
||||||
}
|
});
|
||||||
return make_ready_future<json::json_return_type>(json::stream_range_as_array(std::move(res), std::identity()));
|
return make_ready_future<json::json_return_type>(json::stream_range_as_array(std::move(res), std::identity()));
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
@@ -68,9 +68,10 @@ struct map_reduce_column_families_locally {
|
|||||||
std::function<std::unique_ptr<std::any>(std::unique_ptr<std::any>, std::unique_ptr<std::any>)> reducer;
|
std::function<std::unique_ptr<std::any>(std::unique_ptr<std::any>, std::unique_ptr<std::any>)> reducer;
|
||||||
future<std::unique_ptr<std::any>> operator()(replica::database& db) const {
|
future<std::unique_ptr<std::any>> operator()(replica::database& db) const {
|
||||||
auto res = seastar::make_lw_shared<std::unique_ptr<std::any>>(std::make_unique<std::any>(init));
|
auto res = seastar::make_lw_shared<std::unique_ptr<std::any>>(std::make_unique<std::any>(init));
|
||||||
return do_for_each(db.get_tables_metadata()._column_families, [res, this](const std::pair<table_id, seastar::lw_shared_ptr<replica::table>>& i) {
|
return db.get_tables_metadata().for_each_table_gently([res, this] (table_id, seastar::lw_shared_ptr<replica::table> table) {
|
||||||
*res = reducer(std::move(*res), mapper(*i.second.get()));
|
*res = reducer(std::move(*res), mapper(*table.get()));
|
||||||
}).then([res] {
|
return make_ready_future();
|
||||||
|
}).then([res] () {
|
||||||
return std::move(*res);
|
return std::move(*res);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -68,8 +68,8 @@ void set_compaction_manager(http_context& ctx, routes& r) {
|
|||||||
cm::get_pending_tasks_by_table.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
cm::get_pending_tasks_by_table.set(r, [&ctx] (std::unique_ptr<http::request> req) {
|
||||||
return ctx.db.map_reduce0([](replica::database& db) {
|
return ctx.db.map_reduce0([](replica::database& db) {
|
||||||
return do_with(std::unordered_map<std::pair<sstring, sstring>, uint64_t, utils::tuple_hash>(), [&db](std::unordered_map<std::pair<sstring, sstring>, uint64_t, utils::tuple_hash>& tasks) {
|
return do_with(std::unordered_map<std::pair<sstring, sstring>, uint64_t, utils::tuple_hash>(), [&db](std::unordered_map<std::pair<sstring, sstring>, uint64_t, utils::tuple_hash>& tasks) {
|
||||||
return do_for_each(db.get_tables_metadata()._column_families, [&tasks](const std::pair<table_id, seastar::lw_shared_ptr<replica::table>>& i) -> future<> {
|
return db.get_tables_metadata().for_each_table_gently([&tasks] (table_id, lw_shared_ptr<replica::table> table) {
|
||||||
replica::table& cf = *i.second.get();
|
replica::table& cf = *table.get();
|
||||||
tasks[std::make_pair(cf.schema()->ks_name(), cf.schema()->cf_name())] = cf.estimate_pending_compactions();
|
tasks[std::make_pair(cf.schema()->ks_name(), cf.schema()->cf_name())] = cf.estimate_pending_compactions();
|
||||||
return make_ready_future<>();
|
return make_ready_future<>();
|
||||||
}).then([&tasks] {
|
}).then([&tasks] {
|
||||||
|
|||||||
@@ -980,10 +980,9 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
|
|||||||
ks.set_incremental_backups(value);
|
ks.set_incremental_backups(value);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (auto& pair: db.get_tables_metadata()._column_families) {
|
db.get_tables_metadata().for_each_table([&] (table_id, lw_shared_ptr<replica::table> table) {
|
||||||
auto cf_ptr = pair.second;
|
table->set_incremental_backups(value);
|
||||||
cf_ptr->set_incremental_backups(value);
|
});
|
||||||
}
|
|
||||||
}).then([] {
|
}).then([] {
|
||||||
return make_ready_future<json::json_return_type>(json_void());
|
return make_ready_future<json::json_return_type>(json_void());
|
||||||
});
|
});
|
||||||
@@ -1258,7 +1257,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
|
|||||||
|
|
||||||
auto& ext = db.get_config().extensions();
|
auto& ext = db.get_config().extensions();
|
||||||
|
|
||||||
for (auto& t : db.get_tables_metadata()._column_families | boost::adaptors::map_values) {
|
db.get_tables_metadata().for_each_table([&] (table_id, lw_shared_ptr<replica::table> t) {
|
||||||
auto& schema = t->schema();
|
auto& schema = t->schema();
|
||||||
if ((ks.empty() || ks == schema->ks_name()) && (cf.empty() || cf == schema->cf_name())) {
|
if ((ks.empty() || ks == schema->ks_name()) && (cf.empty() || cf == schema->cf_name())) {
|
||||||
// at most Nsstables long
|
// at most Nsstables long
|
||||||
@@ -1339,7 +1338,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded<service::storage_
|
|||||||
}
|
}
|
||||||
res.emplace_back(std::move(tst));
|
res.emplace_back(std::move(tst));
|
||||||
}
|
}
|
||||||
}
|
});
|
||||||
std::sort(res.begin(), res.end(), [](const ss::table_sstables& t1, const ss::table_sstables& t2) {
|
std::sort(res.begin(), res.end(), [](const ss::table_sstables& t1, const ss::table_sstables& t2) {
|
||||||
return t1.keyspace() < t2.keyspace() || (t1.keyspace() == t2.keyspace() && t1.table() < t2.table());
|
return t1.keyspace() < t2.keyspace() || (t1.keyspace() == t2.keyspace() && t1.table() < t2.table());
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -641,21 +641,21 @@ future<> generation_service::maybe_rewrite_streams_descriptions() {
|
|||||||
|
|
||||||
// For each CDC log table get the TTL setting (from CDC options) and the table's creation time
|
// For each CDC log table get the TTL setting (from CDC options) and the table's creation time
|
||||||
std::vector<time_and_ttl> times_and_ttls;
|
std::vector<time_and_ttl> times_and_ttls;
|
||||||
for (auto& [_, cf] : _db.get_tables_metadata()._column_families) {
|
_db.get_tables_metadata().for_each_table([&] (table_id, lw_shared_ptr<replica::table> t) {
|
||||||
auto& s = *cf->schema();
|
auto& s = *t->schema();
|
||||||
auto base = cdc::get_base_table(_db, s.ks_name(), s.cf_name());
|
auto base = cdc::get_base_table(_db, s.ks_name(), s.cf_name());
|
||||||
if (!base) {
|
if (!base) {
|
||||||
// Not a CDC log table.
|
// Not a CDC log table.
|
||||||
continue;
|
return;
|
||||||
}
|
}
|
||||||
auto& cdc_opts = base->cdc_options();
|
auto& cdc_opts = base->cdc_options();
|
||||||
if (!cdc_opts.enabled()) {
|
if (!cdc_opts.enabled()) {
|
||||||
// This table is named like a CDC log table but it's not one.
|
// This table is named like a CDC log table but it's not one.
|
||||||
continue;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
times_and_ttls.push_back(time_and_ttl{as_timepoint(s.id().uuid()), cdc_opts.ttl()});
|
times_and_ttls.push_back(time_and_ttl{as_timepoint(s.id().uuid()), cdc_opts.ttl()});
|
||||||
}
|
});
|
||||||
|
|
||||||
if (times_and_ttls.empty()) {
|
if (times_and_ttls.empty()) {
|
||||||
// There's no point in rewriting old generations' streams (they don't contain any data).
|
// There's no point in rewriting old generations' streams (they don't contain any data).
|
||||||
|
|||||||
@@ -126,8 +126,7 @@ future<> db::commitlog_replayer::impl::init() {
|
|||||||
}
|
}
|
||||||
}, [this](replica::database& db) {
|
}, [this](replica::database& db) {
|
||||||
return do_with(shard_rpm_map{}, [this, &db](shard_rpm_map& map) {
|
return do_with(shard_rpm_map{}, [this, &db](shard_rpm_map& map) {
|
||||||
return parallel_for_each(db.get_tables_metadata()._column_families, [this, &map](auto& cfp) {
|
return db.get_tables_metadata().parallel_for_each_table([this, &map] (table_id uuid, lw_shared_ptr<replica::table>) {
|
||||||
auto uuid = cfp.first;
|
|
||||||
// We do this on each cpu, for each CF, which technically is a little wasteful, but the values are
|
// We do this on each cpu, for each CF, which technically is a little wasteful, but the values are
|
||||||
// cached, this is only startup, and it makes the code easier.
|
// cached, this is only startup, and it makes the code easier.
|
||||||
// Get all truncation records for the CF and initialize max rps if
|
// Get all truncation records for the CF and initialize max rps if
|
||||||
@@ -156,13 +155,13 @@ future<> db::commitlog_replayer::impl::init() {
|
|||||||
// existing sstables-per-shard.
|
// existing sstables-per-shard.
|
||||||
// So, go through all CF:s and check, if a shard mapping does not
|
// So, go through all CF:s and check, if a shard mapping does not
|
||||||
// have data for it, assume we must set global pos to zero.
|
// have data for it, assume we must set global pos to zero.
|
||||||
for (auto&p : _db.local().get_tables_metadata()._column_families) {
|
_db.local().get_tables_metadata().for_each_table([&] (table_id id, lw_shared_ptr<replica::table>) {
|
||||||
for (auto&p1 : _rpm) { // for each shard
|
for (auto&p1 : _rpm) { // for each shard
|
||||||
if (!p1.second.contains(p.first)) {
|
if (!p1.second.contains(id)) {
|
||||||
_min_pos[p1.first] = replay_position();
|
_min_pos[p1.first] = replay_position();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
});
|
||||||
for (auto&p : _min_pos) {
|
for (auto&p : _min_pos) {
|
||||||
rlogger.debug("minimum position for shard {}: {}", p.first, p.second);
|
rlogger.debug("minimum position for shard {}: {}", p.first, p.second);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -265,8 +265,8 @@ void view_update_generator::setup_metrics() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void view_update_generator::discover_staging_sstables() {
|
void view_update_generator::discover_staging_sstables() {
|
||||||
for (auto& x : _db.get_tables_metadata()._column_families) {
|
_db.get_tables_metadata().for_each_table([&] (table_id, lw_shared_ptr<replica::table> table) {
|
||||||
auto t = x.second->shared_from_this();
|
auto t = table->shared_from_this();
|
||||||
for (auto sstables = t->get_sstables(); sstables::shared_sstable sst : *sstables) {
|
for (auto sstables = t->get_sstables(); sstables::shared_sstable sst : *sstables) {
|
||||||
if (sst->requires_view_building()) {
|
if (sst->requires_view_building()) {
|
||||||
_progress_tracker->on_sstable_registration(sst);
|
_progress_tracker->on_sstable_registration(sst);
|
||||||
@@ -276,7 +276,7 @@ void view_update_generator::discover_staging_sstables() {
|
|||||||
_registration_sem.consume(1);
|
_registration_sem.consume(1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -283,13 +283,13 @@ public:
|
|||||||
const auto snapshots_by_tables = co_await _db.map_reduce(snapshot_reducer(), [ks_name_ = ks_data.name] (replica::database& db) mutable -> future<snapshots_by_tables_map> {
|
const auto snapshots_by_tables = co_await _db.map_reduce(snapshot_reducer(), [ks_name_ = ks_data.name] (replica::database& db) mutable -> future<snapshots_by_tables_map> {
|
||||||
auto ks_name = std::move(ks_name_);
|
auto ks_name = std::move(ks_name_);
|
||||||
snapshots_by_tables_map snapshots_by_tables;
|
snapshots_by_tables_map snapshots_by_tables;
|
||||||
for (auto& [_, table] : db.get_tables_metadata()._column_families) {
|
co_await db.get_tables_metadata().for_each_table_gently(coroutine::lambda([&] (table_id, lw_shared_ptr<replica::table> table) -> future<> {
|
||||||
if (table->schema()->ks_name() != ks_name) {
|
if (table->schema()->ks_name() != ks_name) {
|
||||||
continue;
|
co_return;
|
||||||
}
|
}
|
||||||
const auto unordered_snapshots = co_await table->get_snapshot_details();
|
const auto unordered_snapshots = co_await table->get_snapshot_details();
|
||||||
snapshots_by_tables.emplace(table->schema()->cf_name(), std::map<sstring, replica::table::snapshot_details>(unordered_snapshots.begin(), unordered_snapshots.end()));
|
snapshots_by_tables.emplace(table->schema()->cf_name(), std::map<sstring, replica::table::snapshot_details>(unordered_snapshots.begin(), unordered_snapshots.end()));
|
||||||
}
|
}));
|
||||||
co_return snapshots_by_tables;
|
co_return snapshots_by_tables;
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -433,9 +433,9 @@ private:
|
|||||||
};
|
};
|
||||||
co_return co_await _db.map_reduce(shard_reducer(reduce), [map, reduce] (replica::database& db) {
|
co_return co_await _db.map_reduce(shard_reducer(reduce), [map, reduce] (replica::database& db) {
|
||||||
T val = {};
|
T val = {};
|
||||||
for (auto& [_, table] : db.get_tables_metadata()._column_families) {
|
db.get_tables_metadata().for_each_table([&] (table_id, lw_shared_ptr<replica::table> table) {
|
||||||
val = reduce(val, map(*table));
|
val = reduce(val, map(*table));
|
||||||
}
|
});
|
||||||
return val;
|
return val;
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@@ -560,13 +560,13 @@ public:
|
|||||||
res.total = occupancy.total_space();
|
res.total = occupancy.total_space();
|
||||||
res.free = occupancy.free_space();
|
res.free = occupancy.free_space();
|
||||||
res.entries = db.row_cache_tracker().partitions();
|
res.entries = db.row_cache_tracker().partitions();
|
||||||
for (const auto& [_, t] : db.get_tables_metadata()._column_families) {
|
db.get_tables_metadata().for_each_table([&] (table_id id, lw_shared_ptr<replica::table> t) {
|
||||||
auto& cache_stats = t->get_row_cache().stats();
|
auto& cache_stats = t->get_row_cache().stats();
|
||||||
res.hits += cache_stats.hits.count();
|
res.hits += cache_stats.hits.count();
|
||||||
res.misses += cache_stats.misses.count();
|
res.misses += cache_stats.misses.count();
|
||||||
res.hits_moving_average += cache_stats.hits.rate();
|
res.hits_moving_average += cache_stats.hits.rate();
|
||||||
res.requests_moving_average += (cache_stats.hits.rate() + cache_stats.misses.rate());
|
res.requests_moving_average += (cache_stats.hits.rate() + cache_stats.misses.rate());
|
||||||
}
|
});
|
||||||
return res;
|
return res;
|
||||||
}, stats{}, stats::reduce).then([] (stats s) {
|
}, stats{}, stats::reduce).then([] (stats s) {
|
||||||
return std::vector<std::pair<sstring, sstring>>{
|
return std::vector<std::pair<sstring, sstring>>{
|
||||||
|
|||||||
17
main.cc
17
main.cc
@@ -1346,8 +1346,7 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl
|
|||||||
// Needs to happen before replaying the schema commitlog, which interprets
|
// Needs to happen before replaying the schema commitlog, which interprets
|
||||||
// replay position in the truncation record.
|
// replay position in the truncation record.
|
||||||
// Needs to happen before system_keyspace::setup(), which reads truncation records.
|
// Needs to happen before system_keyspace::setup(), which reads truncation records.
|
||||||
for (auto&& e : db.local().get_tables_metadata()._column_families) {
|
db.local().get_tables_metadata().for_each_table([] (table_id, lw_shared_ptr<replica::table> table_ptr) {
|
||||||
auto table_ptr = e.second;
|
|
||||||
if (table_ptr->schema()->ks_name() == db::schema_tables::NAME) {
|
if (table_ptr->schema()->ks_name() == db::schema_tables::NAME) {
|
||||||
if (table_ptr->get_truncation_record() != db_clock::time_point::min()) {
|
if (table_ptr->get_truncation_record() != db_clock::time_point::min()) {
|
||||||
// replay_position stored in the truncation record may belong to
|
// replay_position stored in the truncation record may belong to
|
||||||
@@ -1360,7 +1359,7 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl
|
|||||||
table_ptr->schema()->ks_name(), table_ptr->schema()->cf_name()));
|
table_ptr->schema()->ks_name(), table_ptr->schema()->cf_name()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
});
|
||||||
|
|
||||||
auto sch_cl = db.local().schema_commitlog();
|
auto sch_cl = db.local().schema_commitlog();
|
||||||
if (sch_cl != nullptr) {
|
if (sch_cl != nullptr) {
|
||||||
@@ -1405,10 +1404,10 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl
|
|||||||
}
|
}
|
||||||
|
|
||||||
db.invoke_on_all([] (replica::database& db) {
|
db.invoke_on_all([] (replica::database& db) {
|
||||||
for (auto& x : db.get_tables_metadata()._column_families) {
|
db.get_tables_metadata().for_each_table([] (table_id, lw_shared_ptr<replica::table> table) {
|
||||||
replica::table& t = *(x.second);
|
replica::table& t = *table;
|
||||||
t.enable_auto_compaction();
|
t.enable_auto_compaction();
|
||||||
}
|
});
|
||||||
}).get();
|
}).get();
|
||||||
|
|
||||||
// If the same sstable is shared by several shards, it cannot be
|
// If the same sstable is shared by several shards, it cannot be
|
||||||
@@ -1423,10 +1422,10 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl
|
|||||||
// streaming
|
// streaming
|
||||||
|
|
||||||
db.invoke_on_all([] (replica::database& db) {
|
db.invoke_on_all([] (replica::database& db) {
|
||||||
for (auto& x : db.get_tables_metadata()._column_families) {
|
db.get_tables_metadata().for_each_table([] (table_id, lw_shared_ptr<replica::table> table) {
|
||||||
replica::column_family& cf = *(x.second);
|
replica::column_family& cf = *table;
|
||||||
cf.trigger_compaction();
|
cf.trigger_compaction();
|
||||||
}
|
});
|
||||||
}).get();
|
}).get();
|
||||||
api::set_server_gossip(ctx, gossiper).get();
|
api::set_server_gossip(ctx, gossiper).get();
|
||||||
api::set_server_snitch(ctx, snitch).get();
|
api::set_server_snitch(ctx, snitch).get();
|
||||||
|
|||||||
@@ -127,19 +127,20 @@ std::ostream& operator<<(std::ostream& out, row_level_diff_detect_algorithm algo
|
|||||||
}
|
}
|
||||||
|
|
||||||
static size_t get_nr_tables(const replica::database& db, const sstring& keyspace) {
|
static size_t get_nr_tables(const replica::database& db, const sstring& keyspace) {
|
||||||
auto& m = db.get_tables_metadata()._ks_cf_to_uuid;
|
size_t tables = 0;
|
||||||
return std::count_if(m.begin(), m.end(), [&keyspace] (auto& e) {
|
db.get_tables_metadata().for_each_table_id([&keyspace, &tables] (const std::pair<sstring, sstring>& kscf, table_id) {
|
||||||
return e.first.first == keyspace;
|
tables += kscf.first == keyspace;
|
||||||
});
|
});
|
||||||
|
return tables;
|
||||||
}
|
}
|
||||||
|
|
||||||
static std::vector<sstring> list_column_families(const replica::database& db, const sstring& keyspace) {
|
static std::vector<sstring> list_column_families(const replica::database& db, const sstring& keyspace) {
|
||||||
std::vector<sstring> ret;
|
std::vector<sstring> ret;
|
||||||
for (auto &&e : db.get_tables_metadata()._ks_cf_to_uuid) {
|
db.get_tables_metadata().for_each_table_id([&] (const std::pair<sstring, sstring>& kscf, table_id) {
|
||||||
if (e.first.first == keyspace) {
|
if (kscf.first == keyspace) {
|
||||||
ret.push_back(e.first.second);
|
ret.push_back(kscf.second);
|
||||||
}
|
}
|
||||||
}
|
});
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -3050,13 +3050,10 @@ future<> repair_service::cleanup_history(tasks::task_id repair_id) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
future<> repair_service::load_history() {
|
future<> repair_service::load_history() {
|
||||||
auto tables = get_db().local().get_tables_metadata()._column_families;
|
co_await get_db().local().get_tables_metadata().for_each_table_gently(coroutine::lambda([&] (table_id table_uuid, lw_shared_ptr<replica::table> table) -> future<> {
|
||||||
for (const auto& x : tables) {
|
|
||||||
auto& table_uuid = x.first;
|
|
||||||
auto& table = x.second;
|
|
||||||
auto shard = unsigned(table_uuid.uuid().get_most_significant_bits()) % smp::count;
|
auto shard = unsigned(table_uuid.uuid().get_most_significant_bits()) % smp::count;
|
||||||
if (shard != this_shard_id()) {
|
if (shard != this_shard_id()) {
|
||||||
continue;
|
co_return;
|
||||||
}
|
}
|
||||||
rlogger.info("Loading repair history for keyspace={}, table={}, table_uuid={}",
|
rlogger.info("Loading repair history for keyspace={}, table={}, table_uuid={}",
|
||||||
table->schema()->ks_name(), table->schema()->cf_name(), table_uuid);
|
table->schema()->ks_name(), table->schema()->cf_name(), table_uuid);
|
||||||
@@ -3077,8 +3074,7 @@ future<> repair_service::load_history() {
|
|||||||
entry.ks, entry.cf, range, repair_time);
|
entry.ks, entry.cf, range, repair_time);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}));
|
||||||
co_return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
repair_meta_ptr repair_service::get_repair_meta(gms::inet_address from, uint32_t repair_meta_id) {
|
repair_meta_ptr repair_service::get_repair_meta(gms::inet_address from, uint32_t repair_meta_id) {
|
||||||
|
|||||||
@@ -66,11 +66,11 @@ public:
|
|||||||
}
|
}
|
||||||
virtual std::vector<data_dictionary::table> get_tables(data_dictionary::database db) const override {
|
virtual std::vector<data_dictionary::table> get_tables(data_dictionary::database db) const override {
|
||||||
std::vector<data_dictionary::table> ret;
|
std::vector<data_dictionary::table> ret;
|
||||||
auto&& tables = unwrap(db).get_tables_metadata()._column_families;
|
auto& tmd = unwrap(db).get_tables_metadata();
|
||||||
ret.reserve(tables.size());
|
ret.reserve(tmd.size());
|
||||||
for (auto&& [uuid, cf] : tables) {
|
tmd.for_each_table([&] (table_id, const lw_shared_ptr<table> table) {
|
||||||
ret.push_back(wrap(*cf));
|
ret.push_back(wrap(*table));
|
||||||
}
|
});
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
virtual std::optional<data_dictionary::table> try_find_table(data_dictionary::database db, std::string_view ks, std::string_view table) const override {
|
virtual std::optional<data_dictionary::table> try_find_table(data_dictionary::database db, std::string_view ks, std::string_view table) const override {
|
||||||
|
|||||||
@@ -154,20 +154,20 @@ phased_barrier_top_10_counts(const database::tables_metadata& tables_metadata, s
|
|||||||
boost::container::static_vector<count_and_tables, 10> res;
|
boost::container::static_vector<count_and_tables, 10> res;
|
||||||
count_and_tables* min_element = nullptr;
|
count_and_tables* min_element = nullptr;
|
||||||
|
|
||||||
for (const auto& [tid, table] : tables_metadata._column_families) {
|
tables_metadata.for_each_table([&] (table_id tid, lw_shared_ptr<table> table) {
|
||||||
const auto count = op_count_getter(*table);
|
const auto count = op_count_getter(*table);
|
||||||
if (!count) {
|
if (!count) {
|
||||||
continue;
|
return;
|
||||||
}
|
}
|
||||||
if (res.size() < res.capacity()) {
|
if (res.size() < res.capacity()) {
|
||||||
auto& elem = res.emplace_back(count, table_list({table.get()}));
|
auto& elem = res.emplace_back(count, table_list({table.get()}));
|
||||||
if (!min_element || min_element->first > count) {
|
if (!min_element || min_element->first > count) {
|
||||||
min_element = &elem;
|
min_element = &elem;
|
||||||
}
|
}
|
||||||
continue;
|
return;
|
||||||
}
|
}
|
||||||
if (min_element->first > count) {
|
if (min_element->first > count) {
|
||||||
continue;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto it = boost::find_if(res, [count] (const count_and_tables& x) {
|
auto it = boost::find_if(res, [count] (const count_and_tables& x) {
|
||||||
@@ -175,13 +175,13 @@ phased_barrier_top_10_counts(const database::tables_metadata& tables_metadata, s
|
|||||||
});
|
});
|
||||||
if (it != res.end()) {
|
if (it != res.end()) {
|
||||||
it->second.push_back(table.get());
|
it->second.push_back(table.get());
|
||||||
continue;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// If we are here, min_element->first < count
|
// If we are here, min_element->first < count
|
||||||
*min_element = {count, table_list({table.get()})};
|
*min_element = {count, table_list({table.get()})};
|
||||||
min_element = &*boost::min_element(res, less);
|
min_element = &*boost::min_element(res, less);
|
||||||
}
|
});
|
||||||
|
|
||||||
boost::sort(res, less);
|
boost::sort(res, less);
|
||||||
|
|
||||||
@@ -1802,10 +1802,10 @@ std::ostream& operator<<(std::ostream& out, const column_family& cf) {
|
|||||||
|
|
||||||
std::ostream& operator<<(std::ostream& out, const database& db) {
|
std::ostream& operator<<(std::ostream& out, const database& db) {
|
||||||
out << "{\n";
|
out << "{\n";
|
||||||
for (auto&& e : db._tables_metadata._column_families) {
|
db._tables_metadata.for_each_table([&] (table_id id, const lw_shared_ptr<table> tp) {
|
||||||
auto&& cf = *e.second;
|
auto&& cf = *tp;
|
||||||
out << "(" << e.first.to_sstring() << ", " << cf.schema()->cf_name() << ", " << cf.schema()->ks_name() << "): " << cf << "\n";
|
out << "(" << id.to_sstring() << ", " << cf.schema()->cf_name() << ", " << cf.schema()->ks_name() << "): " << cf << "\n";
|
||||||
}
|
});
|
||||||
out << "}";
|
out << "}";
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
@@ -2310,13 +2310,13 @@ schema_ptr database::find_indexed_table(const sstring& ks_name, const sstring& i
|
|||||||
|
|
||||||
future<> database::close_tables(table_kind kind_to_close) {
|
future<> database::close_tables(table_kind kind_to_close) {
|
||||||
auto b = defer([this] { _stop_barrier.abort(); });
|
auto b = defer([this] { _stop_barrier.abort(); });
|
||||||
co_await coroutine::parallel_for_each(_tables_metadata._column_families, [this, kind_to_close](auto& val_pair) -> future<> {
|
co_await _tables_metadata.parallel_for_each_table(coroutine::lambda([this, kind_to_close] (table_id, lw_shared_ptr<table> table) -> future<> {
|
||||||
auto& s = val_pair.second->schema();
|
auto& s = table->schema();
|
||||||
table_kind k = is_system_table(*s) || _cfg.extensions().is_extension_internal_keyspace(s->ks_name()) ? table_kind::system : table_kind::user;
|
table_kind k = is_system_table(*s) || _cfg.extensions().is_extension_internal_keyspace(s->ks_name()) ? table_kind::system : table_kind::user;
|
||||||
if (k == kind_to_close) {
|
if (k == kind_to_close) {
|
||||||
co_await val_pair.second->stop();
|
co_await table->stop();
|
||||||
}
|
}
|
||||||
});
|
}));
|
||||||
co_await _stop_barrier.arrive_and_wait();
|
co_await _stop_barrier.arrive_and_wait();
|
||||||
b.cancel();
|
b.cancel();
|
||||||
}
|
}
|
||||||
@@ -2399,8 +2399,8 @@ future<> database::stop() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
future<> database::flush_all_memtables() {
|
future<> database::flush_all_memtables() {
|
||||||
return parallel_for_each(_tables_metadata._column_families, [] (auto& cfp) {
|
return _tables_metadata.parallel_for_each_table([] (table_id, lw_shared_ptr<table> table) {
|
||||||
return cfp.second->flush();
|
return table->flush();
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2855,6 +2855,10 @@ future<> database::drain() {
|
|||||||
b.cancel();
|
b.cancel();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_t database::tables_metadata::size() const noexcept {
|
||||||
|
return _column_families.size();
|
||||||
|
}
|
||||||
|
|
||||||
future<> database::tables_metadata::add_table(schema_ptr schema) {
|
future<> database::tables_metadata::add_table(schema_ptr schema) {
|
||||||
auto holder = co_await _cf_lock.hold_write_lock();
|
auto holder = co_await _cf_lock.hold_write_lock();
|
||||||
auto id = schema->id();
|
auto id = schema->id();
|
||||||
@@ -2879,6 +2883,32 @@ future<> database::tables_metadata::remove_table(schema_ptr schema) noexcept {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void database::tables_metadata::for_each_table(std::function<void(table_id, lw_shared_ptr<table>)> f) const {
|
||||||
|
for (auto& [id, table]: _column_families) {
|
||||||
|
f(id, table);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void database::tables_metadata::for_each_table_id(std::function<void(const ks_cf_t&, table_id)> f) const {
|
||||||
|
for (auto& [kscf, id]: _ks_cf_to_uuid) {
|
||||||
|
f(kscf, id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
future<> database::tables_metadata::for_each_table_gently(std::function<future<>(table_id, lw_shared_ptr<table>)> f) {
|
||||||
|
auto holder = co_await _cf_lock.hold_read_lock();
|
||||||
|
for (auto& [id, table]: _column_families) {
|
||||||
|
co_await f(id, table);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
future<> database::tables_metadata::parallel_for_each_table(std::function<future<>(table_id, lw_shared_ptr<table>)> f) {
|
||||||
|
auto holder = co_await _cf_lock.hold_read_lock();
|
||||||
|
co_await coroutine::parallel_for_each(_column_families, [f = std::move(f)] (auto& table) {
|
||||||
|
return f(table.first, table.second);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
data_dictionary::database
|
data_dictionary::database
|
||||||
database::as_data_dictionary() const {
|
database::as_data_dictionary() const {
|
||||||
static constinit data_dictionary_impl _impl;
|
static constinit data_dictionary_impl _impl;
|
||||||
|
|||||||
@@ -1309,8 +1309,14 @@ public:
|
|||||||
std::unordered_map<table_id, lw_shared_ptr<column_family>> _column_families;
|
std::unordered_map<table_id, lw_shared_ptr<column_family>> _column_families;
|
||||||
ks_cf_to_uuid_t _ks_cf_to_uuid;
|
ks_cf_to_uuid_t _ks_cf_to_uuid;
|
||||||
|
|
||||||
|
size_t size() const noexcept;
|
||||||
|
|
||||||
future<> add_table(schema_ptr schema);
|
future<> add_table(schema_ptr schema);
|
||||||
future<> remove_table(schema_ptr schema) noexcept;
|
future<> remove_table(schema_ptr schema) noexcept;
|
||||||
|
void for_each_table(std::function<void(table_id, lw_shared_ptr<table>)> f) const;
|
||||||
|
void for_each_table_id(std::function<void(const ks_cf_t&, table_id)> f) const;
|
||||||
|
future<> for_each_table_gently(std::function<future<>(table_id, lw_shared_ptr<table>)> f);
|
||||||
|
future<> parallel_for_each_table(std::function<future<>(table_id, lw_shared_ptr<table>)> f);
|
||||||
};
|
};
|
||||||
private:
|
private:
|
||||||
replica::cf_stats _cf_stats;
|
replica::cf_stats _cf_stats;
|
||||||
|
|||||||
@@ -78,9 +78,9 @@ void load_broadcaster::start_broadcasting() {
|
|||||||
llogger.debug("Disseminating load info ...");
|
llogger.debug("Disseminating load info ...");
|
||||||
_done = _db.map_reduce0([](replica::database& db) {
|
_done = _db.map_reduce0([](replica::database& db) {
|
||||||
int64_t res = 0;
|
int64_t res = 0;
|
||||||
for (auto i : db.get_tables_metadata()._column_families) {
|
db.get_tables_metadata().for_each_table([&] (table_id, lw_shared_ptr<replica::table> table) {
|
||||||
res += i.second->get_stats().live_disk_space_used;
|
res += table->get_stats().live_disk_space_used;
|
||||||
}
|
});
|
||||||
return res;
|
return res;
|
||||||
}, int64_t(0), std::plus<int64_t>()).then([this] (int64_t size) {
|
}, int64_t(0), std::plus<int64_t>()).then([this] (int64_t size) {
|
||||||
return _gossiper.add_local_application_state(gms::application_state::LOAD,
|
return _gossiper.add_local_application_state(gms::application_state::LOAD,
|
||||||
|
|||||||
@@ -3093,16 +3093,16 @@ future<> storage_service::replicate_to_all_cores(mutable_token_metadata_ptr tmpt
|
|||||||
co_await container().invoke_on_all([&] (storage_service& ss) {
|
co_await container().invoke_on_all([&] (storage_service& ss) {
|
||||||
auto& db = ss._db.local();
|
auto& db = ss._db.local();
|
||||||
auto tmptr = pending_token_metadata_ptr[this_shard_id()];
|
auto tmptr = pending_token_metadata_ptr[this_shard_id()];
|
||||||
for (auto&& [id, cf] : db.get_tables_metadata()._column_families) { // Safe because we iterate without preemption
|
db.get_tables_metadata().for_each_table([&] (table_id id, lw_shared_ptr<replica::table> table) {
|
||||||
auto rs = db.find_keyspace(cf->schema()->keypace_name()).get_replication_strategy_ptr();
|
auto rs = db.find_keyspace(table->schema()->keypace_name()).get_replication_strategy_ptr();
|
||||||
locator::effective_replication_map_ptr erm;
|
locator::effective_replication_map_ptr erm;
|
||||||
if (auto pt_rs = rs->maybe_as_per_table()) {
|
if (auto pt_rs = rs->maybe_as_per_table()) {
|
||||||
erm = pt_rs->make_replication_map(id, tmptr);
|
erm = pt_rs->make_replication_map(id, tmptr);
|
||||||
} else {
|
} else {
|
||||||
erm = pending_effective_replication_maps[this_shard_id()][cf->schema()->keypace_name()];
|
erm = pending_effective_replication_maps[this_shard_id()][table->schema()->keypace_name()];
|
||||||
}
|
}
|
||||||
pending_table_erms[this_shard_id()].emplace(id, std::move(erm));
|
pending_table_erms[this_shard_id()].emplace(id, std::move(erm));
|
||||||
}
|
});
|
||||||
});
|
});
|
||||||
} catch (...) {
|
} catch (...) {
|
||||||
ex = std::current_exception();
|
ex = std::current_exception();
|
||||||
|
|||||||
@@ -462,15 +462,15 @@ std::vector<replica::column_family*> stream_session::get_column_family_stores(co
|
|||||||
std::vector<replica::column_family*> stores;
|
std::vector<replica::column_family*> stores;
|
||||||
auto& db = manager().db();
|
auto& db = manager().db();
|
||||||
if (column_families.empty()) {
|
if (column_families.empty()) {
|
||||||
for (auto& x : db.get_tables_metadata()._column_families) {
|
db.get_tables_metadata().for_each_table([&] (table_id, lw_shared_ptr<replica::table> tp) {
|
||||||
replica::column_family& cf = *(x.second);
|
replica::column_family& cf = *tp;
|
||||||
auto cf_name = cf.schema()->cf_name();
|
auto cf_name = cf.schema()->cf_name();
|
||||||
auto ks_name = cf.schema()->ks_name();
|
auto ks_name = cf.schema()->ks_name();
|
||||||
if (ks_name == keyspace) {
|
if (ks_name == keyspace) {
|
||||||
sslog.debug("Find ks={} cf={}", ks_name, cf_name);
|
sslog.debug("Find ks={} cf={}", ks_name, cf_name);
|
||||||
stores.push_back(&cf);
|
stores.push_back(&cf);
|
||||||
}
|
}
|
||||||
}
|
});
|
||||||
} else {
|
} else {
|
||||||
// TODO: We can move this to database class and use shared_ptr<column_family> instead
|
// TODO: We can move this to database class and use shared_ptr<column_family> instead
|
||||||
for (auto& cf_name : column_families) {
|
for (auto& cf_name : column_families) {
|
||||||
|
|||||||
@@ -116,8 +116,8 @@ SEASTAR_THREAD_TEST_CASE(test_large_data) {
|
|||||||
// and the old sstable is deleted.
|
// and the old sstable is deleted.
|
||||||
flush(e);
|
flush(e);
|
||||||
e.db().invoke_on_all([] (replica::database& dbi) {
|
e.db().invoke_on_all([] (replica::database& dbi) {
|
||||||
return parallel_for_each(dbi.get_tables_metadata()._column_families, [&dbi] (auto& table) {
|
return dbi.get_tables_metadata().parallel_for_each_table([&dbi] (table_id, lw_shared_ptr<replica::table> t) {
|
||||||
return dbi.get_compaction_manager().perform_major_compaction((table.second)->as_table_state());
|
return dbi.get_compaction_manager().perform_major_compaction(t->as_table_state());
|
||||||
});
|
});
|
||||||
}).get();
|
}).get();
|
||||||
|
|
||||||
|
|||||||
@@ -860,10 +860,10 @@ public:
|
|||||||
replica::distributed_loader::init_non_system_keyspaces(db, proxy, sys_ks).get();
|
replica::distributed_loader::init_non_system_keyspaces(db, proxy, sys_ks).get();
|
||||||
|
|
||||||
db.invoke_on_all([] (replica::database& db) {
|
db.invoke_on_all([] (replica::database& db) {
|
||||||
for (auto& x : db.get_tables_metadata()._column_families) {
|
db.get_tables_metadata().for_each_table([] (table_id, lw_shared_ptr<replica::table> table) {
|
||||||
replica::table& t = *(x.second);
|
replica::table& t = *table;
|
||||||
t.enable_auto_compaction();
|
t.enable_auto_compaction();
|
||||||
}
|
});
|
||||||
}).get();
|
}).get();
|
||||||
|
|
||||||
if (raft_gr.local().is_enabled()) {
|
if (raft_gr.local().is_enabled()) {
|
||||||
|
|||||||
Reference in New Issue
Block a user