Merge 'retry automatic announcements of the schema changes on concurrent operation' from Patryk Jędrzejczak
The follow-up to #15594. We retry every automatic `migration_manager::announce` if `group0_concurrent_modification` occurs. Concurrent operations can happen during concurrent bootstrap in Raft-based topology, so we need this change to enable support for concurrent bootstrap. This PR adds retry loops in 4 places: - `service::create_keyspace_if_missing`, - `system_distributed_keyspace::start`, - `redis::create_keyspace_if_not_exists_impl`, - `table_helper::setup_keyspace` (used for creating the `system_traces` keyspace). Fixes #15435 Closes scylladb/scylladb#15613 * github.com:scylladb/scylladb: table_helper: fix indentation table_helper: retry in setup_keyspace on concurrent operation table_helper: add logger redis/keyspace_utils: fix indentation redis: retry creating defualt databases on concurrent operation db/system_distributed_keyspace: fix indentation db/system_distributed_keyspace: retry start on concurrent operation auth/service: retry creating system_auth on concurrent operation
This commit is contained in:
@@ -165,7 +165,7 @@ future<> service::create_keyspace_if_missing(::service::migration_manager& mm) c
|
||||
assert(this_shard_id() == 0); // once_among_shards makes sure a function is executed on shard 0 only
|
||||
auto db = _qp.db();
|
||||
|
||||
if (!db.has_keyspace(meta::AUTH_KS)) {
|
||||
while (!db.has_keyspace(meta::AUTH_KS)) {
|
||||
auto group0_guard = co_await mm.start_group0_operation();
|
||||
auto ts = group0_guard.write_timestamp();
|
||||
|
||||
@@ -178,8 +178,12 @@ future<> service::create_keyspace_if_missing(::service::migration_manager& mm) c
|
||||
opts,
|
||||
true);
|
||||
|
||||
co_return co_await mm.announce(::service::prepare_new_keyspace_announcement(db.real_database(), ksm, ts),
|
||||
std::move(group0_guard), format("auth_service: create {} keyspace", meta::AUTH_KS));
|
||||
try {
|
||||
co_return co_await mm.announce(::service::prepare_new_keyspace_announcement(db.real_database(), ksm, ts),
|
||||
std::move(group0_guard), format("auth_service: create {} keyspace", meta::AUTH_KS));
|
||||
} catch (::service::group0_concurrent_modification&) {
|
||||
log.info("Concurrent operation is detected while creating {} keyspace, retrying.", meta::AUTH_KS);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -245,78 +245,86 @@ future<> system_distributed_keyspace::start() {
|
||||
auto db = _sp.data_dictionary();
|
||||
auto tables = ensured_tables();
|
||||
|
||||
// Check if there is any work to do before taking the group 0 guard.
|
||||
bool keyspaces_setup = db.has_keyspace(NAME) && db.has_keyspace(NAME_EVERYWHERE);
|
||||
bool tables_setup = std::all_of(tables.begin(), tables.end(), [db] (schema_ptr t) { return db.has_schema(t->ks_name(), t->cf_name()); } );
|
||||
bool service_levels_up_to_date = get_current_service_levels(db)->equal_columns(*get_updated_service_levels(db));
|
||||
if (keyspaces_setup && tables_setup && service_levels_up_to_date) {
|
||||
dlogger.info("system_distributed(_everywhere) keyspaces and tables are up-to-date. Not creating");
|
||||
while (true) {
|
||||
// Check if there is any work to do before taking the group 0 guard.
|
||||
bool keyspaces_setup = db.has_keyspace(NAME) && db.has_keyspace(NAME_EVERYWHERE);
|
||||
bool tables_setup = std::all_of(tables.begin(), tables.end(), [db] (schema_ptr t) { return db.has_schema(t->ks_name(), t->cf_name()); } );
|
||||
bool service_levels_up_to_date = get_current_service_levels(db)->equal_columns(*get_updated_service_levels(db));
|
||||
if (keyspaces_setup && tables_setup && service_levels_up_to_date) {
|
||||
dlogger.info("system_distributed(_everywhere) keyspaces and tables are up-to-date. Not creating");
|
||||
_started = true;
|
||||
co_return;
|
||||
}
|
||||
|
||||
auto group0_guard = co_await _mm.start_group0_operation();
|
||||
auto ts = group0_guard.write_timestamp();
|
||||
std::vector<mutation> mutations;
|
||||
sstring description;
|
||||
|
||||
auto sd_ksm = keyspace_metadata::new_keyspace(
|
||||
NAME,
|
||||
"org.apache.cassandra.locator.SimpleStrategy",
|
||||
{{"replication_factor", "3"}},
|
||||
true /* durable_writes */);
|
||||
if (!db.has_keyspace(NAME)) {
|
||||
mutations = service::prepare_new_keyspace_announcement(db.real_database(), sd_ksm, ts);
|
||||
description += format(" create {} keyspace;", NAME);
|
||||
} else {
|
||||
dlogger.info("{} keyspace is already present. Not creating", NAME);
|
||||
}
|
||||
|
||||
auto sde_ksm = keyspace_metadata::new_keyspace(
|
||||
NAME_EVERYWHERE,
|
||||
"org.apache.cassandra.locator.EverywhereStrategy",
|
||||
{},
|
||||
true /* durable_writes */);
|
||||
if (!db.has_keyspace(NAME_EVERYWHERE)) {
|
||||
auto sde_mutations = service::prepare_new_keyspace_announcement(db.real_database(), sde_ksm, ts);
|
||||
std::move(sde_mutations.begin(), sde_mutations.end(), std::back_inserter(mutations));
|
||||
description += format(" create {} keyspace;", NAME_EVERYWHERE);
|
||||
} else {
|
||||
dlogger.info("{} keyspace is already present. Not creating", NAME_EVERYWHERE);
|
||||
}
|
||||
|
||||
// Get mutations for creating and updating tables.
|
||||
auto num_keyspace_mutations = mutations.size();
|
||||
co_await coroutine::parallel_for_each(ensured_tables(),
|
||||
[this, &mutations, db, ts, sd_ksm, sde_ksm] (auto&& table) -> future<> {
|
||||
auto ksm = table->ks_name() == NAME ? sd_ksm : sde_ksm;
|
||||
|
||||
// Ensure that the service_levels table contains new columns.
|
||||
if (table->cf_name() == SERVICE_LEVELS) {
|
||||
table = get_updated_service_levels(db);
|
||||
}
|
||||
|
||||
if (!db.has_schema(table->ks_name(), table->cf_name())) {
|
||||
co_return co_await service::prepare_new_column_family_announcement(mutations, _sp, *ksm, std::move(table), ts);
|
||||
}
|
||||
|
||||
// The service_levels table exists. Update it if it lacks new columns.
|
||||
if (table->cf_name() == SERVICE_LEVELS && !get_current_service_levels(db)->equal_columns(*table)) {
|
||||
auto update_mutations = co_await service::prepare_column_family_update_announcement(_sp, table, false, std::vector<view_ptr>(), ts);
|
||||
std::move(update_mutations.begin(), update_mutations.end(), std::back_inserter(mutations));
|
||||
}
|
||||
});
|
||||
if (mutations.size() > num_keyspace_mutations) {
|
||||
description += " create and update system_distributed(_everywhere) tables";
|
||||
} else {
|
||||
dlogger.info("All tables are present and up-to-date on start");
|
||||
}
|
||||
|
||||
if (!mutations.empty()) {
|
||||
try {
|
||||
co_await _mm.announce(std::move(mutations), std::move(group0_guard), description);
|
||||
} catch (service::group0_concurrent_modification&) {
|
||||
dlogger.info("Concurrent operation is detected while starting, retrying.");
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
_started = true;
|
||||
co_return;
|
||||
}
|
||||
|
||||
auto group0_guard = co_await _mm.start_group0_operation();
|
||||
auto ts = group0_guard.write_timestamp();
|
||||
std::vector<mutation> mutations;
|
||||
sstring description;
|
||||
|
||||
auto sd_ksm = keyspace_metadata::new_keyspace(
|
||||
NAME,
|
||||
"org.apache.cassandra.locator.SimpleStrategy",
|
||||
{{"replication_factor", "3"}},
|
||||
true /* durable_writes */);
|
||||
if (!db.has_keyspace(NAME)) {
|
||||
mutations = service::prepare_new_keyspace_announcement(db.real_database(), sd_ksm, ts);
|
||||
description += format(" create {} keyspace;", NAME);
|
||||
} else {
|
||||
dlogger.info("{} keyspace is already present. Not creating", NAME);
|
||||
}
|
||||
|
||||
auto sde_ksm = keyspace_metadata::new_keyspace(
|
||||
NAME_EVERYWHERE,
|
||||
"org.apache.cassandra.locator.EverywhereStrategy",
|
||||
{},
|
||||
true /* durable_writes */);
|
||||
if (!db.has_keyspace(NAME_EVERYWHERE)) {
|
||||
auto sde_mutations = service::prepare_new_keyspace_announcement(db.real_database(), sde_ksm, ts);
|
||||
std::move(sde_mutations.begin(), sde_mutations.end(), std::back_inserter(mutations));
|
||||
description += format(" create {} keyspace;", NAME_EVERYWHERE);
|
||||
} else {
|
||||
dlogger.info("{} keyspace is already present. Not creating", NAME_EVERYWHERE);
|
||||
}
|
||||
|
||||
// Get mutations for creating and updating tables.
|
||||
auto num_keyspace_mutations = mutations.size();
|
||||
co_await coroutine::parallel_for_each(ensured_tables(),
|
||||
[this, &mutations, db, ts, sd_ksm, sde_ksm] (auto&& table) -> future<> {
|
||||
auto ksm = table->ks_name() == NAME ? sd_ksm : sde_ksm;
|
||||
|
||||
// Ensure that the service_levels table contains new columns.
|
||||
if (table->cf_name() == SERVICE_LEVELS) {
|
||||
table = get_updated_service_levels(db);
|
||||
}
|
||||
|
||||
if (!db.has_schema(table->ks_name(), table->cf_name())) {
|
||||
co_return co_await service::prepare_new_column_family_announcement(mutations, _sp, *ksm, std::move(table), ts);
|
||||
}
|
||||
|
||||
// The service_levels table exists. Update it if it lacks new columns.
|
||||
if (table->cf_name() == SERVICE_LEVELS && !get_current_service_levels(db)->equal_columns(*table)) {
|
||||
auto update_mutations = co_await service::prepare_column_family_update_announcement(_sp, table, false, std::vector<view_ptr>(), ts);
|
||||
std::move(update_mutations.begin(), update_mutations.end(), std::back_inserter(mutations));
|
||||
}
|
||||
});
|
||||
if (mutations.size() > num_keyspace_mutations) {
|
||||
description += " create and update system_distributed(_everywhere) tables";
|
||||
} else {
|
||||
dlogger.info("All tables are present and up-to-date on start");
|
||||
}
|
||||
|
||||
if (!mutations.empty()) {
|
||||
co_await _mm.announce(std::move(mutations), std::move(group0_guard), description);
|
||||
}
|
||||
|
||||
_started = true;
|
||||
}
|
||||
|
||||
future<> system_distributed_keyspace::stop() {
|
||||
|
||||
@@ -162,67 +162,76 @@ future<> create_keyspace_if_not_exists_impl(seastar::sharded<service::storage_pr
|
||||
boost::irange<unsigned>(0, config.redis_database_count()) |
|
||||
boost::adaptors::transformed([] (unsigned i) { return fmt::format("REDIS_{}", i); }));
|
||||
|
||||
bool schema_ok = boost::algorithm::all_of(ks_names, [&] (auto& ks_name) {
|
||||
auto check = [&] (table t) {
|
||||
return db.has_schema(ks_name, t.name);
|
||||
};
|
||||
return db.has_keyspace(ks_name) && boost::algorithm::all_of(tables, check);
|
||||
});
|
||||
while (true) {
|
||||
bool schema_ok = boost::algorithm::all_of(ks_names, [&] (auto& ks_name) {
|
||||
auto check = [&] (table t) {
|
||||
return db.has_schema(ks_name, t.name);
|
||||
};
|
||||
return db.has_keyspace(ks_name) && boost::algorithm::all_of(tables, check);
|
||||
});
|
||||
|
||||
if (schema_ok) {
|
||||
logger.info("Redis schema is already up-to-date");
|
||||
co_return; // if schema is created already do nothing
|
||||
}
|
||||
|
||||
auto& mml = mm.local();
|
||||
auto tm = proxy.local().get_token_metadata_ptr();
|
||||
|
||||
std::vector<lw_shared_ptr<keyspace_metadata>> ksms;
|
||||
for (auto& ks_name: ks_names) {
|
||||
cql3::statements::ks_prop_defs attrs;
|
||||
attrs.add_property(cql3::statements::ks_prop_defs::KW_DURABLE_WRITES, "true");
|
||||
std::map<sstring, sstring> replication_properties;
|
||||
for (auto&& option : keyspace_replication_strategy_options) {
|
||||
replication_properties.emplace(option.first, option.second);
|
||||
}
|
||||
attrs.add_property(cql3::statements::ks_prop_defs::KW_REPLICATION, replication_properties);
|
||||
attrs.validate();
|
||||
|
||||
ksms.push_back(attrs.as_ks_metadata(ks_name, *tm));
|
||||
}
|
||||
|
||||
auto group0_guard = co_await mml.start_group0_operation();
|
||||
auto ts = group0_guard.write_timestamp();
|
||||
std::vector<mutation> mutations;
|
||||
|
||||
for (auto ksm: ksms) {
|
||||
if (db.has_keyspace(ksm->name())) {
|
||||
continue;
|
||||
if (schema_ok) {
|
||||
logger.info("Redis schema is already up-to-date");
|
||||
co_return; // if schema is created already do nothing
|
||||
}
|
||||
|
||||
auto muts = service::prepare_new_keyspace_announcement(db.real_database(), ksm, ts);
|
||||
std::move(muts.begin(), muts.end(), std::back_inserter(mutations));
|
||||
}
|
||||
auto& mml = mm.local();
|
||||
auto tm = proxy.local().get_token_metadata_ptr();
|
||||
|
||||
auto table_gen = std::bind_front(
|
||||
[] (data_dictionary::database db, service::storage_proxy& sp, std::vector<mutation>& mutations,
|
||||
api::timestamp_type ts, const keyspace_metadata& ksm, sstring cf_name, schema_ptr schema) -> future<> {
|
||||
if (db.has_schema(ksm.name(), cf_name)) {
|
||||
std::vector<lw_shared_ptr<keyspace_metadata>> ksms;
|
||||
for (auto& ks_name: ks_names) {
|
||||
cql3::statements::ks_prop_defs attrs;
|
||||
attrs.add_property(cql3::statements::ks_prop_defs::KW_DURABLE_WRITES, "true");
|
||||
std::map<sstring, sstring> replication_properties;
|
||||
for (auto&& option : keyspace_replication_strategy_options) {
|
||||
replication_properties.emplace(option.first, option.second);
|
||||
}
|
||||
attrs.add_property(cql3::statements::ks_prop_defs::KW_REPLICATION, replication_properties);
|
||||
attrs.validate();
|
||||
|
||||
ksms.push_back(attrs.as_ks_metadata(ks_name, *tm));
|
||||
}
|
||||
|
||||
auto group0_guard = co_await mml.start_group0_operation();
|
||||
auto ts = group0_guard.write_timestamp();
|
||||
std::vector<mutation> mutations;
|
||||
|
||||
for (auto ksm: ksms) {
|
||||
if (db.has_keyspace(ksm->name())) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto muts = service::prepare_new_keyspace_announcement(db.real_database(), ksm, ts);
|
||||
std::move(muts.begin(), muts.end(), std::back_inserter(mutations));
|
||||
}
|
||||
|
||||
auto table_gen = std::bind_front(
|
||||
[] (data_dictionary::database db, service::storage_proxy& sp, std::vector<mutation>& mutations,
|
||||
api::timestamp_type ts, const keyspace_metadata& ksm, sstring cf_name, schema_ptr schema) -> future<> {
|
||||
if (db.has_schema(ksm.name(), cf_name)) {
|
||||
co_return;
|
||||
}
|
||||
|
||||
logger.info("Create keyspace: {}, table: {} for redis.", ksm.name(), cf_name);
|
||||
co_await service::prepare_new_column_family_announcement(mutations, sp, ksm, schema, ts);
|
||||
}, db, std::ref(proxy.local()), std::ref(mutations), ts);
|
||||
|
||||
co_await coroutine::parallel_for_each(ksms, [table_gen = std::move(table_gen)] (const lw_shared_ptr<keyspace_metadata> ksm) mutable {
|
||||
return parallel_for_each(tables, [ksm, table_gen = std::move(table_gen)] (table t) {
|
||||
return table_gen(*ksm, t.name, t.schema(ksm->name()));
|
||||
}).discard_result();
|
||||
});
|
||||
|
||||
if (mutations.empty()) {
|
||||
co_return;
|
||||
}
|
||||
|
||||
logger.info("Create keyspace: {}, table: {} for redis.", ksm.name(), cf_name);
|
||||
co_await service::prepare_new_column_family_announcement(mutations, sp, ksm, schema, ts);
|
||||
}, db, std::ref(proxy.local()), std::ref(mutations), ts);
|
||||
|
||||
co_await coroutine::parallel_for_each(ksms, [table_gen = std::move(table_gen)] (const lw_shared_ptr<keyspace_metadata> ksm) mutable {
|
||||
return parallel_for_each(tables, [ksm, table_gen = std::move(table_gen)] (table t) {
|
||||
return table_gen(*ksm, t.name, t.schema(ksm->name()));
|
||||
}).discard_result();
|
||||
});
|
||||
|
||||
if (!mutations.empty()) {
|
||||
co_await mml.announce(std::move(mutations), std::move(group0_guard), "keyspace-utils: create default keyspaces and databases for redis");
|
||||
try {
|
||||
co_return co_await mml.announce(std::move(mutations), std::move(group0_guard),
|
||||
"keyspace-utils: create default keyspaces and databases for redis");
|
||||
} catch (service::group0_concurrent_modification&) {
|
||||
logger.info("Concurrent operation is detected while creating default databases for redis, retrying.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -16,6 +16,8 @@
|
||||
#include "replica/database.hh"
|
||||
#include "service/migration_manager.hh"
|
||||
|
||||
static logging::logger tlogger("table_helper");
|
||||
|
||||
static schema_ptr parse_new_cf_statement(cql3::query_processor& qp, const sstring& create_cql) {
|
||||
auto db = qp.db();
|
||||
|
||||
@@ -145,35 +147,43 @@ future<> table_helper::setup_keyspace(cql3::query_processor& qp, service::migrat
|
||||
opts["replication_factor"] = replication_factor;
|
||||
auto ksm = keyspace_metadata::new_keyspace(keyspace_name, "org.apache.cassandra.locator.SimpleStrategy", std::move(opts), true);
|
||||
|
||||
if (!db.has_keyspace(keyspace_name)) {
|
||||
while (!db.has_keyspace(keyspace_name)) {
|
||||
auto group0_guard = co_await mm.start_group0_operation();
|
||||
auto ts = group0_guard.write_timestamp();
|
||||
|
||||
if (!db.has_keyspace(keyspace_name)) {
|
||||
co_await mm.announce(service::prepare_new_keyspace_announcement(db.real_database(), ksm, ts),
|
||||
std::move(group0_guard), format("table_helper: create {} keyspace", keyspace_name));
|
||||
try {
|
||||
co_await mm.announce(service::prepare_new_keyspace_announcement(db.real_database(), ksm, ts),
|
||||
std::move(group0_guard), format("table_helper: create {} keyspace", keyspace_name));
|
||||
} catch (service::group0_concurrent_modification&) {
|
||||
tlogger.info("Concurrent operation is detected while creating {} keyspace, retrying.", keyspace_name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
qs.get_client_state().set_keyspace(db.real_database(), keyspace_name);
|
||||
|
||||
if (std::all_of(tables.begin(), tables.end(), [db] (table_helper* t) { return db.has_schema(t->_keyspace, t->_name); })) {
|
||||
co_return;
|
||||
}
|
||||
while (std::any_of(tables.begin(), tables.end(), [db] (table_helper* t) { return !db.has_schema(t->_keyspace, t->_name); })) {
|
||||
auto group0_guard = co_await mm.start_group0_operation();
|
||||
auto ts = group0_guard.write_timestamp();
|
||||
std::vector<mutation> table_mutations;
|
||||
|
||||
auto group0_guard = co_await mm.start_group0_operation();
|
||||
auto ts = group0_guard.write_timestamp();
|
||||
std::vector<mutation> table_mutations;
|
||||
co_await coroutine::parallel_for_each(tables, [&] (auto&& table) -> future<> {
|
||||
auto schema = parse_new_cf_statement(qp, table->_create_cql);
|
||||
if (!db.has_schema(schema->ks_name(), schema->cf_name())) {
|
||||
co_return co_await service::prepare_new_column_family_announcement(table_mutations, qp.proxy(), *ksm, schema, ts);
|
||||
}
|
||||
});
|
||||
|
||||
co_await coroutine::parallel_for_each(tables, [&] (auto&& table) -> future<> {
|
||||
auto schema = parse_new_cf_statement(qp, table->_create_cql);
|
||||
if (!db.has_schema(schema->ks_name(), schema->cf_name())) {
|
||||
co_return co_await service::prepare_new_column_family_announcement(table_mutations, qp.proxy(), *ksm, schema, ts);
|
||||
if (table_mutations.empty()) {
|
||||
co_return;
|
||||
}
|
||||
});
|
||||
|
||||
if (!table_mutations.empty()) {
|
||||
co_await mm.announce(std::move(table_mutations), std::move(group0_guard),
|
||||
format("table_helper: create tables for {} keyspace", keyspace_name));
|
||||
try {
|
||||
co_return co_await mm.announce(std::move(table_mutations), std::move(group0_guard),
|
||||
format("table_helper: create tables for {} keyspace", keyspace_name));
|
||||
} catch (service::group0_concurrent_modification&) {
|
||||
tlogger.info("Concurrent operation is detected while creating tables for {} keyspace, retrying.", keyspace_name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user