Ignore seed name resolution errors on restart.

Gossiper seeds host name resolution failures are ignored during restart if
a node is already boostrapped (i.e. it has successfully joined the cluster).

Fixes scylladb/scylladb#14945
This commit is contained in:
Sergey Zolotukhin
2024-08-23 17:25:54 +02:00
parent fc5e683d02
commit 65f37f3ba6
4 changed files with 19 additions and 6 deletions

13
init.cc
View File

@@ -16,7 +16,9 @@
logging::logger startlog("init");
std::set<gms::inet_address> get_seeds_from_db_config(const db::config& cfg, gms::inet_address broadcast_address) {
std::set<gms::inet_address> get_seeds_from_db_config(const db::config& cfg,
const gms::inet_address broadcast_address,
const bool fail_on_lookup_error) {
auto preferred = cfg.listen_interface_prefer_ipv6() ? std::make_optional(net::inet_address::family::INET6) : std::nullopt;
auto family = cfg.enable_ipv6_dns_lookup() || preferred ? std::nullopt : std::make_optional(net::inet_address::family::INET);
const auto listen = gms::inet_address::lookup(cfg.listen_address(), family).get();
@@ -32,8 +34,13 @@ std::set<gms::inet_address> get_seeds_from_db_config(const db::config& cfg, gms:
try {
seeds.emplace(gms::inet_address::lookup(seed, family, preferred).get());
} catch (...) {
startlog.error("Bad configuration: invalid value in 'seeds': '{}': {}", seed, std::current_exception());
throw bad_configuration_error();
if (fail_on_lookup_error) {
startlog.error("Bad configuration: invalid value in 'seeds': '{}': {}", seed, std::current_exception());
throw bad_configuration_error();
}
startlog.warn("Bad configuration: invalid value in 'seeds': '{}': {}. Node will continue booting since already bootstrapped.",
seed,
std::current_exception());
}
begin = next+1;
}

View File

@@ -35,7 +35,9 @@ extern logging::logger startlog;
class bad_configuration_error : public std::exception {};
std::set<gms::inet_address> get_seeds_from_db_config(const db::config& cfg, gms::inet_address broadcast_address);
[[nodiscard]] std::set<gms::inet_address> get_seeds_from_db_config(const db::config& cfg,
gms::inet_address broadcast_address,
bool fail_on_lookup_error);
class service_set {
public:

View File

@@ -1422,7 +1422,11 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl
startlog.warn("Using default cluster name is not recommended. Using a unique cluster name will reduce the chance of adding nodes to the wrong cluster by mistake");
}
auto group0_id = sys_ks.local().get_raft_group0_id().get();
auto gossiper_seeds = get_seeds_from_db_config(*cfg, broadcast_addr);
// Fail on a gossiper seeds lookup error only if the node is not bootstrapped.
const bool fail_on_lookup_error = !sys_ks.local().bootstrap_complete();
auto gossiper_seeds = get_seeds_from_db_config(*cfg, broadcast_addr, fail_on_lookup_error);
auto get_gossiper_cfg = sharded_parameter([&] {
gms::gossip_config gcfg;

View File

@@ -13,8 +13,8 @@ from test.pylib.manager_client import ManagerClient
logger = logging.getLogger(__name__)
@pytest.mark.asyncio
@pytest.mark.xfail(reason="issue #14945")
async def test_start_bootstrapped_with_invalid_seed(manager: ManagerClient):
"""
Issue https://github.com/scylladb/scylladb/issues/14945.