test: test rf-rack validity when creating keyspace during node ops

add tests that attempt to create a keyspace during different stages of
node join or remove, and verify that the rf-rack condition can't be
broken - either creating the keyspace should fail or the node operation
should fail, depending on the stage.
This commit is contained in:
Michael Litvak
2025-10-12 12:19:04 +02:00
parent a738905a4b
commit d40d06c7ad
2 changed files with 181 additions and 0 deletions

View File

@@ -3126,6 +3126,7 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
.del("topology_request");
auto reason = ::format("bootstrap: accept node");
co_await update_topology_state(std::move(node.guard), {builder.build(), rtbuilder.build()}, reason);
co_await utils::get_local_injector().inject("topology_coordinator_pause_after_accept_node", utils::wait_for_message(5min));
break;
}
case topology_request::leave: {
@@ -3152,6 +3153,7 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
.del("topology_request");
co_await update_topology_state(take_guard(std::move(node)), {builder.build(), rtbuilder.build()},
"start decommission");
co_await utils::get_local_injector().inject("topology_coordinator_pause_after_start_decommission", utils::wait_for_message(5min));
break;
}
case topology_request::remove: {
@@ -3175,6 +3177,7 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
.del("topology_request");
co_await update_topology_state(take_guard(std::move(node)), {builder.build(), rtbuilder.build()},
"start removenode");
co_await utils::get_local_injector().inject("topology_coordinator_pause_after_start_removenode", utils::wait_for_message(5min));
break;
}
case topology_request::replace: {

View File

@@ -3,6 +3,7 @@
#
# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
#
from test.cluster.conftest import skip_mode
from test.pylib.manager_client import ManagerClient
@@ -117,3 +118,180 @@ async def test_remove_node_violating_rf_rack(manager: ManagerClient, enforce: bo
matches = [log.grep('makes some existing keyspaces RF-rack-invalid') for log in logs]
assert any(matches)
@pytest.mark.parametrize("injection", ["before_bootstrap", "after_bootstrap"])
@pytest.mark.asyncio
@skip_mode('release', 'error injections are not supported in release mode')
async def test_keyspace_creation_during_node_join(manager: ManagerClient, injection: str):
"""
Test keyspace creation behavior during node join at different stages.
Creates a cluster with 3 nodes and 3 racks, then starts adding a new node in a 4th rack.
The node join is paused at different points to test keyspace creation behavior.
When injection="before_bootstrap":
- Pause after node is accepted but before bootstrap starts
- Creating keyspace with RF=3 should succeed (topology hasn't changed yet)
- But then node bootstrap should fail due to RF-rack violation
When injection="after_bootstrap":
- Pause after bootstrap completes but before final steps
- Creating keyspace with RF=3 should fail (4 racks would violate RF-rack constraints)
- Node join should complete successfully
"""
cfg = {'rf_rack_valid_keyspaces': True, 'error_injections_at_startup': [{'name': 'suppress_features', 'value': 'RACK_LIST_RF'}]}
cmdline = ['--logger-log-level', 'tablets=debug', '--logger-log-level', 'raft_topology=debug']
# Create initial 3-node cluster with 3 racks
servers = await manager.servers_add(3, config=cfg, cmdline=cmdline, property_file=[
{"dc": "dc1", "rack": "r1"},
{"dc": "dc1", "rack": "r2"},
{"dc": "dc1", "rack": "r3"},
])
cql = manager.get_cql()
inj_before_bootstrap = "topology_coordinator_pause_after_accept_node"
inj_after_bootstrap = "topology_coordinator_pause_after_updating_cdc_generation"
if injection == "before_bootstrap":
inj = inj_before_bootstrap
server_add_expected_error = "would make some existing keyspace RF-rack-invalid"
else:
inj = inj_after_bootstrap
server_add_expected_error = None
await asyncio.gather(*[manager.api.enable_injection(s.ip_addr, inj, one_shot=True) for s in servers])
# Start adding a new node in a new rack (this will pause due to injection)
logger.info(f"Starting to add new node in rack r4 (will pause {injection})")
log = await manager.server_open_log(servers[0].server_id)
mark = await log.mark()
add_node_task = asyncio.create_task(
manager.server_add(config=cfg, cmdline=cmdline,
property_file={"dc": "dc1", "rack": "r4"},
expected_error=server_add_expected_error)
)
await log.wait_for(f"{inj}: waiting for message", from_mark=mark, timeout=60)
if injection == "before_bootstrap":
# Node is accepted but bootstrap hasn't started yet
# Creating keyspace with RF=3 should succeed (topology hasn't changed yet)
logger.info("Creating keyspace with RF=3 (should succeed - topology not changed yet)")
await cql.run_async("CREATE KEYSPACE test_rf3 WITH replication = {'class': 'NetworkTopologyStrategy', 'dc1': 3} AND tablets = {'enabled': true}")
# Unpause the join process - bootstrap should fail due to RF-rack violation
await asyncio.gather(*[manager.api.message_injection(s.ip_addr, inj) for s in servers])
logger.info("Waiting for node join to fail")
await add_node_task
else:
# Node has bootstrapped but join isn't complete yet
# Creating keyspace with RF=3 should fail (4 racks would violate RF-rack constraints)
logger.info("Attempting to create keyspace with RF=3 (should fail - 4 racks detected)")
with pytest.raises(Exception, match="required to be RF-rack-valid"):
await cql.run_async("CREATE KEYSPACE test_rf3 WITH replication = {'class': 'NetworkTopologyStrategy', 'dc1': 3} AND tablets = {'enabled': true}")
# Creating keyspace with RF=4 should also fail, because the node join could still fail and rollback.
logger.info("Attempting to create keyspace with RF=4 (should fail - number of racks undetermined)")
with pytest.raises(Exception, match="required to be RF-rack-valid"):
await cql.run_async("CREATE KEYSPACE test_rf4 WITH replication = {'class': 'NetworkTopologyStrategy', 'dc1': 4} AND tablets = {'enabled': true}")
# Unpause the join process and wait for node join to complete successfully
await asyncio.gather(*[manager.api.message_injection(s.ip_addr, inj) for s in servers])
logger.info("Waiting for node join to complete")
servers += [await add_node_task]
# Now RF=4 should be allowed
logger.info("Creating keyspace with RF=4 (should succeed - 4 racks now present)")
await cql.run_async("CREATE KEYSPACE test_rf4 WITH replication = {'class': 'NetworkTopologyStrategy', 'dc1': 4} AND tablets = {'enabled': true}")
# Test adding a second node to the same rack (r4) - keyspace creation should succeed
# because the number of racks remains constant (4)
logger.info("Testing second node addition to existing rack r4")
# Re-enable injection for remaining servers (including the newly added one)
await asyncio.gather(*[manager.api.enable_injection(s.ip_addr, inj, one_shot=True) for s in servers])
# Start adding second node to rack r4
mark = await log.mark()
add_node2_task = asyncio.create_task(
manager.server_add(config=cfg, cmdline=cmdline, property_file={"dc": "dc1", "rack": "r4"})
)
await log.wait_for(f"{inj}: waiting for message", from_mark=mark, timeout=60)
# Creating keyspace should succeed now because rack count remains at 4
logger.info("Creating keyspace with RF=4 while second node joins r4 (should succeed - rack count unchanged)")
await cql.run_async("CREATE KEYSPACE test_rf4_second WITH replication = {'class': 'NetworkTopologyStrategy', 'dc1': 4} AND tablets = {'enabled': true}")
# Complete the second node join
await asyncio.gather(*[manager.api.message_injection(s.ip_addr, inj) for s in servers])
logger.info("Waiting for second node join to complete")
await add_node2_task
@pytest.mark.parametrize("op", ["remove", "decommission"])
@pytest.mark.asyncio
@skip_mode('release', 'error injections are not supported in release mode')
async def test_keyspace_creation_during_node_remove(manager: ManagerClient, op: str):
"""
Test keyspace creation behavior during node removal or decommission.
Creates a cluster with 3 nodes in 3 racks, then starts removing/decommissioning one node.
The node operation is paused after it starts but before completion.
While paused, attempts to create a keyspace with RF=3 should fail because:
- The cluster currently has 3 racks (RF=3 would be valid now)
- But when the operation completes, there will be only 2 racks
- RF=3 with 2 racks would violate RF-rack constraints
"""
cfg = {'rf_rack_valid_keyspaces': True, 'error_injections_at_startup': [{'name': 'suppress_features', 'value': 'RACK_LIST_RF'}]}
cmdline = ['--logger-log-level', 'tablets=debug', '--logger-log-level', 'raft_topology=debug']
# Create initial 3-node cluster with 3 racks
servers = await manager.servers_add(3, config=cfg, cmdline=cmdline, property_file=[
{"dc": "dc1", "rack": "r1"},
{"dc": "dc1", "rack": "r2"},
{"dc": "dc1", "rack": "r3"},
])
cql = manager.get_cql()
if op == "remove":
inj = "topology_coordinator_pause_after_start_removenode"
elif op == "decommission":
inj = "topology_coordinator_pause_after_start_decommission"
await asyncio.gather(*[manager.api.enable_injection(s.ip_addr, inj, one_shot=True) for s in servers[:2]])
# Start the node operation (this will pause due to injection)
logger.info(f"Starting to {op} node from rack r3 (will pause during {op})")
log = await manager.server_open_log(servers[0].server_id)
mark = await log.mark()
if op == "remove":
await manager.server_stop_gracefully(servers[2].server_id)
node_op_task = asyncio.create_task(
manager.remove_node(servers[0].server_id, servers[2].server_id)
)
elif op == "decommission":
node_op_task = asyncio.create_task(
manager.decommission_node(servers[2].server_id)
)
await log.wait_for(f"{inj}: waiting for message", from_mark=mark, timeout=60)
# While the node operation is paused, try to create keyspace with RF=3
# Should fail because operation would leave only 2 racks, making RF=3 invalid
logger.info(f"Attempting to create keyspace with RF=3 (should fail - {op} would leave 2 racks)")
with pytest.raises(Exception, match="required to be RF-rack-valid"):
await cql.run_async("CREATE KEYSPACE test_rf3 WITH replication = {'class': 'NetworkTopologyStrategy', 'dc1': 3} AND tablets = {'enabled': true}")
# While the node operation is paused, try to create keyspace with RF=2
# Should also fail because the operation may still rollback and go back to 3 racks.
logger.info(f"Attempting to create keyspace with RF=2 (should fail - {op} is in progress)")
with pytest.raises(Exception, match="required to be RF-rack-valid"):
await cql.run_async("CREATE KEYSPACE test_rf2 WITH replication = {'class': 'NetworkTopologyStrategy', 'dc1': 2} AND tablets = {'enabled': true}")
# Unpause the operation and wait for it to complete
await asyncio.gather(*[manager.api.message_injection(s.ip_addr, inj) for s in servers[:2]])
logger.info(f"Waiting for node {op} to complete")
await node_op_task