From f89235517d113e4e82b22e72a3f535bb8f085e3f Mon Sep 17 00:00:00 2001 From: Nadav Har'El Date: Fri, 7 Feb 2025 19:27:18 +0100 Subject: [PATCH] test/topology_custom: fix very slow test test_localnodes_broadcast_rpc_address The test topology_custom/test_alternator::test_localnodes_broadcast_rpc_address sets up nodes with a silly "broadcast rpc address" and checks that Alternator's "/localnodes" requests returns it correctly. The problem is that although we don't use CQL in this test, the test framework does open a CQL connection when the test starts, and closes it when it ends. It turns out that when we set a silly "broadcast RPC address", the driver tends to try to connect to it when shutting down, I'm not even sure why. But the choice of the silly address was 1.2.3.4 is unfortunate, because this IP address is actually routable - and the driver hangs until it times out (in practice, in a bit over two minutes). This trivial patch changes 1.2.3.4 to 127.0.0.0 - and equally silly address but one to which connections fail immediately. Before this patch, the test often takes more than 2 minutes to finish on my laptop, after this patch, it always finishes in 4-5 seconds. Fixes #22744 Signed-off-by: Nadav Har'El Closes scylladb/scylladb#22746 --- test/topology_custom/test_alternator.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/test/topology_custom/test_alternator.py b/test/topology_custom/test_alternator.py index 05080477ed..f868dbcef0 100644 --- a/test/topology_custom/test_alternator.py +++ b/test/topology_custom/test_alternator.py @@ -187,19 +187,22 @@ async def test_localnodes_broadcast_rpc_address(manager: ManagerClient): tested separately, in test/alternator/test_scylla.py. Reproduces issue #18711. """ - # Run two Scylla nodes telling both their broadcast_rpc_address is 1.2.3.4 + # Run two Scylla nodes telling both their broadcast_rpc_address is 127.0.0.0 # (this is silly, but servers_add() doesn't let us use a different config # per server). We need to run two nodes to check that the node to which # we send the /localnodes request knows not only its own modified # address, but also the other node's (which it learnt by gossip). # This address isn't used for any communication, but it will be # produced by "/localnodes" and this is what we want to check + # The address "127.0.0.0" is a silly non-existing address which connecting + # to fails immediately (this is useful in the test shutdown - we don't want + # it to hang trying to reach this node, as happened in issue #22744). config = alternator_config | { - 'broadcast_rpc_address': '1.2.3.4' + 'broadcast_rpc_address': '127.0.0.0' } servers = await manager.servers_add(2, config=config) for server in servers: - # We expect /localnodes to return ["1.2.3.4", "1.2.3.4"] + # We expect /localnodes to return ["127.0.0.0", "127.0.0.0"] # (since we configured both nodes with the same broadcast_rpc_address). # We need the retry loop below because the second node might take a # bit of time to bootstrap after coming up, and only then will it @@ -210,7 +213,7 @@ async def test_localnodes_broadcast_rpc_address(manager: ManagerClient): assert time.time() < timeout response = requests.get(url, verify=False) j = json.loads(response.content.decode('utf-8')) - if j == ['1.2.3.4', '1.2.3.4']: + if j == ['127.0.0.0', '127.0.0.0']: break # done await asyncio.sleep(0.1)