test/topology_experimental_raft: Add tablet split test

Signed-off-by: Raphael S. Carvalho <raphaelsc@scylladb.com>
This commit is contained in:
Raphael S. Carvalho
2024-01-23 21:29:40 -03:00
parent 90c9a5d7af
commit 3b14c5b84a

View File

@@ -551,3 +551,73 @@ async def test_tablet_cleanup(manager: ManagerClient):
# Bonus: check that commitlog_cleanups doesn't have any garbage after restart.
assert 0 == (await cql.run_async("SELECT COUNT(*) FROM system.commitlog_cleanups", host=hosts[0]))[0].count
async def get_tablet_count(manager: ManagerClient, server: ServerInfo, keyspace_name: str, table_name: str):
host = manager.cql.cluster.metadata.get_host(server.ip_addr)
# read_barrier is needed to ensure that local tablet metadata on the queried node
# reflects the finalized tablet movement.
await read_barrier(manager.cql, host)
table_id = await manager.get_table_id(keyspace_name, table_name)
rows = await manager.cql.run_async(f"SELECT tablet_count FROM system.tablets where "
f"table_id = {table_id}", host=host)
return rows[0].tablet_count
@pytest.mark.asyncio
@skip_mode('release', 'error injections are not supported in release mode')
async def test_tablet_split(manager: ManagerClient):
logger.info("Bootstrapping cluster")
cmdline = [
'--logger-log-level', 'storage_service=debug',
'--target-tablet-size-in-bytes', '1024',
]
servers = [await manager.server_add(cmdline=cmdline)]
await manager.api.disable_tablet_balancing(servers[0].ip_addr)
cql = manager.get_cql()
await cql.run_async("CREATE KEYSPACE test WITH replication = {'class': 'NetworkTopologyStrategy', 'replication_factor': 1} AND tablets = {'initial': 1};")
await cql.run_async("CREATE TABLE test.test (pk int PRIMARY KEY, c int);")
# enough to trigger multiple splits with max size of 1024 bytes.
keys = range(256)
await asyncio.gather(*[cql.run_async(f"INSERT INTO test.test (pk, c) VALUES ({k}, {k});") for k in keys])
async def check():
logger.info("Checking table")
cql = manager.get_cql()
rows = await cql.run_async("SELECT * FROM test.test;")
assert len(rows) == len(keys)
for r in rows:
assert r.c == r.pk
await check()
await manager.api.flush_keyspace(servers[0].ip_addr, "test")
tablet_count = await get_tablet_count(manager, servers[0], 'test', 'test')
assert tablet_count == 1
logger.info("Adding new server")
servers.append(await manager.server_add(cmdline=cmdline))
# Increases the chance of tablet migration concurrent with split
await inject_error_one_shot_on(manager, "tablet_allocator_shuffle", servers)
await inject_error_on(manager, "tablet_load_stats_refresh_before_rebalancing", servers)
s1_log = await manager.server_open_log(servers[0].server_id)
s1_mark = await s1_log.mark()
# Now there's a split and migration need, so they'll potentially run concurrently.
await manager.api.enable_tablet_balancing(servers[0].ip_addr)
await check()
time.sleep(5) # Give load balancer some time to do work
await s1_log.wait_for('Detected tablet split for table', from_mark=s1_mark)
await check()
tablet_count = await get_tablet_count(manager, servers[0], 'test', 'test')
assert tablet_count > 1