Merge 'scylla-nodetool: implement the scrub command' from Botond Dénes
On top of the capabilities of the java-nodetool command, the following additional functionalit is implemented: * Expose quarantine-mode option of the scrub_keyspace REST API * Exit with error and print a message, when scrub finishes with abort or validation_errors return code The command comes with tests and all tests pass with both the new and the current nodetool implementations. Refs: #15588 Refs: #16208 Closes scylladb/scylladb#16391 * github.com:scylladb/scylladb: tools/scylla-nodetool: implement the scrub command test/nodetool: rest_api_mock.py: add missing "f" to error message f string api: extract scrub_status into its own header
This commit is contained in:
18
api/scrub_status.hh
Normal file
18
api/scrub_status.hh
Normal file
@@ -0,0 +1,18 @@
|
||||
/*
|
||||
* Copyright (C) 2023-present ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
*/
|
||||
|
||||
namespace api {
|
||||
|
||||
enum class scrub_status {
|
||||
successful = 0,
|
||||
aborted,
|
||||
unable_to_cancel, // Not used in Scylla, included to ensure compatibility with nodetool api.
|
||||
validation_errors,
|
||||
};
|
||||
|
||||
} // namespace api
|
||||
@@ -9,6 +9,7 @@
|
||||
#include "storage_service.hh"
|
||||
#include "api/api-doc/storage_service.json.hh"
|
||||
#include "api/api-doc/storage_proxy.json.hh"
|
||||
#include "api/scrub_status.hh"
|
||||
#include "db/config.hh"
|
||||
#include "db/schema_tables.hh"
|
||||
#include "utils/hash.hh"
|
||||
@@ -1557,13 +1558,6 @@ void unset_storage_service(http_context& ctx, routes& r) {
|
||||
sp::get_schema_versions.unset(r);
|
||||
}
|
||||
|
||||
enum class scrub_status {
|
||||
successful = 0,
|
||||
aborted,
|
||||
unable_to_cancel, // Not used in Scylla, included to ensure compatibility with nodetool api.
|
||||
validation_errors,
|
||||
};
|
||||
|
||||
void set_snapshot(http_context& ctx, routes& r, sharded<db::snapshot_ctl>& snap_ctl) {
|
||||
ss::get_snapshot_details.set(r, [&snap_ctl](std::unique_ptr<http::request> req) {
|
||||
return snap_ctl.local().get_snapshot_details().then([] (std::unordered_map<sstring, std::vector<db::snapshot_ctl::snapshot_details>>&& result) {
|
||||
|
||||
@@ -178,7 +178,7 @@ class rest_server(aiohttp.abc.AbstractRouter):
|
||||
continue
|
||||
|
||||
logger.error(f"unexpected request\nexpected {expected_req}\ngot {this_req}")
|
||||
return aiohttp.web.Response(status=500, text="Expected {expected_req}, got {this_req}")
|
||||
return aiohttp.web.Response(status=500, text=f"Expected {expected_req}, got {this_req}")
|
||||
|
||||
if expected_req.multiple == expected_request.ONE:
|
||||
del self.expected_requests[0]
|
||||
|
||||
169
test/nodetool/test_scrub.py
Normal file
169
test/nodetool/test_scrub.py
Normal file
@@ -0,0 +1,169 @@
|
||||
#
|
||||
# Copyright 2023-present ScyllaDB
|
||||
#
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
#
|
||||
|
||||
import enum
|
||||
import pytest
|
||||
from rest_api_mock import expected_request
|
||||
import utils
|
||||
|
||||
|
||||
class scrub_status(enum.Enum):
|
||||
successful = 0
|
||||
aborted = 1
|
||||
unable_to_cancel = 2 # not used by ScyllaDB
|
||||
validation_errors = 3
|
||||
|
||||
|
||||
def test_scrub_keyspace(nodetool):
|
||||
nodetool("scrub", "ks", expected_requests=[
|
||||
expected_request("GET", "/storage_service/keyspaces", response=["ks"]),
|
||||
expected_request("GET", "/storage_service/keyspace_scrub/ks", response=scrub_status.successful.value)])
|
||||
|
||||
|
||||
def test_scrub_one_table(nodetool):
|
||||
nodetool("scrub", "ks", "tbl1", expected_requests=[
|
||||
expected_request("GET", "/storage_service/keyspaces", response=["ks"]),
|
||||
expected_request("GET", "/storage_service/keyspace_scrub/ks", params={"cf": "tbl1"},
|
||||
response=scrub_status.successful.value)])
|
||||
|
||||
|
||||
def test_scrub_two_tables(nodetool):
|
||||
nodetool("scrub", "ks", "tbl1", "tbl2", expected_requests=[
|
||||
expected_request("GET", "/storage_service/keyspaces", response=["ks"]),
|
||||
expected_request("GET", "/storage_service/keyspace_scrub/ks", params={"cf": "tbl1,tbl2"},
|
||||
response=scrub_status.successful.value)])
|
||||
|
||||
|
||||
# Cassandra parser completely borks when positional args are missing
|
||||
def test_scrub_nokeyspace(nodetool, scylla_only):
|
||||
utils.check_nodetool_fails_with(
|
||||
nodetool,
|
||||
("scrub",),
|
||||
{},
|
||||
["error processing arguments: missing mandatory positional argument: keyspace"])
|
||||
|
||||
|
||||
def test_scrub_non_existent_keyspace(nodetool):
|
||||
utils.check_nodetool_fails_with(
|
||||
nodetool,
|
||||
("scrub", "non_existent_ks"),
|
||||
{"expected_requests": [expected_request("GET", "/storage_service/keyspaces", response=["ks"])]},
|
||||
["nodetool: Keyspace [non_existent_ks] does not exist.",
|
||||
"error processing arguments: keyspace non_existent_ks does not exist"])
|
||||
|
||||
|
||||
# We don't test all values for --mode and --qurantine-mode, they are passed as-is
|
||||
# to the REST API, so their value make no difference when testing nodetool itself.
|
||||
@pytest.mark.parametrize("table", [[], ["tbl1"], ["tbl1", "tbl2"]])
|
||||
@pytest.mark.parametrize("mode", [None, ("-m", "ABORT"), ("--mode", "ABORT"), "ABORT"])
|
||||
@pytest.mark.parametrize("quarantine_mode", [None, ("--quarantine-mode", "INCLUDE"), ("-q", "ONLY")])
|
||||
@pytest.mark.parametrize("disable_snapshot", [None, "--no-snapshot", "-ns"])
|
||||
def test_scrub_options(request, nodetool, table, mode, quarantine_mode, disable_snapshot):
|
||||
args = ["scrub", "ks"] + table
|
||||
expected_params = {}
|
||||
|
||||
if table:
|
||||
expected_params["cf"] = ",".join(table)
|
||||
|
||||
if mode is not None:
|
||||
if type(mode) is tuple:
|
||||
args += list(mode)
|
||||
expected_params["scrub_mode"] = mode[1]
|
||||
else:
|
||||
args += ["--mode", mode]
|
||||
expected_params["scrub_mode"] = mode
|
||||
|
||||
if quarantine_mode is not None:
|
||||
if request.config.getoption("nodetool") == "scylla":
|
||||
args += list(quarantine_mode)
|
||||
expected_params["quarantine_mode"] = quarantine_mode[1]
|
||||
else:
|
||||
pytest.skip("--quarantine-mode only supported by scylla-nodetool")
|
||||
|
||||
if disable_snapshot:
|
||||
args.append(disable_snapshot)
|
||||
expected_params["disable_snapshot"] = "true"
|
||||
|
||||
nodetool(*args, expected_requests=[
|
||||
expected_request("GET", "/storage_service/keyspaces", response=["ks"]),
|
||||
expected_request("GET", "/storage_service/keyspace_scrub/ks", params=expected_params,
|
||||
response=scrub_status.successful.value)])
|
||||
|
||||
|
||||
def test_scrub_skip_corrupted(nodetool):
|
||||
nodetool("scrub", "ks", "tbl1", "tbl2", "--skip-corrupted", expected_requests=[
|
||||
expected_request("GET", "/storage_service/keyspaces", response=["ks"]),
|
||||
expected_request("GET", "/storage_service/keyspace_scrub/ks", params={"cf": "tbl1,tbl2", "scrub_mode": "SKIP"},
|
||||
response=scrub_status.successful.value)])
|
||||
|
||||
nodetool("scrub", "ks", "tbl1", "tbl2", "-s", expected_requests=[
|
||||
expected_request("GET", "/storage_service/keyspaces", response=["ks"]),
|
||||
expected_request("GET", "/storage_service/keyspace_scrub/ks", params={"cf": "tbl1,tbl2", "scrub_mode": "SKIP"},
|
||||
response=scrub_status.successful.value)])
|
||||
|
||||
|
||||
def test_scrub_skip_corrupted_with_mode(nodetool):
|
||||
utils.check_nodetool_fails_with(
|
||||
nodetool,
|
||||
("scrub", "ks", "--skip-corrupted", "--mode", "ABORT"),
|
||||
{"expected_requests": [expected_request("GET", "/storage_service/keyspaces", response=["ks"])]},
|
||||
["nodetool: skipCorrupted and scrubMode must not be specified together",
|
||||
"error processing arguments: cannot use --skip-corrupted when --mode is used"])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ignored_opt", ["--reinsert-overflowed-ttl", "-r", ("--jobs", "2"), "-j2", "--no-validate",
|
||||
"-n"])
|
||||
def test_scrub_ignored_options(nodetool, ignored_opt):
|
||||
args = ["scrub", "ks"]
|
||||
if type(ignored_opt) is tuple:
|
||||
args += list(ignored_opt)
|
||||
else:
|
||||
args.append(ignored_opt)
|
||||
|
||||
nodetool(*args, expected_requests=[
|
||||
expected_request("GET", "/storage_service/keyspaces", response=["ks"]),
|
||||
expected_request("GET", "/storage_service/keyspace_scrub/ks", response=scrub_status.successful.value)])
|
||||
|
||||
|
||||
# Cassandra nodetool ignores the returned status
|
||||
@pytest.mark.parametrize("status", [scrub_status.successful, scrub_status.aborted, scrub_status.validation_errors])
|
||||
def test_scrub_return_status(nodetool, status, cassandra_only):
|
||||
nodetool("scrub", "ks", "--mode=VALIDATE", expected_requests=[
|
||||
expected_request("GET", "/storage_service/keyspaces", response=["ks"]),
|
||||
expected_request("GET", "/storage_service/keyspace_scrub/ks", params={"scrub_mode": "VALIDATE"},
|
||||
response=status.value)])
|
||||
|
||||
|
||||
def test_scrub_validation_errors_exit_code(nodetool, scylla_only):
|
||||
nodetool("scrub", "ks", "--mode=VALIDATE", expected_requests=[
|
||||
expected_request("GET", "/storage_service/keyspaces", response=["ks"]),
|
||||
expected_request("GET", "/storage_service/keyspace_scrub/ks", params={"scrub_mode": "VALIDATE"},
|
||||
response=scrub_status.successful.value)])
|
||||
|
||||
utils.check_nodetool_fails_with(
|
||||
nodetool,
|
||||
("scrub", "ks", "--mode=VALIDATE"),
|
||||
{"expected_requests": [
|
||||
expected_request("GET", "/storage_service/keyspaces", response=["ks"]),
|
||||
expected_request("GET", "/storage_service/keyspace_scrub/ks", params={"scrub_mode": "VALIDATE"},
|
||||
response=scrub_status.validation_errors.value)]},
|
||||
["scrub failed: there are invalid sstables"])
|
||||
|
||||
|
||||
def test_scrub_abort_exit_code(nodetool, scylla_only):
|
||||
nodetool("scrub", "ks", "--mode=ABORT", expected_requests=[
|
||||
expected_request("GET", "/storage_service/keyspaces", response=["ks"]),
|
||||
expected_request("GET", "/storage_service/keyspace_scrub/ks", params={"scrub_mode": "ABORT"},
|
||||
response=scrub_status.successful.value)])
|
||||
|
||||
utils.check_nodetool_fails_with(
|
||||
nodetool,
|
||||
("scrub", "ks", "--mode=ABORT"),
|
||||
{"expected_requests": [
|
||||
expected_request("GET", "/storage_service/keyspaces", response=["ks"]),
|
||||
expected_request("GET", "/storage_service/keyspace_scrub/ks", params={"scrub_mode": "ABORT"},
|
||||
response=scrub_status.aborted.value)]},
|
||||
["scrub failed: aborted"])
|
||||
@@ -18,6 +18,7 @@
|
||||
#include <seastar/util/short_streams.hh>
|
||||
#include <yaml-cpp/yaml.h>
|
||||
|
||||
#include "api/scrub_status.hh"
|
||||
#include "db_clock.hh"
|
||||
#include "log.hh"
|
||||
#include "tools/utils.hh"
|
||||
@@ -36,6 +37,10 @@ const auto app_name = "scylla-nodetool";
|
||||
|
||||
logging::logger nlog(app_name);
|
||||
|
||||
struct operation_failed_on_scylladb : public std::runtime_error {
|
||||
using std::runtime_error::runtime_error;
|
||||
};
|
||||
|
||||
class scylla_rest_client {
|
||||
sstring _host;
|
||||
uint16_t _port;
|
||||
@@ -606,6 +611,48 @@ void removenode_operation(scylla_rest_client& client, const bpo::variables_map&
|
||||
}
|
||||
}
|
||||
|
||||
void scrub_operation(scylla_rest_client& client, const bpo::variables_map& vm) {
|
||||
const auto [keyspace, tables] = parse_keyspace_and_tables(client, vm, false);
|
||||
if (keyspace.empty()) {
|
||||
throw std::invalid_argument("missing mandatory positional argument: keyspace");
|
||||
}
|
||||
if (vm.count("skip-corrupted") && vm.count("mode")) {
|
||||
throw std::invalid_argument("cannot use --skip-corrupted when --mode is used");
|
||||
}
|
||||
|
||||
std::unordered_map<sstring, sstring> params;
|
||||
|
||||
if (!tables.empty()) {
|
||||
params["cf"] = fmt::to_string(fmt::join(tables.begin(), tables.end(), ","));
|
||||
}
|
||||
|
||||
if (vm.count("mode")) {
|
||||
params["scrub_mode"] = vm["mode"].as<sstring>();
|
||||
} else if (vm.count("skip-corrupted")) {
|
||||
params["scrub_mode"] = "SKIP";
|
||||
}
|
||||
|
||||
if (vm.count("quarantine-mode")) {
|
||||
params["quarantine_mode"] = vm["quarantine-mode"].as<sstring>();
|
||||
}
|
||||
|
||||
if (vm.count("no-snapshot")) {
|
||||
params["disable_snapshot"] = "true";
|
||||
}
|
||||
|
||||
auto res = client.get(format("/storage_service/keyspace_scrub/{}", keyspace), std::move(params)).GetInt();
|
||||
|
||||
switch (api::scrub_status(res)) {
|
||||
case api::scrub_status::successful:
|
||||
case api::scrub_status::unable_to_cancel:
|
||||
return;
|
||||
case api::scrub_status::aborted:
|
||||
throw operation_failed_on_scylladb("scrub failed: aborted");
|
||||
case api::scrub_status::validation_errors:
|
||||
throw operation_failed_on_scylladb("scrub failed: there are invalid sstables");
|
||||
}
|
||||
}
|
||||
|
||||
void setlogginglevel_operation(scylla_rest_client& client, const bpo::variables_map& vm) {
|
||||
if (!vm.count("logger") || !vm.count("level")) {
|
||||
throw std::invalid_argument("resetting logger(s) is not supported yet, the logger and level parameters are required");
|
||||
@@ -764,6 +811,8 @@ const std::map<std::string_view, std::string_view> option_substitutions{
|
||||
{"--kc.list", "--keyspace-table-list"},
|
||||
{"-las", "--load-and-stream"},
|
||||
{"-pro", "--primary-replica-only"},
|
||||
{"-ns", "--no-snapshot"},
|
||||
{"-m", "--mode"}, // FIXME: this clashes with seastar option for memory
|
||||
};
|
||||
|
||||
std::map<operation, operation_func> get_operations_with_func() {
|
||||
@@ -1120,6 +1169,29 @@ Fore more information, see: https://opensource.docs.scylladb.com/stable/operatin
|
||||
},
|
||||
removenode_operation
|
||||
},
|
||||
{
|
||||
{
|
||||
"scrub",
|
||||
"Scrub the SSTable files in the specified keyspace or table(s)",
|
||||
R"(
|
||||
Fore more information, see: https://opensource.docs.scylladb.com/stable/operating-scylla/nodetool-commands/scrub.html
|
||||
)",
|
||||
{
|
||||
typed_option<>("no-snapshot", "Do not take a snapshot of scrubbed tables before starting scrub (default false)"),
|
||||
typed_option<>("skip-corrupted,s", "Skip corrupted rows or partitions, even when scrubbing counter tables (deprecated, use ‘–-mode’ instead, default false)"),
|
||||
typed_option<sstring>("mode,m", "How to handle corrupt data (one of: ABORT|SKIP|SEGREGATE|VALIDATE, default ABORT; overrides ‘–-skip-corrupted’)"),
|
||||
typed_option<sstring>("quarantine-mode,q", "How to handle quarantined sstables (one of: INCLUDE|EXCLUDE|ONLY, default INCLUDE)"),
|
||||
typed_option<>("no-validate,n", "Do not validate columns using column validator (unused)"),
|
||||
typed_option<>("reinsert-overflowed-ttl,r", "Rewrites rows with overflowed expiration date (unused)"),
|
||||
typed_option<int64_t>("jobs,j", "The number of sstables to be scrubbed concurrently (unused)"),
|
||||
},
|
||||
{
|
||||
typed_option<sstring>("keyspace", "The keyspace to scrub", 1),
|
||||
typed_option<std::vector<sstring>>("table", "The table(s) to scrub (if unspecified, all tables in the keyspace are scrubbed)", -1),
|
||||
},
|
||||
},
|
||||
scrub_operation
|
||||
},
|
||||
{
|
||||
{
|
||||
"setlogginglevel",
|
||||
@@ -1353,6 +1425,9 @@ For more information, see: https://opensource.docs.scylladb.com/stable/operating
|
||||
} catch (std::invalid_argument& e) {
|
||||
fmt::print(std::cerr, "error processing arguments: {}\n", e.what());
|
||||
return 1;
|
||||
} catch (operation_failed_on_scylladb& e) {
|
||||
fmt::print(std::cerr, "{}\n", e.what());
|
||||
return 3;
|
||||
} catch (...) {
|
||||
fmt::print(std::cerr, "error running operation: {}\n", std::current_exception());
|
||||
return 2;
|
||||
|
||||
Reference in New Issue
Block a user