Compare commits
105 Commits
copilot/ad
...
copilot/pr
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
28c4812edc | ||
|
|
73db5c94de | ||
|
|
85f05fbe1b | ||
|
|
83f46fa7f5 | ||
|
|
ea6f2a21c6 | ||
|
|
30c4bc3f96 | ||
|
|
77fa936edc | ||
|
|
0ec485845b | ||
|
|
dace39fd6c | ||
|
|
5f8f724d78 | ||
|
|
df93ea626b | ||
|
|
74347625f9 | ||
|
|
f1fc5cc808 | ||
|
|
61bbea51ad | ||
|
|
c2b1b10ca0 | ||
|
|
ec87b92ba1 | ||
|
|
9c9371511f | ||
|
|
2e80997630 | ||
|
|
1143acaf5b | ||
|
|
e153cc434f | ||
|
|
844545bb74 | ||
|
|
ccacea621f | ||
|
|
f4a4671ad6 | ||
|
|
70a0418102 | ||
|
|
6fcc1ecf94 | ||
|
|
8dde70d04c | ||
|
|
2e7070d3b7 | ||
|
|
3f7ee3ce5d | ||
|
|
1e14c08eee | ||
|
|
b9ec1180f5 | ||
|
|
12483d8c3c | ||
|
|
d5641398f5 | ||
|
|
c06e63daed | ||
|
|
c1c3b2c5bb | ||
|
|
5e7456936e | ||
|
|
e6f5f2537e | ||
|
|
76aacc00f2 | ||
|
|
7e7e378a4b | ||
|
|
77ee7f3417 | ||
|
|
0ff89a58be | ||
|
|
f7ffa395a8 | ||
|
|
3fa3b920de | ||
|
|
e7ca52ee79 | ||
|
|
730eca5dac | ||
|
|
c8cff94a5a | ||
|
|
5fae4cdf80 | ||
|
|
8bbcaacba1 | ||
|
|
3dfa5ebd7f | ||
|
|
24264e24bb | ||
|
|
0c64e3be9a | ||
|
|
b3b0860e7c | ||
|
|
db15c212a6 | ||
|
|
3595941020 | ||
|
|
102516a787 | ||
|
|
d5b63df46e | ||
|
|
f545ed37bc | ||
|
|
5f13880a91 | ||
|
|
8c4ac457af | ||
|
|
e48170ca8e | ||
|
|
11ad32c85e | ||
|
|
4c8c9cd548 | ||
|
|
98f431dd81 | ||
|
|
4ffdb0721f | ||
|
|
775906d749 | ||
|
|
11eca621b0 | ||
|
|
d7818b56df | ||
|
|
033fed5734 | ||
|
|
c6c30b7d0a | ||
|
|
5afcec4a3d | ||
|
|
9b5f3d12a3 | ||
|
|
0e51a1f812 | ||
|
|
8b807b299e | ||
|
|
07ff659849 | ||
|
|
be9992cfb3 | ||
|
|
daf00a7f24 | ||
|
|
62962f33bb | ||
|
|
060c2f7c0d | ||
|
|
64149b57c3 | ||
|
|
4b004fcdfc | ||
|
|
5e38b3071b | ||
|
|
225b3351fc | ||
|
|
e762027943 | ||
|
|
8edd5b80ab | ||
|
|
fb84b30f88 | ||
|
|
8545f7eedd | ||
|
|
e52e1f842e | ||
|
|
0a7df4b8ac | ||
|
|
9bb8156f02 | ||
|
|
d1b796bc43 | ||
|
|
1ad64731bc | ||
|
|
abadb8ebfb | ||
|
|
54f16f9019 | ||
|
|
b584e1e18e | ||
|
|
aa1d3f1170 | ||
|
|
e309b5dbe1 | ||
|
|
846b656610 | ||
|
|
ee851266be | ||
|
|
9434ec2fd1 | ||
|
|
f54602daf0 | ||
|
|
097c2cd676 | ||
|
|
4f30807f01 | ||
|
|
55704908a0 | ||
|
|
337f417b13 | ||
|
|
705af2bc16 | ||
|
|
5b5f9120d0 |
8
.github/CODEOWNERS
vendored
8
.github/CODEOWNERS
vendored
@@ -1,5 +1,5 @@
|
||||
# AUTH
|
||||
auth/* @nuivall @ptrsmrn
|
||||
auth/* @nuivall
|
||||
|
||||
# CACHE
|
||||
row_cache* @tgrabiec
|
||||
@@ -25,11 +25,11 @@ compaction/* @raphaelsc
|
||||
transport/*
|
||||
|
||||
# CQL QUERY LANGUAGE
|
||||
cql3/* @tgrabiec @nuivall @ptrsmrn
|
||||
cql3/* @tgrabiec @nuivall
|
||||
|
||||
# COUNTERS
|
||||
counters* @nuivall @ptrsmrn
|
||||
tests/counter_test* @nuivall @ptrsmrn
|
||||
counters* @nuivall
|
||||
tests/counter_test* @nuivall
|
||||
|
||||
# DOCS
|
||||
docs/* @annastuchlik @tzach
|
||||
|
||||
@@ -18,7 +18,7 @@ jobs:
|
||||
|
||||
// Regular expression pattern to check for "Fixes" prefix
|
||||
// Adjusted to dynamically insert the repository full name
|
||||
const pattern = `Fixes:? (?:#|${repo.replace('/', '\\/')}#|https://github\\.com/${repo.replace('/', '\\/')}/issues/)(\\d+)`;
|
||||
const pattern = `Fixes:? ((?:#|${repo.replace('/', '\\/')}#|https://github\\.com/${repo.replace('/', '\\/')}/issues/)(\\d+)|([A-Z]+-\\d+))`;
|
||||
const regex = new RegExp(pattern);
|
||||
|
||||
if (!regex.test(body)) {
|
||||
|
||||
10
.github/workflows/docs-validate-metrics.yml
vendored
10
.github/workflows/docs-validate-metrics.yml
vendored
@@ -7,7 +7,7 @@ on:
|
||||
- enterprise
|
||||
paths:
|
||||
- '**/*.cc'
|
||||
- 'scripts/metrics-config.yml'
|
||||
- 'scripts/metrics-config.yml'
|
||||
- 'scripts/get_description.py'
|
||||
- 'docs/_ext/scylladb_metrics.py'
|
||||
|
||||
@@ -15,20 +15,20 @@ jobs:
|
||||
validate-metrics:
|
||||
runs-on: ubuntu-latest
|
||||
name: Check metrics documentation coverage
|
||||
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: '3.10'
|
||||
|
||||
|
||||
- name: Install dependencies
|
||||
run: pip install PyYAML
|
||||
|
||||
|
||||
- name: Validate metrics
|
||||
run: python3 scripts/get_description.py --validate -c scripts/metrics-config.yml
|
||||
|
||||
5
.github/workflows/trigger-scylla-ci.yaml
vendored
5
.github/workflows/trigger-scylla-ci.yaml
vendored
@@ -3,10 +3,13 @@ name: Trigger Scylla CI Route
|
||||
on:
|
||||
issue_comment:
|
||||
types: [created]
|
||||
pull_request_target:
|
||||
types:
|
||||
- unlabeled
|
||||
|
||||
jobs:
|
||||
trigger-jenkins:
|
||||
if: github.event.comment.user.login != 'scylladbbot' && contains(github.event.comment.body, '@scylladbbot') && contains(github.event.comment.body, 'trigger-ci')
|
||||
if: (github.event.comment.user.login != 'scylladbbot' && contains(github.event.comment.body, '@scylladbbot') && contains(github.event.comment.body, 'trigger-ci')) || github.event.label.name == 'conflicts'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Trigger Scylla-CI-Route Jenkins Job
|
||||
|
||||
@@ -42,7 +42,7 @@ comparison_operator_type get_comparison_operator(const rjson::value& comparison_
|
||||
if (!comparison_operator.IsString()) {
|
||||
throw api_error::validation(fmt::format("Invalid comparison operator definition {}", rjson::print(comparison_operator)));
|
||||
}
|
||||
std::string op = comparison_operator.GetString();
|
||||
std::string op = rjson::to_string(comparison_operator);
|
||||
auto it = ops.find(op);
|
||||
if (it == ops.end()) {
|
||||
throw api_error::validation(fmt::format("Unsupported comparison operator {}", op));
|
||||
@@ -377,8 +377,8 @@ bool check_compare(const rjson::value* v1, const rjson::value& v2, const Compara
|
||||
return cmp(unwrap_number(*v1, cmp.diagnostic), unwrap_number(v2, cmp.diagnostic));
|
||||
}
|
||||
if (kv1.name == "S") {
|
||||
return cmp(std::string_view(kv1.value.GetString(), kv1.value.GetStringLength()),
|
||||
std::string_view(kv2.value.GetString(), kv2.value.GetStringLength()));
|
||||
return cmp(rjson::to_string_view(kv1.value),
|
||||
rjson::to_string_view(kv2.value));
|
||||
}
|
||||
if (kv1.name == "B") {
|
||||
auto d_kv1 = unwrap_bytes(kv1.value, v1_from_query);
|
||||
@@ -470,9 +470,9 @@ static bool check_BETWEEN(const rjson::value* v, const rjson::value& lb, const r
|
||||
return check_BETWEEN(unwrap_number(*v, diag), unwrap_number(lb, diag), unwrap_number(ub, diag), bounds_from_query);
|
||||
}
|
||||
if (kv_v.name == "S") {
|
||||
return check_BETWEEN(std::string_view(kv_v.value.GetString(), kv_v.value.GetStringLength()),
|
||||
std::string_view(kv_lb.value.GetString(), kv_lb.value.GetStringLength()),
|
||||
std::string_view(kv_ub.value.GetString(), kv_ub.value.GetStringLength()),
|
||||
return check_BETWEEN(rjson::to_string_view(kv_v.value),
|
||||
rjson::to_string_view(kv_lb.value),
|
||||
rjson::to_string_view(kv_ub.value),
|
||||
bounds_from_query);
|
||||
}
|
||||
if (kv_v.name == "B") {
|
||||
|
||||
@@ -8,6 +8,8 @@
|
||||
|
||||
#include "consumed_capacity.hh"
|
||||
#include "error.hh"
|
||||
#include "utils/rjson.hh"
|
||||
#include <fmt/format.h>
|
||||
|
||||
namespace alternator {
|
||||
|
||||
@@ -32,12 +34,12 @@ bool consumed_capacity_counter::should_add_capacity(const rjson::value& request)
|
||||
if (!return_consumed->IsString()) {
|
||||
throw api_error::validation("Non-string ReturnConsumedCapacity field in request");
|
||||
}
|
||||
std::string consumed = return_consumed->GetString();
|
||||
std::string_view consumed = rjson::to_string_view(*return_consumed);
|
||||
if (consumed == "INDEXES") {
|
||||
throw api_error::validation("INDEXES consumed capacity is not supported");
|
||||
}
|
||||
if (consumed != "TOTAL") {
|
||||
throw api_error::validation("Unknown consumed capacity "+ consumed);
|
||||
throw api_error::validation(fmt::format("Unknown consumed capacity {}", consumed));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -419,7 +419,7 @@ static std::optional<std::string> find_table_name(const rjson::value& request) {
|
||||
if (!table_name_value->IsString()) {
|
||||
throw api_error::validation("Non-string TableName field in request");
|
||||
}
|
||||
std::string table_name = table_name_value->GetString();
|
||||
std::string table_name = rjson::to_string(*table_name_value);
|
||||
return table_name;
|
||||
}
|
||||
|
||||
@@ -546,7 +546,7 @@ get_table_or_view(service::storage_proxy& proxy, const rjson::value& request) {
|
||||
// does exist but the index does not (ValidationException).
|
||||
if (proxy.data_dictionary().has_schema(keyspace_name, orig_table_name)) {
|
||||
throw api_error::validation(
|
||||
fmt::format("Requested resource not found: Index '{}' for table '{}'", index_name->GetString(), orig_table_name));
|
||||
fmt::format("Requested resource not found: Index '{}' for table '{}'", rjson::to_string_view(*index_name), orig_table_name));
|
||||
} else {
|
||||
throw api_error::resource_not_found(
|
||||
fmt::format("Requested resource not found: Table: {} not found", orig_table_name));
|
||||
@@ -587,7 +587,7 @@ static std::string get_string_attribute(const rjson::value& value, std::string_v
|
||||
throw api_error::validation(fmt::format("Expected string value for attribute {}, got: {}",
|
||||
attribute_name, value));
|
||||
}
|
||||
return std::string(attribute_value->GetString(), attribute_value->GetStringLength());
|
||||
return rjson::to_string(*attribute_value);
|
||||
}
|
||||
|
||||
// Convenience function for getting the value of a boolean attribute, or a
|
||||
@@ -1080,8 +1080,8 @@ static void add_column(schema_builder& builder, const std::string& name, const r
|
||||
}
|
||||
for (auto it = attribute_definitions.Begin(); it != attribute_definitions.End(); ++it) {
|
||||
const rjson::value& attribute_info = *it;
|
||||
if (attribute_info["AttributeName"].GetString() == name) {
|
||||
auto type = attribute_info["AttributeType"].GetString();
|
||||
if (rjson::to_string_view(attribute_info["AttributeName"]) == name) {
|
||||
std::string_view type = rjson::to_string_view(attribute_info["AttributeType"]);
|
||||
data_type dt = parse_key_type(type);
|
||||
if (computed_column) {
|
||||
// Computed column for GSI (doesn't choose a real column as-is
|
||||
@@ -1116,7 +1116,7 @@ static std::pair<std::string, std::string> parse_key_schema(const rjson::value&
|
||||
throw api_error::validation("First element of KeySchema must be an object");
|
||||
}
|
||||
const rjson::value *v = rjson::find((*key_schema)[0], "KeyType");
|
||||
if (!v || !v->IsString() || v->GetString() != std::string("HASH")) {
|
||||
if (!v || !v->IsString() || rjson::to_string_view(*v) != "HASH") {
|
||||
throw api_error::validation("First key in KeySchema must be a HASH key");
|
||||
}
|
||||
v = rjson::find((*key_schema)[0], "AttributeName");
|
||||
@@ -1124,14 +1124,14 @@ static std::pair<std::string, std::string> parse_key_schema(const rjson::value&
|
||||
throw api_error::validation("First key in KeySchema must have string AttributeName");
|
||||
}
|
||||
validate_attr_name_length(supplementary_context, v->GetStringLength(), true, "HASH key in KeySchema - ");
|
||||
std::string hash_key = v->GetString();
|
||||
std::string hash_key = rjson::to_string(*v);
|
||||
std::string range_key;
|
||||
if (key_schema->Size() == 2) {
|
||||
if (!(*key_schema)[1].IsObject()) {
|
||||
throw api_error::validation("Second element of KeySchema must be an object");
|
||||
}
|
||||
v = rjson::find((*key_schema)[1], "KeyType");
|
||||
if (!v || !v->IsString() || v->GetString() != std::string("RANGE")) {
|
||||
if (!v || !v->IsString() || rjson::to_string_view(*v) != "RANGE") {
|
||||
throw api_error::validation("Second key in KeySchema must be a RANGE key");
|
||||
}
|
||||
v = rjson::find((*key_schema)[1], "AttributeName");
|
||||
@@ -1799,6 +1799,11 @@ static future<executor::request_return_type> create_table_on_shard0(service::cli
|
||||
}
|
||||
}
|
||||
}
|
||||
// Creating an index in tablets mode requires the rf_rack_valid_keyspaces option to be enabled.
|
||||
// GSI and LSI indexes are based on materialized views which require this option to avoid consistency issues.
|
||||
if (!view_builders.empty() && ksm->uses_tablets() && !sp.data_dictionary().get_config().rf_rack_valid_keyspaces()) {
|
||||
co_return api_error::validation("GlobalSecondaryIndexes and LocalSecondaryIndexes with tablets require the rf_rack_valid_keyspaces option to be enabled.");
|
||||
}
|
||||
try {
|
||||
schema_mutations = service::prepare_new_keyspace_announcement(sp.local_db(), ksm, ts);
|
||||
} catch (exceptions::already_exists_exception&) {
|
||||
@@ -1887,8 +1892,8 @@ future<executor::request_return_type> executor::create_table(client_state& clien
|
||||
std::string def_type = type_to_string(def.type);
|
||||
for (auto it = attribute_definitions.Begin(); it != attribute_definitions.End(); ++it) {
|
||||
const rjson::value& attribute_info = *it;
|
||||
if (attribute_info["AttributeName"].GetString() == def.name_as_text()) {
|
||||
auto type = attribute_info["AttributeType"].GetString();
|
||||
if (rjson::to_string_view(attribute_info["AttributeName"]) == def.name_as_text()) {
|
||||
std::string_view type = rjson::to_string_view(attribute_info["AttributeType"]);
|
||||
if (type != def_type) {
|
||||
throw api_error::validation(fmt::format("AttributeDefinitions redefined {} to {} already a key attribute of type {} in this table", def.name_as_text(), type, def_type));
|
||||
}
|
||||
@@ -2019,6 +2024,10 @@ future<executor::request_return_type> executor::update_table(client_state& clien
|
||||
co_return api_error::validation(fmt::format(
|
||||
"LSI {} already exists in table {}, can't use same name for GSI", index_name, table_name));
|
||||
}
|
||||
if (p.local().local_db().find_keyspace(keyspace_name).get_replication_strategy().uses_tablets() &&
|
||||
!p.local().data_dictionary().get_config().rf_rack_valid_keyspaces()) {
|
||||
co_return api_error::validation("GlobalSecondaryIndexes with tablets require the rf_rack_valid_keyspaces option to be enabled.");
|
||||
}
|
||||
|
||||
elogger.trace("Adding GSI {}", index_name);
|
||||
// FIXME: read and handle "Projection" parameter. This will
|
||||
@@ -2362,7 +2371,7 @@ put_or_delete_item::put_or_delete_item(const rjson::value& item, schema_ptr sche
|
||||
_cells = std::vector<cell>();
|
||||
_cells->reserve(item.MemberCount());
|
||||
for (auto it = item.MemberBegin(); it != item.MemberEnd(); ++it) {
|
||||
bytes column_name = to_bytes(it->name.GetString());
|
||||
bytes column_name = to_bytes(rjson::to_string_view(it->name));
|
||||
validate_value(it->value, "PutItem");
|
||||
const column_definition* cdef = find_attribute(*schema, column_name);
|
||||
validate_attr_name_length("", column_name.size(), cdef && cdef->is_primary_key());
|
||||
@@ -2783,10 +2792,10 @@ static void verify_all_are_used(const rjson::value* field,
|
||||
return;
|
||||
}
|
||||
for (auto it = field->MemberBegin(); it != field->MemberEnd(); ++it) {
|
||||
if (!used.contains(it->name.GetString())) {
|
||||
if (!used.contains(rjson::to_string(it->name))) {
|
||||
throw api_error::validation(
|
||||
format("{} has spurious '{}', not used in {}",
|
||||
field_name, it->name.GetString(), operation));
|
||||
field_name, rjson::to_string_view(it->name), operation));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -3000,7 +3009,7 @@ future<executor::request_return_type> executor::delete_item(client_state& client
|
||||
}
|
||||
|
||||
static schema_ptr get_table_from_batch_request(const service::storage_proxy& proxy, const rjson::value::ConstMemberIterator& batch_request) {
|
||||
sstring table_name = batch_request->name.GetString(); // JSON keys are always strings
|
||||
sstring table_name = rjson::to_sstring(batch_request->name); // JSON keys are always strings
|
||||
try {
|
||||
return proxy.data_dictionary().find_schema(sstring(executor::KEYSPACE_NAME_PREFIX) + table_name, table_name);
|
||||
} catch(data_dictionary::no_such_column_family&) {
|
||||
@@ -3386,7 +3395,7 @@ static bool hierarchy_filter(rjson::value& val, const attribute_path_map_node<T>
|
||||
}
|
||||
rjson::value newv = rjson::empty_object();
|
||||
for (auto it = v.MemberBegin(); it != v.MemberEnd(); ++it) {
|
||||
std::string attr = it->name.GetString();
|
||||
std::string attr = rjson::to_string(it->name);
|
||||
auto x = members.find(attr);
|
||||
if (x != members.end()) {
|
||||
if (x->second) {
|
||||
@@ -3606,7 +3615,7 @@ static std::optional<attrs_to_get> calculate_attrs_to_get(const rjson::value& re
|
||||
const rjson::value& attributes_to_get = req["AttributesToGet"];
|
||||
attrs_to_get ret;
|
||||
for (auto it = attributes_to_get.Begin(); it != attributes_to_get.End(); ++it) {
|
||||
attribute_path_map_add("AttributesToGet", ret, it->GetString());
|
||||
attribute_path_map_add("AttributesToGet", ret, rjson::to_string(*it));
|
||||
validate_attr_name_length("AttributesToGet", it->GetStringLength(), false);
|
||||
}
|
||||
if (ret.empty()) {
|
||||
@@ -4272,12 +4281,12 @@ inline void update_item_operation::apply_attribute_updates(const std::unique_ptr
|
||||
attribute_collector& modified_attrs, bool& any_updates, bool& any_deletes) const {
|
||||
for (auto it = _attribute_updates->MemberBegin(); it != _attribute_updates->MemberEnd(); ++it) {
|
||||
// Note that it.key() is the name of the column, *it is the operation
|
||||
bytes column_name = to_bytes(it->name.GetString());
|
||||
bytes column_name = to_bytes(rjson::to_string_view(it->name));
|
||||
const column_definition* cdef = _schema->get_column_definition(column_name);
|
||||
if (cdef && cdef->is_primary_key()) {
|
||||
throw api_error::validation(format("UpdateItem cannot update key column {}", it->name.GetString()));
|
||||
throw api_error::validation(format("UpdateItem cannot update key column {}", rjson::to_string_view(it->name)));
|
||||
}
|
||||
std::string action = (it->value)["Action"].GetString();
|
||||
std::string action = rjson::to_string((it->value)["Action"]);
|
||||
if (action == "DELETE") {
|
||||
// The DELETE operation can do two unrelated tasks. Without a
|
||||
// "Value" option, it is used to delete an attribute. With a
|
||||
@@ -5474,7 +5483,7 @@ calculate_bounds_conditions(schema_ptr schema, const rjson::value& conditions) {
|
||||
std::vector<query::clustering_range> ck_bounds;
|
||||
|
||||
for (auto it = conditions.MemberBegin(); it != conditions.MemberEnd(); ++it) {
|
||||
std::string key = it->name.GetString();
|
||||
sstring key = rjson::to_sstring(it->name);
|
||||
const rjson::value& condition = it->value;
|
||||
|
||||
const rjson::value& comp_definition = rjson::get(condition, "ComparisonOperator");
|
||||
@@ -5482,13 +5491,13 @@ calculate_bounds_conditions(schema_ptr schema, const rjson::value& conditions) {
|
||||
|
||||
const column_definition& pk_cdef = schema->partition_key_columns().front();
|
||||
const column_definition* ck_cdef = schema->clustering_key_size() > 0 ? &schema->clustering_key_columns().front() : nullptr;
|
||||
if (sstring(key) == pk_cdef.name_as_text()) {
|
||||
if (key == pk_cdef.name_as_text()) {
|
||||
if (!partition_ranges.empty()) {
|
||||
throw api_error::validation("Currently only a single restriction per key is allowed");
|
||||
}
|
||||
partition_ranges.push_back(calculate_pk_bound(schema, pk_cdef, comp_definition, attr_list));
|
||||
}
|
||||
if (ck_cdef && sstring(key) == ck_cdef->name_as_text()) {
|
||||
if (ck_cdef && key == ck_cdef->name_as_text()) {
|
||||
if (!ck_bounds.empty()) {
|
||||
throw api_error::validation("Currently only a single restriction per key is allowed");
|
||||
}
|
||||
@@ -5889,7 +5898,7 @@ future<executor::request_return_type> executor::list_tables(client_state& client
|
||||
|
||||
rjson::value* exclusive_start_json = rjson::find(request, "ExclusiveStartTableName");
|
||||
rjson::value* limit_json = rjson::find(request, "Limit");
|
||||
std::string exclusive_start = exclusive_start_json ? exclusive_start_json->GetString() : "";
|
||||
std::string exclusive_start = exclusive_start_json ? rjson::to_string(*exclusive_start_json) : "";
|
||||
int limit = limit_json ? limit_json->GetInt() : 100;
|
||||
if (limit < 1 || limit > 100) {
|
||||
co_return api_error::validation("Limit must be greater than 0 and no greater than 100");
|
||||
|
||||
@@ -496,7 +496,7 @@ const std::pair<std::string, const rjson::value*> unwrap_set(const rjson::value&
|
||||
return {"", nullptr};
|
||||
}
|
||||
auto it = v.MemberBegin();
|
||||
const std::string it_key = it->name.GetString();
|
||||
const std::string it_key = rjson::to_string(it->name);
|
||||
if (it_key != "SS" && it_key != "BS" && it_key != "NS") {
|
||||
return {std::move(it_key), nullptr};
|
||||
}
|
||||
|
||||
@@ -93,7 +93,7 @@ future<executor::request_return_type> executor::update_time_to_live(client_state
|
||||
if (v->GetStringLength() < 1 || v->GetStringLength() > 255) {
|
||||
co_return api_error::validation("The length of AttributeName must be between 1 and 255");
|
||||
}
|
||||
sstring attribute_name(v->GetString(), v->GetStringLength());
|
||||
sstring attribute_name = rjson::to_sstring(*v);
|
||||
|
||||
co_await verify_permission(_enforce_authorization, _warn_authorization, client_state, schema, auth::permission::ALTER, _stats);
|
||||
co_await db::modify_tags(_mm, schema->ks_name(), schema->cf_name(), [&](std::map<sstring, sstring>& tags_map) {
|
||||
|
||||
@@ -31,6 +31,7 @@ set(swagger_files
|
||||
api-doc/column_family.json
|
||||
api-doc/commitlog.json
|
||||
api-doc/compaction_manager.json
|
||||
api-doc/client_routes.json
|
||||
api-doc/config.json
|
||||
api-doc/cql_server_test.json
|
||||
api-doc/endpoint_snitch_info.json
|
||||
@@ -68,6 +69,7 @@ target_sources(api
|
||||
PRIVATE
|
||||
api.cc
|
||||
cache_service.cc
|
||||
client_routes.cc
|
||||
collectd.cc
|
||||
column_family.cc
|
||||
commitlog.cc
|
||||
|
||||
23
api/api-doc/client_routes.def.json
Normal file
23
api/api-doc/client_routes.def.json
Normal file
@@ -0,0 +1,23 @@
|
||||
, "client_routes_entry": {
|
||||
"id": "client_routes_entry",
|
||||
"summary": "An entry storing client routes",
|
||||
"properties": {
|
||||
"connection_id": {"type": "string"},
|
||||
"host_id": {"type": "string", "format": "uuid"},
|
||||
"address": {"type": "string"},
|
||||
"port": {"type": "integer"},
|
||||
"tls_port": {"type": "integer"},
|
||||
"alternator_port": {"type": "integer"},
|
||||
"alternator_https_port": {"type": "integer"}
|
||||
},
|
||||
"required": ["connection_id", "host_id", "address"]
|
||||
}
|
||||
, "client_routes_key": {
|
||||
"id": "client_routes_key",
|
||||
"summary": "A key of client_routes_entry",
|
||||
"properties": {
|
||||
"connection_id": {"type": "string"},
|
||||
"host_id": {"type": "string", "format": "uuid"}
|
||||
}
|
||||
}
|
||||
|
||||
74
api/api-doc/client_routes.json
Normal file
74
api/api-doc/client_routes.json
Normal file
@@ -0,0 +1,74 @@
|
||||
, "/v2/client-routes":{
|
||||
"get": {
|
||||
"description":"List all client route entries",
|
||||
"operationId":"get_client_routes",
|
||||
"tags":["client_routes"],
|
||||
"produces":[
|
||||
"application/json"
|
||||
],
|
||||
"parameters":[],
|
||||
"responses":{
|
||||
"200":{
|
||||
"schema":{
|
||||
"type":"array",
|
||||
"items":{ "$ref":"#/definitions/client_routes_entry" }
|
||||
}
|
||||
},
|
||||
"default":{
|
||||
"description":"unexpected error",
|
||||
"schema":{"$ref":"#/definitions/ErrorModel"}
|
||||
}
|
||||
}
|
||||
},
|
||||
"post": {
|
||||
"description":"Upsert one or more client route entries",
|
||||
"operationId":"set_client_routes",
|
||||
"tags":["client_routes"],
|
||||
"parameters":[
|
||||
{
|
||||
"name":"body",
|
||||
"in":"body",
|
||||
"required":true,
|
||||
"schema":{
|
||||
"type":"array",
|
||||
"items":{ "$ref":"#/definitions/client_routes_entry" }
|
||||
}
|
||||
}
|
||||
],
|
||||
"responses":{
|
||||
"200":{ "description": "OK" },
|
||||
"default":{
|
||||
"description":"unexpected error",
|
||||
"schema":{ "$ref":"#/definitions/ErrorModel" }
|
||||
}
|
||||
}
|
||||
},
|
||||
"delete": {
|
||||
"description":"Delete one or more client route entries",
|
||||
"operationId":"delete_client_routes",
|
||||
"tags":["client_routes"],
|
||||
"parameters":[
|
||||
{
|
||||
"name":"body",
|
||||
"in":"body",
|
||||
"required":true,
|
||||
"schema":{
|
||||
"type":"array",
|
||||
"items":{ "$ref":"#/definitions/client_routes_key" }
|
||||
}
|
||||
}
|
||||
],
|
||||
"responses":{
|
||||
"200":{
|
||||
"description": "OK"
|
||||
},
|
||||
"default":{
|
||||
"description":"unexpected error",
|
||||
"schema":{
|
||||
"$ref":"#/definitions/ErrorModel"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -729,14 +729,6 @@
|
||||
"allowMultiple":false,
|
||||
"type":"boolean",
|
||||
"paramType":"query"
|
||||
},
|
||||
{
|
||||
"name":"use_sstable_identifier",
|
||||
"description":"Use the sstable identifier UUID, if available, rather than the sstable generation.",
|
||||
"required":false,
|
||||
"allowMultiple":false,
|
||||
"type":"boolean",
|
||||
"paramType":"query"
|
||||
}
|
||||
]
|
||||
},
|
||||
@@ -3059,7 +3051,7 @@
|
||||
},
|
||||
{
|
||||
"name":"incremental_mode",
|
||||
"description":"Set the incremental repair mode. Can be 'disabled', 'incremental', or 'full'. 'incremental': The incremental repair logic is enabled. Unrepaired sstables will be included for repair. Repaired sstables will be skipped. The incremental repair states will be updated after repair. 'full': The incremental repair logic is enabled. Both repaired and unrepaired sstables will be included for repair. The incremental repair states will be updated after repair. 'disabled': The incremental repair logic is disabled completely. The incremental repair states, e.g., repaired_at in sstables and sstables_repaired_at in the system.tablets table, will not be updated after repair. When the option is not provided, it defaults to incremental mode.",
|
||||
"description":"Set the incremental repair mode. Can be 'disabled', 'incremental', or 'full'. 'incremental': The incremental repair logic is enabled. Unrepaired sstables will be included for repair. Repaired sstables will be skipped. The incremental repair states will be updated after repair. 'full': The incremental repair logic is enabled. Both repaired and unrepaired sstables will be included for repair. The incremental repair states will be updated after repair. 'disabled': The incremental repair logic is disabled completely. The incremental repair states, e.g., repaired_at in sstables and sstables_repaired_at in the system.tablets table, will not be updated after repair. When the option is not provided, it defaults to 'disabled' mode.",
|
||||
"required":false,
|
||||
"allowMultiple":false,
|
||||
"type":"string",
|
||||
|
||||
13
api/api.cc
13
api/api.cc
@@ -37,6 +37,7 @@
|
||||
#include "raft.hh"
|
||||
#include "gms/gossip_address_map.hh"
|
||||
#include "service_levels.hh"
|
||||
#include "client_routes.hh"
|
||||
|
||||
logging::logger apilog("api");
|
||||
|
||||
@@ -67,9 +68,11 @@ future<> set_server_init(http_context& ctx) {
|
||||
rb02->set_api_doc(r);
|
||||
rb02->register_api_file(r, "swagger20_header");
|
||||
rb02->register_api_file(r, "metrics");
|
||||
rb02->register_api_file(r, "client_routes");
|
||||
rb->register_function(r, "system",
|
||||
"The system related API");
|
||||
rb02->add_definitions_file(r, "metrics");
|
||||
rb02->add_definitions_file(r, "client_routes");
|
||||
set_system(ctx, r);
|
||||
rb->register_function(r, "error_injection",
|
||||
"The error injection API");
|
||||
@@ -129,6 +132,16 @@ future<> unset_server_storage_service(http_context& ctx) {
|
||||
return ctx.http_server.set_routes([&ctx] (routes& r) { unset_storage_service(ctx, r); });
|
||||
}
|
||||
|
||||
future<> set_server_client_routes(http_context& ctx, sharded<service::client_routes_service>& cr) {
|
||||
return ctx.http_server.set_routes([&ctx, &cr] (routes& r) {
|
||||
set_client_routes(ctx, r, cr);
|
||||
});
|
||||
}
|
||||
|
||||
future<> unset_server_client_routes(http_context& ctx) {
|
||||
return ctx.http_server.set_routes([&ctx] (routes& r) { unset_client_routes(ctx, r); });
|
||||
}
|
||||
|
||||
future<> set_load_meter(http_context& ctx, service::load_meter& lm) {
|
||||
return ctx.http_server.set_routes([&ctx, &lm] (routes& r) { set_load_meter(ctx, r, lm); });
|
||||
}
|
||||
|
||||
@@ -29,6 +29,7 @@ class storage_proxy;
|
||||
class storage_service;
|
||||
class raft_group0_client;
|
||||
class raft_group_registry;
|
||||
class client_routes_service;
|
||||
|
||||
} // namespace service
|
||||
|
||||
@@ -99,6 +100,8 @@ future<> set_server_snitch(http_context& ctx, sharded<locator::snitch_ptr>& snit
|
||||
future<> unset_server_snitch(http_context& ctx);
|
||||
future<> set_server_storage_service(http_context& ctx, sharded<service::storage_service>& ss, service::raft_group0_client&);
|
||||
future<> unset_server_storage_service(http_context& ctx);
|
||||
future<> set_server_client_routes(http_context& ctx, sharded<service::client_routes_service>& cr);
|
||||
future<> unset_server_client_routes(http_context& ctx);
|
||||
future<> set_server_sstables_loader(http_context& ctx, sharded<sstables_loader>& sst_loader);
|
||||
future<> unset_server_sstables_loader(http_context& ctx);
|
||||
future<> set_server_view_builder(http_context& ctx, sharded<db::view::view_builder>& vb, sharded<gms::gossiper>& g);
|
||||
|
||||
178
api/client_routes.cc
Normal file
178
api/client_routes.cc
Normal file
@@ -0,0 +1,178 @@
|
||||
/*
|
||||
* Copyright (C) 2025-present ScyllaDB
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
|
||||
*/
|
||||
|
||||
#include <seastar/http/short_streams.hh>
|
||||
|
||||
#include "client_routes.hh"
|
||||
#include "api/api.hh"
|
||||
#include "service/storage_service.hh"
|
||||
#include "service/client_routes.hh"
|
||||
#include "utils/rjson.hh"
|
||||
|
||||
|
||||
#include "api/api-doc/client_routes.json.hh"
|
||||
|
||||
using namespace seastar::httpd;
|
||||
using namespace std::chrono_literals;
|
||||
using namespace json;
|
||||
|
||||
extern logging::logger apilog;
|
||||
|
||||
namespace api {
|
||||
|
||||
static void validate_client_routes_endpoint(sharded<service::client_routes_service>& cr, sstring endpoint_name) {
|
||||
if (!cr.local().get_feature_service().client_routes) {
|
||||
apilog.warn("{}: called before the cluster feature was enabled", endpoint_name);
|
||||
throw std::runtime_error(fmt::format("{} requires all nodes to support the CLIENT_ROUTES cluster feature", endpoint_name));
|
||||
}
|
||||
}
|
||||
|
||||
static sstring parse_string(const char* name, rapidjson::Value const& v) {
|
||||
const auto it = v.FindMember(name);
|
||||
if (it == v.MemberEnd()) {
|
||||
throw bad_param_exception(fmt::format("Missing '{}'", name));
|
||||
}
|
||||
if (!it->value.IsString()) {
|
||||
throw bad_param_exception(fmt::format("'{}' must be a string", name));
|
||||
}
|
||||
return {it->value.GetString(), it->value.GetStringLength()};
|
||||
}
|
||||
|
||||
static std::optional<uint32_t> parse_port(const char* name, rapidjson::Value const& v) {
|
||||
const auto it = v.FindMember(name);
|
||||
if (it == v.MemberEnd()) {
|
||||
return std::nullopt;
|
||||
}
|
||||
if (!it->value.IsInt()) {
|
||||
throw bad_param_exception(fmt::format("'{}' must be an integer", name));
|
||||
}
|
||||
auto port = it->value.GetInt();
|
||||
if (port < 1 || port > 65535) {
|
||||
throw bad_param_exception(fmt::format("'{}' value={} is outside the allowed port range", name, port));
|
||||
}
|
||||
return port;
|
||||
}
|
||||
|
||||
static std::vector<service::client_routes_service::client_route_entry> parse_set_client_array(const rapidjson::Document& root) {
|
||||
if (!root.IsArray()) {
|
||||
throw bad_param_exception("Body must be a JSON array");
|
||||
}
|
||||
|
||||
std::vector<service::client_routes_service::client_route_entry> v;
|
||||
v.reserve(root.GetArray().Size());
|
||||
for (const auto& element : root.GetArray()) {
|
||||
if (!element.IsObject()) { throw bad_param_exception("Each element must be object"); }
|
||||
|
||||
const auto port = parse_port("port", element);
|
||||
const auto tls_port = parse_port("tls_port", element);
|
||||
const auto alternator_port = parse_port("alternator_port", element);
|
||||
const auto alternator_https_port = parse_port("alternator_https_port", element);
|
||||
|
||||
if (!port.has_value() && !tls_port.has_value() && !alternator_port.has_value() && !alternator_https_port.has_value()) {
|
||||
throw bad_param_exception("At least one port field ('port', 'tls_port', 'alternator_port', 'alternator_https_port') must be specified");
|
||||
}
|
||||
|
||||
v.emplace_back(
|
||||
parse_string("connection_id", element),
|
||||
utils::UUID{parse_string("host_id", element)},
|
||||
parse_string("address", element),
|
||||
port,
|
||||
tls_port,
|
||||
alternator_port,
|
||||
alternator_https_port
|
||||
);
|
||||
}
|
||||
|
||||
return v;
|
||||
}
|
||||
|
||||
static
|
||||
future<json::json_return_type>
|
||||
rest_set_client_routes(http_context& ctx, sharded<service::client_routes_service>& cr, std::unique_ptr<http::request> req) {
|
||||
validate_client_routes_endpoint(cr, "rest_set_client_routes");
|
||||
|
||||
rapidjson::Document root;
|
||||
auto content = co_await util::read_entire_stream_contiguous(*req->content_stream);
|
||||
root.Parse(content.c_str());
|
||||
const auto route_entries = parse_set_client_array(root);
|
||||
|
||||
co_await cr.local().set_client_routes(route_entries);
|
||||
co_return seastar::json::json_void();
|
||||
}
|
||||
|
||||
static std::vector<service::client_routes_service::client_route_key> parse_delete_client_array(const rapidjson::Document& root) {
|
||||
if (!root.IsArray()) {
|
||||
throw bad_param_exception("Body must be a JSON array");
|
||||
}
|
||||
|
||||
std::vector<service::client_routes_service::client_route_key> v;
|
||||
v.reserve(root.GetArray().Size());
|
||||
for (const auto& element : root.GetArray()) {
|
||||
v.emplace_back(
|
||||
parse_string("connection_id", element),
|
||||
utils::UUID{parse_string("host_id", element)}
|
||||
);
|
||||
}
|
||||
|
||||
return v;
|
||||
}
|
||||
|
||||
static
|
||||
future<json::json_return_type>
|
||||
rest_delete_client_routes(http_context& ctx, sharded<service::client_routes_service>& cr, std::unique_ptr<http::request> req) {
|
||||
validate_client_routes_endpoint(cr, "delete_client_routes");
|
||||
|
||||
rapidjson::Document root;
|
||||
auto content = co_await util::read_entire_stream_contiguous(*req->content_stream);
|
||||
root.Parse(content.c_str());
|
||||
|
||||
const auto route_keys = parse_delete_client_array(root);
|
||||
co_await cr.local().delete_client_routes(route_keys);
|
||||
co_return seastar::json::json_void();
|
||||
}
|
||||
|
||||
static
|
||||
future<json::json_return_type>
|
||||
rest_get_client_routes(http_context& ctx, sharded<service::client_routes_service>& cr, std::unique_ptr<http::request> req) {
|
||||
validate_client_routes_endpoint(cr, "get_client_routes");
|
||||
|
||||
co_return co_await cr.invoke_on(0, [] (service::client_routes_service& cr) -> future<json::json_return_type> {
|
||||
co_return json::json_return_type(stream_range_as_array(co_await cr.get_client_routes(), [](const service::client_routes_service::client_route_entry & entry) {
|
||||
seastar::httpd::client_routes_json::client_routes_entry obj;
|
||||
obj.connection_id = entry.connection_id;
|
||||
obj.host_id = fmt::to_string(entry.host_id);
|
||||
obj.address = entry.address;
|
||||
if (entry.port.has_value()) { obj.port = entry.port.value(); }
|
||||
if (entry.tls_port.has_value()) { obj.tls_port = entry.tls_port.value(); }
|
||||
if (entry.alternator_port.has_value()) { obj.alternator_port = entry.alternator_port.value(); }
|
||||
if (entry.alternator_https_port.has_value()) { obj.alternator_https_port = entry.alternator_https_port.value(); }
|
||||
return obj;
|
||||
}));
|
||||
});
|
||||
}
|
||||
|
||||
void set_client_routes(http_context& ctx, routes& r, sharded<service::client_routes_service>& cr) {
|
||||
seastar::httpd::client_routes_json::set_client_routes.set(r, [&ctx, &cr] (std::unique_ptr<seastar::http::request> req) {
|
||||
return rest_set_client_routes(ctx, cr, std::move(req));
|
||||
});
|
||||
seastar::httpd::client_routes_json::delete_client_routes.set(r, [&ctx, &cr] (std::unique_ptr<seastar::http::request> req) {
|
||||
return rest_delete_client_routes(ctx, cr, std::move(req));
|
||||
});
|
||||
seastar::httpd::client_routes_json::get_client_routes.set(r, [&ctx, &cr] (std::unique_ptr<seastar::http::request> req) {
|
||||
return rest_get_client_routes(ctx, cr, std::move(req));
|
||||
});
|
||||
}
|
||||
|
||||
void unset_client_routes(http_context& ctx, routes& r) {
|
||||
seastar::httpd::client_routes_json::set_client_routes.unset(r);
|
||||
seastar::httpd::client_routes_json::delete_client_routes.unset(r);
|
||||
seastar::httpd::client_routes_json::get_client_routes.unset(r);
|
||||
}
|
||||
|
||||
}
|
||||
20
api/client_routes.hh
Normal file
20
api/client_routes.hh
Normal file
@@ -0,0 +1,20 @@
|
||||
/*
|
||||
* Copyright (C) 2025-present ScyllaDB
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include <seastar/core/sharded.hh>
|
||||
#include <seastar/json/json_elements.hh>
|
||||
#include "api/api_init.hh"
|
||||
|
||||
namespace api {
|
||||
|
||||
void set_client_routes(http_context& ctx, httpd::routes& r, sharded<service::client_routes_service>& cr);
|
||||
void unset_client_routes(http_context& ctx, httpd::routes& r);
|
||||
|
||||
}
|
||||
@@ -2020,16 +2020,12 @@ void set_snapshot(http_context& ctx, routes& r, sharded<db::snapshot_ctl>& snap_
|
||||
auto tag = req->get_query_param("tag");
|
||||
auto column_families = split(req->get_query_param("cf"), ",");
|
||||
auto sfopt = req->get_query_param("sf");
|
||||
auto usiopt = req->get_query_param("use_sstable_identifier");
|
||||
db::snapshot_options opts = {
|
||||
.skip_flush = strcasecmp(sfopt.c_str(), "true") == 0,
|
||||
.use_sstable_identifier = strcasecmp(usiopt.c_str(), "true") == 0
|
||||
};
|
||||
auto sf = db::snapshot_ctl::skip_flush(strcasecmp(sfopt.c_str(), "true") == 0);
|
||||
|
||||
std::vector<sstring> keynames = split(req->get_query_param("kn"), ",");
|
||||
try {
|
||||
if (column_families.empty()) {
|
||||
co_await snap_ctl.local().take_snapshot(tag, keynames, opts);
|
||||
co_await snap_ctl.local().take_snapshot(tag, keynames, sf);
|
||||
} else {
|
||||
if (keynames.empty()) {
|
||||
throw httpd::bad_param_exception("The keyspace of column families must be specified");
|
||||
@@ -2037,7 +2033,7 @@ void set_snapshot(http_context& ctx, routes& r, sharded<db::snapshot_ctl>& snap_
|
||||
if (keynames.size() > 1) {
|
||||
throw httpd::bad_param_exception("Only one keyspace allowed when specifying a column family");
|
||||
}
|
||||
co_await snap_ctl.local().take_column_family_snapshot(keynames[0], column_families, tag, opts);
|
||||
co_await snap_ctl.local().take_column_family_snapshot(keynames[0], column_families, tag, sf);
|
||||
}
|
||||
co_return json_void();
|
||||
} catch (...) {
|
||||
@@ -2072,8 +2068,7 @@ void set_snapshot(http_context& ctx, routes& r, sharded<db::snapshot_ctl>& snap_
|
||||
auto info = parse_scrub_options(ctx, std::move(req));
|
||||
|
||||
if (!info.snapshot_tag.empty()) {
|
||||
db::snapshot_options opts = {.skip_flush = false, .use_sstable_identifier = false};
|
||||
co_await snap_ctl.local().take_column_family_snapshot(info.keyspace, info.column_families, info.snapshot_tag, opts);
|
||||
co_await snap_ctl.local().take_column_family_snapshot(info.keyspace, info.column_families, info.snapshot_tag, db::snapshot_ctl::skip_flush::no);
|
||||
}
|
||||
|
||||
compaction::compaction_stats stats;
|
||||
|
||||
@@ -146,8 +146,7 @@ void set_tasks_compaction_module(http_context& ctx, routes& r, sharded<service::
|
||||
auto info = parse_scrub_options(ctx, std::move(req));
|
||||
|
||||
if (!info.snapshot_tag.empty()) {
|
||||
db::snapshot_options opts = {.skip_flush = false, .use_sstable_identifier = false};
|
||||
co_await snap_ctl.local().take_column_family_snapshot(info.keyspace, info.column_families, info.snapshot_tag, opts);
|
||||
co_await snap_ctl.local().take_column_family_snapshot(info.keyspace, info.column_families, info.snapshot_tag, db::snapshot_ctl::skip_flush::no);
|
||||
}
|
||||
|
||||
auto& compaction_module = db.local().get_compaction_manager().get_task_manager_module();
|
||||
|
||||
@@ -9,7 +9,6 @@
|
||||
#include "auth/allow_all_authenticator.hh"
|
||||
|
||||
#include "service/migration_manager.hh"
|
||||
#include "utils/alien_worker.hh"
|
||||
#include "utils/class_registrator.hh"
|
||||
|
||||
namespace auth {
|
||||
@@ -23,7 +22,6 @@ static const class_registrator<
|
||||
cql3::query_processor&,
|
||||
::service::raft_group0_client&,
|
||||
::service::migration_manager&,
|
||||
cache&,
|
||||
utils::alien_worker&> registration("org.apache.cassandra.auth.AllowAllAuthenticator");
|
||||
cache&> registration("org.apache.cassandra.auth.AllowAllAuthenticator");
|
||||
|
||||
}
|
||||
|
||||
@@ -14,7 +14,6 @@
|
||||
#include "auth/authenticator.hh"
|
||||
#include "auth/cache.hh"
|
||||
#include "auth/common.hh"
|
||||
#include "utils/alien_worker.hh"
|
||||
|
||||
namespace cql3 {
|
||||
class query_processor;
|
||||
@@ -30,7 +29,7 @@ extern const std::string_view allow_all_authenticator_name;
|
||||
|
||||
class allow_all_authenticator final : public authenticator {
|
||||
public:
|
||||
allow_all_authenticator(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&, cache&, utils::alien_worker&) {
|
||||
allow_all_authenticator(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&, cache&) {
|
||||
}
|
||||
|
||||
virtual future<> start() override {
|
||||
|
||||
@@ -35,14 +35,13 @@ static const class_registrator<auth::authenticator
|
||||
, cql3::query_processor&
|
||||
, ::service::raft_group0_client&
|
||||
, ::service::migration_manager&
|
||||
, auth::cache&
|
||||
, utils::alien_worker&> cert_auth_reg(CERT_AUTH_NAME);
|
||||
, auth::cache&> cert_auth_reg(CERT_AUTH_NAME);
|
||||
|
||||
enum class auth::certificate_authenticator::query_source {
|
||||
subject, altname
|
||||
};
|
||||
|
||||
auth::certificate_authenticator::certificate_authenticator(cql3::query_processor& qp, ::service::raft_group0_client&, ::service::migration_manager&, auth::cache&, utils::alien_worker&)
|
||||
auth::certificate_authenticator::certificate_authenticator(cql3::query_processor& qp, ::service::raft_group0_client&, ::service::migration_manager&, auth::cache&)
|
||||
: _queries([&] {
|
||||
auto& conf = qp.db().get_config();
|
||||
auto queries = conf.auth_certificate_role_queries();
|
||||
@@ -77,9 +76,9 @@ auth::certificate_authenticator::certificate_authenticator(cql3::query_processor
|
||||
throw std::invalid_argument(fmt::format("Invalid source: {}", map.at(cfg_source_attr)));
|
||||
}
|
||||
continue;
|
||||
} catch (std::out_of_range&) {
|
||||
} catch (const std::out_of_range&) {
|
||||
// just fallthrough
|
||||
} catch (boost::regex_error&) {
|
||||
} catch (const boost::regex_error&) {
|
||||
std::throw_with_nested(std::invalid_argument(fmt::format("Invalid query expression: {}", map.at(cfg_query_attr))));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -10,7 +10,6 @@
|
||||
#pragma once
|
||||
|
||||
#include "auth/authenticator.hh"
|
||||
#include "utils/alien_worker.hh"
|
||||
#include <boost/regex_fwd.hpp> // IWYU pragma: keep
|
||||
|
||||
namespace cql3 {
|
||||
@@ -34,7 +33,7 @@ class certificate_authenticator : public authenticator {
|
||||
enum class query_source;
|
||||
std::vector<std::pair<query_source, boost::regex>> _queries;
|
||||
public:
|
||||
certificate_authenticator(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&, cache&, utils::alien_worker&);
|
||||
certificate_authenticator(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&, cache&);
|
||||
~certificate_authenticator();
|
||||
|
||||
future<> start() override;
|
||||
|
||||
@@ -94,7 +94,7 @@ static future<> create_legacy_metadata_table_if_missing_impl(
|
||||
try {
|
||||
co_return co_await mm.announce(co_await ::service::prepare_new_column_family_announcement(qp.proxy(), table, ts),
|
||||
std::move(group0_guard), format("auth: create {} metadata table", table->cf_name()));
|
||||
} catch (exceptions::already_exists_exception&) {}
|
||||
} catch (const exceptions::already_exists_exception&) {}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -256,7 +256,7 @@ future<> default_authorizer::revoke_all(std::string_view role_name, ::service::g
|
||||
} else {
|
||||
co_await collect_mutations(_qp, mc, query, {sstring(role_name)});
|
||||
}
|
||||
} catch (exceptions::request_execution_exception& e) {
|
||||
} catch (const exceptions::request_execution_exception& e) {
|
||||
alogger.warn("CassandraAuthorizer failed to revoke all permissions of {}: {}", role_name, e);
|
||||
}
|
||||
}
|
||||
@@ -293,13 +293,13 @@ future<> default_authorizer::revoke_all_legacy(const resource& resource) {
|
||||
[resource](auto ep) {
|
||||
try {
|
||||
std::rethrow_exception(ep);
|
||||
} catch (exceptions::request_execution_exception& e) {
|
||||
} catch (const exceptions::request_execution_exception& e) {
|
||||
alogger.warn("CassandraAuthorizer failed to revoke all permissions on {}: {}", resource, e);
|
||||
}
|
||||
|
||||
});
|
||||
});
|
||||
} catch (exceptions::request_execution_exception& e) {
|
||||
} catch (const exceptions::request_execution_exception& e) {
|
||||
alogger.warn("CassandraAuthorizer failed to revoke all permissions on {}: {}", resource, e);
|
||||
return make_ready_future();
|
||||
}
|
||||
|
||||
@@ -49,8 +49,7 @@ static const class_registrator<
|
||||
cql3::query_processor&,
|
||||
::service::raft_group0_client&,
|
||||
::service::migration_manager&,
|
||||
cache&,
|
||||
utils::alien_worker&> password_auth_reg("org.apache.cassandra.auth.PasswordAuthenticator");
|
||||
cache&> password_auth_reg("org.apache.cassandra.auth.PasswordAuthenticator");
|
||||
|
||||
static thread_local auto rng_for_salt = std::default_random_engine(std::random_device{}());
|
||||
|
||||
@@ -64,14 +63,13 @@ std::string password_authenticator::default_superuser(const db::config& cfg) {
|
||||
password_authenticator::~password_authenticator() {
|
||||
}
|
||||
|
||||
password_authenticator::password_authenticator(cql3::query_processor& qp, ::service::raft_group0_client& g0, ::service::migration_manager& mm, cache& cache, utils::alien_worker& hashing_worker)
|
||||
password_authenticator::password_authenticator(cql3::query_processor& qp, ::service::raft_group0_client& g0, ::service::migration_manager& mm, cache& cache)
|
||||
: _qp(qp)
|
||||
, _group0_client(g0)
|
||||
, _migration_manager(mm)
|
||||
, _cache(cache)
|
||||
, _stopped(make_ready_future<>())
|
||||
, _superuser(default_superuser(qp.db().get_config()))
|
||||
, _hashing_worker(hashing_worker)
|
||||
{}
|
||||
|
||||
static bool has_salted_hash(const cql3::untyped_result_set_row& row) {
|
||||
@@ -330,20 +328,18 @@ future<authenticated_user> password_authenticator::authenticate(
|
||||
}
|
||||
salted_hash = role->salted_hash;
|
||||
}
|
||||
const bool password_match = co_await _hashing_worker.submit<bool>([password = std::move(password), salted_hash] {
|
||||
return passwords::check(password, *salted_hash);
|
||||
});
|
||||
const bool password_match = co_await passwords::check(password, *salted_hash);
|
||||
if (!password_match) {
|
||||
throw exceptions::authentication_exception("Username and/or password are incorrect");
|
||||
}
|
||||
co_return username;
|
||||
} catch (std::system_error &) {
|
||||
} catch (const std::system_error &) {
|
||||
std::throw_with_nested(exceptions::authentication_exception("Could not verify password"));
|
||||
} catch (exceptions::request_execution_exception& e) {
|
||||
} catch (const exceptions::request_execution_exception& e) {
|
||||
std::throw_with_nested(exceptions::authentication_exception(e.what()));
|
||||
} catch (exceptions::authentication_exception& e) {
|
||||
} catch (const exceptions::authentication_exception& e) {
|
||||
std::throw_with_nested(e);
|
||||
} catch (exceptions::unavailable_exception& e) {
|
||||
} catch (const exceptions::unavailable_exception& e) {
|
||||
std::throw_with_nested(exceptions::authentication_exception(e.get_message()));
|
||||
} catch (...) {
|
||||
std::throw_with_nested(exceptions::authentication_exception("authentication failed"));
|
||||
|
||||
@@ -18,7 +18,6 @@
|
||||
#include "auth/passwords.hh"
|
||||
#include "auth/cache.hh"
|
||||
#include "service/raft/raft_group0_client.hh"
|
||||
#include "utils/alien_worker.hh"
|
||||
|
||||
namespace db {
|
||||
class config;
|
||||
@@ -49,13 +48,12 @@ class password_authenticator : public authenticator {
|
||||
shared_promise<> _superuser_created_promise;
|
||||
// We used to also support bcrypt, SHA-256, and MD5 (ref. scylladb#24524).
|
||||
constexpr static auth::passwords::scheme _scheme = passwords::scheme::sha_512;
|
||||
utils::alien_worker& _hashing_worker;
|
||||
|
||||
public:
|
||||
static db::consistency_level consistency_for_user(std::string_view role_name);
|
||||
static std::string default_superuser(const db::config&);
|
||||
|
||||
password_authenticator(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&, cache&, utils::alien_worker&);
|
||||
password_authenticator(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&, cache&);
|
||||
|
||||
~password_authenticator();
|
||||
|
||||
|
||||
@@ -7,6 +7,8 @@
|
||||
*/
|
||||
|
||||
#include "auth/passwords.hh"
|
||||
#include "utils/crypt_sha512.hh"
|
||||
#include <seastar/core/coroutine.hh>
|
||||
|
||||
#include <cerrno>
|
||||
|
||||
@@ -21,27 +23,48 @@ static thread_local crypt_data tlcrypt = {};
|
||||
|
||||
namespace detail {
|
||||
|
||||
void verify_hashing_output(const char * res) {
|
||||
if (!res || (res[0] == '*')) {
|
||||
throw std::system_error(errno, std::system_category());
|
||||
}
|
||||
}
|
||||
|
||||
void verify_scheme(scheme scheme) {
|
||||
const sstring random_part_of_salt = "aaaabbbbccccdddd";
|
||||
|
||||
const sstring salt = sstring(prefix_for_scheme(scheme)) + random_part_of_salt;
|
||||
const char* e = crypt_r("fisk", salt.c_str(), &tlcrypt);
|
||||
|
||||
if (e && (e[0] != '*')) {
|
||||
return;
|
||||
try {
|
||||
verify_hashing_output(e);
|
||||
} catch (const std::system_error& ex) {
|
||||
throw no_supported_schemes();
|
||||
}
|
||||
|
||||
throw no_supported_schemes();
|
||||
}
|
||||
|
||||
sstring hash_with_salt(const sstring& pass, const sstring& salt) {
|
||||
auto res = crypt_r(pass.c_str(), salt.c_str(), &tlcrypt);
|
||||
if (!res || (res[0] == '*')) {
|
||||
throw std::system_error(errno, std::system_category());
|
||||
}
|
||||
verify_hashing_output(res);
|
||||
return res;
|
||||
}
|
||||
|
||||
seastar::future<sstring> hash_with_salt_async(const sstring& pass, const sstring& salt) {
|
||||
sstring res;
|
||||
// Only SHA-512 hashes for passphrases shorter than 256 bytes can be computed using
|
||||
// the __crypt_sha512 method. For other computations, we fall back to the
|
||||
// crypt_r implementation from `<crypt.h>`, which can stall.
|
||||
if (salt.starts_with(prefix_for_scheme(scheme::sha_512)) && pass.size() <= 255) {
|
||||
char buf[128];
|
||||
const char * output_ptr = co_await __crypt_sha512(pass.c_str(), salt.c_str(), buf);
|
||||
verify_hashing_output(output_ptr);
|
||||
res = output_ptr;
|
||||
} else {
|
||||
const char * output_ptr = crypt_r(pass.c_str(), salt.c_str(), &tlcrypt);
|
||||
verify_hashing_output(output_ptr);
|
||||
res = output_ptr;
|
||||
}
|
||||
co_return res;
|
||||
}
|
||||
|
||||
std::string_view prefix_for_scheme(scheme c) noexcept {
|
||||
switch (c) {
|
||||
case scheme::bcrypt_y: return "$2y$";
|
||||
@@ -58,8 +81,9 @@ no_supported_schemes::no_supported_schemes()
|
||||
: std::runtime_error("No allowed hashing schemes are supported on this system") {
|
||||
}
|
||||
|
||||
bool check(const sstring& pass, const sstring& salted_hash) {
|
||||
return detail::hash_with_salt(pass, salted_hash) == salted_hash;
|
||||
seastar::future<bool> check(const sstring& pass, const sstring& salted_hash) {
|
||||
const auto pwd_hash = co_await detail::hash_with_salt_async(pass, salted_hash);
|
||||
co_return pwd_hash == salted_hash;
|
||||
}
|
||||
|
||||
} // namespace auth::passwords
|
||||
|
||||
@@ -11,6 +11,7 @@
|
||||
#include <random>
|
||||
#include <stdexcept>
|
||||
|
||||
#include <seastar/core/future.hh>
|
||||
#include <seastar/core/sstring.hh>
|
||||
|
||||
#include "seastarx.hh"
|
||||
@@ -75,10 +76,19 @@ sstring generate_salt(RandomNumberEngine& g, scheme scheme) {
|
||||
|
||||
///
|
||||
/// Hash a password combined with an implementation-specific salt string.
|
||||
/// Deprecated in favor of `hash_with_salt_async`.
|
||||
///
|
||||
/// \throws \ref std::system_error when an unexpected implementation-specific error occurs.
|
||||
///
|
||||
sstring hash_with_salt(const sstring& pass, const sstring& salt);
|
||||
[[deprecated("Use hash_with_salt_async instead")]] sstring hash_with_salt(const sstring& pass, const sstring& salt);
|
||||
|
||||
///
|
||||
/// Async version of `hash_with_salt` that returns a future.
|
||||
/// If possible, hashing uses `coroutine::maybe_yield` to prevent reactor stalls.
|
||||
///
|
||||
/// \throws \ref std::system_error when an unexpected implementation-specific error occurs.
|
||||
///
|
||||
seastar::future<sstring> hash_with_salt_async(const sstring& pass, const sstring& salt);
|
||||
|
||||
} // namespace detail
|
||||
|
||||
@@ -107,6 +117,6 @@ sstring hash(const sstring& pass, RandomNumberEngine& g, scheme scheme) {
|
||||
///
|
||||
/// \throws \ref std::system_error when an unexpected implementation-specific error occurs.
|
||||
///
|
||||
bool check(const sstring& pass, const sstring& salted_hash);
|
||||
seastar::future<bool> check(const sstring& pass, const sstring& salted_hash);
|
||||
|
||||
} // namespace auth::passwords
|
||||
|
||||
@@ -35,10 +35,9 @@ static const class_registrator<
|
||||
cql3::query_processor&,
|
||||
::service::raft_group0_client&,
|
||||
::service::migration_manager&,
|
||||
cache&,
|
||||
utils::alien_worker&> saslauthd_auth_reg("com.scylladb.auth.SaslauthdAuthenticator");
|
||||
cache&> saslauthd_auth_reg("com.scylladb.auth.SaslauthdAuthenticator");
|
||||
|
||||
saslauthd_authenticator::saslauthd_authenticator(cql3::query_processor& qp, ::service::raft_group0_client&, ::service::migration_manager&, cache&, utils::alien_worker&)
|
||||
saslauthd_authenticator::saslauthd_authenticator(cql3::query_processor& qp, ::service::raft_group0_client&, ::service::migration_manager&, cache&)
|
||||
: _socket_path(qp.db().get_config().saslauthd_socket_path())
|
||||
{}
|
||||
|
||||
|
||||
@@ -12,7 +12,6 @@
|
||||
|
||||
#include "auth/authenticator.hh"
|
||||
#include "auth/cache.hh"
|
||||
#include "utils/alien_worker.hh"
|
||||
|
||||
namespace cql3 {
|
||||
class query_processor;
|
||||
@@ -30,7 +29,7 @@ namespace auth {
|
||||
class saslauthd_authenticator : public authenticator {
|
||||
sstring _socket_path; ///< Path to the domain socket on which saslauthd is listening.
|
||||
public:
|
||||
saslauthd_authenticator(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&, cache&,utils::alien_worker&);
|
||||
saslauthd_authenticator(cql3::query_processor&, ::service::raft_group0_client&, ::service::migration_manager&, cache&);
|
||||
|
||||
future<> start() override;
|
||||
|
||||
|
||||
@@ -191,8 +191,7 @@ service::service(
|
||||
::service::migration_manager& mm,
|
||||
const service_config& sc,
|
||||
maintenance_socket_enabled used_by_maintenance_socket,
|
||||
cache& cache,
|
||||
utils::alien_worker& hashing_worker)
|
||||
cache& cache)
|
||||
: service(
|
||||
std::move(c),
|
||||
cache,
|
||||
@@ -200,7 +199,7 @@ service::service(
|
||||
g0,
|
||||
mn,
|
||||
create_object<authorizer>(sc.authorizer_java_name, qp, g0, mm),
|
||||
create_object<authenticator>(sc.authenticator_java_name, qp, g0, mm, cache, hashing_worker),
|
||||
create_object<authenticator>(sc.authenticator_java_name, qp, g0, mm, cache),
|
||||
create_object<role_manager>(sc.role_manager_java_name, qp, g0, mm, cache),
|
||||
used_by_maintenance_socket) {
|
||||
}
|
||||
@@ -226,7 +225,7 @@ future<> service::create_legacy_keyspace_if_missing(::service::migration_manager
|
||||
try {
|
||||
co_return co_await mm.announce(::service::prepare_new_keyspace_announcement(db.real_database(), ksm, ts),
|
||||
std::move(group0_guard), seastar::format("auth_service: create {} keyspace", meta::legacy::AUTH_KS));
|
||||
} catch (::service::group0_concurrent_modification&) {
|
||||
} catch (const ::service::group0_concurrent_modification&) {
|
||||
log.info("Concurrent operation is detected while creating {} keyspace, retrying.", meta::legacy::AUTH_KS);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -27,7 +27,6 @@
|
||||
#include "cql3/description.hh"
|
||||
#include "seastarx.hh"
|
||||
#include "service/raft/raft_group0_client.hh"
|
||||
#include "utils/alien_worker.hh"
|
||||
#include "utils/observable.hh"
|
||||
#include "utils/serialized_action.hh"
|
||||
#include "service/maintenance_mode.hh"
|
||||
@@ -131,8 +130,7 @@ public:
|
||||
::service::migration_manager&,
|
||||
const service_config&,
|
||||
maintenance_socket_enabled,
|
||||
cache&,
|
||||
utils::alien_worker&);
|
||||
cache&);
|
||||
|
||||
future<> start(::service::migration_manager&, db::system_keyspace&);
|
||||
|
||||
|
||||
@@ -192,7 +192,7 @@ future<> standard_role_manager::legacy_create_default_role_if_missing() {
|
||||
{_superuser},
|
||||
cql3::query_processor::cache_internal::no).discard_result();
|
||||
log.info("Created default superuser role '{}'.", _superuser);
|
||||
} catch(const exceptions::unavailable_exception& e) {
|
||||
} catch (const exceptions::unavailable_exception& e) {
|
||||
log.warn("Skipped default role setup: some nodes were not ready; will retry");
|
||||
throw e;
|
||||
}
|
||||
|
||||
@@ -38,8 +38,8 @@ class transitional_authenticator : public authenticator {
|
||||
public:
|
||||
static const sstring PASSWORD_AUTHENTICATOR_NAME;
|
||||
|
||||
transitional_authenticator(cql3::query_processor& qp, ::service::raft_group0_client& g0, ::service::migration_manager& mm, cache& cache, utils::alien_worker& hashing_worker)
|
||||
: transitional_authenticator(std::make_unique<password_authenticator>(qp, g0, mm, cache, hashing_worker)) {
|
||||
transitional_authenticator(cql3::query_processor& qp, ::service::raft_group0_client& g0, ::service::migration_manager& mm, cache& cache)
|
||||
: transitional_authenticator(std::make_unique<password_authenticator>(qp, g0, mm, cache)) {
|
||||
}
|
||||
transitional_authenticator(std::unique_ptr<authenticator> a)
|
||||
: _authenticator(std::move(a)) {
|
||||
@@ -81,7 +81,7 @@ public:
|
||||
}).handle_exception([](auto ep) {
|
||||
try {
|
||||
std::rethrow_exception(ep);
|
||||
} catch (exceptions::authentication_exception&) {
|
||||
} catch (const exceptions::authentication_exception&) {
|
||||
// return anon user
|
||||
return make_ready_future<authenticated_user>(anonymous_user());
|
||||
}
|
||||
@@ -126,7 +126,7 @@ public:
|
||||
virtual bytes evaluate_response(bytes_view client_response) override {
|
||||
try {
|
||||
return _sasl->evaluate_response(client_response);
|
||||
} catch (exceptions::authentication_exception&) {
|
||||
} catch (const exceptions::authentication_exception&) {
|
||||
_complete = true;
|
||||
return {};
|
||||
}
|
||||
@@ -141,7 +141,7 @@ public:
|
||||
return _sasl->get_authenticated_user().handle_exception([](auto ep) {
|
||||
try {
|
||||
std::rethrow_exception(ep);
|
||||
} catch (exceptions::authentication_exception&) {
|
||||
} catch (const exceptions::authentication_exception&) {
|
||||
// return anon user
|
||||
return make_ready_future<authenticated_user>(anonymous_user());
|
||||
}
|
||||
@@ -241,8 +241,7 @@ static const class_registrator<
|
||||
cql3::query_processor&,
|
||||
::service::raft_group0_client&,
|
||||
::service::migration_manager&,
|
||||
auth::cache&,
|
||||
utils::alien_worker&> transitional_authenticator_reg(auth::PACKAGE_NAME + "TransitionalAuthenticator");
|
||||
auth::cache&> transitional_authenticator_reg(auth::PACKAGE_NAME + "TransitionalAuthenticator");
|
||||
|
||||
static const class_registrator<
|
||||
auth::authorizer,
|
||||
|
||||
@@ -859,6 +859,7 @@ scylla_core = (['message/messaging_service.cc',
|
||||
'utils/alien_worker.cc',
|
||||
'utils/array-search.cc',
|
||||
'utils/base64.cc',
|
||||
'utils/crypt_sha512.cc',
|
||||
'utils/logalloc.cc',
|
||||
'utils/large_bitset.cc',
|
||||
'utils/buffer_input_stream.cc',
|
||||
@@ -1157,6 +1158,7 @@ scylla_core = (['message/messaging_service.cc',
|
||||
'locator/topology.cc',
|
||||
'locator/util.cc',
|
||||
'service/client_state.cc',
|
||||
'service/client_routes.cc',
|
||||
'service/storage_service.cc',
|
||||
'service/session.cc',
|
||||
'service/task_manager_module.cc',
|
||||
@@ -1317,6 +1319,8 @@ api = ['api/api.cc',
|
||||
'api/storage_proxy.cc',
|
||||
Json2Code('api/api-doc/cache_service.json'),
|
||||
'api/cache_service.cc',
|
||||
Json2Code('api/api-doc/client_routes.json'),
|
||||
'api/client_routes.cc',
|
||||
Json2Code('api/api-doc/collectd.json'),
|
||||
'api/collectd.cc',
|
||||
Json2Code('api/api-doc/endpoint_snitch_info.json'),
|
||||
@@ -1479,7 +1483,6 @@ deps = {
|
||||
|
||||
pure_boost_tests = set([
|
||||
'test/boost/anchorless_list_test',
|
||||
'test/boost/auth_passwords_test',
|
||||
'test/boost/auth_resource_test',
|
||||
'test/boost/big_decimal_test',
|
||||
'test/boost/caching_options_test',
|
||||
|
||||
@@ -1322,6 +1322,10 @@ const std::vector<expr::expression>& statement_restrictions::index_restrictions(
|
||||
return _index_restrictions;
|
||||
}
|
||||
|
||||
bool statement_restrictions::is_empty() const {
|
||||
return !_where.has_value();
|
||||
}
|
||||
|
||||
// Current score table:
|
||||
// local and restrictions include full partition key: 2
|
||||
// global: 1
|
||||
|
||||
@@ -408,6 +408,8 @@ public:
|
||||
|
||||
/// Checks that the primary key restrictions don't contain null values, throws invalid_request_exception otherwise.
|
||||
void validate_primary_key(const query_options& options) const;
|
||||
|
||||
bool is_empty() const;
|
||||
};
|
||||
|
||||
statement_restrictions analyze_statement_restrictions(
|
||||
|
||||
@@ -1976,7 +1976,7 @@ mutation_fragments_select_statement::do_execute(query_processor& qp, service::qu
|
||||
if (it == indexes.end()) {
|
||||
throw exceptions::invalid_request_exception("ANN ordering by vector requires the column to be indexed using 'vector_index'");
|
||||
}
|
||||
if (index_opt || parameters->allow_filtering() || restrictions->need_filtering() || check_needs_allow_filtering_anyway(*restrictions)) {
|
||||
if (index_opt || parameters->allow_filtering() || !(restrictions->is_empty()) || check_needs_allow_filtering_anyway(*restrictions)) {
|
||||
throw exceptions::invalid_request_exception("ANN ordering by vector does not support filtering");
|
||||
}
|
||||
index_opt = *it;
|
||||
|
||||
@@ -42,6 +42,11 @@ table::get_index_manager() const {
|
||||
return _ops->get_index_manager(*this);
|
||||
}
|
||||
|
||||
db_clock::time_point
|
||||
table::get_truncation_time() const {
|
||||
return _ops->get_truncation_time(*this);
|
||||
}
|
||||
|
||||
lw_shared_ptr<keyspace_metadata>
|
||||
keyspace::metadata() const {
|
||||
return _ops->get_keyspace_metadata(*this);
|
||||
|
||||
@@ -77,6 +77,7 @@ public:
|
||||
schema_ptr schema() const;
|
||||
const std::vector<view_ptr>& views() const;
|
||||
const secondary_index::secondary_index_manager& get_index_manager() const;
|
||||
db_clock::time_point get_truncation_time() const;
|
||||
};
|
||||
|
||||
class keyspace {
|
||||
|
||||
@@ -27,6 +27,7 @@ public:
|
||||
virtual std::optional<table> try_find_table(database db, table_id id) const = 0;
|
||||
virtual const secondary_index::secondary_index_manager& get_index_manager(table t) const = 0;
|
||||
virtual schema_ptr get_table_schema(table t) const = 0;
|
||||
virtual db_clock::time_point get_truncation_time(table t) const = 0;
|
||||
virtual lw_shared_ptr<keyspace_metadata> get_keyspace_metadata(keyspace ks) const = 0;
|
||||
virtual bool is_internal(keyspace ks) const = 0;
|
||||
virtual const locator::abstract_replication_strategy& get_replication_strategy(keyspace ks) const = 0;
|
||||
|
||||
20
db/batchlog.hh
Normal file
20
db/batchlog.hh
Normal file
@@ -0,0 +1,20 @@
|
||||
/*
|
||||
* Copyright (C) 2025-present ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* SPDX-License-Identifier: (LicenseRef-ScyllaDB-Source-Available-1.0 and Apache-2.0)
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "mutation/mutation.hh"
|
||||
#include "utils/UUID.hh"
|
||||
|
||||
namespace db {
|
||||
|
||||
mutation get_batchlog_mutation_for(schema_ptr schema, const utils::chunked_vector<mutation>& mutations, int32_t version, db_clock::time_point now, const utils::UUID& id);
|
||||
|
||||
mutation get_batchlog_delete_mutation(schema_ptr schema, int32_t version, db_clock::time_point now, const utils::UUID& id);
|
||||
|
||||
}
|
||||
@@ -10,6 +10,7 @@
|
||||
|
||||
#include <chrono>
|
||||
#include <exception>
|
||||
#include <ranges>
|
||||
#include <seastar/core/future-util.hh>
|
||||
#include <seastar/core/do_with.hh>
|
||||
#include <seastar/core/semaphore.hh>
|
||||
@@ -18,12 +19,14 @@
|
||||
#include <seastar/core/sleep.hh>
|
||||
|
||||
#include "batchlog_manager.hh"
|
||||
#include "batchlog.hh"
|
||||
#include "data_dictionary/data_dictionary.hh"
|
||||
#include "mutation/canonical_mutation.hh"
|
||||
#include "service/storage_proxy.hh"
|
||||
#include "system_keyspace.hh"
|
||||
#include "utils/rate_limiter.hh"
|
||||
#include "utils/log.hh"
|
||||
#include "utils/murmur_hash.hh"
|
||||
#include "db_clock.hh"
|
||||
#include "unimplemented.hh"
|
||||
#include "idl/frozen_schema.dist.hh"
|
||||
@@ -33,17 +36,94 @@
|
||||
#include "cql3/untyped_result_set.hh"
|
||||
#include "service_permit.hh"
|
||||
#include "cql3/query_processor.hh"
|
||||
#include "replica/database.hh"
|
||||
|
||||
static logging::logger blogger("batchlog_manager");
|
||||
|
||||
namespace db {
|
||||
|
||||
// Yields 256 batchlog shards. Even on the largest nodes we currently run on,
|
||||
// this should be enough to give every core a batchlog partition.
|
||||
static constexpr unsigned batchlog_shard_bits = 8;
|
||||
|
||||
int32_t batchlog_shard_of(db_clock::time_point written_at) {
|
||||
const int64_t count = written_at.time_since_epoch().count();
|
||||
std::array<uint64_t, 2> result;
|
||||
utils::murmur_hash::hash3_x64_128(bytes_view(reinterpret_cast<const signed char*>(&count), sizeof(count)), 0, result);
|
||||
uint64_t hash = result[0] ^ result[1];
|
||||
return hash & ((1ULL << batchlog_shard_bits) - 1);
|
||||
}
|
||||
|
||||
std::pair<partition_key, clustering_key>
|
||||
get_batchlog_key(const schema& schema, int32_t version, db::batchlog_stage stage, int32_t batchlog_shard, db_clock::time_point written_at, std::optional<utils::UUID> id) {
|
||||
auto pkey = partition_key::from_exploded(schema, {serialized(version), serialized(int8_t(stage)), serialized(batchlog_shard)});
|
||||
|
||||
std::vector<bytes> ckey_components;
|
||||
ckey_components.reserve(2);
|
||||
ckey_components.push_back(serialized(written_at));
|
||||
if (id) {
|
||||
ckey_components.push_back(serialized(*id));
|
||||
}
|
||||
auto ckey = clustering_key::from_exploded(schema, ckey_components);
|
||||
|
||||
return {std::move(pkey), std::move(ckey)};
|
||||
}
|
||||
|
||||
std::pair<partition_key, clustering_key>
|
||||
get_batchlog_key(const schema& schema, int32_t version, db::batchlog_stage stage, db_clock::time_point written_at, std::optional<utils::UUID> id) {
|
||||
return get_batchlog_key(schema, version, stage, batchlog_shard_of(written_at), written_at, id);
|
||||
}
|
||||
|
||||
mutation get_batchlog_mutation_for(schema_ptr schema, managed_bytes data, int32_t version, db::batchlog_stage stage, db_clock::time_point now, const utils::UUID& id) {
|
||||
auto [key, ckey] = get_batchlog_key(*schema, version, stage, now, id);
|
||||
|
||||
auto timestamp = api::new_timestamp();
|
||||
|
||||
mutation m(schema, key);
|
||||
// Avoid going through data_value and therefore `bytes`, as it can be large (#24809).
|
||||
auto cdef_data = schema->get_column_definition(to_bytes("data"));
|
||||
m.set_cell(ckey, *cdef_data, atomic_cell::make_live(*cdef_data->type, timestamp, std::move(data)));
|
||||
|
||||
return m;
|
||||
}
|
||||
|
||||
mutation get_batchlog_mutation_for(schema_ptr schema, const utils::chunked_vector<mutation>& mutations, int32_t version, db::batchlog_stage stage, db_clock::time_point now, const utils::UUID& id) {
|
||||
auto data = [&mutations] {
|
||||
utils::chunked_vector<canonical_mutation> fm(mutations.begin(), mutations.end());
|
||||
bytes_ostream out;
|
||||
for (auto& m : fm) {
|
||||
ser::serialize(out, m);
|
||||
}
|
||||
return std::move(out).to_managed_bytes();
|
||||
}();
|
||||
|
||||
return get_batchlog_mutation_for(std::move(schema), std::move(data), version, stage, now, id);
|
||||
}
|
||||
|
||||
mutation get_batchlog_mutation_for(schema_ptr schema, const utils::chunked_vector<mutation>& mutations, int32_t version, db_clock::time_point now, const utils::UUID& id) {
|
||||
return get_batchlog_mutation_for(std::move(schema), mutations, version, batchlog_stage::initial, now, id);
|
||||
}
|
||||
|
||||
mutation get_batchlog_delete_mutation(schema_ptr schema, int32_t version, db::batchlog_stage stage, db_clock::time_point now, const utils::UUID& id) {
|
||||
auto [key, ckey] = get_batchlog_key(*schema, version, stage, now, id);
|
||||
mutation m(schema, key);
|
||||
auto timestamp = api::new_timestamp();
|
||||
m.partition().apply_delete(*schema, ckey, tombstone(timestamp, gc_clock::now()));
|
||||
return m;
|
||||
}
|
||||
|
||||
mutation get_batchlog_delete_mutation(schema_ptr schema, int32_t version, db_clock::time_point now, const utils::UUID& id) {
|
||||
return get_batchlog_delete_mutation(std::move(schema), version, batchlog_stage::initial, now, id);
|
||||
}
|
||||
|
||||
} // namespace db
|
||||
|
||||
const std::chrono::seconds db::batchlog_manager::replay_interval;
|
||||
const uint32_t db::batchlog_manager::page_size;
|
||||
|
||||
db::batchlog_manager::batchlog_manager(cql3::query_processor& qp, db::system_keyspace& sys_ks, batchlog_manager_config config)
|
||||
: _qp(qp)
|
||||
, _sys_ks(sys_ks)
|
||||
, _write_request_timeout(std::chrono::duration_cast<db_clock::duration>(config.write_request_timeout))
|
||||
, _replay_timeout(config.replay_timeout)
|
||||
, _replay_rate(config.replay_rate)
|
||||
, _delay(config.delay)
|
||||
, _replay_cleanup_after_replays(config.replay_cleanup_after_replays)
|
||||
@@ -152,18 +232,75 @@ future<> db::batchlog_manager::stop() {
|
||||
}
|
||||
|
||||
future<size_t> db::batchlog_manager::count_all_batches() const {
|
||||
sstring query = format("SELECT count(*) FROM {}.{} BYPASS CACHE", system_keyspace::NAME, system_keyspace::BATCHLOG);
|
||||
sstring query = format("SELECT count(*) FROM {}.{} BYPASS CACHE", system_keyspace::NAME, system_keyspace::BATCHLOG_V2);
|
||||
return _qp.execute_internal(query, cql3::query_processor::cache_internal::yes).then([](::shared_ptr<cql3::untyped_result_set> rs) {
|
||||
return size_t(rs->one().get_as<int64_t>("count"));
|
||||
});
|
||||
}
|
||||
|
||||
db_clock::duration db::batchlog_manager::get_batch_log_timeout() const {
|
||||
// enough time for the actual write + BM removal mutation
|
||||
return _write_request_timeout * 2;
|
||||
future<> db::batchlog_manager::maybe_migrate_v1_to_v2() {
|
||||
if (_migration_done) {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
return with_gate(_gate, [this] () mutable -> future<> {
|
||||
blogger.info("Migrating batchlog entries from v1 -> v2");
|
||||
|
||||
auto schema_v1 = _qp.db().find_schema(system_keyspace::NAME, system_keyspace::BATCHLOG);
|
||||
auto schema_v2 = _qp.db().find_schema(system_keyspace::NAME, system_keyspace::BATCHLOG_V2);
|
||||
|
||||
auto batch = [this, schema_v1, schema_v2] (const cql3::untyped_result_set::row& row) -> future<stop_iteration> {
|
||||
// check version of serialization format
|
||||
if (!row.has("version")) {
|
||||
blogger.warn("Not migrating logged batch because of unknown version");
|
||||
co_return stop_iteration::no;
|
||||
}
|
||||
|
||||
auto version = row.get_as<int32_t>("version");
|
||||
if (version != netw::messaging_service::current_version) {
|
||||
blogger.warn("Not migrating logged batch because of incorrect version");
|
||||
co_return stop_iteration::no;
|
||||
}
|
||||
|
||||
auto id = row.get_as<utils::UUID>("id");
|
||||
auto written_at = row.get_as<db_clock::time_point>("written_at");
|
||||
auto data = row.get_blob_fragmented("data");
|
||||
|
||||
auto& sp = _qp.proxy();
|
||||
|
||||
utils::get_local_injector().inject("batchlog_manager_fail_migration", [] { throw std::runtime_error("Error injection: failing batchlog migration"); });
|
||||
|
||||
auto migrate_mut = get_batchlog_mutation_for(schema_v2, std::move(data), version, batchlog_stage::failed_replay, written_at, id);
|
||||
co_await sp.mutate_locally(migrate_mut, tracing::trace_state_ptr(), db::commitlog::force_sync::no);
|
||||
|
||||
mutation delete_mut(schema_v1, partition_key::from_single_value(*schema_v1, serialized(id)));
|
||||
delete_mut.partition().apply_delete(*schema_v1, clustering_key_prefix::make_empty(), tombstone(api::new_timestamp(), gc_clock::now()));
|
||||
co_await sp.mutate_locally(delete_mut, tracing::trace_state_ptr(), db::commitlog::force_sync::no);
|
||||
|
||||
co_return stop_iteration::no;
|
||||
};
|
||||
try {
|
||||
co_await _qp.query_internal(
|
||||
format("SELECT * FROM {}.{} BYPASS CACHE", system_keyspace::NAME, system_keyspace::BATCHLOG),
|
||||
db::consistency_level::ONE,
|
||||
{},
|
||||
page_size,
|
||||
std::move(batch));
|
||||
} catch (...) {
|
||||
blogger.warn("Batchlog v1 to v2 migration failed: {}; will retry", std::current_exception());
|
||||
co_return;
|
||||
}
|
||||
|
||||
co_await container().invoke_on_all([] (auto& bm) {
|
||||
bm._migration_done = true;
|
||||
});
|
||||
|
||||
blogger.info("Done migrating batchlog entries from v1 -> v2");
|
||||
});
|
||||
}
|
||||
|
||||
future<db::all_batches_replayed> db::batchlog_manager::replay_all_failed_batches(post_replay_cleanup cleanup) {
|
||||
co_await maybe_migrate_v1_to_v2();
|
||||
|
||||
typedef db_clock::rep clock_type;
|
||||
|
||||
db::all_batches_replayed all_replayed = all_batches_replayed::yes;
|
||||
@@ -172,21 +309,26 @@ future<db::all_batches_replayed> db::batchlog_manager::replay_all_failed_batches
|
||||
auto throttle = _replay_rate / _qp.proxy().get_token_metadata_ptr()->count_normal_token_owners();
|
||||
auto limiter = make_lw_shared<utils::rate_limiter>(throttle);
|
||||
|
||||
auto schema = _qp.db().find_schema(system_keyspace::NAME, system_keyspace::BATCHLOG);
|
||||
auto delete_batch = [this, schema = std::move(schema)] (utils::UUID id) {
|
||||
auto key = partition_key::from_singular(*schema, id);
|
||||
mutation m(schema, key);
|
||||
auto now = service::client_state(service::client_state::internal_tag()).get_timestamp();
|
||||
m.partition().apply_delete(*schema, clustering_key_prefix::make_empty(), tombstone(now, gc_clock::now()));
|
||||
return _qp.proxy().mutate_locally(m, tracing::trace_state_ptr(), db::commitlog::force_sync::no);
|
||||
auto schema = _qp.db().find_schema(system_keyspace::NAME, system_keyspace::BATCHLOG_V2);
|
||||
|
||||
struct replay_stats {
|
||||
std::optional<db_clock::time_point> min_too_fresh;
|
||||
bool need_cleanup = false;
|
||||
};
|
||||
|
||||
auto batch = [this, limiter, delete_batch = std::move(delete_batch), &all_replayed](const cql3::untyped_result_set::row& row) -> future<stop_iteration> {
|
||||
std::unordered_map<int32_t, replay_stats> replay_stats_per_shard;
|
||||
|
||||
// Use a stable `now` accross all batches, so skip/replay decisions are the
|
||||
// same accross a while prefix of written_at (accross all ids).
|
||||
const auto now = db_clock::now();
|
||||
|
||||
auto batch = [this, cleanup, limiter, schema, &all_replayed, &replay_stats_per_shard, now] (const cql3::untyped_result_set::row& row) -> future<stop_iteration> {
|
||||
const auto stage = static_cast<batchlog_stage>(row.get_as<int8_t>("stage"));
|
||||
const auto batch_shard = row.get_as<int32_t>("shard");
|
||||
auto written_at = row.get_as<db_clock::time_point>("written_at");
|
||||
auto id = row.get_as<utils::UUID>("id");
|
||||
// enough time for the actual write + batchlog entry mutation delivery (two separate requests).
|
||||
auto now = db_clock::now();
|
||||
auto timeout = get_batch_log_timeout();
|
||||
auto timeout = _replay_timeout;
|
||||
|
||||
if (utils::get_local_injector().is_enabled("skip_batch_replay")) {
|
||||
blogger.debug("Skipping batch replay due to skip_batch_replay injection");
|
||||
@@ -194,52 +336,48 @@ future<db::all_batches_replayed> db::batchlog_manager::replay_all_failed_batches
|
||||
co_return stop_iteration::no;
|
||||
}
|
||||
|
||||
// check version of serialization format
|
||||
if (!row.has("version")) {
|
||||
blogger.warn("Skipping logged batch because of unknown version");
|
||||
co_await delete_batch(id);
|
||||
co_return stop_iteration::no;
|
||||
}
|
||||
|
||||
auto version = row.get_as<int32_t>("version");
|
||||
if (version != netw::messaging_service::current_version) {
|
||||
blogger.warn("Skipping logged batch because of incorrect version {}; current version = {}", version, netw::messaging_service::current_version);
|
||||
co_await delete_batch(id);
|
||||
co_return stop_iteration::no;
|
||||
}
|
||||
|
||||
auto data = row.get_blob_unfragmented("data");
|
||||
|
||||
blogger.debug("Replaying batch {}", id);
|
||||
blogger.debug("Replaying batch {} from stage {} and batch shard {}", id, int32_t(stage), batch_shard);
|
||||
|
||||
utils::chunked_vector<mutation> mutations;
|
||||
bool send_failed = false;
|
||||
|
||||
auto& shard_written_at = replay_stats_per_shard.try_emplace(batch_shard, replay_stats{}).first->second;
|
||||
|
||||
try {
|
||||
auto fms = make_lw_shared<std::deque<canonical_mutation>>();
|
||||
utils::chunked_vector<std::pair<canonical_mutation, schema_ptr>> fms;
|
||||
auto in = ser::as_input_stream(data);
|
||||
while (in.size()) {
|
||||
fms->emplace_back(ser::deserialize(in, std::type_identity<canonical_mutation>()));
|
||||
schema_ptr s = _qp.db().find_schema(fms->back().column_family_id());
|
||||
timeout = std::min(timeout, std::chrono::duration_cast<db_clock::duration>(s->tombstone_gc_options().propagation_delay_in_seconds()));
|
||||
auto fm = ser::deserialize(in, std::type_identity<canonical_mutation>());
|
||||
const auto tbl = _qp.db().try_find_table(fm.column_family_id());
|
||||
if (!tbl) {
|
||||
continue;
|
||||
}
|
||||
if (written_at <= tbl->get_truncation_time()) {
|
||||
continue;
|
||||
}
|
||||
schema_ptr s = tbl->schema();
|
||||
if (s->tombstone_gc_options().mode() == tombstone_gc_mode::repair) {
|
||||
timeout = std::min(timeout, std::chrono::duration_cast<db_clock::duration>(s->tombstone_gc_options().propagation_delay_in_seconds()));
|
||||
}
|
||||
fms.emplace_back(std::move(fm), std::move(s));
|
||||
}
|
||||
|
||||
if (now < written_at + timeout) {
|
||||
blogger.debug("Skipping replay of {}, too fresh", id);
|
||||
|
||||
shard_written_at.min_too_fresh = std::min(shard_written_at.min_too_fresh.value_or(written_at), written_at);
|
||||
|
||||
co_return stop_iteration::no;
|
||||
}
|
||||
|
||||
auto size = data.size();
|
||||
|
||||
auto mutations = co_await map_reduce(*fms, [this, written_at] (canonical_mutation& fm) {
|
||||
const auto& cf = _qp.proxy().local_db().find_column_family(fm.column_family_id());
|
||||
return make_ready_future<canonical_mutation*>(written_at > cf.get_truncation_time() ? &fm : nullptr);
|
||||
},
|
||||
utils::chunked_vector<mutation>(),
|
||||
[this] (utils::chunked_vector<mutation> mutations, canonical_mutation* fm) {
|
||||
if (fm) {
|
||||
schema_ptr s = _qp.db().find_schema(fm->column_family_id());
|
||||
mutations.emplace_back(fm->to_mutation(s));
|
||||
}
|
||||
return mutations;
|
||||
});
|
||||
for (const auto& [fm, s] : fms) {
|
||||
mutations.emplace_back(fm.to_mutation(s));
|
||||
co_await maybe_yield();
|
||||
}
|
||||
|
||||
if (!mutations.empty()) {
|
||||
const auto ttl = [written_at]() -> clock_type {
|
||||
@@ -265,7 +403,11 @@ future<db::all_batches_replayed> db::batchlog_manager::replay_all_failed_batches
|
||||
co_await limiter->reserve(size);
|
||||
_stats.write_attempts += mutations.size();
|
||||
auto timeout = db::timeout_clock::now() + write_timeout;
|
||||
co_await _qp.proxy().send_batchlog_replay_to_all_replicas(std::move(mutations), timeout);
|
||||
if (cleanup) {
|
||||
co_await _qp.proxy().send_batchlog_replay_to_all_replicas(mutations, timeout);
|
||||
} else {
|
||||
co_await _qp.proxy().send_batchlog_replay_to_all_replicas(std::move(mutations), timeout);
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (data_dictionary::no_such_keyspace& ex) {
|
||||
@@ -279,31 +421,80 @@ future<db::all_batches_replayed> db::batchlog_manager::replay_all_failed_batches
|
||||
// Do _not_ remove the batch, assuning we got a node write error.
|
||||
// Since we don't have hints (which origin is satisfied with),
|
||||
// we have to resort to keeping this batch to next lap.
|
||||
co_return stop_iteration::no;
|
||||
if (!cleanup || stage == batchlog_stage::failed_replay) {
|
||||
co_return stop_iteration::no;
|
||||
}
|
||||
send_failed = true;
|
||||
}
|
||||
|
||||
auto& sp = _qp.proxy();
|
||||
|
||||
if (send_failed) {
|
||||
blogger.debug("Moving batch {} to stage failed_replay", id);
|
||||
auto m = get_batchlog_mutation_for(schema, mutations, netw::messaging_service::current_version, batchlog_stage::failed_replay, written_at, id);
|
||||
co_await sp.mutate_locally(m, tracing::trace_state_ptr(), db::commitlog::force_sync::no);
|
||||
}
|
||||
|
||||
// delete batch
|
||||
co_await delete_batch(id);
|
||||
auto m = get_batchlog_delete_mutation(schema, netw::messaging_service::current_version, stage, written_at, id);
|
||||
co_await _qp.proxy().mutate_locally(m, tracing::trace_state_ptr(), db::commitlog::force_sync::no);
|
||||
|
||||
shard_written_at.need_cleanup = true;
|
||||
|
||||
co_return stop_iteration::no;
|
||||
};
|
||||
|
||||
co_await with_gate(_gate, [this, cleanup, batch = std::move(batch)] () mutable -> future<> {
|
||||
blogger.debug("Started replayAllFailedBatches (cpu {})", this_shard_id());
|
||||
co_await with_gate(_gate, [this, cleanup, &all_replayed, batch = std::move(batch), now, &replay_stats_per_shard] () mutable -> future<> {
|
||||
blogger.debug("Started replayAllFailedBatches with cleanup: {}", cleanup);
|
||||
co_await utils::get_local_injector().inject("add_delay_to_batch_replay", std::chrono::milliseconds(1000));
|
||||
co_await _qp.query_internal(
|
||||
format("SELECT id, data, written_at, version FROM {}.{} BYPASS CACHE", system_keyspace::NAME, system_keyspace::BATCHLOG),
|
||||
db::consistency_level::ONE,
|
||||
{},
|
||||
page_size,
|
||||
std::move(batch)).then([this, cleanup] {
|
||||
if (cleanup == post_replay_cleanup::no) {
|
||||
return make_ready_future<>();
|
||||
|
||||
auto schema = _qp.db().find_schema(system_keyspace::NAME, system_keyspace::BATCHLOG_V2);
|
||||
|
||||
co_await coroutine::parallel_for_each(std::views::iota(0, 16), [&] (int32_t chunk) -> future<> {
|
||||
const int32_t batchlog_chunk_base = chunk * 16;
|
||||
for (int32_t i = 0; i < 16; ++i) {
|
||||
int32_t batchlog_shard = batchlog_chunk_base + i;
|
||||
|
||||
co_await _qp.query_internal(
|
||||
format("SELECT * FROM {}.{} WHERE version = ? AND stage = ? AND shard = ? BYPASS CACHE", system_keyspace::NAME, system_keyspace::BATCHLOG_V2),
|
||||
db::consistency_level::ONE,
|
||||
{data_value(netw::messaging_service::current_version), data_value(int8_t(batchlog_stage::failed_replay)), data_value(batchlog_shard)},
|
||||
page_size,
|
||||
batch);
|
||||
|
||||
co_await _qp.query_internal(
|
||||
format("SELECT * FROM {}.{} WHERE version = ? AND stage = ? AND shard = ? BYPASS CACHE", system_keyspace::NAME, system_keyspace::BATCHLOG_V2),
|
||||
db::consistency_level::ONE,
|
||||
{data_value(netw::messaging_service::current_version), data_value(int8_t(batchlog_stage::initial)), data_value(batchlog_shard)},
|
||||
page_size,
|
||||
batch);
|
||||
|
||||
if (cleanup != post_replay_cleanup::yes) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto it = replay_stats_per_shard.find(batchlog_shard);
|
||||
if (it == replay_stats_per_shard.end() || !it->second.need_cleanup) {
|
||||
// Nothing was replayed on this batchlog shard, nothing to cleanup.
|
||||
continue;
|
||||
}
|
||||
|
||||
const auto write_time = it->second.min_too_fresh.value_or(now - _replay_timeout);
|
||||
const auto end_weight = it->second.min_too_fresh ? bound_weight::before_all_prefixed : bound_weight::after_all_prefixed;
|
||||
auto [key, ckey] = get_batchlog_key(*schema, netw::messaging_service::current_version, batchlog_stage::initial, batchlog_shard, write_time, {});
|
||||
auto end_pos = position_in_partition(partition_region::clustered, end_weight, std::move(ckey));
|
||||
|
||||
range_tombstone rt(position_in_partition::before_all_clustered_rows(), std::move(end_pos), tombstone(api::new_timestamp(), gc_clock::now()));
|
||||
|
||||
blogger.trace("Clean up batchlog shard {} with range tombstone {}", batchlog_shard, rt);
|
||||
|
||||
mutation m(schema, key);
|
||||
m.partition().apply_row_tombstone(*schema, std::move(rt));
|
||||
co_await _qp.proxy().mutate_locally(m, tracing::trace_state_ptr(), db::commitlog::force_sync::no);
|
||||
}
|
||||
// Replaying batches could have generated tombstones, flush to disk,
|
||||
// where they can be compacted away.
|
||||
return replica::database::flush_table_on_all_shards(_qp.proxy().get_db(), system_keyspace::NAME, system_keyspace::BATCHLOG);
|
||||
}).then([] {
|
||||
blogger.debug("Finished replayAllFailedBatches");
|
||||
});
|
||||
|
||||
blogger.debug("Finished replayAllFailedBatches with all_replayed: {}", all_replayed);
|
||||
});
|
||||
|
||||
co_return all_replayed;
|
||||
|
||||
@@ -34,12 +34,17 @@ class system_keyspace;
|
||||
using all_batches_replayed = bool_class<struct all_batches_replayed_tag>;
|
||||
|
||||
struct batchlog_manager_config {
|
||||
std::chrono::duration<double> write_request_timeout;
|
||||
db_clock::duration replay_timeout;
|
||||
uint64_t replay_rate = std::numeric_limits<uint64_t>::max();
|
||||
std::chrono::milliseconds delay = std::chrono::milliseconds(0);
|
||||
unsigned replay_cleanup_after_replays;
|
||||
};
|
||||
|
||||
enum class batchlog_stage : int8_t {
|
||||
initial,
|
||||
failed_replay
|
||||
};
|
||||
|
||||
class batchlog_manager : public peering_sharded_service<batchlog_manager> {
|
||||
public:
|
||||
using post_replay_cleanup = bool_class<class post_replay_cleanup_tag>;
|
||||
@@ -59,7 +64,7 @@ private:
|
||||
|
||||
cql3::query_processor& _qp;
|
||||
db::system_keyspace& _sys_ks;
|
||||
db_clock::duration _write_request_timeout;
|
||||
db_clock::duration _replay_timeout;
|
||||
uint64_t _replay_rate;
|
||||
std::chrono::milliseconds _delay;
|
||||
unsigned _replay_cleanup_after_replays = 100;
|
||||
@@ -71,6 +76,14 @@ private:
|
||||
|
||||
gc_clock::time_point _last_replay;
|
||||
|
||||
// Was the v1 -> v2 migration already done since last restart?
|
||||
// The migration is attempted once after each restart. This is redundant but
|
||||
// keeps thing simple. Once no upgrade path exists from a ScyllaDB version
|
||||
// which can still produce v1 entries, this migration code can be removed.
|
||||
bool _migration_done = false;
|
||||
|
||||
future<> maybe_migrate_v1_to_v2();
|
||||
|
||||
future<all_batches_replayed> replay_all_failed_batches(post_replay_cleanup cleanup);
|
||||
public:
|
||||
// Takes a QP, not a distributes. Because this object is supposed
|
||||
@@ -85,10 +98,13 @@ public:
|
||||
future<all_batches_replayed> do_batch_log_replay(post_replay_cleanup cleanup);
|
||||
|
||||
future<size_t> count_all_batches() const;
|
||||
db_clock::duration get_batch_log_timeout() const;
|
||||
gc_clock::time_point get_last_replay() const {
|
||||
return _last_replay;
|
||||
}
|
||||
|
||||
const stats& stats() const {
|
||||
return _stats;
|
||||
}
|
||||
private:
|
||||
future<> batchlog_replay_loop();
|
||||
};
|
||||
|
||||
@@ -54,12 +54,14 @@ public:
|
||||
uint64_t applied_mutations = 0;
|
||||
uint64_t corrupt_bytes = 0;
|
||||
uint64_t truncated_at = 0;
|
||||
uint64_t broken_files = 0;
|
||||
|
||||
stats& operator+=(const stats& s) {
|
||||
invalid_mutations += s.invalid_mutations;
|
||||
skipped_mutations += s.skipped_mutations;
|
||||
applied_mutations += s.applied_mutations;
|
||||
corrupt_bytes += s.corrupt_bytes;
|
||||
broken_files += s.broken_files;
|
||||
return *this;
|
||||
}
|
||||
stats operator+(const stats& s) const {
|
||||
@@ -192,6 +194,8 @@ db::commitlog_replayer::impl::recover(const commitlog::descriptor& d, const comm
|
||||
s->corrupt_bytes += e.bytes();
|
||||
} catch (commitlog::segment_truncation& e) {
|
||||
s->truncated_at = e.position();
|
||||
} catch (commitlog::header_checksum_error&) {
|
||||
++s->broken_files;
|
||||
} catch (...) {
|
||||
throw;
|
||||
}
|
||||
@@ -370,6 +374,9 @@ future<> db::commitlog_replayer::recover(std::vector<sstring> files, sstring fna
|
||||
if (stats.truncated_at != 0) {
|
||||
rlogger.warn("Truncated file: {} at position {}.", f, stats.truncated_at);
|
||||
}
|
||||
if (stats.broken_files != 0) {
|
||||
rlogger.warn("Corrupted file header: {}. Skipped.", f);
|
||||
}
|
||||
rlogger.debug("Log replay of {} complete, {} replayed mutations ({} invalid, {} skipped)"
|
||||
, f
|
||||
, stats.applied_mutations
|
||||
|
||||
@@ -1152,7 +1152,7 @@ db::config::config(std::shared_ptr<db::extensions> exts)
|
||||
"Number of threads with which to deliver hints. In multiple data-center deployments, consider increasing this number because cross data-center handoff is generally slower.")
|
||||
, batchlog_replay_throttle_in_kb(this, "batchlog_replay_throttle_in_kb", value_status::Unused, 1024,
|
||||
"Total maximum throttle. Throttling is reduced proportionally to the number of nodes in the cluster.")
|
||||
, batchlog_replay_cleanup_after_replays(this, "batchlog_replay_cleanup_after_replays", liveness::LiveUpdate, value_status::Used, 60,
|
||||
, batchlog_replay_cleanup_after_replays(this, "batchlog_replay_cleanup_after_replays", liveness::LiveUpdate, value_status::Used, 1,
|
||||
"Clean up batchlog memtable after every N replays. Replays are issued on a timer, every 60 seconds. So if batchlog_replay_cleanup_after_replays is set to 60, the batchlog memtable is flushed every 60 * 60 seconds.")
|
||||
/**
|
||||
* @Group Request scheduler properties
|
||||
|
||||
@@ -1262,16 +1262,9 @@ static future<> do_merge_schema(sharded<service::storage_proxy>& proxy, sharded
|
||||
{
|
||||
slogger.trace("do_merge_schema: {}", mutations);
|
||||
schema_applier ap(proxy, ss, sys_ks, reload);
|
||||
std::exception_ptr ex;
|
||||
try {
|
||||
co_await execute_do_merge_schema(proxy, ap, std::move(mutations));
|
||||
} catch (...) {
|
||||
ex = std::current_exception();
|
||||
}
|
||||
co_await ap.destroy();
|
||||
if (ex) {
|
||||
throw ex;
|
||||
}
|
||||
co_await execute_do_merge_schema(proxy, ap, std::move(mutations)).finally([&ap]() {
|
||||
return ap.destroy();
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -65,7 +65,7 @@ future<> snapshot_ctl::run_snapshot_modify_operation(noncopyable_function<future
|
||||
});
|
||||
}
|
||||
|
||||
future<> snapshot_ctl::take_snapshot(sstring tag, std::vector<sstring> keyspace_names, snapshot_options opts) {
|
||||
future<> snapshot_ctl::take_snapshot(sstring tag, std::vector<sstring> keyspace_names, skip_flush sf) {
|
||||
if (tag.empty()) {
|
||||
throw std::runtime_error("You must supply a snapshot name.");
|
||||
}
|
||||
@@ -74,21 +74,21 @@ future<> snapshot_ctl::take_snapshot(sstring tag, std::vector<sstring> keyspace_
|
||||
std::ranges::copy(_db.local().get_keyspaces() | std::views::keys, std::back_inserter(keyspace_names));
|
||||
};
|
||||
|
||||
return run_snapshot_modify_operation([tag = std::move(tag), keyspace_names = std::move(keyspace_names), opts, this] () mutable {
|
||||
return do_take_snapshot(std::move(tag), std::move(keyspace_names), opts);
|
||||
return run_snapshot_modify_operation([tag = std::move(tag), keyspace_names = std::move(keyspace_names), sf, this] () mutable {
|
||||
return do_take_snapshot(std::move(tag), std::move(keyspace_names), sf);
|
||||
});
|
||||
}
|
||||
|
||||
future<> snapshot_ctl::do_take_snapshot(sstring tag, std::vector<sstring> keyspace_names, snapshot_options opts) {
|
||||
future<> snapshot_ctl::do_take_snapshot(sstring tag, std::vector<sstring> keyspace_names, skip_flush sf) {
|
||||
co_await coroutine::parallel_for_each(keyspace_names, [tag, this] (const auto& ks_name) {
|
||||
return check_snapshot_not_exist(ks_name, tag);
|
||||
});
|
||||
co_await coroutine::parallel_for_each(keyspace_names, [this, tag = std::move(tag), opts] (const auto& ks_name) {
|
||||
return replica::database::snapshot_keyspace_on_all_shards(_db, ks_name, tag, opts);
|
||||
co_await coroutine::parallel_for_each(keyspace_names, [this, tag = std::move(tag), sf] (const auto& ks_name) {
|
||||
return replica::database::snapshot_keyspace_on_all_shards(_db, ks_name, tag, bool(sf));
|
||||
});
|
||||
}
|
||||
|
||||
future<> snapshot_ctl::take_column_family_snapshot(sstring ks_name, std::vector<sstring> tables, sstring tag, snapshot_options opts) {
|
||||
future<> snapshot_ctl::take_column_family_snapshot(sstring ks_name, std::vector<sstring> tables, sstring tag, skip_flush sf) {
|
||||
if (ks_name.empty()) {
|
||||
throw std::runtime_error("You must supply a keyspace name");
|
||||
}
|
||||
@@ -99,14 +99,14 @@ future<> snapshot_ctl::take_column_family_snapshot(sstring ks_name, std::vector<
|
||||
throw std::runtime_error("You must supply a snapshot name.");
|
||||
}
|
||||
|
||||
return run_snapshot_modify_operation([this, ks_name = std::move(ks_name), tables = std::move(tables), tag = std::move(tag), opts] () mutable {
|
||||
return do_take_column_family_snapshot(std::move(ks_name), std::move(tables), std::move(tag), opts);
|
||||
return run_snapshot_modify_operation([this, ks_name = std::move(ks_name), tables = std::move(tables), tag = std::move(tag), sf] () mutable {
|
||||
return do_take_column_family_snapshot(std::move(ks_name), std::move(tables), std::move(tag), sf);
|
||||
});
|
||||
}
|
||||
|
||||
future<> snapshot_ctl::do_take_column_family_snapshot(sstring ks_name, std::vector<sstring> tables, sstring tag, snapshot_options opts) {
|
||||
future<> snapshot_ctl::do_take_column_family_snapshot(sstring ks_name, std::vector<sstring> tables, sstring tag, skip_flush sf) {
|
||||
co_await check_snapshot_not_exist(ks_name, tag, tables);
|
||||
co_await replica::database::snapshot_tables_on_all_shards(_db, ks_name, std::move(tables), std::move(tag), opts);
|
||||
co_await replica::database::snapshot_tables_on_all_shards(_db, ks_name, std::move(tables), std::move(tag), bool(sf));
|
||||
}
|
||||
|
||||
future<> snapshot_ctl::clear_snapshot(sstring tag, std::vector<sstring> keyspace_names, sstring cf_name) {
|
||||
|
||||
@@ -38,13 +38,10 @@ class backup_task_impl;
|
||||
|
||||
} // snapshot namespace
|
||||
|
||||
struct snapshot_options {
|
||||
bool skip_flush = false;
|
||||
bool use_sstable_identifier = false;
|
||||
};
|
||||
|
||||
class snapshot_ctl : public peering_sharded_service<snapshot_ctl> {
|
||||
public:
|
||||
using skip_flush = bool_class<class skip_flush_tag>;
|
||||
|
||||
struct table_snapshot_details {
|
||||
int64_t total;
|
||||
int64_t live;
|
||||
@@ -73,8 +70,8 @@ public:
|
||||
*
|
||||
* @param tag the tag given to the snapshot; may not be null or empty
|
||||
*/
|
||||
future<> take_snapshot(sstring tag, snapshot_options opts = {}) {
|
||||
return take_snapshot(tag, {}, opts);
|
||||
future<> take_snapshot(sstring tag, skip_flush sf = skip_flush::no) {
|
||||
return take_snapshot(tag, {}, sf);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -83,7 +80,7 @@ public:
|
||||
* @param tag the tag given to the snapshot; may not be null or empty
|
||||
* @param keyspace_names the names of the keyspaces to snapshot; empty means "all"
|
||||
*/
|
||||
future<> take_snapshot(sstring tag, std::vector<sstring> keyspace_names, snapshot_options opts = {});
|
||||
future<> take_snapshot(sstring tag, std::vector<sstring> keyspace_names, skip_flush sf = skip_flush::no);
|
||||
|
||||
/**
|
||||
* Takes the snapshot of multiple tables. A snapshot name must be specified.
|
||||
@@ -92,7 +89,7 @@ public:
|
||||
* @param tables a vector of tables names to snapshot
|
||||
* @param tag the tag given to the snapshot; may not be null or empty
|
||||
*/
|
||||
future<> take_column_family_snapshot(sstring ks_name, std::vector<sstring> tables, sstring tag, snapshot_options opts = {});
|
||||
future<> take_column_family_snapshot(sstring ks_name, std::vector<sstring> tables, sstring tag, skip_flush sf = skip_flush::no);
|
||||
|
||||
/**
|
||||
* Remove the snapshot with the given name from the given keyspaces.
|
||||
@@ -130,8 +127,8 @@ private:
|
||||
|
||||
friend class snapshot::backup_task_impl;
|
||||
|
||||
future<> do_take_snapshot(sstring tag, std::vector<sstring> keyspace_names, snapshot_options opts = {} );
|
||||
future<> do_take_column_family_snapshot(sstring ks_name, std::vector<sstring> tables, sstring tag, snapshot_options opts = {});
|
||||
future<> do_take_snapshot(sstring tag, std::vector<sstring> keyspace_names, skip_flush sf = skip_flush::no);
|
||||
future<> do_take_column_family_snapshot(sstring ks_name, std::vector<sstring> tables, sstring tag, skip_flush sf = skip_flush::no);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@@ -110,6 +110,7 @@ namespace {
|
||||
system_keyspace::v3::CDC_LOCAL,
|
||||
system_keyspace::DICTS,
|
||||
system_keyspace::VIEW_BUILDING_TASKS,
|
||||
system_keyspace::CLIENT_ROUTES,
|
||||
};
|
||||
if (ks_name == system_keyspace::NAME && tables.contains(cf_name)) {
|
||||
props.enable_schema_commitlog();
|
||||
@@ -137,8 +138,7 @@ namespace {
|
||||
system_keyspace::ROLE_PERMISSIONS,
|
||||
system_keyspace::DICTS,
|
||||
system_keyspace::VIEW_BUILDING_TASKS,
|
||||
// repair tasks
|
||||
system_keyspace::REPAIR_TASKS,
|
||||
system_keyspace::CLIENT_ROUTES,
|
||||
};
|
||||
if (ks_name == system_keyspace::NAME && tables.contains(cf_name)) {
|
||||
props.is_group0_table = true;
|
||||
@@ -215,6 +215,30 @@ schema_ptr system_keyspace::batchlog() {
|
||||
return batchlog;
|
||||
}
|
||||
|
||||
schema_ptr system_keyspace::batchlog_v2() {
|
||||
static thread_local auto batchlog_v2 = [] {
|
||||
schema_builder builder(generate_legacy_id(NAME, BATCHLOG_V2), NAME, BATCHLOG_V2,
|
||||
// partition key
|
||||
{{"version", int32_type}, {"stage", byte_type}, {"shard", int32_type}},
|
||||
// clustering key
|
||||
{{"written_at", timestamp_type}, {"id", uuid_type}},
|
||||
// regular columns
|
||||
{{"data", bytes_type}},
|
||||
// static columns
|
||||
{},
|
||||
// regular column name type
|
||||
utf8_type,
|
||||
// comment
|
||||
"batches awaiting replay"
|
||||
);
|
||||
builder.set_gc_grace_seconds(0);
|
||||
builder.set_caching_options(caching_options::get_disabled_caching_options());
|
||||
builder.with_hash_version();
|
||||
return builder.build(schema_builder::compact_storage::no);
|
||||
}();
|
||||
return batchlog_v2;
|
||||
}
|
||||
|
||||
/*static*/ schema_ptr system_keyspace::paxos() {
|
||||
static thread_local auto paxos = [] {
|
||||
// FIXME: switch to the new schema_builder interface (with_column(...), etc)
|
||||
@@ -464,24 +488,6 @@ schema_ptr system_keyspace::repair_history() {
|
||||
return schema;
|
||||
}
|
||||
|
||||
schema_ptr system_keyspace::repair_tasks() {
|
||||
static thread_local auto schema = [] {
|
||||
auto id = generate_legacy_id(NAME, REPAIR_TASKS);
|
||||
return schema_builder(NAME, REPAIR_TASKS, std::optional(id))
|
||||
.with_column("task_uuid", uuid_type, column_kind::partition_key)
|
||||
.with_column("operation", utf8_type, column_kind::clustering_key)
|
||||
// First and last token for of the tablet
|
||||
.with_column("first_token", long_type, column_kind::clustering_key)
|
||||
.with_column("last_token", long_type, column_kind::clustering_key)
|
||||
.with_column("timestamp", timestamp_type)
|
||||
.with_column("table_uuid", uuid_type, column_kind::static_column)
|
||||
.set_comment("Record tablet repair tasks")
|
||||
.with_hash_version()
|
||||
.build();
|
||||
}();
|
||||
return schema;
|
||||
}
|
||||
|
||||
schema_ptr system_keyspace::built_indexes() {
|
||||
static thread_local auto built_indexes = [] {
|
||||
schema_builder builder(generate_legacy_id(NAME, BUILT_INDEXES), NAME, BUILT_INDEXES,
|
||||
@@ -1411,6 +1417,23 @@ schema_ptr system_keyspace::view_building_tasks() {
|
||||
return schema;
|
||||
}
|
||||
|
||||
schema_ptr system_keyspace::client_routes() {
|
||||
static thread_local auto schema = [] {
|
||||
auto id = generate_legacy_id(NAME, CLIENT_ROUTES);
|
||||
return schema_builder(NAME, CLIENT_ROUTES, std::make_optional(id))
|
||||
.with_column("connection_id", utf8_type, column_kind::partition_key)
|
||||
.with_column("host_id", uuid_type, column_kind::clustering_key)
|
||||
.with_column("address", utf8_type)
|
||||
.with_column("port", int32_type)
|
||||
.with_column("tls_port", int32_type)
|
||||
.with_column("alternator_port", int32_type)
|
||||
.with_column("alternator_https_port", int32_type)
|
||||
.with_hash_version()
|
||||
.build();
|
||||
}();
|
||||
return schema;
|
||||
}
|
||||
|
||||
future<system_keyspace::local_info> system_keyspace::load_local_info() {
|
||||
auto msg = co_await execute_cql(format("SELECT host_id, cluster_name, data_center, rack FROM system.{} WHERE key=?", LOCAL), sstring(LOCAL));
|
||||
|
||||
@@ -2324,14 +2347,13 @@ std::vector<schema_ptr> system_keyspace::all_tables(const db::config& cfg) {
|
||||
std::copy(schema_tables.begin(), schema_tables.end(), std::back_inserter(r));
|
||||
auto auth_tables = system_keyspace::auth_tables();
|
||||
std::copy(auth_tables.begin(), auth_tables.end(), std::back_inserter(r));
|
||||
r.insert(r.end(), { built_indexes(), hints(), batchlog(), paxos(), local(),
|
||||
r.insert(r.end(), { built_indexes(), hints(), batchlog(), batchlog_v2(), paxos(), local(),
|
||||
peers(), peer_events(), range_xfers(),
|
||||
compactions_in_progress(), compaction_history(),
|
||||
sstable_activity(), size_estimates(), large_partitions(), large_rows(), large_cells(),
|
||||
corrupt_data(),
|
||||
scylla_local(), db::schema_tables::scylla_table_schema_history(),
|
||||
repair_history(),
|
||||
repair_tasks(),
|
||||
v3::views_builds_in_progress(), v3::built_views(),
|
||||
v3::scylla_views_builds_in_progress(),
|
||||
v3::truncated(),
|
||||
@@ -2339,7 +2361,7 @@ std::vector<schema_ptr> system_keyspace::all_tables(const db::config& cfg) {
|
||||
v3::cdc_local(),
|
||||
raft(), raft_snapshots(), raft_snapshot_config(), group0_history(), discovery(),
|
||||
topology(), cdc_generations_v3(), topology_requests(), service_levels_v2(), view_build_status_v2(),
|
||||
dicts(), view_building_tasks(), cdc_streams_state(), cdc_streams_history()
|
||||
dicts(), view_building_tasks(), client_routes(), cdc_streams_state(), cdc_streams_history()
|
||||
});
|
||||
|
||||
if (cfg.check_experimental(db::experimental_features_t::feature::BROADCAST_TABLES)) {
|
||||
@@ -2356,7 +2378,9 @@ std::vector<schema_ptr> system_keyspace::all_tables(const db::config& cfg) {
|
||||
}
|
||||
|
||||
static bool maybe_write_in_user_memory(schema_ptr s) {
|
||||
return (s.get() == system_keyspace::batchlog().get()) || (s.get() == system_keyspace::paxos().get())
|
||||
return (s.get() == system_keyspace::batchlog().get())
|
||||
|| (s.get() == system_keyspace::batchlog_v2().get())
|
||||
|| (s.get() == system_keyspace::paxos().get())
|
||||
|| s == system_keyspace::v3::scylla_views_builds_in_progress();
|
||||
}
|
||||
|
||||
@@ -2573,32 +2597,6 @@ future<> system_keyspace::get_repair_history(::table_id table_id, repair_history
|
||||
});
|
||||
}
|
||||
|
||||
future<utils::chunked_vector<canonical_mutation>> system_keyspace::get_update_repair_task_mutations(const repair_task_entry& entry, api::timestamp_type ts) {
|
||||
// Default to timeout the repair task entries in 10 days, this should be enough time for the management tools to query
|
||||
constexpr int ttl = 10 * 24 * 3600;
|
||||
sstring req = format("INSERT INTO system.{} (task_uuid, operation, first_token, last_token, timestamp, table_uuid) VALUES (?, ?, ?, ?, ?, ?) USING TTL {}", REPAIR_TASKS, ttl);
|
||||
auto muts = co_await _qp.get_mutations_internal(req, internal_system_query_state(), ts,
|
||||
{entry.task_uuid.uuid(), repair_task_operation_to_string(entry.operation),
|
||||
entry.first_token, entry.last_token, entry.timestamp, entry.table_uuid.uuid()});
|
||||
utils::chunked_vector<canonical_mutation> cmuts = {muts.begin(), muts.end()};
|
||||
co_return cmuts;
|
||||
}
|
||||
|
||||
future<> system_keyspace::get_repair_task(tasks::task_id task_uuid, repair_task_consumer f) {
|
||||
sstring req = format("SELECT * from system.{} WHERE task_uuid = {}", REPAIR_TASKS, task_uuid);
|
||||
co_await _qp.query_internal(req, [&f] (const cql3::untyped_result_set::row& row) mutable -> future<stop_iteration> {
|
||||
repair_task_entry ent;
|
||||
ent.task_uuid = tasks::task_id(row.get_as<utils::UUID>("task_uuid"));
|
||||
ent.operation = repair_task_operation_from_string(row.get_as<sstring>("operation"));
|
||||
ent.first_token = row.get_as<int64_t>("first_token");
|
||||
ent.last_token = row.get_as<int64_t>("last_token");
|
||||
ent.timestamp = row.get_as<db_clock::time_point>("timestamp");
|
||||
ent.table_uuid = ::table_id(row.get_as<utils::UUID>("table_uuid"));
|
||||
co_await f(std::move(ent));
|
||||
co_return stop_iteration::no;
|
||||
});
|
||||
}
|
||||
|
||||
future<gms::generation_type> system_keyspace::increment_and_get_generation() {
|
||||
auto req = format("SELECT gossip_generation FROM system.{} WHERE key='{}'", LOCAL, LOCAL);
|
||||
auto rs = co_await _qp.execute_internal(req, cql3::query_processor::cache_internal::yes);
|
||||
@@ -3770,35 +3768,4 @@ future<> system_keyspace::apply_mutation(mutation m) {
|
||||
return _qp.proxy().mutate_locally(m, {}, db::commitlog::force_sync(m.schema()->static_props().wait_for_sync_to_commitlog), db::no_timeout);
|
||||
}
|
||||
|
||||
// The names are persisted in system tables so should not be changed.
|
||||
static const std::unordered_map<system_keyspace::repair_task_operation, sstring> repair_task_operation_to_name = {
|
||||
{system_keyspace::repair_task_operation::requested, "requested"},
|
||||
{system_keyspace::repair_task_operation::finished, "finished"},
|
||||
};
|
||||
|
||||
static const std::unordered_map<sstring, system_keyspace::repair_task_operation> repair_task_operation_from_name = std::invoke([] {
|
||||
std::unordered_map<sstring, system_keyspace::repair_task_operation> result;
|
||||
for (auto&& [v, s] : repair_task_operation_to_name) {
|
||||
result.emplace(s, v);
|
||||
}
|
||||
return result;
|
||||
});
|
||||
|
||||
sstring system_keyspace::repair_task_operation_to_string(system_keyspace::repair_task_operation op) {
|
||||
auto i = repair_task_operation_to_name.find(op);
|
||||
if (i == repair_task_operation_to_name.end()) {
|
||||
on_internal_error(slogger, format("Invalid repair task operation: {}", static_cast<int>(op)));
|
||||
}
|
||||
return i->second;
|
||||
}
|
||||
|
||||
system_keyspace::repair_task_operation system_keyspace::repair_task_operation_from_string(const sstring& name) {
|
||||
return repair_task_operation_from_name.at(name);
|
||||
}
|
||||
|
||||
} // namespace db
|
||||
|
||||
auto fmt::formatter<db::system_keyspace::repair_task_operation>::format(const db::system_keyspace::repair_task_operation& op, fmt::format_context& ctx) const
|
||||
-> decltype(ctx.out()) {
|
||||
return fmt::format_to(ctx.out(), "{}", db::system_keyspace::repair_task_operation_to_string(op));
|
||||
}
|
||||
|
||||
@@ -57,8 +57,6 @@ namespace paxos {
|
||||
struct topology_request_state;
|
||||
|
||||
class group0_guard;
|
||||
|
||||
class raft_group0_client;
|
||||
}
|
||||
|
||||
namespace netw {
|
||||
@@ -165,6 +163,7 @@ public:
|
||||
static constexpr auto NAME = "system";
|
||||
static constexpr auto HINTS = "hints";
|
||||
static constexpr auto BATCHLOG = "batchlog";
|
||||
static constexpr auto BATCHLOG_V2 = "batchlog_v2";
|
||||
static constexpr auto PAXOS = "paxos";
|
||||
static constexpr auto BUILT_INDEXES = "IndexInfo";
|
||||
static constexpr auto LOCAL = "local";
|
||||
@@ -186,7 +185,6 @@ public:
|
||||
static constexpr auto RAFT_SNAPSHOTS = "raft_snapshots";
|
||||
static constexpr auto RAFT_SNAPSHOT_CONFIG = "raft_snapshot_config";
|
||||
static constexpr auto REPAIR_HISTORY = "repair_history";
|
||||
static constexpr auto REPAIR_TASKS = "repair_tasks";
|
||||
static constexpr auto GROUP0_HISTORY = "group0_history";
|
||||
static constexpr auto DISCOVERY = "discovery";
|
||||
static constexpr auto BROADCAST_KV_STORE = "broadcast_kv_store";
|
||||
@@ -201,6 +199,7 @@ public:
|
||||
static constexpr auto VIEW_BUILD_STATUS_V2 = "view_build_status_v2";
|
||||
static constexpr auto DICTS = "dicts";
|
||||
static constexpr auto VIEW_BUILDING_TASKS = "view_building_tasks";
|
||||
static constexpr auto CLIENT_ROUTES = "client_routes";
|
||||
|
||||
// auth
|
||||
static constexpr auto ROLES = "roles";
|
||||
@@ -258,12 +257,12 @@ public:
|
||||
|
||||
static schema_ptr hints();
|
||||
static schema_ptr batchlog();
|
||||
static schema_ptr batchlog_v2();
|
||||
static schema_ptr paxos();
|
||||
static schema_ptr built_indexes(); // TODO (from Cassandra): make private
|
||||
static schema_ptr raft();
|
||||
static schema_ptr raft_snapshots();
|
||||
static schema_ptr repair_history();
|
||||
static schema_ptr repair_tasks();
|
||||
static schema_ptr group0_history();
|
||||
static schema_ptr discovery();
|
||||
static schema_ptr broadcast_kv_store();
|
||||
@@ -278,6 +277,7 @@ public:
|
||||
static schema_ptr view_build_status_v2();
|
||||
static schema_ptr dicts();
|
||||
static schema_ptr view_building_tasks();
|
||||
static schema_ptr client_routes();
|
||||
|
||||
// auth
|
||||
static schema_ptr roles();
|
||||
@@ -402,22 +402,6 @@ public:
|
||||
int64_t range_end;
|
||||
};
|
||||
|
||||
enum class repair_task_operation {
|
||||
requested,
|
||||
finished,
|
||||
};
|
||||
static sstring repair_task_operation_to_string(repair_task_operation op);
|
||||
static repair_task_operation repair_task_operation_from_string(const sstring& name);
|
||||
|
||||
struct repair_task_entry {
|
||||
tasks::task_id task_uuid;
|
||||
repair_task_operation operation;
|
||||
int64_t first_token;
|
||||
int64_t last_token;
|
||||
db_clock::time_point timestamp;
|
||||
table_id table_uuid;
|
||||
};
|
||||
|
||||
struct topology_requests_entry {
|
||||
utils::UUID id;
|
||||
utils::UUID initiating_host;
|
||||
@@ -439,10 +423,6 @@ public:
|
||||
using repair_history_consumer = noncopyable_function<future<>(const repair_history_entry&)>;
|
||||
future<> get_repair_history(table_id, repair_history_consumer f);
|
||||
|
||||
future<utils::chunked_vector<canonical_mutation>> get_update_repair_task_mutations(const repair_task_entry& entry, api::timestamp_type ts);
|
||||
using repair_task_consumer = noncopyable_function<future<>(const repair_task_entry&)>;
|
||||
future<> get_repair_task(tasks::task_id task_uuid, repair_task_consumer f);
|
||||
|
||||
future<> save_truncation_record(const replica::column_family&, db_clock::time_point truncated_at, db::replay_position);
|
||||
future<replay_positions> get_truncated_positions(table_id);
|
||||
future<> drop_truncation_rp_records();
|
||||
@@ -750,8 +730,3 @@ public:
|
||||
}; // class system_keyspace
|
||||
|
||||
} // namespace db
|
||||
|
||||
template <>
|
||||
struct fmt::formatter<db::system_keyspace::repair_task_operation> : fmt::formatter<string_view> {
|
||||
auto format(const db::system_keyspace::repair_task_operation&, fmt::format_context& ctx) const -> decltype(ctx.out());
|
||||
};
|
||||
|
||||
@@ -71,7 +71,7 @@ Use "Bash on Ubuntu on Windows" for the same tools and capabilities as on Linux
|
||||
|
||||
### Building the Docs
|
||||
|
||||
1. Run `make preview` to build the documentation.
|
||||
1. Run `make preview` in the `docs/` directory to build the documentation.
|
||||
1. Preview the built documentation locally at http://127.0.0.1:5500/.
|
||||
|
||||
### Cleanup
|
||||
|
||||
@@ -41,6 +41,8 @@ class MetricsProcessor:
|
||||
# Get metrics from the file
|
||||
try:
|
||||
metrics_file = metrics.get_metrics_from_file(relative_path, "scylla_", metrics_info, strict=strict)
|
||||
except SystemExit:
|
||||
pass
|
||||
finally:
|
||||
os.chdir(old_cwd)
|
||||
if metrics_file:
|
||||
|
||||
@@ -102,6 +102,7 @@ Additional Information
|
||||
|
||||
To learn more about TTL, and see a hands-on example, check out `this lesson <https://university.scylladb.com/courses/data-modeling/lessons/advanced-data-modeling/topic/expiring-data-with-ttl-time-to-live/>`_ on ScyllaDB University.
|
||||
|
||||
* `Video: Managing data expiration with Time-To-Live <https://www.youtube.com/watch?v=SXkbu7mFHeA>`_
|
||||
* :doc:`Apache Cassandra Query Language (CQL) Reference </cql/index>`
|
||||
* :doc:`KB Article:How to Change gc_grace_seconds for a Table </kb/gc-grace-seconds/>`
|
||||
* :doc:`KB Article:Time to Live (TTL) and Compaction </kb/ttl-facts/>`
|
||||
|
||||
@@ -236,3 +236,26 @@ the same mechanism for other protocol versions, such as CQLv4.
|
||||
|
||||
The feature is identified by the `SCYLLA_USE_METADATA_ID` key, which is meant to be sent
|
||||
in the SUPPORTED message.
|
||||
|
||||
## Sending the CLIENT_ROUTES_CHANGE event
|
||||
|
||||
This extension allows a driver to update its connections when the
|
||||
`system.client_routes` table is modified.
|
||||
|
||||
In some network topologies a specific mapping of addresses and ports is required (e.g.
|
||||
to support Private Link). This mapping can change dynamically even when no nodes are
|
||||
added or removed. The driver must adapt to those changes; otherwise connectivity can be
|
||||
lost.
|
||||
|
||||
The extension is implemented as a new `EVENT` type: `CLIENT_ROUTES_CHANGE`. The event
|
||||
body consists of:
|
||||
- [string] change
|
||||
- [string list] connection_ids
|
||||
- [string list] host_ids
|
||||
|
||||
There is only one change value: `UPDATE_NODES`, which means at least one client route
|
||||
was inserted, updated, or deleted.
|
||||
|
||||
Events already have a subscription mechanism similar to protocol extensions (that is,
|
||||
the driver only receives the events it explicitly subscribed to), so no additional
|
||||
`cql_protocol_extension` key is introduced for this feature.
|
||||
|
||||
@@ -29,9 +29,6 @@ A CDC generation consists of:
|
||||
|
||||
This is the mapping used to decide on which stream IDs to use when making writes, as explained in the :doc:`./cdc-streams` document. It is a global property of the cluster: it doesn't depend on the table you're making writes to.
|
||||
|
||||
.. caution::
|
||||
The tables mentioned in the following sections: ``system_distributed.cdc_generation_timestamps`` and ``system_distributed.cdc_streams_descriptions_v2`` have been introduced in ScyllaDB 4.4. It is highly recommended to upgrade to 4.4 for efficient CDC usage. The last section explains how to run the below examples in ScyllaDB 4.3.
|
||||
|
||||
When CDC generations change
|
||||
---------------------------
|
||||
|
||||
|
||||
@@ -28,7 +28,8 @@ Incremental Repair is only supported for tables that use the tablets architectur
|
||||
Incremental Repair Modes
|
||||
------------------------
|
||||
|
||||
While incremental repair is the default and recommended mode, you can control its behavior for a given repair operation using the ``incremental_mode`` parameter. This is useful for situations where you might need to force a full data validation.
|
||||
Incremental is currently disabled by default. You can control its behavior for a given repair operation using the ``incremental_mode`` parameter.
|
||||
This is useful for enabling incremental repair, or in situations where you might need to force a full data validation.
|
||||
|
||||
The available modes are:
|
||||
|
||||
|
||||
@@ -53,13 +53,13 @@ ScyllaDB nodetool cluster repair command supports the following options:
|
||||
|
||||
nodetool cluster repair --tablet-tokens 1,10474535988
|
||||
|
||||
- ``--incremental-mode`` specifies the incremental repair mode. Can be 'disabled', 'incremental', or 'full'. 'incremental': The incremental repair logic is enabled. Unrepaired sstables will be included for repair. Repaired sstables will be skipped. The incremental repair states will be updated after repair. 'full': The incremental repair logic is enabled. Both repaired and unrepaired sstables will be included for repair. The incremental repair states will be updated after repair. 'disabled': The incremental repair logic is disabled completely. The incremental repair states, e.g., repaired_at in sstables and sstables_repaired_at in the system.tablets table, will not be updated after repair. When the option is not provided, it defaults to incremental.
|
||||
- ``--incremental-mode`` specifies the incremental repair mode. Can be 'disabled', 'incremental', or 'full'. 'incremental': The incremental repair logic is enabled. Unrepaired sstables will be included for repair. Repaired sstables will be skipped. The incremental repair states will be updated after repair. 'full': The incremental repair logic is enabled. Both repaired and unrepaired sstables will be included for repair. The incremental repair states will be updated after repair. 'disabled': The incremental repair logic is disabled completely. The incremental repair states, e.g., repaired_at in sstables and sstables_repaired_at in the system.tablets table, will not be updated after repair. When the option is not provided, it defaults to 'disabled'.
|
||||
|
||||
For example:
|
||||
|
||||
::
|
||||
|
||||
nodetool cluster repair --incremental-mode regular
|
||||
nodetool cluster repair --incremental-mode disabled
|
||||
|
||||
- ``keyspace`` executes a repair on a specific keyspace. The default is all keyspaces.
|
||||
|
||||
|
||||
@@ -17,7 +17,7 @@ SYNOPSIS
|
||||
[(-u <username> | --username <username>)] snapshot
|
||||
[(-cf <table> | --column-family <table> | --table <table>)]
|
||||
[(-kc <kclist> | --kc.list <kclist>)]
|
||||
[(-sf | --skip-flush)] [--use-sstable-identifier] [(-t <tag> | --tag <tag>)] [--] [<keyspaces...>]
|
||||
[(-sf | --skip-flush)] [(-t <tag> | --tag <tag>)] [--] [<keyspaces...>]
|
||||
|
||||
OPTIONS
|
||||
.......
|
||||
@@ -37,8 +37,6 @@ Parameter Descriptio
|
||||
-------------------------------------------------------------------- -------------------------------------------------------------------------------------
|
||||
-sf / --skip-flush Do not flush memtables before snapshotting (snapshot will not contain unflushed data)
|
||||
-------------------------------------------------------------------- -------------------------------------------------------------------------------------
|
||||
--use-sstable-identifier Use the sstable identifier UUID, if available, rather than the sstable generation.
|
||||
-------------------------------------------------------------------- -------------------------------------------------------------------------------------
|
||||
-t <tag> / --tag <tag> The name of the snapshot
|
||||
==================================================================== =====================================================================================
|
||||
|
||||
|
||||
@@ -64,13 +64,12 @@ ADMIN Logs service level operations: create, alter, drop, attach, detach, l
|
||||
auditing.
|
||||
========= =========================================================================================
|
||||
|
||||
Note that audit for every DML or QUERY might impact performance and consume a lot of storage.
|
||||
Note that enabling audit may negatively impact performance and audit-to-table may consume extra storage. That's especially true when auditing DML and QUERY categories, which generate a high volume of audit messages.
|
||||
|
||||
Configuring Audit Storage
|
||||
---------------------------
|
||||
|
||||
Auditing messages can be sent to :ref:`Syslog <auditing-syslog-storage>` or stored in a Scylla :ref:`table <auditing-table-storage>`.
|
||||
Currently, auditing messages can only be saved to one location at a time. You cannot log into both a table and the Syslog.
|
||||
Auditing messages can be sent to :ref:`Syslog <auditing-syslog-storage>` or stored in a Scylla :ref:`table <auditing-table-storage>` or both.
|
||||
|
||||
.. _auditing-syslog-storage:
|
||||
|
||||
@@ -193,6 +192,23 @@ For example:
|
||||
2018-03-18 00:00:00+0000 | 10.143.2.108 | 3429b1a5-2a94-11e8-8f4e-000000000001 | DDL | ONE | False | nba | DROP TABLE nba.team_roster ; | 127.0.0.1 | team_roster | Scylla |
|
||||
(1 row)
|
||||
|
||||
.. _auditing-table-and-syslog-storage:
|
||||
|
||||
Storing Audit Messages in a Table and Syslog Simultaneously
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
**Procedure**
|
||||
|
||||
#. Follow both procedures from above, and set the ``audit`` parameter in the ``scylla.yaml`` file to both ``syslog`` and ``table``. You need to restart scylla only once.
|
||||
|
||||
To have both syslog and table you need to specify both backends separated by a comma:
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
audit: "syslog,table"
|
||||
|
||||
|
||||
|
||||
Handling Audit Failures
|
||||
---------------------------
|
||||
|
||||
|
||||
@@ -816,7 +816,6 @@ public:
|
||||
future<data_sink> wrap_sink(const sstables::sstable& sst, sstables::component_type type, data_sink sink) override {
|
||||
switch (type) {
|
||||
case sstables::component_type::Scylla:
|
||||
case sstables::component_type::TemporaryScylla:
|
||||
case sstables::component_type::TemporaryTOC:
|
||||
case sstables::component_type::TOC:
|
||||
co_return sink;
|
||||
@@ -845,7 +844,6 @@ public:
|
||||
sstables::component_type type,
|
||||
data_source src) override {
|
||||
switch (type) {
|
||||
case sstables::component_type::TemporaryScylla:
|
||||
case sstables::component_type::Scylla:
|
||||
case sstables::component_type::TemporaryTOC:
|
||||
case sstables::component_type::TOC:
|
||||
|
||||
@@ -143,7 +143,6 @@ public:
|
||||
|
||||
gms::feature tablet_incremental_repair { *this, "TABLET_INCREMENTAL_REPAIR"sv };
|
||||
gms::feature tablet_repair_scheduler { *this, "TABLET_REPAIR_SCHEDULER"sv };
|
||||
gms::feature tablet_repair_tasks_table { *this, "TABLET_REPAIR_TASKS_TABLE"sv };
|
||||
gms::feature tablet_merge { *this, "TABLET_MERGE"sv };
|
||||
gms::feature tablet_rack_aware_view_pairing { *this, "TABLET_RACK_AWARE_VIEW_PAIRING"sv };
|
||||
|
||||
@@ -177,6 +176,7 @@ public:
|
||||
gms::feature rack_list_rf { *this, "RACK_LIST_RF"sv };
|
||||
gms::feature driver_service_level { *this, "DRIVER_SERVICE_LEVEL"sv };
|
||||
gms::feature strongly_consistent_tables { *this, "STRONGLY_CONSISTENT_TABLES"sv };
|
||||
gms::feature client_routes { *this, "CLIENT_ROUTES"sv };
|
||||
public:
|
||||
|
||||
const std::unordered_map<sstring, std::reference_wrapper<feature>>& registered_features() const;
|
||||
|
||||
@@ -15,3 +15,22 @@ with the Apache License (version 2) and ScyllaDB-Source-Available-1.0.
|
||||
They contain the following tag:
|
||||
|
||||
SPDX-License-Identifier: (LicenseRef-ScyllaDB-Source-Available-1.0 and Apache-2.0)
|
||||
|
||||
### `musl libc` files
|
||||
|
||||
`licenses/musl-license.txt` is obtained from:
|
||||
https://git.musl-libc.org/cgit/musl/tree/COPYRIGHT
|
||||
|
||||
`utils/crypt_sha512.cc` is obtained from:
|
||||
https://git.musl-libc.org/cgit/musl/tree/src/crypt/crypt_sha512.c
|
||||
|
||||
Both files are obtained from git.musl-libc.org.
|
||||
Import commit:
|
||||
commit 1b76ff0767d01df72f692806ee5adee13c67ef88
|
||||
Author: Alex Rønne Petersen <alex@alexrp.com>
|
||||
Date: Sun Oct 12 05:35:19 2025 +0200
|
||||
|
||||
s390x: shuffle register usage in __tls_get_offset to avoid r0 as address
|
||||
|
||||
musl as a whole is licensed under the standard MIT license included in
|
||||
`licenses/musl-license.txt`.
|
||||
|
||||
193
licenses/musl-license.txt
Normal file
193
licenses/musl-license.txt
Normal file
@@ -0,0 +1,193 @@
|
||||
musl as a whole is licensed under the following standard MIT license:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
Copyright © 2005-2020 Rich Felker, et al.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
----------------------------------------------------------------------
|
||||
|
||||
Authors/contributors include:
|
||||
|
||||
A. Wilcox
|
||||
Ada Worcester
|
||||
Alex Dowad
|
||||
Alex Suykov
|
||||
Alexander Monakov
|
||||
Andre McCurdy
|
||||
Andrew Kelley
|
||||
Anthony G. Basile
|
||||
Aric Belsito
|
||||
Arvid Picciani
|
||||
Bartosz Brachaczek
|
||||
Benjamin Peterson
|
||||
Bobby Bingham
|
||||
Boris Brezillon
|
||||
Brent Cook
|
||||
Chris Spiegel
|
||||
Clément Vasseur
|
||||
Daniel Micay
|
||||
Daniel Sabogal
|
||||
Daurnimator
|
||||
David Carlier
|
||||
David Edelsohn
|
||||
Denys Vlasenko
|
||||
Dmitry Ivanov
|
||||
Dmitry V. Levin
|
||||
Drew DeVault
|
||||
Emil Renner Berthing
|
||||
Fangrui Song
|
||||
Felix Fietkau
|
||||
Felix Janda
|
||||
Gianluca Anzolin
|
||||
Hauke Mehrtens
|
||||
He X
|
||||
Hiltjo Posthuma
|
||||
Isaac Dunham
|
||||
Jaydeep Patil
|
||||
Jens Gustedt
|
||||
Jeremy Huntwork
|
||||
Jo-Philipp Wich
|
||||
Joakim Sindholt
|
||||
John Spencer
|
||||
Julien Ramseier
|
||||
Justin Cormack
|
||||
Kaarle Ritvanen
|
||||
Khem Raj
|
||||
Kylie McClain
|
||||
Leah Neukirchen
|
||||
Luca Barbato
|
||||
Luka Perkov
|
||||
Lynn Ochs
|
||||
M Farkas-Dyck (Strake)
|
||||
Mahesh Bodapati
|
||||
Markus Wichmann
|
||||
Masanori Ogino
|
||||
Michael Clark
|
||||
Michael Forney
|
||||
Mikhail Kremnyov
|
||||
Natanael Copa
|
||||
Nicholas J. Kain
|
||||
orc
|
||||
Pascal Cuoq
|
||||
Patrick Oppenlander
|
||||
Petr Hosek
|
||||
Petr Skocik
|
||||
Pierre Carrier
|
||||
Reini Urban
|
||||
Rich Felker
|
||||
Richard Pennington
|
||||
Ryan Fairfax
|
||||
Samuel Holland
|
||||
Segev Finer
|
||||
Shiz
|
||||
sin
|
||||
Solar Designer
|
||||
Stefan Kristiansson
|
||||
Stefan O'Rear
|
||||
Szabolcs Nagy
|
||||
Timo Teräs
|
||||
Trutz Behn
|
||||
Will Dietz
|
||||
William Haddon
|
||||
William Pitcock
|
||||
|
||||
Portions of this software are derived from third-party works licensed
|
||||
under terms compatible with the above MIT license:
|
||||
|
||||
The TRE regular expression implementation (src/regex/reg* and
|
||||
src/regex/tre*) is Copyright © 2001-2008 Ville Laurikari and licensed
|
||||
under a 2-clause BSD license (license text in the source files). The
|
||||
included version has been heavily modified by Rich Felker in 2012, in
|
||||
the interests of size, simplicity, and namespace cleanliness.
|
||||
|
||||
Much of the math library code (src/math/* and src/complex/*) is
|
||||
Copyright © 1993,2004 Sun Microsystems or
|
||||
Copyright © 2003-2011 David Schultz or
|
||||
Copyright © 2003-2009 Steven G. Kargl or
|
||||
Copyright © 2003-2009 Bruce D. Evans or
|
||||
Copyright © 2008 Stephen L. Moshier or
|
||||
Copyright © 2017-2018 Arm Limited
|
||||
and labelled as such in comments in the individual source files. All
|
||||
have been licensed under extremely permissive terms.
|
||||
|
||||
The ARM memcpy code (src/string/arm/memcpy.S) is Copyright © 2008
|
||||
The Android Open Source Project and is licensed under a two-clause BSD
|
||||
license. It was taken from Bionic libc, used on Android.
|
||||
|
||||
The AArch64 memcpy and memset code (src/string/aarch64/*) are
|
||||
Copyright © 1999-2019, Arm Limited.
|
||||
|
||||
The implementation of DES for crypt (src/crypt/crypt_des.c) is
|
||||
Copyright © 1994 David Burren. It is licensed under a BSD license.
|
||||
|
||||
The implementation of blowfish crypt (src/crypt/crypt_blowfish.c) was
|
||||
originally written by Solar Designer and placed into the public
|
||||
domain. The code also comes with a fallback permissive license for use
|
||||
in jurisdictions that may not recognize the public domain.
|
||||
|
||||
The smoothsort implementation (src/stdlib/qsort.c) is Copyright © 2011
|
||||
Lynn Ochs and is licensed under an MIT-style license.
|
||||
|
||||
The x86_64 port was written by Nicholas J. Kain and is licensed under
|
||||
the standard MIT terms.
|
||||
|
||||
The mips and microblaze ports were originally written by Richard
|
||||
Pennington for use in the ellcc project. The original code was adapted
|
||||
by Rich Felker for build system and code conventions during upstream
|
||||
integration. It is licensed under the standard MIT terms.
|
||||
|
||||
The mips64 port was contributed by Imagination Technologies and is
|
||||
licensed under the standard MIT terms.
|
||||
|
||||
The powerpc port was also originally written by Richard Pennington,
|
||||
and later supplemented and integrated by John Spencer. It is licensed
|
||||
under the standard MIT terms.
|
||||
|
||||
All other files which have no copyright comments are original works
|
||||
produced specifically for use as part of this library, written either
|
||||
by Rich Felker, the main author of the library, or by one or more
|
||||
contibutors listed above. Details on authorship of individual files
|
||||
can be found in the git version control history of the project. The
|
||||
omission of copyright and license comments in each file is in the
|
||||
interest of source tree size.
|
||||
|
||||
In addition, permission is hereby granted for all public header files
|
||||
(include/* and arch/*/bits/*) and crt files intended to be linked into
|
||||
applications (crt/*, ldso/dlstart.c, and arch/*/crt_arch.h) to omit
|
||||
the copyright notice and permission notice otherwise required by the
|
||||
license, and to use these files without any requirement of
|
||||
attribution. These files include substantial contributions from:
|
||||
|
||||
Bobby Bingham
|
||||
John Spencer
|
||||
Nicholas J. Kain
|
||||
Rich Felker
|
||||
Richard Pennington
|
||||
Stefan Kristiansson
|
||||
Szabolcs Nagy
|
||||
|
||||
all of whom have explicitly granted such permission.
|
||||
|
||||
This file previously contained text expressing a belief that most of
|
||||
the files covered by the above exception were sufficiently trivial not
|
||||
to be subject to copyright, resulting in confusion over whether it
|
||||
negated the permissions granted in the license. In the spirit of
|
||||
permissive licensing, and of not having licensing issues being an
|
||||
obstacle to adoption, that text has been removed.
|
||||
@@ -200,7 +200,10 @@ enum class tablet_repair_incremental_mode : uint8_t {
|
||||
disabled,
|
||||
};
|
||||
|
||||
constexpr tablet_repair_incremental_mode default_tablet_repair_incremental_mode{tablet_repair_incremental_mode::incremental};
|
||||
// FIXME: Incremental repair is disabled by default due to
|
||||
// https://github.com/scylladb/scylladb/issues/26041 and
|
||||
// https://github.com/scylladb/scylladb/issues/27414
|
||||
constexpr tablet_repair_incremental_mode default_tablet_repair_incremental_mode{tablet_repair_incremental_mode::disabled};
|
||||
|
||||
sstring tablet_repair_incremental_mode_to_string(tablet_repair_incremental_mode);
|
||||
tablet_repair_incremental_mode tablet_repair_incremental_mode_from_string(const sstring&);
|
||||
|
||||
@@ -235,9 +235,6 @@ public:
|
||||
const topology& get_topology() const;
|
||||
void debug_show() const;
|
||||
|
||||
/** Return the unique host ID for an end-point. */
|
||||
host_id get_host_id(inet_address endpoint) const;
|
||||
|
||||
/** @return a copy of the endpoint-to-id map for read-only operations */
|
||||
std::unordered_set<host_id> get_host_ids() const;
|
||||
|
||||
|
||||
26
main.cc
26
main.cc
@@ -23,6 +23,7 @@
|
||||
#include <seastar/core/future.hh>
|
||||
#include <seastar/core/signal.hh>
|
||||
#include <seastar/core/timer.hh>
|
||||
#include "service/client_routes.hh"
|
||||
#include "service/qos/raft_service_level_distributed_data_accessor.hh"
|
||||
#include "db/view/view_building_state.hh"
|
||||
#include "tasks/task_manager.hh"
|
||||
@@ -748,8 +749,6 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl
|
||||
// inherit Seastar's CPU affinity masks. We want this thread to be free
|
||||
// to migrate between CPUs; we think that's what makes the most sense.
|
||||
auto rpc_dict_training_worker = utils::alien_worker(startlog, 19, "rpc-dict");
|
||||
// niceness=10 is ~10% of normal process time
|
||||
auto hashing_worker = utils::alien_worker(startlog, 10, "pwd-hash");
|
||||
|
||||
return app.run(ac, av, [&] () -> future<int> {
|
||||
|
||||
@@ -779,8 +778,7 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl
|
||||
return seastar::async([&app, cfg, ext, &disk_space_monitor_shard0, &cm, &sstm, &db, &qp, &bm, &proxy, &mapreduce_service, &mm, &mm_notifier, &ctx, &opts, &dirs,
|
||||
&prometheus_server, &cf_cache_hitrate_calculator, &load_meter, &feature_service, &gossiper, &snitch,
|
||||
&token_metadata, &erm_factory, &snapshot_ctl, &messaging, &sst_dir_semaphore, &raft_gr, &service_memory_limiter,
|
||||
&repair, &sst_loader, &auth_cache, &ss, &lifecycle_notifier, &stream_manager, &task_manager, &rpc_dict_training_worker,
|
||||
&hashing_worker, &vector_store_client] {
|
||||
&repair, &sst_loader, &auth_cache, &ss, &lifecycle_notifier, &stream_manager, &task_manager, &rpc_dict_training_worker, &vector_store_client] {
|
||||
try {
|
||||
if (opts.contains("relabel-config-file") && !opts["relabel-config-file"].as<sstring>().empty()) {
|
||||
// calling update_relabel_config_from_file can cause an exception that would stop startup
|
||||
@@ -1798,6 +1796,13 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl
|
||||
auth_cache.stop().get();
|
||||
});
|
||||
|
||||
checkpoint(stop_signal, "initializing client routes service");
|
||||
static sharded<service::client_routes_service> client_routes;
|
||||
client_routes.start(std::ref(stop_signal.as_sharded_abort_source()), std::ref(feature_service), std::ref(group0_client), std::ref(qp), std::ref(lifecycle_notifier)).get();
|
||||
auto stop_client_routes = defer_verbose_shutdown("client_routes", [&] {
|
||||
client_routes.stop().get();
|
||||
});
|
||||
|
||||
checkpoint(stop_signal, "initializing storage service");
|
||||
debug::the_storage_service = &ss;
|
||||
ss.start(std::ref(stop_signal.as_sharded_abort_source()),
|
||||
@@ -1806,7 +1811,7 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl
|
||||
std::ref(messaging), std::ref(repair),
|
||||
std::ref(stream_manager), std::ref(lifecycle_notifier), std::ref(bm), std::ref(snitch),
|
||||
std::ref(tablet_allocator), std::ref(cdc_generation_service), std::ref(view_builder), std::ref(view_building_worker), std::ref(qp), std::ref(sl_controller),
|
||||
std::ref(auth_cache),
|
||||
std::ref(auth_cache), std::ref(client_routes),
|
||||
std::ref(tsm), std::ref(vbsm), std::ref(task_manager), std::ref(gossip_address_map),
|
||||
compression_dict_updated_callback,
|
||||
only_on_shard0(&*disk_space_monitor_shard0)
|
||||
@@ -2060,7 +2065,7 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl
|
||||
maintenance_auth_config.authenticator_java_name = sstring{auth::allow_all_authenticator_name};
|
||||
maintenance_auth_config.role_manager_java_name = sstring{auth::maintenance_socket_role_manager_name};
|
||||
|
||||
maintenance_auth_service.start(perm_cache_config, std::ref(qp), std::ref(group0_client), std::ref(mm_notifier), std::ref(mm), maintenance_auth_config, maintenance_socket_enabled::yes, std::ref(auth_cache), std::ref(hashing_worker)).get();
|
||||
maintenance_auth_service.start(perm_cache_config, std::ref(qp), std::ref(group0_client), std::ref(mm_notifier), std::ref(mm), maintenance_auth_config, maintenance_socket_enabled::yes, std::ref(auth_cache)).get();
|
||||
|
||||
cql_maintenance_server_ctl.emplace(maintenance_auth_service, mm_notifier, gossiper, qp, service_memory_limiter, sl_controller, lifecycle_notifier, *cfg, maintenance_cql_sg_stats_key, maintenance_socket_enabled::yes, dbcfg.statement_scheduling_group);
|
||||
|
||||
@@ -2194,6 +2199,11 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl
|
||||
});
|
||||
}).get();
|
||||
|
||||
api::set_server_client_routes(ctx, client_routes).get();
|
||||
auto stop_cr_api = defer_verbose_shutdown("client routes API", [&ctx] {
|
||||
api::unset_server_client_routes(ctx).get();
|
||||
});
|
||||
|
||||
checkpoint(stop_signal, "join cluster");
|
||||
// Allow abort during join_cluster since bootstrap or replace
|
||||
// can take a long time.
|
||||
@@ -2336,7 +2346,7 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl
|
||||
auth_config.authenticator_java_name = qualified_authenticator_name;
|
||||
auth_config.role_manager_java_name = qualified_role_manager_name;
|
||||
|
||||
auth_service.start(std::move(perm_cache_config), std::ref(qp), std::ref(group0_client), std::ref(mm_notifier), std::ref(mm), auth_config, maintenance_socket_enabled::no, std::ref(auth_cache), std::ref(hashing_worker)).get();
|
||||
auth_service.start(std::move(perm_cache_config), std::ref(qp), std::ref(group0_client), std::ref(mm_notifier), std::ref(mm), auth_config, maintenance_socket_enabled::no, std::ref(auth_cache)).get();
|
||||
|
||||
std::any stop_auth_service;
|
||||
// Has to be called after node joined the cluster (join_cluster())
|
||||
@@ -2380,7 +2390,7 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl
|
||||
|
||||
checkpoint(stop_signal, "starting batchlog manager");
|
||||
db::batchlog_manager_config bm_cfg;
|
||||
bm_cfg.write_request_timeout = cfg->write_request_timeout_in_ms() * 1ms;
|
||||
bm_cfg.replay_timeout = cfg->write_request_timeout_in_ms() * 1ms * 2;
|
||||
bm_cfg.replay_rate = cfg->batchlog_replay_throttle_in_kb() * 1000;
|
||||
bm_cfg.delay = std::chrono::milliseconds(cfg->ring_delay_ms());
|
||||
bm_cfg.replay_cleanup_after_replays = cfg->batchlog_replay_cleanup_after_replays();
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:80a47fe93866989aaf7e949168fcd308e95841e78c976a61f9eac20bfdd34d96
|
||||
size 6448960
|
||||
oid sha256:3cbe2dd05945f8fb76ebce2ea70864063d2b282c4d5080af1f290ead43321ab3
|
||||
size 6444732
|
||||
|
||||
@@ -551,9 +551,13 @@ void repair_writer_impl::create_writer(lw_shared_ptr<repair_writer> w) {
|
||||
}
|
||||
replica::table& t = _db.local().find_column_family(_schema->id());
|
||||
rlogger.debug("repair_writer: keyspace={}, table={}, estimated_partitions={}", w->schema()->ks_name(), w->schema()->cf_name(), w->get_estimated_partitions());
|
||||
// #17384 don't use off-strategy for repair (etc) if using tablets. sstables generated will
|
||||
// be single token range and can just be added to normal sstable set as is, eventually
|
||||
// handled by normal compaction.
|
||||
auto off_str = t.uses_tablets() ? sstables::offstrategy(false) : is_offstrategy_supported(_reason);
|
||||
auto sharder = get_sharder_helper(t, *(w->schema()), _topo_guard);
|
||||
_writer_done = mutation_writer::distribute_reader_and_consume_on_shards(_schema, sharder.sharder, std::move(_queue_reader),
|
||||
streaming::make_streaming_consumer(sstables::repair_origin, _db, _view_builder, _view_building_worker, w->get_estimated_partitions(), _reason, is_offstrategy_supported(_reason),
|
||||
streaming::make_streaming_consumer(sstables::repair_origin, _db, _view_builder, _view_building_worker, w->get_estimated_partitions(), _reason, off_str,
|
||||
_topo_guard, _repaired_at, w->get_sstable_list_to_mark_as_repaired()),
|
||||
t.stream_in_progress()).then([w] (uint64_t partitions) {
|
||||
rlogger.debug("repair_writer: keyspace={}, table={}, managed to write partitions={} to sstable",
|
||||
@@ -3844,83 +3848,3 @@ future<uint32_t> repair_service::get_next_repair_meta_id() {
|
||||
locator::host_id repair_service::my_host_id() const noexcept {
|
||||
return _gossiper.local().my_host_id();
|
||||
}
|
||||
|
||||
future<size_t> count_finished_tablets(utils::chunked_vector<tablet_token_range> ranges1, utils::chunked_vector<tablet_token_range> ranges2) {
|
||||
if (ranges1.empty() || ranges2.empty()) {
|
||||
co_return 0;
|
||||
}
|
||||
|
||||
auto sort = [] (utils::chunked_vector<tablet_token_range>& ranges) {
|
||||
std::sort(ranges.begin(), ranges.end(), [] (const auto& a, const auto& b) {
|
||||
if (a.first_token != b.first_token) {
|
||||
return a.first_token < b.first_token;
|
||||
}
|
||||
return a.last_token < b.last_token;
|
||||
});
|
||||
};
|
||||
|
||||
// First, merge overlapping and adjacent ranges in ranges2.
|
||||
sort(ranges2);
|
||||
utils::chunked_vector<tablet_token_range> merged;
|
||||
merged.push_back(ranges2[0]);
|
||||
for (size_t i = 1; i < ranges2.size(); ++i) {
|
||||
co_await coroutine::maybe_yield();
|
||||
// To avoid overflow with max() + 1, we check adjacency with `a - 1 <= b` instead of `a <= b + 1`
|
||||
if (ranges2[i].first_token - 1 <= merged.back().last_token) {
|
||||
merged.back().last_token = std::max(merged.back().last_token, ranges2[i].last_token);
|
||||
} else {
|
||||
merged.push_back(ranges2[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// Count covered ranges using a linear scan
|
||||
size_t covered_count = 0;
|
||||
auto it = merged.begin();
|
||||
auto end = merged.end();
|
||||
sort(ranges1);
|
||||
for (const auto& r1 : ranges1) {
|
||||
co_await coroutine::maybe_yield();
|
||||
// Advance the merged iterator only if the current merged range ends
|
||||
// before the current r1 starts.
|
||||
while (it != end && it->last_token < r1.first_token) {
|
||||
co_await coroutine::maybe_yield();
|
||||
++it;
|
||||
}
|
||||
// If we have exhausted the merged ranges, no further r1 can be covered
|
||||
if (it == end) {
|
||||
break;
|
||||
}
|
||||
// Check if the current merged range covers r1.
|
||||
if (it->first_token <= r1.first_token && r1.last_token <= it->last_token) {
|
||||
covered_count++;
|
||||
}
|
||||
}
|
||||
|
||||
co_return covered_count;
|
||||
}
|
||||
|
||||
future<std::optional<repair_task_progress>> repair_service::get_tablet_repair_task_progress(tasks::task_id task_uuid) {
|
||||
utils::chunked_vector<tablet_token_range> requested_tablets;
|
||||
utils::chunked_vector<tablet_token_range> finished_tablets;
|
||||
table_id tid;
|
||||
if (!_db.local().features().tablet_repair_tasks_table) {
|
||||
co_return std::nullopt;
|
||||
}
|
||||
co_await _sys_ks.local().get_repair_task(task_uuid, [&tid, &requested_tablets, &finished_tablets] (const db::system_keyspace::repair_task_entry& entry) -> future<> {
|
||||
rlogger.debug("repair_task_progress: Get entry operation={} first_token={} last_token={}", entry.operation, entry.first_token, entry.last_token);
|
||||
if (entry.operation == db::system_keyspace::repair_task_operation::requested) {
|
||||
requested_tablets.push_back({entry.first_token, entry.last_token});
|
||||
} else if (entry.operation == db::system_keyspace::repair_task_operation::finished) {
|
||||
finished_tablets.push_back({entry.first_token, entry.last_token});
|
||||
}
|
||||
tid = entry.table_uuid;
|
||||
co_return;
|
||||
});
|
||||
auto requested = requested_tablets.size();
|
||||
auto finished_nomerge = finished_tablets.size();
|
||||
auto finished = co_await count_finished_tablets(std::move(requested_tablets), std::move(finished_tablets));
|
||||
auto progress = repair_task_progress{requested, finished, tid};
|
||||
rlogger.debug("repair_task_progress: task_uuid={} table_uuid={} requested_tablets={} finished_tablets={} progress={} finished_nomerge={}",
|
||||
task_uuid, tid, requested, finished, progress.progress(), finished_nomerge);
|
||||
co_return progress;
|
||||
}
|
||||
|
||||
@@ -99,15 +99,6 @@ public:
|
||||
|
||||
using host2ip_t = std::function<future<gms::inet_address> (locator::host_id)>;
|
||||
|
||||
struct repair_task_progress {
|
||||
size_t requested;
|
||||
size_t finished;
|
||||
table_id table_uuid;
|
||||
float progress() const {
|
||||
return requested == 0 ? 1.0 : float(finished) / requested;
|
||||
}
|
||||
};
|
||||
|
||||
class repair_service : public seastar::peering_sharded_service<repair_service> {
|
||||
sharded<service::topology_state_machine>& _tsm;
|
||||
sharded<gms::gossiper>& _gossiper;
|
||||
@@ -231,9 +222,6 @@ private:
|
||||
public:
|
||||
future<gc_clock::time_point> repair_tablet(gms::gossip_address_map& addr_map, locator::tablet_metadata_guard& guard, locator::global_tablet_id gid, tasks::task_info global_tablet_repair_task_info, service::frozen_topology_guard topo_guard, std::optional<locator::tablet_replica_set> rebuild_replicas, locator::tablet_transition_stage stage);
|
||||
|
||||
|
||||
future<std::optional<repair_task_progress>> get_tablet_repair_task_progress(tasks::task_id task_uuid);
|
||||
|
||||
private:
|
||||
|
||||
future<repair_update_system_table_response> repair_update_system_table_handler(
|
||||
@@ -338,12 +326,3 @@ future<std::list<repair_row>> to_repair_rows_list(repair_rows_on_wire rows,
|
||||
schema_ptr s, uint64_t seed, repair_master is_master,
|
||||
reader_permit permit, repair_hasher hasher);
|
||||
void flush_rows(schema_ptr s, std::list<repair_row>& rows, lw_shared_ptr<repair_writer>& writer, std::optional<small_table_optimization_params> small_table_optimization = std::nullopt, repair_meta* rm = nullptr);
|
||||
|
||||
// A struct to hold the first and last token of a tablet.
|
||||
struct tablet_token_range {
|
||||
int64_t first_token;
|
||||
int64_t last_token;
|
||||
};
|
||||
|
||||
// Function to count the number of ranges in ranges1 covered by the merged ranges of ranges2.
|
||||
future<size_t> count_finished_tablets(utils::chunked_vector<tablet_token_range> ranges1, utils::chunked_vector<tablet_token_range> ranges2);
|
||||
|
||||
@@ -297,17 +297,17 @@ public:
|
||||
|
||||
const dht::token_range& token_range() const noexcept;
|
||||
|
||||
size_t memtable_count() const noexcept;
|
||||
size_t memtable_count() const;
|
||||
|
||||
const compaction_group_ptr& main_compaction_group() const noexcept;
|
||||
const std::vector<compaction_group_ptr>& split_ready_compaction_groups() const;
|
||||
compaction_group_ptr& select_compaction_group(locator::tablet_range_side) noexcept;
|
||||
|
||||
uint64_t live_disk_space_used() const noexcept;
|
||||
uint64_t live_disk_space_used() const;
|
||||
|
||||
void for_each_compaction_group(std::function<void(const compaction_group_ptr&)> action) const noexcept;
|
||||
utils::small_vector<compaction_group_ptr, 3> compaction_groups() noexcept;
|
||||
utils::small_vector<const_compaction_group_ptr, 3> compaction_groups() const noexcept;
|
||||
void for_each_compaction_group(std::function<void(const compaction_group_ptr&)> action) const;
|
||||
utils::small_vector<compaction_group_ptr, 3> compaction_groups();
|
||||
utils::small_vector<const_compaction_group_ptr, 3> compaction_groups() const;
|
||||
|
||||
utils::small_vector<compaction_group_ptr, 3> split_unready_groups() const;
|
||||
bool split_unready_groups_are_empty() const;
|
||||
@@ -430,7 +430,7 @@ public:
|
||||
virtual storage_group& storage_group_for_token(dht::token) const = 0;
|
||||
virtual utils::chunked_vector<storage_group_ptr> storage_groups_for_token_range(dht::token_range tr) const = 0;
|
||||
|
||||
virtual locator::combined_load_stats table_load_stats(std::function<bool(const locator::tablet_map&, locator::global_tablet_id)> tablet_filter) const noexcept = 0;
|
||||
virtual locator::combined_load_stats table_load_stats(std::function<bool(const locator::tablet_map&, locator::global_tablet_id)> tablet_filter) const = 0;
|
||||
virtual bool all_storage_groups_split() = 0;
|
||||
virtual future<> split_all_storage_groups(tasks::task_info tablet_split_task_info) = 0;
|
||||
virtual future<> maybe_split_compaction_group_of(size_t idx) = 0;
|
||||
|
||||
@@ -96,6 +96,9 @@ public:
|
||||
virtual const secondary_index::secondary_index_manager& get_index_manager(data_dictionary::table t) const override {
|
||||
return const_cast<replica::table&>(unwrap(t)).get_index_manager();
|
||||
}
|
||||
virtual db_clock::time_point get_truncation_time(data_dictionary::table t) const override {
|
||||
return const_cast<replica::table&>(unwrap(t)).get_truncation_time();
|
||||
}
|
||||
virtual lw_shared_ptr<keyspace_metadata> get_keyspace_metadata(data_dictionary::keyspace ks) const override {
|
||||
return unwrap(ks).metadata();
|
||||
}
|
||||
|
||||
@@ -2810,26 +2810,26 @@ future<> database::drop_cache_for_keyspace_on_all_shards(sharded<database>& shar
|
||||
});
|
||||
}
|
||||
|
||||
future<> database::snapshot_table_on_all_shards(sharded<database>& sharded_db, table_id uuid, sstring tag, db::snapshot_options opts) {
|
||||
if (!opts.skip_flush) {
|
||||
future<> database::snapshot_table_on_all_shards(sharded<database>& sharded_db, table_id uuid, sstring tag, bool skip_flush) {
|
||||
if (!skip_flush) {
|
||||
co_await flush_table_on_all_shards(sharded_db, uuid);
|
||||
}
|
||||
auto table_shards = co_await get_table_on_all_shards(sharded_db, uuid);
|
||||
co_await table::snapshot_on_all_shards(sharded_db, table_shards, tag, opts);
|
||||
co_await table::snapshot_on_all_shards(sharded_db, table_shards, tag);
|
||||
}
|
||||
|
||||
future<> database::snapshot_tables_on_all_shards(sharded<database>& sharded_db, std::string_view ks_name, std::vector<sstring> table_names, sstring tag, db::snapshot_options opts) {
|
||||
return parallel_for_each(table_names, [&sharded_db, ks_name, tag = std::move(tag), opts] (auto& table_name) {
|
||||
future<> database::snapshot_tables_on_all_shards(sharded<database>& sharded_db, std::string_view ks_name, std::vector<sstring> table_names, sstring tag, bool skip_flush) {
|
||||
return parallel_for_each(table_names, [&sharded_db, ks_name, tag = std::move(tag), skip_flush] (auto& table_name) {
|
||||
auto uuid = sharded_db.local().find_uuid(ks_name, table_name);
|
||||
return snapshot_table_on_all_shards(sharded_db, uuid, tag, opts);
|
||||
return snapshot_table_on_all_shards(sharded_db, uuid, tag, skip_flush);
|
||||
});
|
||||
}
|
||||
|
||||
future<> database::snapshot_keyspace_on_all_shards(sharded<database>& sharded_db, std::string_view ks_name, sstring tag, db::snapshot_options opts) {
|
||||
future<> database::snapshot_keyspace_on_all_shards(sharded<database>& sharded_db, std::string_view ks_name, sstring tag, bool skip_flush) {
|
||||
auto& ks = sharded_db.local().find_keyspace(ks_name);
|
||||
co_await coroutine::parallel_for_each(ks.metadata()->cf_meta_data(), [&, tag = std::move(tag), opts] (const auto& pair) -> future<> {
|
||||
co_await coroutine::parallel_for_each(ks.metadata()->cf_meta_data(), [&, tag = std::move(tag), skip_flush] (const auto& pair) -> future<> {
|
||||
auto uuid = pair.second->id();
|
||||
co_await snapshot_table_on_all_shards(sharded_db, uuid, tag, opts);
|
||||
co_await snapshot_table_on_all_shards(sharded_db, uuid, tag, skip_flush);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -2951,12 +2951,7 @@ future<> database::truncate_table_on_all_shards(sharded<database>& sharded_db, s
|
||||
auto truncated_at = truncated_at_opt.value_or(db_clock::now());
|
||||
auto name = snapshot_name_opt.value_or(
|
||||
format("{:d}-{}", truncated_at.time_since_epoch().count(), cf.schema()->cf_name()));
|
||||
// Use the sstable identifier in snapshot names to allow de-duplication of sstables
|
||||
// at backup time even if they were migrated across shards or nodes and were renamed a given a new generation.
|
||||
// We hard-code that here since we have no way to pass this option to auto-snapshot and
|
||||
// it is always safe to use the sstable identifier for the sstable generation.
|
||||
auto opts = db::snapshot_options{.use_sstable_identifier = true};
|
||||
co_await table::snapshot_on_all_shards(sharded_db, table_shards, name, opts);
|
||||
co_await table::snapshot_on_all_shards(sharded_db, table_shards, name);
|
||||
}
|
||||
|
||||
co_await sharded_db.invoke_on_all([&] (database& db) {
|
||||
|
||||
@@ -1040,12 +1040,12 @@ public:
|
||||
private:
|
||||
using snapshot_file_set = foreign_ptr<std::unique_ptr<std::unordered_set<sstring>>>;
|
||||
|
||||
future<snapshot_file_set> take_snapshot(sstring jsondir, db::snapshot_options opts);
|
||||
future<snapshot_file_set> take_snapshot(sstring jsondir);
|
||||
// Writes the table schema and the manifest of all files in the snapshot directory.
|
||||
future<> finalize_snapshot(const global_table_ptr& table_shards, sstring jsondir, std::vector<snapshot_file_set> file_sets);
|
||||
static future<> seal_snapshot(sstring jsondir, std::vector<snapshot_file_set> file_sets);
|
||||
public:
|
||||
static future<> snapshot_on_all_shards(sharded<database>& sharded_db, const global_table_ptr& table_shards, sstring name, db::snapshot_options opts);
|
||||
static future<> snapshot_on_all_shards(sharded<database>& sharded_db, const global_table_ptr& table_shards, sstring name);
|
||||
|
||||
future<std::unordered_map<sstring, snapshot_details>> get_snapshot_details();
|
||||
static future<snapshot_details> get_snapshot_details(std::filesystem::path snapshot_dir, std::filesystem::path datadir);
|
||||
@@ -1133,7 +1133,7 @@ public:
|
||||
|
||||
// The tablet filter is used to not double account migrating tablets, so it's important that
|
||||
// only one of pending or leaving replica is accounted based on current migration stage.
|
||||
locator::combined_load_stats table_load_stats(std::function<bool(const locator::tablet_map&, locator::global_tablet_id)> tablet_filter) const noexcept;
|
||||
locator::combined_load_stats table_load_stats(std::function<bool(const locator::tablet_map&, locator::global_tablet_id)> tablet_filter) const;
|
||||
|
||||
const db::view::stats& get_view_stats() const {
|
||||
return _view_stats;
|
||||
@@ -2009,9 +2009,9 @@ public:
|
||||
static future<> drop_cache_for_table_on_all_shards(sharded<database>& sharded_db, table_id id);
|
||||
static future<> drop_cache_for_keyspace_on_all_shards(sharded<database>& sharded_db, std::string_view ks_name);
|
||||
|
||||
static future<> snapshot_table_on_all_shards(sharded<database>& sharded_db, table_id id, sstring tag, db::snapshot_options opts);
|
||||
static future<> snapshot_tables_on_all_shards(sharded<database>& sharded_db, std::string_view ks_name, std::vector<sstring> table_names, sstring tag, db::snapshot_options opts);
|
||||
static future<> snapshot_keyspace_on_all_shards(sharded<database>& sharded_db, std::string_view ks_name, sstring tag, db::snapshot_options opts);
|
||||
static future<> snapshot_table_on_all_shards(sharded<database>& sharded_db, table_id id, sstring tag, bool skip_flush);
|
||||
static future<> snapshot_tables_on_all_shards(sharded<database>& sharded_db, std::string_view ks_name, std::vector<sstring> table_names, sstring tag, bool skip_flush);
|
||||
static future<> snapshot_keyspace_on_all_shards(sharded<database>& sharded_db, std::string_view ks_name, sstring tag, bool skip_flush);
|
||||
|
||||
public:
|
||||
bool update_column_family(schema_ptr s);
|
||||
|
||||
@@ -215,7 +215,7 @@ private:
|
||||
output_ck_raw_values.emplace_back(bytes{});
|
||||
}
|
||||
}
|
||||
if (underlying_ck_raw_values.empty()) {
|
||||
if (pos.region() != partition_region::clustered) {
|
||||
output_ck_raw_values.push_back(bytes{});
|
||||
} else {
|
||||
output_ck_raw_values.push_back(data_value(static_cast<int8_t>(pos.get_bound_weight())).serialize_nonnull());
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
#include <seastar/coroutine/as_future.hh>
|
||||
#include <seastar/util/closeable.hh>
|
||||
#include <seastar/util/defer.hh>
|
||||
#include <seastar/json/json_elements.hh>
|
||||
|
||||
#include "dht/decorated_key.hh"
|
||||
#include "replica/database.hh"
|
||||
@@ -708,7 +709,7 @@ public:
|
||||
return *_single_sg;
|
||||
}
|
||||
|
||||
locator::combined_load_stats table_load_stats(std::function<bool(const locator::tablet_map&, locator::global_tablet_id)>) const noexcept override {
|
||||
locator::combined_load_stats table_load_stats(std::function<bool(const locator::tablet_map&, locator::global_tablet_id)>) const override {
|
||||
return locator::combined_load_stats{
|
||||
.table_ls = locator::table_load_stats{
|
||||
.size_in_bytes = _single_sg->live_disk_space_used(),
|
||||
@@ -874,7 +875,7 @@ public:
|
||||
return storage_group_for_id(storage_group_of(token).first);
|
||||
}
|
||||
|
||||
locator::combined_load_stats table_load_stats(std::function<bool(const locator::tablet_map&, locator::global_tablet_id)> tablet_filter) const noexcept override;
|
||||
locator::combined_load_stats table_load_stats(std::function<bool(const locator::tablet_map&, locator::global_tablet_id)> tablet_filter) const override;
|
||||
bool all_storage_groups_split() override;
|
||||
future<> split_all_storage_groups(tasks::task_info tablet_split_task_info) override;
|
||||
future<> maybe_split_compaction_group_of(size_t idx) override;
|
||||
@@ -922,7 +923,7 @@ compaction_group_ptr& storage_group::select_compaction_group(locator::tablet_ran
|
||||
return _main_cg;
|
||||
}
|
||||
|
||||
void storage_group::for_each_compaction_group(std::function<void(const compaction_group_ptr&)> action) const noexcept {
|
||||
void storage_group::for_each_compaction_group(std::function<void(const compaction_group_ptr&)> action) const {
|
||||
action(_main_cg);
|
||||
for (auto& cg : _merging_groups) {
|
||||
action(cg);
|
||||
@@ -932,7 +933,7 @@ void storage_group::for_each_compaction_group(std::function<void(const compactio
|
||||
}
|
||||
}
|
||||
|
||||
utils::small_vector<compaction_group_ptr, 3> storage_group::compaction_groups() noexcept {
|
||||
utils::small_vector<compaction_group_ptr, 3> storage_group::compaction_groups() {
|
||||
utils::small_vector<compaction_group_ptr, 3> cgs;
|
||||
for_each_compaction_group([&cgs] (const compaction_group_ptr& cg) {
|
||||
cgs.push_back(cg);
|
||||
@@ -940,7 +941,7 @@ utils::small_vector<compaction_group_ptr, 3> storage_group::compaction_groups()
|
||||
return cgs;
|
||||
}
|
||||
|
||||
utils::small_vector<const_compaction_group_ptr, 3> storage_group::compaction_groups() const noexcept {
|
||||
utils::small_vector<const_compaction_group_ptr, 3> storage_group::compaction_groups() const {
|
||||
utils::small_vector<const_compaction_group_ptr, 3> cgs;
|
||||
for_each_compaction_group([&cgs] (const compaction_group_ptr& cg) {
|
||||
cgs.push_back(cg);
|
||||
@@ -1890,7 +1891,7 @@ sstables::file_size_stats compaction_group::live_disk_space_used_full_stats() co
|
||||
return _main_sstables->get_file_size_stats() + _maintenance_sstables->get_file_size_stats();
|
||||
}
|
||||
|
||||
uint64_t storage_group::live_disk_space_used() const noexcept {
|
||||
uint64_t storage_group::live_disk_space_used() const {
|
||||
auto cgs = const_cast<storage_group&>(*this).compaction_groups();
|
||||
return std::ranges::fold_left(cgs | std::views::transform(std::mem_fn(&compaction_group::live_disk_space_used)), uint64_t(0), std::plus{});
|
||||
}
|
||||
@@ -2813,7 +2814,7 @@ void table::on_flush_timer() {
|
||||
});
|
||||
}
|
||||
|
||||
locator::combined_load_stats tablet_storage_group_manager::table_load_stats(std::function<bool(const locator::tablet_map&, locator::global_tablet_id)> tablet_filter) const noexcept {
|
||||
locator::combined_load_stats tablet_storage_group_manager::table_load_stats(std::function<bool(const locator::tablet_map&, locator::global_tablet_id)> tablet_filter) const {
|
||||
locator::table_load_stats table_stats;
|
||||
table_stats.split_ready_seq_number = _split_ready_seq_number;
|
||||
|
||||
@@ -2836,7 +2837,7 @@ locator::combined_load_stats tablet_storage_group_manager::table_load_stats(std:
|
||||
};
|
||||
}
|
||||
|
||||
locator::combined_load_stats table::table_load_stats(std::function<bool(const locator::tablet_map&, locator::global_tablet_id)> tablet_filter) const noexcept {
|
||||
locator::combined_load_stats table::table_load_stats(std::function<bool(const locator::tablet_map&, locator::global_tablet_id)> tablet_filter) const {
|
||||
return _sg_manager->table_load_stats(std::move(tablet_filter));
|
||||
}
|
||||
|
||||
@@ -3198,23 +3199,35 @@ db::replay_position table::highest_flushed_replay_position() const {
|
||||
return _highest_flushed_rp;
|
||||
}
|
||||
|
||||
struct manifest_json : public json::json_base {
|
||||
json::json_chunked_list<sstring> files;
|
||||
|
||||
manifest_json() {
|
||||
register_params();
|
||||
}
|
||||
manifest_json(manifest_json&& e) {
|
||||
register_params();
|
||||
files = std::move(e.files);
|
||||
}
|
||||
manifest_json& operator=(manifest_json&& e) {
|
||||
files = std::move(e.files);
|
||||
return *this;
|
||||
}
|
||||
private:
|
||||
void register_params() {
|
||||
add(&files, "files");
|
||||
}
|
||||
};
|
||||
|
||||
future<>
|
||||
table::seal_snapshot(sstring jsondir, std::vector<snapshot_file_set> file_sets) {
|
||||
std::ostringstream ss;
|
||||
int n = 0;
|
||||
ss << "{" << std::endl << "\t\"files\" : [ ";
|
||||
manifest_json manifest;
|
||||
for (const auto& fsp : file_sets) {
|
||||
for (const auto& rf : *fsp) {
|
||||
if (n++ > 0) {
|
||||
ss << ", ";
|
||||
for (auto& rf : *fsp) {
|
||||
manifest.files.push(std::move(rf));
|
||||
}
|
||||
ss << "\"" << rf << "\"";
|
||||
co_await coroutine::maybe_yield();
|
||||
}
|
||||
}
|
||||
ss << " ]" << std::endl << "}" << std::endl;
|
||||
|
||||
auto json = ss.str();
|
||||
auto streamer = json::stream_object(std::move(manifest));
|
||||
auto jsonfile = jsondir + "/manifest.json";
|
||||
|
||||
tlogger.debug("Storing manifest {}", jsonfile);
|
||||
@@ -3224,12 +3237,10 @@ table::seal_snapshot(sstring jsondir, std::vector<snapshot_file_set> file_sets)
|
||||
auto out = co_await make_file_output_stream(std::move(f));
|
||||
std::exception_ptr ex;
|
||||
try {
|
||||
co_await out.write(json.c_str(), json.size());
|
||||
co_await out.flush();
|
||||
co_await streamer(std::move(out));
|
||||
} catch (...) {
|
||||
ex = std::current_exception();
|
||||
}
|
||||
co_await out.close();
|
||||
|
||||
if (ex) {
|
||||
co_await coroutine::return_exception_ptr(std::move(ex));
|
||||
@@ -3268,7 +3279,7 @@ future<> table::write_schema_as_cql(const global_table_ptr& table_shards, sstrin
|
||||
}
|
||||
|
||||
// Runs the orchestration code on an arbitrary shard to balance the load.
|
||||
future<> table::snapshot_on_all_shards(sharded<database>& sharded_db, const global_table_ptr& table_shards, sstring name, db::snapshot_options opts) {
|
||||
future<> table::snapshot_on_all_shards(sharded<database>& sharded_db, const global_table_ptr& table_shards, sstring name) {
|
||||
auto* so = std::get_if<storage_options::local>(&table_shards->get_storage_options().value);
|
||||
if (so == nullptr) {
|
||||
throw std::runtime_error("Snapshotting non-local tables is not implemented");
|
||||
@@ -3291,7 +3302,7 @@ future<> table::snapshot_on_all_shards(sharded<database>& sharded_db, const glob
|
||||
co_await io_check([&jsondir] { return recursive_touch_directory(jsondir); });
|
||||
co_await coroutine::parallel_for_each(smp::all_cpus(), [&] (unsigned shard) -> future<> {
|
||||
file_sets.emplace_back(co_await smp::submit_to(shard, [&] {
|
||||
return table_shards->take_snapshot(jsondir, opts);
|
||||
return table_shards->take_snapshot(jsondir);
|
||||
}));
|
||||
});
|
||||
co_await io_check(sync_directory, jsondir);
|
||||
@@ -3300,22 +3311,19 @@ future<> table::snapshot_on_all_shards(sharded<database>& sharded_db, const glob
|
||||
});
|
||||
}
|
||||
|
||||
future<table::snapshot_file_set> table::take_snapshot(sstring jsondir, db::snapshot_options opts) {
|
||||
tlogger.trace("take_snapshot {}: use_sstable_identifier={}", jsondir, opts.use_sstable_identifier);
|
||||
future<table::snapshot_file_set> table::take_snapshot(sstring jsondir) {
|
||||
tlogger.trace("take_snapshot {}", jsondir);
|
||||
|
||||
auto sstable_deletion_guard = co_await get_sstable_list_permit();
|
||||
|
||||
auto tables = *_sstables->all() | std::ranges::to<std::vector<sstables::shared_sstable>>();
|
||||
auto table_names = std::make_unique<std::unordered_set<sstring>>();
|
||||
|
||||
auto& ks_name = schema()->ks_name();
|
||||
auto& cf_name = schema()->cf_name();
|
||||
co_await _sstables_manager.dir_semaphore().parallel_for_each(tables, [&, opts] (sstables::shared_sstable sstable) -> future<> {
|
||||
auto gen = co_await io_check([sstable, &dir = jsondir, opts] {
|
||||
return sstable->snapshot(dir, opts.use_sstable_identifier);
|
||||
co_await _sstables_manager.dir_semaphore().parallel_for_each(tables, [&jsondir, &table_names] (sstables::shared_sstable sstable) {
|
||||
table_names->insert(sstable->component_basename(sstables::component_type::Data));
|
||||
return io_check([sstable, &dir = jsondir] {
|
||||
return sstable->snapshot(dir);
|
||||
});
|
||||
auto fname = sstable->component_basename(ks_name, cf_name, sstable->get_version(), gen, sstable->get_format(), sstables::component_type::Data);
|
||||
table_names->insert(fname);
|
||||
});
|
||||
co_return make_foreign(std::move(table_names));
|
||||
}
|
||||
@@ -3456,7 +3464,7 @@ size_t compaction_group::memtable_count() const noexcept {
|
||||
return _memtables->size();
|
||||
}
|
||||
|
||||
size_t storage_group::memtable_count() const noexcept {
|
||||
size_t storage_group::memtable_count() const {
|
||||
return std::ranges::fold_left(compaction_groups() | std::views::transform(std::mem_fn(&compaction_group::memtable_count)), size_t(0), std::plus{});
|
||||
}
|
||||
|
||||
|
||||
@@ -22,7 +22,7 @@ format_match = re.compile(r'\s*(?:seastar::)?format\(\s*"([^"]+)"\s*,\s*(.*)\s*'
|
||||
def handle_error(message, strict=True, verbose_mode=False):
|
||||
if strict:
|
||||
print(f"[ERROR] {message}")
|
||||
exit(-1)
|
||||
exit(1)
|
||||
elif verbose_mode:
|
||||
print(f"[WARNING] {message}")
|
||||
|
||||
@@ -180,12 +180,11 @@ def get_metrics_from_file(file_name, prefix, metrics_information, verb=None, str
|
||||
groups = {}
|
||||
if clean_name in metrics_information:
|
||||
if (isinstance(metrics_information[clean_name], str) and metrics_information[clean_name] == "skip") or "skip" in metrics_information[clean_name]:
|
||||
exit(0)
|
||||
return {}
|
||||
param_mapping = metrics_information[clean_name]["params"] if clean_name in metrics_information and "params" in metrics_information[clean_name] else {}
|
||||
groups = metrics_information[clean_name]["groups"] if clean_name in metrics_information and "groups" in metrics_information[clean_name] else {}
|
||||
|
||||
metrics = {}
|
||||
multi_line = False
|
||||
names = undefined
|
||||
typ = undefined
|
||||
line_number = 0;
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
"cdc/log.cc":
|
||||
params:
|
||||
cdc_group_name: cdc
|
||||
cdc_group_name: "cdc"
|
||||
part_name;suffix: [["static_row", "total"],["clustering_row", "total"], ["map", "total"], ["set", "total"], ["list", "total"], ["udt", "total"], ["range_tombstone", "total"],["partition_delete", "total"],["row_delete", "total"], ["static_row", "failed"],["clustering_row", "failed"], ["map", "failed"], ["set", "failed"], ["list", "failed"], ["udt", "failed"], ["range_tombstone", "failed"],["partition_delete", "failed"],["row_delete", "failed"]]
|
||||
kind: ["total", "failed"]
|
||||
"db/commitlog/commitlog.cc":
|
||||
@@ -9,7 +9,7 @@
|
||||
"cfg.max_active_flushes": "cfg.max_active_flushes"
|
||||
"cql3/query_processor.cc":
|
||||
groups:
|
||||
"80": query_processor
|
||||
"80": "query_processor"
|
||||
"replica/dirty_memory_manager.cc":
|
||||
params:
|
||||
namestr: ["regular", "system"]
|
||||
@@ -19,10 +19,11 @@
|
||||
"replica/database.cc":
|
||||
params:
|
||||
"_dirty_memory_manager.throttle_threshold()": "throttle threshold"
|
||||
"seastar/apps/metrics_tester/metrics_tester.cc": skip
|
||||
"seastar/tests/unit/metrics_test.cc": skip
|
||||
"seastar/tests/unit/metrics_tester.cc": skip
|
||||
"seastar/tests/unit/prometheus_http_test.cc": skip
|
||||
"seastar/apps/metrics_tester/metrics_tester.cc": "skip"
|
||||
"seastar/tests/unit/metrics_test.cc": "skip"
|
||||
"seastar/tests/unit/metrics_tester.cc": "skip"
|
||||
"seastar/tests/unit/prometheus_http_test.cc": "skip"
|
||||
"seastar/tests/unit/prometheus_text_test.cc": "skip"
|
||||
"service/storage_proxy.cc":
|
||||
params:
|
||||
COORDINATOR_STATS_CATEGORY: "storage_proxy_coordinator"
|
||||
@@ -32,25 +33,25 @@
|
||||
_short_description_prefix: ["total_write_attempts", "write_errors", "background_replica_writes_failed", "read_repair_write_attempts"]
|
||||
_long_description_prefix: ["total number of write requests", "number of write requests that failed", "background_replica_writes_failed", "number of write operations in a read repair context"]
|
||||
_category: "storage_proxy_coordinator"
|
||||
"thrift/server.cc": skip
|
||||
"thrift/server.cc": "skip"
|
||||
"tracing/tracing.cc":
|
||||
params:
|
||||
"max_pending_trace_records + write_event_records_threshold": "max_pending_trace_records + write_event_records_threshold"
|
||||
"transport/server.cc":
|
||||
groups:
|
||||
"200": transport
|
||||
"200": "transport"
|
||||
params:
|
||||
"_config.max_request_size": "max_request_size"
|
||||
"seastar/src/net/dpdk.cc": skip
|
||||
"seastar/src/net/dpdk.cc": "skip"
|
||||
"db/hints/manager.cc":
|
||||
params:
|
||||
"group_name": ["hints_for_views_manager", "hints_manager"]
|
||||
"seastar/src/core/execution_stage.cc":
|
||||
groups:
|
||||
"100": execution_stages
|
||||
"100": "execution_stages"
|
||||
"seastar/src/core/fair_queue.cc":
|
||||
groups:
|
||||
"300": io_queue
|
||||
"300": "io_queue"
|
||||
"seastar/src/net/net.cc":
|
||||
params:
|
||||
_stats_plugin_name: ["stats_plugin_name"]
|
||||
|
||||
@@ -818,6 +818,9 @@ class std_list:
|
||||
self._node = node_header['_M_next']
|
||||
self._end = node_header['_M_next']['_M_prev']
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
def __next__(self):
|
||||
if self._node == self._end:
|
||||
raise StopIteration()
|
||||
|
||||
@@ -3,6 +3,7 @@ target_sources(service
|
||||
PRIVATE
|
||||
broadcast_tables/experimental/lang.cc
|
||||
client_state.cc
|
||||
client_routes.cc
|
||||
mapreduce_service.cc
|
||||
migration_manager.cc
|
||||
misc_services.cc
|
||||
|
||||
137
service/client_routes.cc
Normal file
137
service/client_routes.cc
Normal file
@@ -0,0 +1,137 @@
|
||||
/*
|
||||
* Copyright (C) 2025-present ScyllaDB
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
|
||||
*/
|
||||
|
||||
#include "service/client_routes.hh"
|
||||
#include "cql3/query_processor.hh"
|
||||
#include "cql3/untyped_result_set.hh"
|
||||
#include "mutation/mutation.hh"
|
||||
#include "service/endpoint_lifecycle_subscriber.hh"
|
||||
#include "db/system_keyspace.hh"
|
||||
|
||||
static logging::logger crlogger("client_routes");
|
||||
|
||||
service::query_state& client_routes_query_state() {
|
||||
using namespace std::chrono_literals;
|
||||
const auto t = 10s;
|
||||
static timeout_config tc{ t, t, t, t, t, t, t };
|
||||
static thread_local service::client_state cs(service::client_state::internal_tag{}, tc);
|
||||
static thread_local service::query_state qs(cs, empty_service_permit());
|
||||
return qs;
|
||||
};
|
||||
|
||||
future<mutation> service::client_routes_service::make_remove_client_route_mutation(api::timestamp_type ts, const service::client_routes_service::client_route_key& key) {
|
||||
static const sstring stmt = format("DELETE FROM {}.{} WHERE connection_id = ? and host_id = ?", db::system_keyspace::NAME, db::system_keyspace::CLIENT_ROUTES);
|
||||
|
||||
auto muts = co_await _qp.get_mutations_internal(stmt, client_routes_query_state(), ts, {key.connection_id, key.host_id});
|
||||
if (muts.size() != 1) {
|
||||
on_internal_error(crlogger, fmt::format("expected 1 mutation got {}", muts.size()));
|
||||
}
|
||||
co_return std::move(muts[0]);
|
||||
}
|
||||
|
||||
future<mutation> service::client_routes_service::make_update_client_route_mutation(api::timestamp_type ts, const service::client_routes_service::client_route_entry& route) {
|
||||
static const sstring stmt = format("INSERT INTO {}.{} (connection_id, host_id, address, port, tls_port, alternator_port, alternator_https_port) VALUES (?, ?, ?, ?, ?, ?, ?)", db::system_keyspace::NAME, db::system_keyspace::CLIENT_ROUTES);
|
||||
|
||||
auto muts = co_await _qp.get_mutations_internal(stmt, client_routes_query_state(), ts, {
|
||||
route.connection_id,
|
||||
route.host_id,
|
||||
route.address,
|
||||
route.port,
|
||||
route.tls_port,
|
||||
route.alternator_port,
|
||||
route.alternator_https_port
|
||||
});
|
||||
if (muts.size() != 1) {
|
||||
on_internal_error(crlogger, fmt::format("expected 1 mutation got {}", muts.size()));
|
||||
}
|
||||
co_return std::move(muts[0]);
|
||||
}
|
||||
|
||||
future<std::vector<service::client_routes_service::client_route_entry>> service::client_routes_service::get_client_routes() const {
|
||||
std::vector<service::client_routes_service::client_route_entry> result;
|
||||
static const sstring query = format("SELECT * from {}.{}", db::system_keyspace::NAME, db::system_keyspace::CLIENT_ROUTES);
|
||||
auto rs = co_await _qp.execute_internal(query, cql3::query_processor::cache_internal::yes);
|
||||
result.reserve(rs->size());
|
||||
for (const auto& row : *rs) {
|
||||
result.emplace_back(
|
||||
row.get_as<sstring>("connection_id"),
|
||||
row.get_as<utils::UUID>("host_id"),
|
||||
row.get_as<sstring>("address"),
|
||||
row.get_opt<int32_t>("port"),
|
||||
row.get_opt<int32_t>("tls_port"),
|
||||
row.get_opt<int32_t>("alternator_port"),
|
||||
row.get_opt<int32_t>("alternator_https_port")
|
||||
);
|
||||
}
|
||||
co_return result;
|
||||
}
|
||||
|
||||
seastar::future<> service::client_routes_service::notify_client_routes_change(const client_route_keys& client_route_keys) {
|
||||
co_await container().invoke_on_all([&client_route_keys] (service::client_routes_service& client_routes) {
|
||||
return client_routes._lifecycle_notifier.notify_client_routes_change(client_route_keys);
|
||||
});
|
||||
}
|
||||
|
||||
seastar::future<> service::client_routes_service::set_client_routes_inner(const std::vector<service::client_routes_service::client_route_entry>& route_entries) {
|
||||
auto guard = co_await _group0_client.start_operation(_abort_source, service::raft_timeout{});
|
||||
utils::chunked_vector<canonical_mutation> cmuts;
|
||||
|
||||
for (auto& entry : route_entries) {
|
||||
auto mut = co_await make_update_client_route_mutation(guard.write_timestamp(), entry);
|
||||
cmuts.emplace_back(std::move(mut));
|
||||
}
|
||||
auto cmd = _group0_client.prepare_command(service::write_mutations{std::move(cmuts)}, guard, "insert client routes");
|
||||
co_await _group0_client.add_entry(std::move(cmd), std::move(guard), _abort_source);
|
||||
}
|
||||
|
||||
seastar::future<> service::client_routes_service::delete_client_routes_inner(const std::vector<service::client_routes_service::client_route_key>& route_keys) {
|
||||
auto guard = co_await _group0_client.start_operation(_abort_source, service::raft_timeout{});
|
||||
utils::chunked_vector<canonical_mutation> cmuts;
|
||||
|
||||
for (const auto& route_key : route_keys) {
|
||||
auto mut = co_await make_remove_client_route_mutation(guard.write_timestamp(), route_key);
|
||||
cmuts.emplace_back(std::move(mut));
|
||||
}
|
||||
|
||||
auto cmd = _group0_client.prepare_command(service::write_mutations{std::move(cmuts)}, guard, "delete client routes");
|
||||
co_await _group0_client.add_entry(std::move(cmd), std::move(guard), _abort_source);
|
||||
}
|
||||
|
||||
seastar::future<> service::client_routes_service::set_client_routes(const std::vector<service::client_routes_service::client_route_entry>& route_entries) {
|
||||
return container().invoke_on(0, [route_entries = std::move(route_entries)] (service::client_routes_service& cr) -> future<> {
|
||||
return cr.with_retry([&] {
|
||||
return cr.set_client_routes_inner(route_entries);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
seastar::future<> service::client_routes_service::delete_client_routes(const std::vector<service::client_routes_service::client_route_key>& route_keys) {
|
||||
return container().invoke_on(0, [route_keys = std::move(route_keys)] (service::client_routes_service& cr) -> future<> {
|
||||
return cr.with_retry([&] {
|
||||
return cr.delete_client_routes_inner(route_keys);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
template <typename Func>
|
||||
seastar::future<> service::client_routes_service::with_retry(Func&& func) const {
|
||||
int retries = 10;
|
||||
while (true) {
|
||||
try {
|
||||
co_await func();
|
||||
} catch (const ::service::group0_concurrent_modification&) {
|
||||
crlogger.warn("Failed to set client routes due to guard conflict, retries={}", retries);
|
||||
if (retries--) {
|
||||
continue;
|
||||
}
|
||||
throw;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
88
service/client_routes.hh
Normal file
88
service/client_routes.hh
Normal file
@@ -0,0 +1,88 @@
|
||||
/*
|
||||
* Copyright (C) 2025-present ScyllaDB
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include <seastar/core/abort_source.hh>
|
||||
#include <seastar/core/sharded.hh>
|
||||
|
||||
#include "gms/feature_service.hh"
|
||||
#include "mutation/mutation.hh"
|
||||
#include "service/raft/raft_group0_client.hh"
|
||||
|
||||
namespace service {
|
||||
|
||||
class endpoint_lifecycle_notifier;
|
||||
|
||||
class client_routes_service : public seastar::peering_sharded_service<client_routes_service> {
|
||||
public:
|
||||
client_routes_service(
|
||||
abort_source& abort_source,
|
||||
gms::feature_service& feature_service,
|
||||
service::raft_group0_client& group0_client,
|
||||
cql3::query_processor& qp,
|
||||
endpoint_lifecycle_notifier& elc_notif
|
||||
)
|
||||
: _abort_source(abort_source)
|
||||
, _feature_service(feature_service)
|
||||
, _group0_client(group0_client)
|
||||
, _qp(qp)
|
||||
, _lifecycle_notifier(elc_notif) { }
|
||||
|
||||
struct client_route_key {
|
||||
sstring connection_id;
|
||||
utils::UUID host_id;
|
||||
|
||||
bool operator<(const client_route_key& other) const {
|
||||
if (connection_id != other.connection_id) {
|
||||
return connection_id < other.connection_id;
|
||||
}
|
||||
return host_id < other.host_id;
|
||||
}
|
||||
};
|
||||
using client_route_keys = std::set<client_route_key>;
|
||||
|
||||
struct client_route_entry {
|
||||
sstring connection_id;
|
||||
utils::UUID host_id;
|
||||
sstring address;
|
||||
// At least one of the ports should be specified
|
||||
std::optional<int32_t> port;
|
||||
std::optional<int32_t> tls_port;
|
||||
std::optional<int32_t> alternator_port;
|
||||
std::optional<int32_t> alternator_https_port;
|
||||
};
|
||||
|
||||
gms::feature_service& get_feature_service() noexcept {
|
||||
return _feature_service;
|
||||
}
|
||||
|
||||
// mutations
|
||||
future<mutation> make_remove_client_route_mutation(api::timestamp_type ts, const service::client_routes_service::client_route_key& key);
|
||||
future<mutation> make_update_client_route_mutation(api::timestamp_type ts, const client_route_entry& entry);
|
||||
future<std::vector<client_route_entry>> get_client_routes() const;
|
||||
seastar::future<> set_client_routes(const std::vector<service::client_routes_service::client_route_entry>& route_entries);
|
||||
seastar::future<> delete_client_routes(const std::vector<service::client_routes_service::client_route_key>& route_keys);
|
||||
|
||||
|
||||
// notifications
|
||||
seastar::future<> notify_client_routes_change(const client_route_keys& client_route_keys);
|
||||
private:
|
||||
seastar::future<> set_client_routes_inner(const std::vector<service::client_routes_service::client_route_entry>& route_entries);
|
||||
seastar::future<> delete_client_routes_inner(const std::vector<service::client_routes_service::client_route_key>& route_keys);
|
||||
template <typename Func>
|
||||
seastar::future<> with_retry(Func&& func) const;
|
||||
|
||||
abort_source& _abort_source;
|
||||
gms::feature_service& _feature_service;
|
||||
service::raft_group0_client& _group0_client;
|
||||
cql3::query_processor& _qp;
|
||||
endpoint_lifecycle_notifier& _lifecycle_notifier;
|
||||
};
|
||||
|
||||
}
|
||||
@@ -13,6 +13,7 @@
|
||||
#include "gms/inet_address.hh"
|
||||
#include "locator/host_id.hh"
|
||||
#include "utils/atomic_vector.hh"
|
||||
#include "service/client_routes.hh"
|
||||
|
||||
namespace service {
|
||||
|
||||
@@ -65,6 +66,7 @@ public:
|
||||
* @param endpoint the endpoint marked DOWN.
|
||||
*/
|
||||
virtual void on_down(const gms::inet_address& endpoint, locator::host_id host_id) {}
|
||||
virtual void on_client_routes_change(const client_routes_service::client_route_keys& client_route_keys) {}
|
||||
};
|
||||
|
||||
class endpoint_lifecycle_notifier {
|
||||
@@ -79,6 +81,8 @@ public:
|
||||
future<> notify_released(locator::host_id host_id);
|
||||
future<> notify_up(gms::inet_address endpoint, locator::host_id host_id);
|
||||
future<> notify_joined(gms::inet_address endpoint, locator::host_id host_id);
|
||||
|
||||
future<> notify_client_routes_change(const client_routes_service::client_route_keys& client_route_keys);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@@ -124,8 +124,40 @@ bool should_flush_system_topology_after_applying(const mutation& mut, const data
|
||||
return false;
|
||||
}
|
||||
|
||||
future<> write_mutations_to_database(storage_proxy& proxy, gms::inet_address from, utils::chunked_vector<canonical_mutation> cms) {
|
||||
static void collect_client_routes_update(const mutation& mut, client_routes_service::client_route_keys& client_routes_update) {
|
||||
|
||||
auto s_client_routes = db::system_keyspace::client_routes();
|
||||
if (mut.column_family_id() != s_client_routes->id()) {
|
||||
return;
|
||||
}
|
||||
|
||||
const auto pk_components = mut.decorated_key()._key.explode(*s_client_routes);
|
||||
if (pk_components.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto conn_uuid = value_cast<sstring>(utf8_type->deserialize_value(pk_components[0]));
|
||||
for (const rows_entry& re : mut.partition().clustered_rows()) {
|
||||
const auto ck_components = re.key().explode(*s_client_routes);
|
||||
if (ck_components.empty()) {
|
||||
continue;
|
||||
}
|
||||
auto host_uuid = value_cast<utils::UUID>(uuid_type->deserialize_value(ck_components[0]));
|
||||
client_routes_update.emplace(conn_uuid, host_uuid);
|
||||
}
|
||||
}
|
||||
|
||||
static future<> notify_client_route_change_if_needed(storage_service& storage_service, const client_routes_service::client_route_keys& client_routes_update) {
|
||||
if (client_routes_update.size() > 0) {
|
||||
slogger.trace("write_mutations_to_database: notify_client_routes_change routes_update.size()={}", client_routes_update.size());
|
||||
co_await storage_service.notify_client_routes_change(client_routes_update);
|
||||
}
|
||||
}
|
||||
|
||||
future<> write_mutations_to_database(storage_service& storage_service, storage_proxy& proxy, gms::inet_address from, utils::chunked_vector<canonical_mutation> cms) {
|
||||
utils::chunked_vector<frozen_mutation_and_schema> mutations;
|
||||
client_routes_service::client_route_keys client_routes_update;
|
||||
|
||||
mutations.reserve(cms.size());
|
||||
bool need_system_topology_flush = false;
|
||||
try {
|
||||
@@ -133,7 +165,12 @@ future<> write_mutations_to_database(storage_proxy& proxy, gms::inet_address fro
|
||||
auto& tbl = proxy.local_db().find_column_family(cm.column_family_id());
|
||||
auto& s = tbl.schema();
|
||||
auto mut = co_await to_mutation_gently(cm, s);
|
||||
|
||||
need_system_topology_flush = need_system_topology_flush || should_flush_system_topology_after_applying(mut, proxy.data_dictionary());
|
||||
if (proxy.data_dictionary().has_schema(db::system_keyspace::NAME, db::system_keyspace::CLIENT_ROUTES)) {
|
||||
collect_client_routes_update(mut, client_routes_update);
|
||||
}
|
||||
|
||||
mutations.emplace_back(co_await freeze_gently(mut), s);
|
||||
}
|
||||
} catch (replica::no_such_column_family& e) {
|
||||
@@ -147,6 +184,8 @@ future<> write_mutations_to_database(storage_proxy& proxy, gms::inet_address fro
|
||||
slogger.trace("write_mutations_to_database: flushing {}.{}", db::system_keyspace::NAME, db::system_keyspace::TOPOLOGY);
|
||||
co_await proxy.get_db().local().flush(db::system_keyspace::NAME, db::system_keyspace::TOPOLOGY);
|
||||
}
|
||||
|
||||
co_await notify_client_route_change_if_needed(storage_service, client_routes_update);
|
||||
}
|
||||
|
||||
group0_state_machine::modules_to_reload group0_state_machine::get_modules_to_reload(const utils::chunked_vector<canonical_mutation>& mutations) {
|
||||
@@ -251,7 +290,7 @@ future<> group0_state_machine::merge_and_apply(group0_state_machine_merger& merg
|
||||
[&] (topology_change& chng) -> future<> {
|
||||
auto modules_to_reload = get_modules_to_reload(chng.mutations);
|
||||
auto tablet_keys = replica::get_tablet_metadata_change_hint(chng.mutations);
|
||||
co_await write_mutations_to_database(_sp, cmd.creator_addr, std::move(chng.mutations));
|
||||
co_await write_mutations_to_database(_ss, _sp, cmd.creator_addr, std::move(chng.mutations));
|
||||
co_await _ss.topology_transition({.tablets_hint = std::move(tablet_keys)});
|
||||
co_await reload_modules(std::move(modules_to_reload));
|
||||
},
|
||||
@@ -263,7 +302,7 @@ future<> group0_state_machine::merge_and_apply(group0_state_machine_merger& merg
|
||||
},
|
||||
[&] (write_mutations& muts) -> future<> {
|
||||
auto modules_to_reload = get_modules_to_reload(muts.mutations);
|
||||
co_await write_mutations_to_database(_sp, cmd.creator_addr, std::move(muts.mutations));
|
||||
co_await write_mutations_to_database(_ss, _sp, cmd.creator_addr, std::move(muts.mutations));
|
||||
co_await reload_modules(std::move(modules_to_reload));
|
||||
}
|
||||
), cmd.change);
|
||||
@@ -393,6 +432,7 @@ future<> group0_state_machine::load_snapshot(raft::snapshot_id id) {
|
||||
|
||||
future<> group0_state_machine::transfer_snapshot(raft::server_id from_id, raft::snapshot_descriptor snp) {
|
||||
try {
|
||||
co_await utils::get_local_injector().inject("block_group0_transfer_snapshot", utils::wait_for_message(300s));
|
||||
// Note that this may bring newer state than the group0 state machine raft's
|
||||
// log, so some raft entries may be double applied, but since the state
|
||||
// machine is idempotent it is not a problem.
|
||||
@@ -451,11 +491,23 @@ future<> group0_state_machine::transfer_snapshot(raft::server_id from_id, raft::
|
||||
co_await _sp.get_db().local().flush(db::system_keyspace::NAME, db::system_keyspace::TOPOLOGY);
|
||||
}
|
||||
|
||||
client_routes_service::client_route_keys client_routes_update;
|
||||
if (raft_snp) {
|
||||
if (_sp.data_dictionary().has_schema(db::system_keyspace::NAME, db::system_keyspace::CLIENT_ROUTES)) {
|
||||
auto s_client_routes = db::system_keyspace::client_routes();
|
||||
for (auto& canonical_mut : raft_snp->mutations) {
|
||||
if (canonical_mut.column_family_id() == s_client_routes->id()) {
|
||||
auto mut = co_await to_mutation_gently(canonical_mut, s_client_routes);
|
||||
slogger.trace("transfer snapshot: raft snapshot includes client_routes mutation");
|
||||
collect_client_routes_update(mut, client_routes_update);
|
||||
}
|
||||
}
|
||||
}
|
||||
co_await mutate_locally(std::move(raft_snp->mutations), _sp);
|
||||
}
|
||||
|
||||
co_await _ss.auth_cache().load_all();
|
||||
co_await notify_client_route_change_if_needed(_ss, client_routes_update);
|
||||
|
||||
co_await _sp.mutate_locally({std::move(history_mut)}, nullptr);
|
||||
} catch (const abort_requested_exception&) {
|
||||
|
||||
@@ -130,6 +130,6 @@ public:
|
||||
bool should_flush_system_topology_after_applying(const mutation& mut, const data_dictionary::database db);
|
||||
|
||||
// Used to write data to topology and other tables except schema tables.
|
||||
future<> write_mutations_to_database(storage_proxy& proxy, gms::inet_address from, utils::chunked_vector<canonical_mutation> cms);
|
||||
future<> write_mutations_to_database(storage_service& storage_service, storage_proxy& proxy, gms::inet_address from, utils::chunked_vector<canonical_mutation> cms);
|
||||
|
||||
} // end of namespace service
|
||||
|
||||
@@ -36,6 +36,7 @@
|
||||
#include <seastar/core/future-util.hh>
|
||||
#include "db/read_repair_decision.hh"
|
||||
#include "db/config.hh"
|
||||
#include "db/batchlog.hh"
|
||||
#include "db/batchlog_manager.hh"
|
||||
#include "db/hints/manager.hh"
|
||||
#include "db/system_keyspace.hh"
|
||||
@@ -4281,12 +4282,13 @@ storage_proxy::mutate_atomically_result(utils::chunked_vector<mutation> mutation
|
||||
coordinator_mutate_options _options;
|
||||
|
||||
const utils::UUID _batch_uuid;
|
||||
const db_clock::time_point _batch_write_time;
|
||||
const host_id_vector_replica_set _batchlog_endpoints;
|
||||
|
||||
public:
|
||||
context(storage_proxy & p, utils::chunked_vector<mutation>&& mutations, lw_shared_ptr<cdc::operation_result_tracker>&& cdc_tracker, db::consistency_level cl, clock_type::time_point timeout, tracing::trace_state_ptr tr_state, service_permit permit, coordinator_mutate_options options)
|
||||
: _p(p)
|
||||
, _schema(_p.local_db().find_schema(db::system_keyspace::NAME, db::system_keyspace::BATCHLOG))
|
||||
, _schema(_p.local_db().find_schema(db::system_keyspace::NAME, db::system_keyspace::BATCHLOG_V2))
|
||||
, _ermp(_p.local_db().find_column_family(_schema->id()).get_effective_replication_map())
|
||||
, _mutations(std::move(mutations))
|
||||
, _cdc_tracker(std::move(cdc_tracker))
|
||||
@@ -4297,6 +4299,7 @@ storage_proxy::mutate_atomically_result(utils::chunked_vector<mutation> mutation
|
||||
, _permit(std::move(permit))
|
||||
, _options(std::move(options))
|
||||
, _batch_uuid(utils::UUID_gen::get_time_UUID())
|
||||
, _batch_write_time(db_clock::now())
|
||||
, _batchlog_endpoints(
|
||||
[this]() -> host_id_vector_replica_set {
|
||||
auto local_addr = _p.my_host_id(*_ermp);
|
||||
@@ -4334,17 +4337,14 @@ storage_proxy::mutate_atomically_result(utils::chunked_vector<mutation> mutation
|
||||
}));
|
||||
}
|
||||
future<result<>> sync_write_to_batchlog() {
|
||||
auto m = _p.do_get_batchlog_mutation_for(_schema, _mutations, _batch_uuid, netw::messaging_service::current_version, db_clock::now());
|
||||
auto m = db::get_batchlog_mutation_for(_schema, _mutations, netw::messaging_service::current_version, _batch_write_time, _batch_uuid);
|
||||
tracing::trace(_trace_state, "Sending a batchlog write mutation");
|
||||
return send_batchlog_mutation(std::move(m));
|
||||
};
|
||||
future<> async_remove_from_batchlog() {
|
||||
// delete batch
|
||||
utils::get_local_injector().inject("storage_proxy_fail_remove_from_batchlog", [] { throw std::runtime_error("Error injection: failing remove from batchlog"); });
|
||||
auto key = partition_key::from_exploded(*_schema, {uuid_type->decompose(_batch_uuid)});
|
||||
auto now = service::client_state(service::client_state::internal_tag()).get_timestamp();
|
||||
mutation m(_schema, key);
|
||||
m.partition().apply_delete(*_schema, clustering_key_prefix::make_empty(), tombstone(now, gc_clock::now()));
|
||||
auto m = db::get_batchlog_delete_mutation(_schema, netw::messaging_service::current_version, _batch_write_time, _batch_uuid);
|
||||
|
||||
tracing::trace(_trace_state, "Sending a batchlog remove mutation");
|
||||
return send_batchlog_mutation(std::move(m), db::consistency_level::ANY).then_wrapped([] (future<result<>> f) {
|
||||
@@ -4363,6 +4363,7 @@ storage_proxy::mutate_atomically_result(utils::chunked_vector<mutation> mutation
|
||||
return _p.mutate_prepare(_mutations, _cl, db::write_type::BATCH, _trace_state, _permit, db::allow_per_partition_rate_limit::no, _options).then(utils::result_wrap([this] (unique_response_handler_vector ids) {
|
||||
return sync_write_to_batchlog().then(utils::result_wrap([this, ids = std::move(ids)] () mutable {
|
||||
tracing::trace(_trace_state, "Sending batch mutations");
|
||||
utils::get_local_injector().inject("storage_proxy_fail_send_batch", [] { throw std::runtime_error("Error injection: failing to send batch"); });
|
||||
_p.register_cdc_operation_result_tracker(ids, _cdc_tracker);
|
||||
return _p.mutate_begin(std::move(ids), _cl, _trace_state, _timeout);
|
||||
})).then(utils::result_wrap([this] {
|
||||
@@ -4398,33 +4399,6 @@ storage_proxy::mutate_atomically_result(utils::chunked_vector<mutation> mutation
|
||||
}).then_wrapped(std::move(cleanup));
|
||||
}
|
||||
|
||||
mutation storage_proxy::get_batchlog_mutation_for(const utils::chunked_vector<mutation>& mutations, const utils::UUID& id, int32_t version, db_clock::time_point now) {
|
||||
auto schema = local_db().find_schema(db::system_keyspace::NAME, db::system_keyspace::BATCHLOG);
|
||||
return do_get_batchlog_mutation_for(std::move(schema), mutations, id, version, now);
|
||||
}
|
||||
|
||||
mutation storage_proxy::do_get_batchlog_mutation_for(schema_ptr schema, const utils::chunked_vector<mutation>& mutations, const utils::UUID& id, int32_t version, db_clock::time_point now) {
|
||||
auto key = partition_key::from_singular(*schema, id);
|
||||
auto timestamp = api::new_timestamp();
|
||||
auto data = [&mutations] {
|
||||
utils::chunked_vector<canonical_mutation> fm(mutations.begin(), mutations.end());
|
||||
bytes_ostream out;
|
||||
for (auto& m : fm) {
|
||||
ser::serialize(out, m);
|
||||
}
|
||||
return std::move(out).to_managed_bytes();
|
||||
}();
|
||||
|
||||
mutation m(schema, key);
|
||||
m.set_cell(clustering_key_prefix::make_empty(), to_bytes("version"), version, timestamp);
|
||||
m.set_cell(clustering_key_prefix::make_empty(), to_bytes("written_at"), now, timestamp);
|
||||
// Avoid going through data_value and therefore `bytes`, as it can be large (#24809).
|
||||
auto cdef_data = schema->get_column_definition(to_bytes("data"));
|
||||
m.set_cell(clustering_key_prefix::make_empty(), *cdef_data, atomic_cell::make_live(*cdef_data->type, timestamp, std::move(data)));
|
||||
|
||||
return m;
|
||||
}
|
||||
|
||||
template<typename Range>
|
||||
bool storage_proxy::cannot_hint(const Range& targets, db::write_type type) const {
|
||||
// if hints are disabled we "can always hint" since there's going to be no hint generated in this case
|
||||
@@ -4528,14 +4502,14 @@ future<> storage_proxy::send_hint_to_all_replicas(frozen_mutation_and_schema fm_
|
||||
}
|
||||
|
||||
future<> storage_proxy::send_batchlog_replay_to_all_replicas(utils::chunked_vector<mutation> mutations, clock_type::time_point timeout) {
|
||||
if (utils::get_local_injector().is_enabled("batch_replay_throw")) {
|
||||
throw std::runtime_error("Skipping batch replay due to batch_replay_throw injection");
|
||||
}
|
||||
utils::get_local_injector().inject("storage_proxy_fail_replay_batch", [] { throw std::runtime_error("Error injection: failing to send batch"); });
|
||||
|
||||
utils::chunked_vector<batchlog_replay_mutation> ms = mutations | std::views::transform([] (auto&& m) {
|
||||
return batchlog_replay_mutation(std::move(m));
|
||||
}) | std::ranges::to<utils::chunked_vector<batchlog_replay_mutation>>();
|
||||
|
||||
utils::get_local_injector().inject("storage_proxy_fail_replay_batch", [] { throw std::runtime_error("Error injection: failing to send batch"); });
|
||||
|
||||
return mutate_internal(std::move(ms), db::consistency_level::EACH_QUORUM, nullptr, empty_service_permit(), timeout, db::write_type::BATCH)
|
||||
.then(utils::result_into_future<result<>>);
|
||||
}
|
||||
|
||||
@@ -683,7 +683,6 @@ private:
|
||||
fencing_token caller_token, locator::host_id caller_id,
|
||||
Func&& write_func);
|
||||
|
||||
mutation do_get_batchlog_mutation_for(schema_ptr schema, const utils::chunked_vector<mutation>& mutations, const utils::UUID& id, int32_t version, db_clock::time_point now);
|
||||
future<> drain_on_shutdown();
|
||||
public:
|
||||
void update_fence_version(locator::token_metadata::version_t fence_version);
|
||||
@@ -834,8 +833,6 @@ public:
|
||||
db::consistency_level cl_for_paxos, db::consistency_level cl_for_learn,
|
||||
clock_type::time_point write_timeout, clock_type::time_point cas_timeout, bool write = true, cdc::per_request_options cdc_opts = {});
|
||||
|
||||
mutation get_batchlog_mutation_for(const utils::chunked_vector<mutation>& mutations, const utils::UUID& id, int32_t version, db_clock::time_point now);
|
||||
|
||||
future<> stop();
|
||||
future<> start_hints_manager();
|
||||
void allow_replaying_hints() noexcept;
|
||||
|
||||
@@ -205,6 +205,7 @@ storage_service::storage_service(abort_source& abort_source,
|
||||
cql3::query_processor& qp,
|
||||
sharded<qos::service_level_controller>& sl_controller,
|
||||
auth::cache& auth_cache,
|
||||
sharded<client_routes_service>& client_routes,
|
||||
topology_state_machine& topology_state_machine,
|
||||
db::view::view_building_state_machine& view_building_state_machine,
|
||||
tasks::task_manager& tm,
|
||||
@@ -224,6 +225,7 @@ storage_service::storage_service(abort_source& abort_source,
|
||||
, _snitch(snitch)
|
||||
, _sl_controller(sl_controller)
|
||||
, _auth_cache(auth_cache)
|
||||
, _client_routes(client_routes)
|
||||
, _group0(nullptr)
|
||||
, _async_gate("storage_service")
|
||||
, _node_ops_abort_thread(node_ops_abort_thread())
|
||||
@@ -1420,7 +1422,7 @@ future<> storage_service::raft_initialize_discovery_leader(const join_node_reque
|
||||
_migration_manager.local().get_group0_client().get_history_gc_duration(), "bootstrap: adding myself as the first node to the topology");
|
||||
auto mutation_creator_addr = _sys_ks.local().local_db().get_token_metadata().get_topology().my_address();
|
||||
|
||||
co_await write_mutations_to_database(_qp.proxy(), mutation_creator_addr, std::move(change.mutations));
|
||||
co_await write_mutations_to_database(*this, _qp.proxy(), mutation_creator_addr, std::move(change.mutations));
|
||||
co_await _qp.proxy().mutate_locally({history_append}, nullptr);
|
||||
}
|
||||
|
||||
@@ -6822,7 +6824,6 @@ future<std::unordered_map<sstring, sstring>> storage_service::add_repair_tablet_
|
||||
});
|
||||
}
|
||||
|
||||
auto ts = db_clock::now();
|
||||
for (const auto& token : tokens) {
|
||||
auto tid = tmap.get_tablet_id(token);
|
||||
auto& tinfo = tmap.get_tablet_info(tid);
|
||||
@@ -6836,20 +6837,6 @@ future<std::unordered_map<sstring, sstring>> storage_service::add_repair_tablet_
|
||||
tablet_mutation_builder_for_base_table(guard.write_timestamp(), table)
|
||||
.set_repair_task_info(last_token, repair_task_info, _feature_service)
|
||||
.build());
|
||||
db::system_keyspace::repair_task_entry entry{
|
||||
.task_uuid = tasks::task_id(repair_task_info.tablet_task_id.uuid()),
|
||||
.operation = db::system_keyspace::repair_task_operation::requested,
|
||||
.first_token = dht::token::to_int64(tmap.get_first_token(tid)),
|
||||
.last_token = dht::token::to_int64(tmap.get_last_token(tid)),
|
||||
.timestamp = ts,
|
||||
.table_uuid = table,
|
||||
};
|
||||
if (_feature_service.tablet_repair_tasks_table) {
|
||||
auto cmuts = co_await _sys_ks.local().get_update_repair_task_mutations(entry, guard.write_timestamp());
|
||||
for (auto& m : cmuts) {
|
||||
updates.push_back(std::move(m));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sstring reason = format("Repair tablet by API request tokens={} tablet_task_id={}", tokens, repair_task_info.tablet_task_id);
|
||||
@@ -7546,7 +7533,7 @@ future<join_node_response_result> storage_service::join_node_response_handler(jo
|
||||
&& _join_node_response_done.failed()) {
|
||||
// The topology coordinator accepted the node that was rejected before or failed while handling
|
||||
// the response. Inform the coordinator about it so it moves the node to the left state.
|
||||
throw _join_node_response_done.get_shared_future().get_exception();
|
||||
co_await coroutine::return_exception_ptr(_join_node_response_done.get_shared_future().get_exception());
|
||||
}
|
||||
|
||||
co_return join_node_response_result{};
|
||||
@@ -7717,6 +7704,9 @@ void storage_service::init_messaging_service() {
|
||||
additional_tables.push_back(db::system_keyspace::cdc_streams_state()->id());
|
||||
additional_tables.push_back(db::system_keyspace::cdc_streams_history()->id());
|
||||
}
|
||||
if (ss._feature_service.client_routes) {
|
||||
additional_tables.push_back(db::system_keyspace::client_routes()->id());
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto& table : boost::join(params.tables, additional_tables)) {
|
||||
@@ -8056,6 +8046,18 @@ future<> endpoint_lifecycle_notifier::notify_joined(gms::inet_address endpoint,
|
||||
});
|
||||
}
|
||||
|
||||
future<> endpoint_lifecycle_notifier::notify_client_routes_change(const client_routes_service::client_route_keys& client_route_keys) {
|
||||
co_await seastar::async([this, &client_route_keys] {
|
||||
_subscribers.thread_for_each([&client_route_keys] (endpoint_lifecycle_subscriber* subscriber) {
|
||||
try {
|
||||
subscriber->on_client_routes_change(client_route_keys);
|
||||
} catch (...) {
|
||||
slogger.warn("Client routes notification failed: {}", std::current_exception());
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
future<> storage_service::notify_joined(inet_address endpoint, locator::host_id hid) {
|
||||
co_await utils::get_local_injector().inject(
|
||||
"storage_service_notify_joined_sleep", std::chrono::milliseconds{500});
|
||||
@@ -8080,6 +8082,10 @@ future<> storage_service::notify_cql_change(inet_address endpoint, locator::host
|
||||
}
|
||||
}
|
||||
|
||||
future<> storage_service::notify_client_routes_change(const client_routes_service::client_route_keys& client_route_keys) {
|
||||
co_await _client_routes.local().notify_client_routes_change(client_route_keys);
|
||||
}
|
||||
|
||||
bool storage_service::is_normal_state_handled_on_boot(locator::host_id node) {
|
||||
return _normal_state_handled_on_boot.contains(node);
|
||||
}
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
#include "gms/endpoint_state.hh"
|
||||
#include "gms/i_endpoint_state_change_subscriber.hh"
|
||||
#include "schema/schema_fwd.hh"
|
||||
#include "service/client_routes.hh"
|
||||
#include "service/endpoint_lifecycle_subscriber.hh"
|
||||
#include "service/qos/service_level_controller.hh"
|
||||
#include "service/topology_guard.hh"
|
||||
@@ -202,6 +203,7 @@ private:
|
||||
sharded<locator::snitch_ptr>& _snitch;
|
||||
sharded<qos::service_level_controller>& _sl_controller;
|
||||
auth::cache& _auth_cache;
|
||||
sharded<client_routes_service>& _client_routes;
|
||||
|
||||
// Engaged on shard 0 before `join_cluster`.
|
||||
service::raft_group0* _group0;
|
||||
@@ -269,6 +271,7 @@ public:
|
||||
cql3::query_processor& qp,
|
||||
sharded<qos::service_level_controller>& sl_controller,
|
||||
auth::cache& auth_cache,
|
||||
sharded<client_routes_service>& _client_routes,
|
||||
topology_state_machine& topology_state_machine,
|
||||
db::view::view_building_state_machine& view_building_state_machine,
|
||||
tasks::task_manager& tm,
|
||||
@@ -1138,6 +1141,8 @@ public:
|
||||
future<std::vector<std::byte>> train_dict(utils::chunked_vector<temporary_buffer<char>> sample);
|
||||
future<> publish_new_sstable_dict(table_id, std::span<const std::byte>, service::raft_group0_client&);
|
||||
void set_train_dict_callback(decltype(_train_dict));
|
||||
seastar::future<> notify_client_routes_change(const client_routes_service::client_route_keys& client_route_keys);
|
||||
|
||||
|
||||
friend class join_node_rpc_handshaker;
|
||||
friend class node_ops::node_ops_virtual_task;
|
||||
|
||||
@@ -136,17 +136,6 @@ db::tablet_options combine_tablet_options(R&& opts) {
|
||||
return combined_opts;
|
||||
}
|
||||
|
||||
static std::unordered_set<locator::tablet_id> split_string_to_tablet_id(std::string_view s, char delimiter) {
|
||||
auto tokens_view = s | std::views::split(delimiter)
|
||||
| std::views::transform([](auto&& range) {
|
||||
return std::string_view(&*range.begin(), std::ranges::distance(range));
|
||||
})
|
||||
| std::views::transform([](std::string_view sv) {
|
||||
return locator::tablet_id(std::stoul(std::string(sv)));
|
||||
});
|
||||
return std::unordered_set<locator::tablet_id>{tokens_view.begin(), tokens_view.end()};
|
||||
}
|
||||
|
||||
// Used to compare different migration choices in regard to impact on load imbalance.
|
||||
// There is a total order on migration_badness such that better migrations are ordered before worse ones.
|
||||
struct migration_badness {
|
||||
@@ -904,8 +893,6 @@ public:
|
||||
co_await coroutine::maybe_yield();
|
||||
auto& config = tmap.repair_scheduler_config();
|
||||
auto now = db_clock::now();
|
||||
auto skip = utils::get_local_injector().inject_parameter<std::string_view>("tablet_repair_skip_sched");
|
||||
auto skip_tablets = skip ? split_string_to_tablet_id(*skip, ',') : std::unordered_set<locator::tablet_id>();
|
||||
co_await tmap.for_each_tablet([&] (locator::tablet_id id, const locator::tablet_info& info) -> future<> {
|
||||
auto gid = locator::global_tablet_id{table, id};
|
||||
// Skip tablet that is in transitions.
|
||||
@@ -926,11 +913,6 @@ public:
|
||||
co_return;
|
||||
}
|
||||
|
||||
if (skip_tablets.contains(id)) {
|
||||
lblogger.debug("Skipped tablet repair for tablet={} by error injector", gid);
|
||||
co_return;
|
||||
}
|
||||
|
||||
// Avoid rescheduling a failed tablet repair in a loop
|
||||
// TODO: Allow user to config
|
||||
const auto min_reschedule_time = std::chrono::seconds(5);
|
||||
|
||||
@@ -10,7 +10,6 @@
|
||||
#include "replica/database.hh"
|
||||
#include "service/migration_manager.hh"
|
||||
#include "service/storage_service.hh"
|
||||
#include "repair/row_level.hh"
|
||||
#include "service/task_manager_module.hh"
|
||||
#include "tasks/task_handler.hh"
|
||||
#include "tasks/virtual_task_hint.hh"
|
||||
@@ -110,16 +109,6 @@ future<std::optional<tasks::virtual_task_hint>> tablet_virtual_task::contains(ta
|
||||
tid = tmap.next_tablet(*tid);
|
||||
}
|
||||
}
|
||||
|
||||
// Check if the task id is present in the repair task table
|
||||
auto progress = co_await _ss._repair.local().get_tablet_repair_task_progress(task_id);
|
||||
if (progress && progress->requested > 0) {
|
||||
co_return tasks::virtual_task_hint{
|
||||
.table_id = progress->table_uuid,
|
||||
.task_type = locator::tablet_task_type::user_repair,
|
||||
.tablet_id = std::nullopt,
|
||||
};
|
||||
}
|
||||
co_return std::nullopt;
|
||||
}
|
||||
|
||||
@@ -254,20 +243,7 @@ future<std::optional<status_helper>> tablet_virtual_task::get_status_helper(task
|
||||
size_t sched_nr = 0;
|
||||
auto tmptr = _ss.get_token_metadata_ptr();
|
||||
auto& tmap = tmptr->tablets().get_tablet_map(table);
|
||||
bool repair_task_finished = false;
|
||||
bool repair_task_pending = false;
|
||||
if (is_repair_task(task_type)) {
|
||||
auto progress = co_await _ss._repair.local().get_tablet_repair_task_progress(id);
|
||||
if (progress) {
|
||||
res.status.progress.completed = progress->finished;
|
||||
res.status.progress.total = progress->requested;
|
||||
res.status.progress_units = "tablets";
|
||||
if (progress->requested > 0 && progress->requested == progress->finished) {
|
||||
repair_task_finished = true;
|
||||
} if (progress->requested > 0 && progress->requested > progress->finished) {
|
||||
repair_task_pending = true;
|
||||
}
|
||||
}
|
||||
co_await tmap.for_each_tablet([&] (locator::tablet_id tid, const locator::tablet_info& info) {
|
||||
auto& task_info = info.repair_task_info;
|
||||
if (task_info.tablet_task_id.uuid() == id.uuid()) {
|
||||
@@ -299,17 +275,7 @@ future<std::optional<status_helper>> tablet_virtual_task::get_status_helper(task
|
||||
res.status.state = sched_nr == 0 ? tasks::task_manager::task_state::created : tasks::task_manager::task_state::running;
|
||||
co_return res;
|
||||
}
|
||||
|
||||
if (repair_task_pending) {
|
||||
// When repair_task_pending is true, the res.tablets will be empty iff the request is aborted by user.
|
||||
res.status.state = res.tablets.empty() ? tasks::task_manager::task_state::failed : tasks::task_manager::task_state::running;
|
||||
co_return res;
|
||||
}
|
||||
if (repair_task_finished) {
|
||||
res.status.state = tasks::task_manager::task_state::done;
|
||||
co_return res;
|
||||
}
|
||||
|
||||
// FIXME: Show finished tasks.
|
||||
co_return std::nullopt;
|
||||
}
|
||||
|
||||
|
||||
@@ -1205,8 +1205,6 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
|
||||
std::unordered_map<locator::tablet_transition_stage, background_action_holder> barriers;
|
||||
// Record the repair_time returned by the repair_tablet rpc call
|
||||
db_clock::time_point repair_time;
|
||||
// Record the repair task update muations
|
||||
utils::chunked_vector<canonical_mutation> repair_task_updates;
|
||||
service::session_id session_id;
|
||||
};
|
||||
|
||||
@@ -1739,14 +1737,6 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
|
||||
}
|
||||
dst = dst_opt.value().host;
|
||||
}
|
||||
// Update repair task
|
||||
db::system_keyspace::repair_task_entry entry{
|
||||
.task_uuid = tasks::task_id(tinfo.repair_task_info.tablet_task_id.uuid()),
|
||||
.operation = db::system_keyspace::repair_task_operation::finished,
|
||||
.first_token = dht::token::to_int64(tmap.get_first_token(gid.tablet)),
|
||||
.last_token = dht::token::to_int64(tmap.get_last_token(gid.tablet)),
|
||||
.table_uuid = gid.table,
|
||||
};
|
||||
rtlogger.info("Initiating tablet repair host={} tablet={}", dst, gid);
|
||||
auto session_id = utils::get_local_injector().enter("handle_tablet_migration_repair_random_session") ?
|
||||
service::session_id::create_random_id() : trinfo->session_id;
|
||||
@@ -1755,10 +1745,6 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
|
||||
auto duration = std::chrono::duration<float>(db_clock::now() - sched_time);
|
||||
auto& tablet_state = _tablets[tablet];
|
||||
tablet_state.repair_time = db_clock::from_time_t(gc_clock::to_time_t(res.repair_time));
|
||||
if (_feature_service.tablet_repair_tasks_table) {
|
||||
entry.timestamp = db_clock::now();
|
||||
tablet_state.repair_task_updates = co_await _sys_ks.get_update_repair_task_mutations(entry, api::new_timestamp());
|
||||
}
|
||||
rtlogger.info("Finished tablet repair host={} tablet={} duration={} repair_time={}",
|
||||
dst, tablet, duration, res.repair_time);
|
||||
})) {
|
||||
@@ -1777,9 +1763,6 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
|
||||
.set_stage(last_token, locator::tablet_transition_stage::end_repair)
|
||||
.del_repair_task_info(last_token, _feature_service)
|
||||
.del_session(last_token);
|
||||
for (auto& m : tablet_state.repair_task_updates) {
|
||||
updates.push_back(std::move(m));
|
||||
}
|
||||
// Skip update repair time in case hosts filter or dcs filter is set.
|
||||
if (valid && is_filter_off) {
|
||||
auto sched_time = tinfo.repair_task_info.sched_time;
|
||||
|
||||
@@ -27,7 +27,6 @@ enum class component_type {
|
||||
TemporaryTOC,
|
||||
TemporaryStatistics,
|
||||
Scylla,
|
||||
TemporaryScylla,
|
||||
Rows,
|
||||
Partitions,
|
||||
TemporaryHashes,
|
||||
@@ -77,8 +76,6 @@ struct fmt::formatter<sstables::component_type> : fmt::formatter<string_view> {
|
||||
return formatter<string_view>::format("TemporaryStatistics", ctx);
|
||||
case Scylla:
|
||||
return formatter<string_view>::format("Scylla", ctx);
|
||||
case TemporaryScylla:
|
||||
return formatter<string_view>::format("TemporaryScylla", ctx);
|
||||
case Partitions:
|
||||
return formatter<string_view>::format("Partitions", ctx);
|
||||
case Rows:
|
||||
|
||||
@@ -632,10 +632,6 @@ private:
|
||||
std::unique_ptr<file_writer> close_writer(std::unique_ptr<file_writer>& w);
|
||||
|
||||
void close_data_writer();
|
||||
void close_index_writer();
|
||||
void close_rows_writer();
|
||||
void close_partitions_writer();
|
||||
|
||||
void ensure_tombstone_is_written() {
|
||||
if (!_tombstone_written) {
|
||||
consume(tombstone());
|
||||
@@ -948,16 +944,17 @@ void writer::init_file_writers() {
|
||||
_sst._schema->get_compressor_params(),
|
||||
std::move(compressor)), _sst.get_filename());
|
||||
}
|
||||
|
||||
if (_sst.has_component(component_type::Index)) {
|
||||
out = _sst._storage->make_data_or_index_sink(_sst, component_type::Index).get();
|
||||
_index_writer = std::make_unique<crc32_digest_file_writer>(std::move(out), _sst.sstable_buffer_size, _sst.index_filename());
|
||||
_index_writer = std::make_unique<file_writer>(output_stream<char>(std::move(out)), _sst.index_filename());
|
||||
}
|
||||
if (_sst.has_component(component_type::Partitions) && _sst.has_component(component_type::Rows)) {
|
||||
out = _sst._storage->make_data_or_index_sink(_sst, component_type::Rows).get();
|
||||
_rows_writer = std::make_unique<crc32_digest_file_writer>(std::move(out), _sst.sstable_buffer_size, component_name(_sst, component_type::Rows));
|
||||
_rows_writer = std::make_unique<file_writer>(output_stream<char>(std::move(out)), component_name(_sst, component_type::Rows));
|
||||
_bti_row_index_writer = trie::bti_row_index_writer(*_rows_writer);
|
||||
out = _sst._storage->make_data_or_index_sink(_sst, component_type::Partitions).get();
|
||||
_partitions_writer = std::make_unique<crc32_digest_file_writer>(std::move(out), _sst.sstable_buffer_size, component_name(_sst, component_type::Partitions));
|
||||
_partitions_writer = std::make_unique<file_writer>(output_stream<char>(std::move(out)), component_name(_sst, component_type::Partitions));
|
||||
_bti_partition_index_writer = trie::bti_partition_index_writer(*_partitions_writer);
|
||||
}
|
||||
if (_delayed_filter) {
|
||||
@@ -985,41 +982,6 @@ void writer::close_data_writer() {
|
||||
}
|
||||
}
|
||||
|
||||
void writer::close_index_writer() {
|
||||
if (_index_writer) {
|
||||
auto writer = close_writer(_index_writer);
|
||||
auto chksum_wr = static_cast<crc32_digest_file_writer*>(writer.get());
|
||||
_sst.get_components_digests().index_digest = chksum_wr->full_checksum();
|
||||
}
|
||||
}
|
||||
|
||||
void writer::close_partitions_writer() {
|
||||
if (_partitions_writer) {
|
||||
_sst._partitions_db_footer = std::move(*_bti_partition_index_writer).finish(
|
||||
_sst.get_version(),
|
||||
_first_key.value(),
|
||||
_last_key.value());
|
||||
auto writer = close_writer(_partitions_writer);
|
||||
auto chksum_wr = static_cast<crc32_digest_file_writer*>(writer.get());
|
||||
_sst.get_components_digests().partitions_digest = chksum_wr->full_checksum();
|
||||
}
|
||||
}
|
||||
|
||||
void writer::close_rows_writer() {
|
||||
if (_rows_writer) {
|
||||
// Append some garbage padding to the file just to ensure that it's never empty.
|
||||
// (Otherwise it would be empty if the sstable contains only small partitions).
|
||||
// This is a hack to work around some bad interactions between zero-sized files
|
||||
// and object storage. (It seems that e.g. minio considers a zero-sized file
|
||||
// upload to be a no-op, which breaks some assumptions).
|
||||
uint32_t garbage = seastar::cpu_to_be(0x13371337);
|
||||
_rows_writer->write(reinterpret_cast<const char*>(&garbage), sizeof(garbage));
|
||||
auto writer = close_writer(_rows_writer);
|
||||
auto chksum_wr = static_cast<crc32_digest_file_writer*>(writer.get());
|
||||
_sst.get_components_digests().rows_digest = chksum_wr->full_checksum();
|
||||
}
|
||||
}
|
||||
|
||||
void writer::consume_new_partition(const dht::decorated_key& dk) {
|
||||
_c_stats.start_offset = _data_writer->offset();
|
||||
_prev_row_start = _data_writer->offset();
|
||||
@@ -1668,10 +1630,27 @@ void writer::consume_end_of_stream() {
|
||||
_collector.add_compression_ratio(_sst._components->compression.compressed_file_length(), _sst._components->compression.uncompressed_file_length());
|
||||
}
|
||||
|
||||
close_index_writer();
|
||||
if (_index_writer) {
|
||||
close_writer(_index_writer);
|
||||
}
|
||||
|
||||
close_partitions_writer();
|
||||
close_rows_writer();
|
||||
if (_partitions_writer) {
|
||||
_sst._partitions_db_footer = std::move(*_bti_partition_index_writer).finish(
|
||||
_sst.get_version(),
|
||||
_first_key.value(),
|
||||
_last_key.value());
|
||||
close_writer(_partitions_writer);
|
||||
}
|
||||
if (_rows_writer) {
|
||||
// Append some garbage padding to the file just to ensure that it's never empty.
|
||||
// (Otherwise it would be empty if the sstable contains only small partitions).
|
||||
// This is a hack to work around some bad interactions between zero-sized files
|
||||
// and object storage. (It seems that e.g. minio considers a zero-sized file
|
||||
// upload to be a no-op, which breaks some assumptions).
|
||||
uint32_t garbage = seastar::cpu_to_be(0x13371337);
|
||||
_rows_writer->write(reinterpret_cast<const char*>(&garbage), sizeof(garbage));
|
||||
close_writer(_rows_writer);
|
||||
}
|
||||
|
||||
if (_hashes_writer) {
|
||||
close_writer(_hashes_writer);
|
||||
|
||||
@@ -44,7 +44,6 @@ sstable_version_constants::component_map_t sstable_version_constants::create_com
|
||||
{ component_type::Filter, "Filter.db" },
|
||||
{ component_type::Statistics, "Statistics.db" },
|
||||
{ component_type::Scylla, "Scylla.db" },
|
||||
{ component_type::TemporaryScylla, "Scylla.db.tmp" },
|
||||
{ component_type::TemporaryTOC, TEMPORARY_TOC_SUFFIX },
|
||||
{ component_type::TemporaryStatistics, "Statistics.db.tmp" }
|
||||
};
|
||||
|
||||
@@ -956,22 +956,16 @@ future<file_writer> sstable::make_component_file_writer(component_type c, file_o
|
||||
});
|
||||
}
|
||||
|
||||
future<std::unique_ptr<crc32_digest_file_writer>> sstable::make_digests_component_file_writer(component_type c, file_output_stream_options options, open_flags oflags) noexcept {
|
||||
return _storage->make_component_sink(*this, c, oflags, std::move(options)).then([this, comp = component_name(*this, c)] (data_sink sink) mutable {
|
||||
return std::make_unique<crc32_digest_file_writer>(std::move(sink), sstable_buffer_size, comp);
|
||||
});
|
||||
}
|
||||
|
||||
void sstable::open_sstable(const sstring& origin) {
|
||||
_origin = origin;
|
||||
generate_toc();
|
||||
_storage->open(*this);
|
||||
}
|
||||
|
||||
void sstable::write_toc(std::unique_ptr<crc32_digest_file_writer> w) {
|
||||
void sstable::write_toc(file_writer w) {
|
||||
sstlog.debug("Writing TOC file {} ", toc_filename());
|
||||
|
||||
do_write_simple(*w, [&] (version_types v, file_writer& w) {
|
||||
do_write_simple(std::move(w), [&] (version_types v, file_writer& w) {
|
||||
for (auto&& key : _recognized_components) {
|
||||
// new line character is appended to the end of each component name.
|
||||
auto value = sstable_version_constants::get_component_map(v).at(key) + "\n";
|
||||
@@ -979,8 +973,6 @@ void sstable::write_toc(std::unique_ptr<crc32_digest_file_writer> w) {
|
||||
write(v, w, b);
|
||||
}
|
||||
});
|
||||
|
||||
_components_digests.toc_digest = w->full_checksum();
|
||||
}
|
||||
|
||||
void sstable::write_crc(const checksum& c) {
|
||||
@@ -997,7 +989,6 @@ void sstable::write_digest(uint32_t full_checksum) {
|
||||
auto digest = to_sstring<bytes>(full_checksum);
|
||||
write(v, w, digest);
|
||||
}, buffer_size);
|
||||
_components_digests.data_digest = full_checksum;
|
||||
}
|
||||
|
||||
thread_local std::array<std::vector<int>, downsampling::BASE_SAMPLING_LEVEL> downsampling::_sample_pattern_cache;
|
||||
@@ -1054,7 +1045,7 @@ future<> sstable::read_simple(T& component) {
|
||||
});
|
||||
}
|
||||
|
||||
void sstable::do_write_simple(file_writer& writer,
|
||||
void sstable::do_write_simple(file_writer&& writer,
|
||||
noncopyable_function<void (version_types, file_writer&)> write_component) {
|
||||
write_component(_version, writer);
|
||||
_metadata_size_on_disk += writer.offset();
|
||||
@@ -1069,7 +1060,7 @@ void sstable::do_write_simple(component_type type,
|
||||
file_output_stream_options options;
|
||||
options.buffer_size = buffer_size;
|
||||
auto w = make_component_file_writer(type, std::move(options)).get();
|
||||
do_write_simple(w, std::move(write_component));
|
||||
do_write_simple(std::move(w), std::move(write_component));
|
||||
}
|
||||
|
||||
template <component_type Type, typename T>
|
||||
@@ -1079,30 +1070,10 @@ void sstable::write_simple(const T& component) {
|
||||
}, sstable_buffer_size);
|
||||
}
|
||||
|
||||
uint32_t sstable::do_write_simple_with_digest(component_type type,
|
||||
noncopyable_function<void (version_types version, file_writer& writer)> write_component, unsigned buffer_size) {
|
||||
auto file_path = filename(type);
|
||||
sstlog.debug("Writing {} file {}", sstable_version_constants::get_component_map(_version).at(type), file_path);
|
||||
|
||||
file_output_stream_options options;
|
||||
options.buffer_size = buffer_size;
|
||||
auto w = make_digests_component_file_writer(type, std::move(options)).get();
|
||||
do_write_simple(*w, std::move(write_component));
|
||||
return w->full_checksum();
|
||||
}
|
||||
|
||||
template <component_type Type, typename T>
|
||||
uint32_t sstable::write_simple_with_digest(const T& component) {
|
||||
return do_write_simple_with_digest(Type, [&component] (version_types v, file_writer& w) {
|
||||
write(v, w, component);
|
||||
}, sstable_buffer_size);
|
||||
}
|
||||
|
||||
template future<> sstable::read_simple<component_type::Filter>(sstables::filter& f);
|
||||
template void sstable::write_simple<component_type::Filter>(const sstables::filter& f);
|
||||
|
||||
template void sstable::write_simple<component_type::Summary>(const sstables::summary_ka&);
|
||||
template uint32_t sstable::write_simple_with_digest<component_type::Summary>(const sstables::summary_ka&);
|
||||
|
||||
future<> sstable::read_compression() {
|
||||
// FIXME: If there is no compression, we should expect a CRC file to be present.
|
||||
@@ -1121,8 +1092,7 @@ void sstable::write_compression() {
|
||||
return;
|
||||
}
|
||||
|
||||
uint32_t digest = write_simple_with_digest<component_type::CompressionInfo>(_components->compression);
|
||||
_components_digests.compression_digest = digest;
|
||||
write_simple<component_type::CompressionInfo>(_components->compression);
|
||||
}
|
||||
|
||||
void sstable::validate_partitioner() {
|
||||
@@ -1347,8 +1317,7 @@ future<> sstable::read_partitions_db_footer() {
|
||||
}
|
||||
|
||||
void sstable::write_statistics() {
|
||||
auto digest = write_simple_with_digest<component_type::Statistics>(_components->statistics);
|
||||
_components_digests.statistics_digest = digest;
|
||||
write_simple<component_type::Statistics>(_components->statistics);
|
||||
}
|
||||
|
||||
void sstable::mark_as_being_repaired(const service::session_id& id) {
|
||||
@@ -1373,23 +1342,10 @@ void sstable::rewrite_statistics() {
|
||||
|
||||
file_output_stream_options options;
|
||||
options.buffer_size = sstable_buffer_size;
|
||||
auto w = make_digests_component_file_writer(component_type::TemporaryStatistics, std::move(options),
|
||||
auto w = make_component_file_writer(component_type::TemporaryStatistics, std::move(options),
|
||||
open_flags::wo | open_flags::create | open_flags::truncate).get();
|
||||
write(_version, *w, _components->statistics);
|
||||
w->close();
|
||||
|
||||
// When rewriting statistics, we also need to update the scylla component
|
||||
// because it contains the digest of the statistics component.
|
||||
if (has_scylla_component()) {
|
||||
_components_digests.statistics_digest = w->full_checksum();
|
||||
_components->scylla_metadata->data.set<scylla_metadata_type::ComponentsDigests>(components_digests{_components_digests});
|
||||
sstlog.debug("Rewriting scylla component of sstable {}", get_filename());
|
||||
write_simple<component_type::TemporaryScylla>(*_components->scylla_metadata);
|
||||
|
||||
// rename() guarantees atomicity when renaming a file into place.
|
||||
sstable_write_io_check(rename_file, fmt::to_string(filename(component_type::TemporaryScylla)), fmt::to_string(filename(component_type::Scylla))).get();
|
||||
}
|
||||
|
||||
write(_version, w, _components->statistics);
|
||||
w.close();
|
||||
// rename() guarantees atomicity when renaming a file into place.
|
||||
sstable_write_io_check(rename_file, fmt::to_string(filename(component_type::TemporaryStatistics)), fmt::to_string(filename(component_type::Statistics))).get();
|
||||
}
|
||||
@@ -1583,8 +1539,7 @@ void sstable::write_filter() {
|
||||
|
||||
auto&& bs = f->bits();
|
||||
auto filter_ref = sstables::filter_ref(f->num_hashes(), bs.get_storage());
|
||||
uint32_t digest = write_simple_with_digest<component_type::Filter>(filter_ref);
|
||||
_components_digests.filter_digest = digest;
|
||||
write_simple<component_type::Filter>(filter_ref);
|
||||
}
|
||||
|
||||
void sstable::maybe_rebuild_filter_from_index(uint64_t num_partitions) {
|
||||
@@ -2043,8 +1998,6 @@ sstable::read_scylla_metadata() noexcept {
|
||||
}
|
||||
return read_simple<component_type::Scylla>(*_components->scylla_metadata).then([this] {
|
||||
_features = _components->scylla_metadata->get_features();
|
||||
_components_digests = _components->scylla_metadata->get_components_digests();
|
||||
_components->digest = _components_digests.data_digest;
|
||||
});
|
||||
});
|
||||
}
|
||||
@@ -2117,14 +2070,11 @@ sstable::write_scylla_metadata(shard_id shard, struct run_identifier identifier,
|
||||
}
|
||||
|
||||
sstable_id sid;
|
||||
// Force a random sstable_id for testing purposes
|
||||
bool random_sstable_identifier = utils::get_local_injector().is_enabled("random_sstable_identifier");
|
||||
if (!random_sstable_identifier && generation().is_uuid_based()) {
|
||||
if (generation().is_uuid_based()) {
|
||||
sid = sstable_id(generation().as_uuid());
|
||||
} else {
|
||||
sid = sstable_id(utils::UUID_gen::get_time_UUID());
|
||||
auto msg = random_sstable_identifier ? "forced random sstable_id" : "has numerical generation";
|
||||
sstlog.info("SSTable {} {}. SSTable identifier in scylla_metadata set to {}", get_filename(), msg, sid);
|
||||
sstlog.info("SSTable {} has numerical generation. SSTable identifier in scylla_metadata set to {}", get_filename(), sid);
|
||||
}
|
||||
_components->scylla_metadata->data.set<scylla_metadata_type::SSTableIdentifier>(scylla_metadata::sstable_identifier{sid});
|
||||
|
||||
@@ -2137,7 +2087,6 @@ sstable::write_scylla_metadata(shard_id shard, struct run_identifier identifier,
|
||||
sstable_schema.columns.elements.push_back(sstable_column_description{to_sstable_column_kind(col.kind), {col.name()}, {to_bytes(col.type->name())}});
|
||||
}
|
||||
_components->scylla_metadata->data.set<scylla_metadata_type::Schema>(std::move(sstable_schema));
|
||||
_components->scylla_metadata->data.set<scylla_metadata_type::ComponentsDigests>(components_digests(_components_digests));
|
||||
|
||||
write_simple<component_type::Scylla>(*_components->scylla_metadata);
|
||||
}
|
||||
@@ -2538,11 +2487,8 @@ std::vector<std::pair<component_type, sstring>> sstable::all_components() const
|
||||
return all;
|
||||
}
|
||||
|
||||
future<generation_type> sstable::snapshot(const sstring& dir, bool use_sstable_identifier) const {
|
||||
// Use the sstable identifier UUID if available to enable global de-duplication of sstables in backup.
|
||||
generation_type gen = (use_sstable_identifier && _sstable_identifier) ? generation_type(_sstable_identifier->uuid()) : _generation;
|
||||
co_await _storage->snapshot(*this, dir, storage::absolute_path::yes, gen);
|
||||
co_return gen;
|
||||
future<> sstable::snapshot(const sstring& dir) const {
|
||||
return _storage->snapshot(*this, dir, storage::absolute_path::yes);
|
||||
}
|
||||
|
||||
future<> sstable::change_state(sstable_state to, delayed_commit_changes* delay_commit) {
|
||||
@@ -3124,31 +3070,6 @@ void sstable::set_sstable_level(uint32_t new_level) {
|
||||
s.sstable_level = new_level;
|
||||
}
|
||||
|
||||
std::optional<uint32_t> sstable::get_component_digest(component_type c) const {
|
||||
switch (c) {
|
||||
case component_type::Index:
|
||||
return _components_digests.index_digest;
|
||||
case component_type::Summary:
|
||||
return _components_digests.summary_digest;
|
||||
case component_type::TOC:
|
||||
return _components_digests.toc_digest;
|
||||
case component_type::CompressionInfo:
|
||||
return _components_digests.compression_digest;
|
||||
case component_type::Filter:
|
||||
return _components_digests.filter_digest;
|
||||
case component_type::Partitions:
|
||||
return _components_digests.partitions_digest;
|
||||
case component_type::Rows:
|
||||
return _components_digests.rows_digest;
|
||||
case component_type::Data:
|
||||
return _components_digests.data_digest;
|
||||
case component_type::Statistics:
|
||||
return _components_digests.statistics_digest;
|
||||
default:
|
||||
return std::nullopt;
|
||||
}
|
||||
}
|
||||
|
||||
future<> sstable::mutate_sstable_level(uint32_t new_level) {
|
||||
if (!has_component(component_type::Statistics)) {
|
||||
return make_ready_future<>();
|
||||
|
||||
@@ -9,7 +9,6 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "sstables/writer.hh"
|
||||
#include "version.hh"
|
||||
#include "shared_sstable.hh"
|
||||
#include "open_info.hh"
|
||||
@@ -397,10 +396,6 @@ public:
|
||||
return _version;
|
||||
}
|
||||
|
||||
format_types get_format() const {
|
||||
return _format;
|
||||
}
|
||||
|
||||
// Returns the total bytes of all components.
|
||||
uint64_t bytes_on_disk() const;
|
||||
file_size_stats get_file_size_stats() const;
|
||||
@@ -442,10 +437,7 @@ public:
|
||||
|
||||
std::vector<std::pair<component_type, sstring>> all_components() const;
|
||||
|
||||
// When use_sstable_identifier is true and the sstable identifier is available,
|
||||
// use it to name the sstable in the snapshot, rather than the sstable generation.
|
||||
// Returns the generation used for snapshot.
|
||||
future<generation_type> snapshot(const sstring& dir, bool use_sstable_identifier = false) const;
|
||||
future<> snapshot(const sstring& dir) const;
|
||||
|
||||
// Delete the sstable by unlinking all sstable files
|
||||
// Ignores all errors.
|
||||
@@ -635,8 +627,6 @@ private:
|
||||
// Total memory reclaimed so far from this sstable
|
||||
size_t _total_memory_reclaimed{0};
|
||||
bool _unlinked{false};
|
||||
|
||||
components_digests _components_digests;
|
||||
public:
|
||||
bool has_component(component_type f) const;
|
||||
sstables_manager& manager() { return _manager; }
|
||||
@@ -657,18 +647,12 @@ private:
|
||||
|
||||
template <component_type Type, typename T>
|
||||
void write_simple(const T& comp);
|
||||
void do_write_simple(file_writer& writer,
|
||||
void do_write_simple(file_writer&& writer,
|
||||
noncopyable_function<void (version_types, file_writer&)> write_component);
|
||||
void do_write_simple(component_type type,
|
||||
noncopyable_function<void (version_types version, file_writer& writer)> write_component,
|
||||
unsigned buffer_size);
|
||||
|
||||
template <component_type Type, typename T>
|
||||
uint32_t write_simple_with_digest(const T& comp);
|
||||
uint32_t do_write_simple_with_digest(component_type type,
|
||||
noncopyable_function<void (version_types version, file_writer& writer)> write_component,
|
||||
unsigned buffer_size);
|
||||
|
||||
void write_crc(const checksum& c);
|
||||
void write_digest(uint32_t full_checksum);
|
||||
|
||||
@@ -679,9 +663,6 @@ private:
|
||||
future<file_writer> make_component_file_writer(component_type c, file_output_stream_options options,
|
||||
open_flags oflags = open_flags::wo | open_flags::create | open_flags::exclusive) noexcept;
|
||||
|
||||
future<std::unique_ptr<crc32_digest_file_writer>> make_digests_component_file_writer(component_type c, file_output_stream_options options,
|
||||
open_flags oflags = open_flags::wo | open_flags::create | open_flags::exclusive) noexcept;
|
||||
|
||||
void generate_toc();
|
||||
void open_sstable(const sstring& origin);
|
||||
|
||||
@@ -712,8 +693,7 @@ private:
|
||||
future<> read_summary() noexcept;
|
||||
|
||||
void write_summary() {
|
||||
uint32_t digest = write_simple_with_digest<component_type::Summary>(_components->summary);
|
||||
_components_digests.summary_digest = digest;
|
||||
write_simple<component_type::Summary>(_components->summary);
|
||||
}
|
||||
|
||||
// To be called when we try to load an SSTable that lacks a Summary. Could
|
||||
@@ -843,7 +823,7 @@ private:
|
||||
|
||||
future<> open_or_create_data(open_flags oflags, file_open_options options = {}) noexcept;
|
||||
// runs in async context (called from storage::open)
|
||||
void write_toc(std::unique_ptr<crc32_digest_file_writer> w);
|
||||
void write_toc(file_writer w);
|
||||
static future<uint32_t> read_digest_from_file(file f);
|
||||
static future<lw_shared_ptr<checksum>> read_checksum_from_file(file f);
|
||||
public:
|
||||
@@ -1033,12 +1013,6 @@ public:
|
||||
return _components->digest;
|
||||
}
|
||||
|
||||
components_digests& get_components_digests() {
|
||||
return _components_digests;
|
||||
}
|
||||
|
||||
std::optional<uint32_t> get_component_digest(component_type c) const;
|
||||
|
||||
// Gets ratio of droppable tombstone. A tombstone is considered droppable here
|
||||
// for cells and tombstones expired before the time point "GC before", which
|
||||
// is the point before which expiring data can be purged.
|
||||
|
||||
@@ -204,13 +204,13 @@ void filesystem_storage::open(sstable& sst) {
|
||||
open_flags::create |
|
||||
open_flags::exclusive,
|
||||
options).get();
|
||||
auto w = std::make_unique<crc32_digest_file_writer>(std::move(sink), sst.sstable_buffer_size, component_name(sst, component_type::TemporaryTOC));
|
||||
auto w = file_writer(output_stream<char>(std::move(sink)), component_name(sst, component_type::TemporaryTOC));
|
||||
|
||||
bool toc_exists = file_exists(fmt::to_string(sst.filename(component_type::TOC))).get();
|
||||
if (toc_exists) {
|
||||
// TOC will exist at this point if write_components() was called with
|
||||
// the generation of a sstable that exists.
|
||||
w->close();
|
||||
w.close();
|
||||
remove_file(fmt::to_string(sst.filename(component_type::TemporaryTOC))).get();
|
||||
throw std::runtime_error(format("SSTable write failed due to existence of TOC file for generation {} of {}.{}", sst._generation, sst._schema->ks_name(), sst._schema->cf_name()));
|
||||
}
|
||||
@@ -670,10 +670,15 @@ void object_storage_base::open(sstable& sst) {
|
||||
sst.manager().sstables_registry().create_entry(owner(), status_creating, sst._state, std::move(desc)).get();
|
||||
|
||||
memory_data_sink_buffers bufs;
|
||||
auto out = data_sink(std::make_unique<memory_data_sink>(bufs));
|
||||
auto w = std::make_unique<crc32_digest_file_writer>(std::move(out), sst.sstable_buffer_size, component_name(sst, component_type::TOC));
|
||||
|
||||
sst.write_toc(std::move(w));
|
||||
sst.write_toc(
|
||||
file_writer(
|
||||
output_stream<char>(
|
||||
data_sink(
|
||||
std::make_unique<memory_data_sink>(bufs)
|
||||
)
|
||||
)
|
||||
)
|
||||
);
|
||||
put_object(make_object_name(sst, component_type::TOC), std::move(bufs)).get();
|
||||
}
|
||||
|
||||
|
||||
@@ -547,7 +547,6 @@ enum class scylla_metadata_type : uint32_t {
|
||||
ExtTimestampStats = 9,
|
||||
SSTableIdentifier = 10,
|
||||
Schema = 11,
|
||||
ComponentsDigests = 12,
|
||||
};
|
||||
|
||||
// UUID is used for uniqueness across nodes, such that an imported sstable
|
||||
@@ -574,24 +573,6 @@ struct sstable_identifier_type {
|
||||
auto describe_type(sstable_version_types v, Describer f) { return f(value); }
|
||||
};
|
||||
|
||||
// Component digests stored in scylla metadata to track integrity of individual components
|
||||
struct components_digests {
|
||||
std::optional<uint32_t> data_digest;
|
||||
std::optional<uint32_t> compression_digest;
|
||||
std::optional<uint32_t> filter_digest;
|
||||
std::optional<uint32_t> statistics_digest;
|
||||
std::optional<uint32_t> summary_digest;
|
||||
std::optional<uint32_t> index_digest;
|
||||
std::optional<uint32_t> toc_digest;
|
||||
std::optional<uint32_t> partitions_digest;
|
||||
std::optional<uint32_t> rows_digest;
|
||||
|
||||
template <typename Describer>
|
||||
auto describe_type(sstable_version_types v, Describer f) {
|
||||
return f(data_digest,compression_digest, filter_digest, statistics_digest, summary_digest, index_digest, toc_digest, partitions_digest, rows_digest);
|
||||
}
|
||||
};
|
||||
|
||||
// Types of large data statistics.
|
||||
//
|
||||
// Note: For extensibility, never reuse an identifier,
|
||||
@@ -675,8 +656,7 @@ struct scylla_metadata {
|
||||
disk_tagged_union_member<scylla_metadata_type, scylla_metadata_type::ScyllaVersion, scylla_version>,
|
||||
disk_tagged_union_member<scylla_metadata_type, scylla_metadata_type::ExtTimestampStats, ext_timestamp_stats>,
|
||||
disk_tagged_union_member<scylla_metadata_type, scylla_metadata_type::SSTableIdentifier, sstable_identifier>,
|
||||
disk_tagged_union_member<scylla_metadata_type, scylla_metadata_type::Schema, sstable_schema>,
|
||||
disk_tagged_union_member<scylla_metadata_type, scylla_metadata_type::ComponentsDigests, components_digests>
|
||||
disk_tagged_union_member<scylla_metadata_type, scylla_metadata_type::Schema, sstable_schema>
|
||||
> data;
|
||||
|
||||
sstable_enabled_features get_features() const {
|
||||
@@ -711,13 +691,6 @@ struct scylla_metadata {
|
||||
auto* sid = data.get<scylla_metadata_type::SSTableIdentifier, scylla_metadata::sstable_identifier>();
|
||||
return sid ? sid->value : sstable_id::create_null_id();
|
||||
}
|
||||
const components_digests get_components_digests() const {
|
||||
auto cd = data.get<scylla_metadata_type::ComponentsDigests, components_digests>();
|
||||
if (!cd) {
|
||||
return {};
|
||||
}
|
||||
return *cd;
|
||||
}
|
||||
|
||||
template <typename Describer>
|
||||
auto describe_type(sstable_version_types v, Describer f) { return f(data); }
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user