alternator: migrate to rapidjson library
Profiling alternator implied that JSON parsing takes up a fair amount of CPU, and as such should be optimized. libjsoncpp is a standard library for handling JSON objects, but it also proves slower than rapidjson, which is hereby used instead. The results indicated that libjsoncpp used roughly 30% of CPU for a single-shard alternator instance under stress, while rapidjson dropped that usage to 18% without optimizations. Future optimizations should include eliding object copying, string copying and perhaps experimenting with different JSON allocators.
This commit is contained in:
@@ -12,6 +12,7 @@
|
||||
#include "log.hh"
|
||||
#include "serialization.hh"
|
||||
#include "error.hh"
|
||||
#include "rapidjson/writer.h"
|
||||
|
||||
static logging::logger slogger("alternator-serialization");
|
||||
|
||||
@@ -45,26 +46,32 @@ type_representation represent_type(alternator_type atype) {
|
||||
return it->second;
|
||||
}
|
||||
|
||||
bytes serialize_item(const Json::Value& item) {
|
||||
if (item.size() != 1) {
|
||||
throw api_error("ValidationException", format("An item can contain only one attribute definition: {}", item.toStyledString()));
|
||||
bytes serialize_item(const rjson::value& item) {
|
||||
if (item.IsNull() || item.MemberCount() != 1) {
|
||||
throw api_error("ValidationException", format("An item can contain only one attribute definition: {}", item));
|
||||
}
|
||||
auto it = item.begin();
|
||||
type_info type_info = type_info_from_string(it.key().asString()); // JSON keys are guaranteed to be strings
|
||||
auto it = item.MemberBegin();
|
||||
type_info type_info = type_info_from_string(it->name.GetString()); // JSON keys are guaranteed to be strings
|
||||
|
||||
if (type_info.atype == alternator_type::NOT_SUPPORTED_YET) {
|
||||
slogger.trace("Non-optimal serialization of type {}", it.key());
|
||||
return bytes{int8_t(type_info.atype)} + to_bytes(item.toStyledString());
|
||||
slogger.trace("Non-optimal serialization of type {}", it->name.GetString());
|
||||
return bytes{int8_t(type_info.atype)} + to_bytes(rjson::print(item));
|
||||
}
|
||||
|
||||
bytes serialized;
|
||||
// Alternator bytes representation does not start with "0x" followed by hex digits as Scylla-JSON does,
|
||||
// but instead uses base64.
|
||||
|
||||
if (type_info.dtype == bytes_type) {
|
||||
std::string raw_value = it->asString();
|
||||
std::string raw_value = it->value.GetString();
|
||||
serialized = base64_decode(std::string_view(raw_value));
|
||||
} else if (type_info.dtype == decimal_type) {
|
||||
serialized = type_info.dtype->from_string(it->value.GetString());
|
||||
} else if (type_info.dtype == boolean_type) {
|
||||
serialized = type_info.dtype->from_json_object(Json::Value(it->value.GetBool()), cql_serialization_format::internal());
|
||||
} else {
|
||||
serialized = type_info.dtype->from_json_object(*it, cql_serialization_format::internal());
|
||||
//FIXME(sarna): Once we have type visitors, this double conversion hack should be replaced with parsing straight from rapidjson
|
||||
serialized = type_info.dtype->from_json_object(Json::Value(rjson::print(it->value)), cql_serialization_format::internal());
|
||||
}
|
||||
|
||||
//NOTICE: redundant copy here, from_json_object should accept bytes' output iterator too.
|
||||
@@ -72,8 +79,8 @@ bytes serialize_item(const Json::Value& item) {
|
||||
return bytes{int8_t(type_info.atype)} + std::move(serialized);
|
||||
}
|
||||
|
||||
Json::Value deserialize_item(bytes_view bv) {
|
||||
Json::Value deserialized;
|
||||
rjson::value deserialize_item(bytes_view bv) {
|
||||
rjson::value deserialized(rapidjson::kObjectType);
|
||||
if (bv.empty()) {
|
||||
throw api_error("ValidationException", "Serialized value empty");
|
||||
}
|
||||
@@ -83,17 +90,18 @@ Json::Value deserialize_item(bytes_view bv) {
|
||||
|
||||
if (atype == alternator_type::NOT_SUPPORTED_YET) {
|
||||
slogger.trace("Non-optimal deserialization of alternator type {}", int8_t(atype));
|
||||
return json::to_json_value(sstring(reinterpret_cast<const char *>(bv.data()), bv.size()));
|
||||
return rjson::parse_raw(reinterpret_cast<const char *>(bv.data()), bv.size());
|
||||
}
|
||||
|
||||
type_representation type_representation = represent_type(atype);
|
||||
if (type_representation.dtype == bytes_type) {
|
||||
deserialized[type_representation.ident] = base64_encode(bv);
|
||||
std::string b64 = base64_encode(bv);
|
||||
rjson::set_with_string_name(deserialized, type_representation.ident, rjson::from_string(b64));
|
||||
} else if (type_representation.dtype == decimal_type) {
|
||||
auto s = decimal_type->to_json_string(bytes(bv)); //FIXME(sarna): unnecessary copy
|
||||
deserialized[type_representation.ident] = Json::Value(reinterpret_cast<const char*>(s.data()), reinterpret_cast<const char*>(s.data()) + s.size());
|
||||
rjson::set_with_string_name(deserialized, type_representation.ident, rjson::from_string(s));
|
||||
} else {
|
||||
deserialized[type_representation.ident] = json::to_json_value(type_representation.dtype->to_json_string(bytes(bv))); //FIXME(sarna): unnecessary copy
|
||||
rjson::set_with_string_name(deserialized, type_representation.ident, rjson::parse(type_representation.dtype->to_string(bytes(bv))));
|
||||
}
|
||||
|
||||
return deserialized;
|
||||
@@ -113,43 +121,41 @@ std::string type_to_string(data_type type) {
|
||||
return it->second;
|
||||
}
|
||||
|
||||
bytes get_key_column_value(const Json::Value& item, const column_definition& column) {
|
||||
bytes get_key_column_value(const rjson::value& item, const column_definition& column) {
|
||||
std::string column_name = column.name_as_text();
|
||||
std::string expected_type = type_to_string(column.type);
|
||||
|
||||
Json::Value key_typed_value = item.get(column_name, Json::nullValue);
|
||||
if (!key_typed_value.isObject() || key_typed_value.size() != 1) {
|
||||
const rjson::value& key_typed_value = rjson::get(item, rjson::value::StringRefType(column_name.c_str()));
|
||||
if (!key_typed_value.IsObject() || key_typed_value.MemberCount() != 1) {
|
||||
throw api_error("ValidationException",
|
||||
format("Missing or invalid value object for key column {}: {}",
|
||||
column_name, item.toStyledString()));
|
||||
format("Missing or invalid value object for key column {}: {}", column_name, item));
|
||||
}
|
||||
auto it = key_typed_value.begin();
|
||||
if (it.key().asString() != expected_type) {
|
||||
auto it = key_typed_value.MemberBegin();
|
||||
if (it->name.GetString() != expected_type) {
|
||||
throw api_error("ValidationException",
|
||||
format("Expected type {} for key column {}, got type {}",
|
||||
expected_type, column_name, it.key().asString()));
|
||||
expected_type, column_name, it->name.GetString()));
|
||||
}
|
||||
if (column.type == bytes_type) {
|
||||
return base64_decode(it->asString());
|
||||
return base64_decode(it->value.GetString());
|
||||
} else {
|
||||
return column.type->from_string(it->asString());
|
||||
return column.type->from_string(it->value.GetString());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Json::Value json_key_column_value(bytes_view cell, const column_definition& column) {
|
||||
rjson::value json_key_column_value(bytes_view cell, const column_definition& column) {
|
||||
if (column.type == bytes_type) {
|
||||
return base64_encode(cell);
|
||||
std::string b64 = base64_encode(cell);
|
||||
return rjson::from_string(b64);
|
||||
} if (column.type == utf8_type) {
|
||||
return Json::Value(reinterpret_cast<const char*>(cell.data()),
|
||||
reinterpret_cast<const char*>(cell.data()) + cell.size());
|
||||
return rjson::from_string(std::string(reinterpret_cast<const char*>(cell.data()), cell.size()));
|
||||
} else if (column.type == decimal_type) {
|
||||
// FIXME: use specialized Alternator number type, not the more
|
||||
// general "decimal_type". A dedicated type can be more efficient
|
||||
// in storage space and in parsing speed.
|
||||
auto s = decimal_type->to_json_string(bytes(cell));
|
||||
return Json::Value(reinterpret_cast<const char*>(s.data()),
|
||||
reinterpret_cast<const char*>(s.data()) + s.size());
|
||||
return rjson::from_string(s);
|
||||
} else {
|
||||
// We shouldn't get here, we shouldn't see such key columns.
|
||||
throw std::runtime_error(format("Unexpected key type: {}", column.type->name()));
|
||||
@@ -157,7 +163,7 @@ Json::Value json_key_column_value(bytes_view cell, const column_definition& colu
|
||||
}
|
||||
|
||||
|
||||
partition_key pk_from_json(const Json::Value& item, schema_ptr schema) {
|
||||
partition_key pk_from_json(const rjson::value& item, schema_ptr schema) {
|
||||
std::vector<bytes> raw_pk;
|
||||
// FIXME: this is a loop, but we really allow only one partition key column.
|
||||
for (const column_definition& cdef : schema->partition_key_columns()) {
|
||||
@@ -167,7 +173,7 @@ partition_key pk_from_json(const Json::Value& item, schema_ptr schema) {
|
||||
return partition_key::from_exploded(raw_pk);
|
||||
}
|
||||
|
||||
clustering_key ck_from_json(const Json::Value& item, schema_ptr schema) {
|
||||
clustering_key ck_from_json(const rjson::value& item, schema_ptr schema) {
|
||||
if (schema->clustering_key_size() == 0) {
|
||||
return clustering_key::make_empty();
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user