cql3: optimize the deserialization of collections

Before this patch, deserializing a collection from a (prepared) CQL request
involved deserializing every element and serializing it again. Originally this
was a hacky method of validation, and it was also needed to reserialize nested
frozen collections from the CQLv2 format to the CQLv3 format.

But since then we started doing validation separately (before calls to
from_serialized) and CQLv2 became irrelevant, making reserialization of
elements (which, among other things, involves a memory alocation for every
element) pure waste.

This patch adds a faster path for collections in the v3 format, which does not
involve linearizing or reserializing the elements (since v3 is the same as
our internal format).

After this patch, the path from prepared CQL statements to
atomic_cell_or_collection is almost completely linearization-free. The last
remaining place is collection_mutation_description, where map keys are
linearized.
This commit is contained in:
Michał Chojnowski
2021-03-19 00:14:32 +01:00
parent a0f12b8d63
commit 458878a414
5 changed files with 76 additions and 19 deletions

View File

@@ -126,16 +126,22 @@ lists::literal::to_string() const {
lists::value
lists::value::from_serialized(const raw_value_view& val, const list_type_impl& type, cql_serialization_format sf) {
try {
// Collections have this small hack that validate cannot be called on a serialized object,
// but compose does the validation (so we're fine).
// FIXME: deserializeForNativeProtocol()?!
auto l = val.deserialize<list_type_impl::native_type>(type, sf);
std::vector<managed_bytes_opt> elements;
elements.reserve(l.size());
for (auto&& element : l) {
// elements can be null in lists that represent a set of IN values
// FIXME: decompose to managed_bytes
elements.push_back(element.is_null() ? managed_bytes_opt() : managed_bytes_opt(type.get_elements_type()->decompose(element)));
if (sf.collection_format_unchanged()) {
std::vector<managed_bytes> tmp = val.with_value([sf] (const FragmentedView auto& v) {
return partially_deserialize_listlike(v, sf);
});
elements.reserve(tmp.size());
for (auto&& element : tmp) {
elements.emplace_back(std::move(element));
}
} else [[unlikely]] {
auto l = val.deserialize<list_type_impl::native_type>(type, sf);
elements.reserve(l.size());
for (auto&& element : l) {
// elements can be null in lists that represent a set of IN values
elements.push_back(element.is_null() ? managed_bytes_opt() : managed_bytes_opt(type.get_elements_type()->decompose(element)));
}
}
return value(std::move(elements));
} catch (marshal_exception& e) {

View File

@@ -162,11 +162,21 @@ maps::value::from_serialized(const raw_value_view& fragmented_value, const map_t
// FIXME: deserialize_for_native_protocol?!
auto m = fragmented_value.deserialize<map_type_impl::native_type>(type, sf);
std::map<managed_bytes, managed_bytes, serialized_compare> map(type.get_keys_type()->as_less_comparator());
for (auto&& e : m) {
map.emplace(type.get_keys_type()->decompose(e.first),
type.get_values_type()->decompose(e.second));
if (sf.collection_format_unchanged()) {
std::vector<std::pair<managed_bytes, managed_bytes>> tmp = fragmented_value.with_value([sf] (const FragmentedView auto& v) {
return partially_deserialize_map(v, sf);
});
for (auto&& key_value : tmp) {
map.insert(std::move(key_value));
}
} else [[unlikely]] {
auto m = fragmented_value.deserialize<map_type_impl::native_type>(type, sf);
for (auto&& e : m) {
map.emplace(type.get_keys_type()->decompose(e.first),
type.get_values_type()->decompose(e.second));
}
}
return maps::value { std::move(map) };
return maps::value(std::move(map));
} catch (marshal_exception& e) {
throw exceptions::invalid_request_exception(e.what());
}

View File

@@ -135,13 +135,19 @@ sets::literal::to_string() const {
sets::value
sets::value::from_serialized(const raw_value_view& val, const set_type_impl& type, cql_serialization_format sf) {
try {
// Collections have this small hack that validate cannot be called on a serialized object,
// but compose does the validation (so we're fine).
// FIXME: deserializeForNativeProtocol?!
auto s = val.deserialize<set_type_impl::native_type>(type, sf);
std::set<managed_bytes, serialized_compare> elements(type.get_elements_type()->as_less_comparator());
for (auto&& element : s) {
elements.insert(elements.end(), managed_bytes(type.get_elements_type()->decompose(element)));
if (sf.collection_format_unchanged()) {
std::vector<managed_bytes> tmp = val.with_value([sf] (const FragmentedView auto& v) {
return partially_deserialize_listlike(v, sf);
});
for (auto&& element : tmp) {
elements.insert(std::move(element));
}
} else [[unlikely]] {
auto s = val.deserialize<set_type_impl::native_type>(type, sf);
for (auto&& element : s) {
elements.insert(elements.end(), managed_bytes(type.get_elements_type()->decompose(element)));
}
}
return value(std::move(elements));
} catch (marshal_exception& e) {

View File

@@ -1322,6 +1322,34 @@ set_type_impl::serialize_partially_deserialized_form(
return pack(v.begin(), v.end(), v.size(), sf);
}
template <FragmentedView View>
std::vector<managed_bytes> partially_deserialize_listlike(View in, cql_serialization_format sf) {
auto nr = read_collection_size(in, sf);
std::vector<managed_bytes> elements;
elements.reserve(nr);
for (int i = 0; i != nr; ++i) {
elements.emplace_back(read_collection_value(in, sf));
}
return elements;
}
template std::vector<managed_bytes> partially_deserialize_listlike(managed_bytes_view in, cql_serialization_format sf);
template std::vector<managed_bytes> partially_deserialize_listlike(fragmented_temporary_buffer::view in, cql_serialization_format sf);
template <FragmentedView View>
std::vector<std::pair<managed_bytes, managed_bytes>> partially_deserialize_map(View in, cql_serialization_format sf) {
auto nr = read_collection_size(in, sf);
std::vector<std::pair<managed_bytes, managed_bytes>> elements;
elements.reserve(nr);
for (int i = 0; i != nr; ++i) {
auto key = managed_bytes(read_collection_value(in, sf));
auto value = managed_bytes(read_collection_value(in, sf));
elements.emplace_back(std::move(key), std::move(value));
}
return elements;
}
template std::vector<std::pair<managed_bytes, managed_bytes>> partially_deserialize_map(managed_bytes_view in, cql_serialization_format sf);
template std::vector<std::pair<managed_bytes, managed_bytes>> partially_deserialize_map(fragmented_temporary_buffer::view in, cql_serialization_format sf);
list_type
list_type_impl::get_instance(data_type elements, bool is_multi_cell) {
return intern::get_instance(elements, is_multi_cell);

View File

@@ -1223,6 +1223,13 @@ void write_collection_value(bytes::iterator& out, cql_serialization_format sf, b
void write_collection_value(managed_bytes_mutable_view&, cql_serialization_format sf, bytes_view val_bytes);
void write_collection_value(managed_bytes_mutable_view&, cql_serialization_format sf, const managed_bytes_view& val_bytes);
// Splits a serialized collection into a vector of elements, but does not recursively deserialize the elements.
// Does not perform validation.
template <FragmentedView View>
std::vector<managed_bytes> partially_deserialize_listlike(View in, cql_serialization_format sf);
template <FragmentedView View>
std::vector<std::pair<managed_bytes, managed_bytes>> partially_deserialize_map(View in, cql_serialization_format sf);
using user_type = shared_ptr<const user_type_impl>;
using tuple_type = shared_ptr<const tuple_type_impl>;