data/cell: don't overshoot target allocation sizes
data::cell targets 8KB as its maximum allocations size to avoid pressuring the allocator. This 8KB target is used for internal storage -- values small enough to be stored inside the cell itself -- as well for external storage. Externally stored values use 8KB fragment sizes. The problem is that only the size of data itself was considered when making the allocations. For example when allocating the fragments (chunks) for external storage, each fragment stored 8KB of data. But fragments have overhead, they have next and back pointers. This resulted in a 8KB + 2 * sizeof(void*) allocation. IMR uses the allocation strategy mechanism, which works with aligned allocations. As the seastar allocation only guarantees aligned allocations for power of two sizes, it ends up allocating a 16KB slot. This results in the mutation fragment using almost twice as much memory as would be required. This is a huge waste. This patch fixes the problem by considering the overhead of both internal and external storage ensuring allocations are 8KB or less. Fixes: #6043 Tests: unit(debug, dev, release) Signed-off-by: Botond Dénes <bdenes@scylladb.com> Message-Id: <20200910171359.1438029-1-bdenes@scylladb.com>
This commit is contained in:
@@ -208,7 +208,7 @@ size_t atomic_cell_or_collection::external_memory_usage(const abstract_type& t)
|
||||
external_value_size = cell_view.value_size();
|
||||
}
|
||||
// Add overhead of chunk headers. The last one is a special case.
|
||||
external_value_size += (external_value_size - 1) / data::cell::maximum_external_chunk_length * data::cell::external_chunk_overhead;
|
||||
external_value_size += (external_value_size - 1) / data::cell::effective_external_chunk_length * data::cell::external_chunk_overhead;
|
||||
external_value_size += data::cell::external_last_chunk_overhead;
|
||||
}
|
||||
return data::cell::structure::serialized_object_size(_data.get(), ctx)
|
||||
|
||||
28
data/cell.hh
28
data/cell.hh
@@ -44,8 +44,16 @@ template<typename T>
|
||||
class value_writer;
|
||||
|
||||
struct cell {
|
||||
static constexpr size_t maximum_internal_storage_length = value_view::maximum_internal_storage_length;
|
||||
static constexpr size_t maximum_external_chunk_length = value_view::maximum_external_chunk_length;
|
||||
// We make the internal storage 1KB smaller for 2 reasons:
|
||||
// * To ensure the cell storage doesn't exceed 8KB in total (and hence
|
||||
// reverts to a 16KB allocation, wasting a lot of space).
|
||||
// * To make sure we can distinguish between external and internal storage
|
||||
// values based on size. External storage will practically store less than
|
||||
// 8KB in chunks due to chunk header overhead. So in order to be able to
|
||||
// decide on size alone we need to leave sufficient room between the two.
|
||||
static constexpr size_t maximum_internal_storage_length = 7 * 1024;
|
||||
static constexpr size_t maximum_external_chunk_length = 8 * 1024;
|
||||
|
||||
|
||||
struct tags {
|
||||
class cell;
|
||||
@@ -204,14 +212,16 @@ struct cell {
|
||||
/// If a cell value size is above maximum_internal_storage_length it is
|
||||
/// stored externally. Moreover, in order to avoid stressing the memory
|
||||
/// allocators with large allocations values are fragmented in chunks
|
||||
/// no larger than maximum_external_chunk_length. The size of all chunks,
|
||||
/// but the last one is always maximum_external_chunk_length.
|
||||
/// no larger than maximum_external_chunk_length. The gross size (including
|
||||
/// the chunk header) of all chunks, but the last one is always
|
||||
/// maximum_external_chunk_length.
|
||||
using external_chunk = imr::structure<
|
||||
imr::member<tags::chunk_back_pointer, imr::tagged_type<tags::chunk_back_pointer, imr::pod<uint8_t*>>>,
|
||||
imr::member<tags::chunk_next, imr::pod<uint8_t*>>,
|
||||
imr::member<tags::chunk_data, imr::buffer<tags::chunk_data>>
|
||||
>;
|
||||
static constexpr size_t external_chunk_overhead = sizeof(uint8_t*) * 2;
|
||||
static constexpr size_t effective_external_chunk_length = maximum_external_chunk_length - external_chunk_overhead;
|
||||
|
||||
using external_last_chunk_size = imr::pod<uint16_t>;
|
||||
/// The last fragment of an externally stored value
|
||||
@@ -239,7 +249,7 @@ struct cell {
|
||||
|
||||
template<typename Tag>
|
||||
static constexpr size_t size_of() noexcept {
|
||||
return cell::maximum_external_chunk_length;
|
||||
return cell::effective_external_chunk_length;
|
||||
}
|
||||
template<typename Tag, typename... Args>
|
||||
auto context_for(Args&&...) const noexcept {
|
||||
@@ -662,7 +672,7 @@ public:
|
||||
}
|
||||
|
||||
bool is_value_fragmented() const noexcept {
|
||||
return flags_view().template get<tags::external_data>() && value_size() > maximum_external_chunk_length;
|
||||
return flags_view().template get<tags::external_data>() && value_size() > cell::effective_external_chunk_length;
|
||||
}
|
||||
};
|
||||
|
||||
@@ -705,15 +715,15 @@ inline cell::mutable_atomic_cell_view cell::make_atomic_cell_view(const type_inf
|
||||
/// When a cell value is stored externally as a list of fragments we need to
|
||||
/// know when we reach the last fragment. The way to do that is to read the
|
||||
/// total value size from the parent cell object and use the fact that the size
|
||||
/// of all fragments except the last one is cell::maximum_external_chunk_length.
|
||||
/// of all fragments except the last one is cell::effective_external_chunk_length.
|
||||
class fragment_chain_destructor_context : public imr::no_context_t {
|
||||
size_t _total_length;
|
||||
public:
|
||||
explicit fragment_chain_destructor_context(size_t total_length) noexcept
|
||||
: _total_length(total_length) { }
|
||||
|
||||
void next_chunk() noexcept { _total_length -= data::cell::maximum_external_chunk_length; }
|
||||
bool is_last_chunk() const noexcept { return _total_length <= data::cell::maximum_external_chunk_length; }
|
||||
void next_chunk() noexcept { _total_length -= data::cell::effective_external_chunk_length; }
|
||||
bool is_last_chunk() const noexcept { return _total_length <= data::cell::effective_external_chunk_length; }
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@@ -104,19 +104,18 @@ public:
|
||||
|
||||
auto offset = 0;
|
||||
auto migrate_fn_ptr = &cell::lsa_chunk_migrate_fn;
|
||||
while (_value_size - offset > cell::maximum_external_chunk_length) {
|
||||
while (_value_size - offset > cell::effective_external_chunk_length) {
|
||||
imr::placeholder<imr::pod<uint8_t*>> phldr;
|
||||
auto chunk_ser = allocations.template allocate_nested<cell::external_chunk>(migrate_fn_ptr)
|
||||
.serialize(next_pointer_position);
|
||||
next_pointer_position = chunk_ser.position();
|
||||
next_pointer_phldr.serialize(
|
||||
chunk_ser.serialize(phldr)
|
||||
.serialize(cell::maximum_external_chunk_length,
|
||||
write_to_destination(cell::maximum_external_chunk_length))
|
||||
.serialize(cell::effective_external_chunk_length, write_to_destination(cell::effective_external_chunk_length))
|
||||
.done()
|
||||
);
|
||||
next_pointer_phldr = phldr;
|
||||
offset += cell::maximum_external_chunk_length;
|
||||
offset += cell::effective_external_chunk_length;
|
||||
}
|
||||
|
||||
size_t left = _value_size - offset;
|
||||
@@ -156,11 +155,11 @@ inline basic_value_view<is_mutable> cell::variable_value::do_make_view(structure
|
||||
return view.template get<tags::value_data>().visit(make_visitor(
|
||||
[&] (imr::pod<uint8_t*>::view ptr) {
|
||||
auto ex_ptr = static_cast<uint8_t*>(ptr.load());
|
||||
if (size > maximum_external_chunk_length) {
|
||||
if (size > cell::effective_external_chunk_length) {
|
||||
auto ex_ctx = chunk_context(ex_ptr);
|
||||
auto ex_view = external_chunk::make_view(ex_ptr, ex_ctx);
|
||||
auto next = static_cast<uint8_t*>(ex_view.get<tags::chunk_next>().load());
|
||||
return basic_value_view<is_mutable>(ex_view.get<tags::chunk_data>(ex_ctx), size - maximum_external_chunk_length, next);
|
||||
return basic_value_view<is_mutable>(ex_view.get<tags::chunk_data>(ex_ctx), size - cell::effective_external_chunk_length, next);
|
||||
} else {
|
||||
auto ex_ctx = last_chunk_context(ex_ptr);
|
||||
auto ex_view = external_last_chunk::make_view(ex_ptr, ex_ctx);
|
||||
|
||||
@@ -42,9 +42,6 @@ namespace data {
|
||||
template<mutable_view is_mutable>
|
||||
class basic_value_view {
|
||||
public:
|
||||
static constexpr size_t maximum_internal_storage_length = 8 * 1024;
|
||||
static constexpr size_t maximum_external_chunk_length = 8 * 1024;
|
||||
|
||||
using fragment_type = std::conditional_t<is_mutable == mutable_view::no,
|
||||
bytes_view, bytes_mutable_view>;
|
||||
using raw_pointer_type = std::conditional_t<is_mutable == mutable_view::no,
|
||||
|
||||
@@ -30,12 +30,12 @@ inline typename basic_value_view<is_mutable>::iterator& basic_value_view<is_muta
|
||||
{
|
||||
if (!_next) {
|
||||
_view = fragment_type();
|
||||
} else if (_left > maximum_external_chunk_length) {
|
||||
} else if (_left > cell::effective_external_chunk_length) {
|
||||
cell::chunk_context ctx(_next);
|
||||
auto v = cell::external_chunk::make_view(_next, ctx);
|
||||
_next = static_cast<uint8_t*>(v.template get<cell::tags::chunk_next>(ctx).load());
|
||||
_view = v.template get<cell::tags::chunk_data>(ctx);
|
||||
_left -= cell::maximum_external_chunk_length;
|
||||
_left -= cell::effective_external_chunk_length;
|
||||
} else {
|
||||
cell::last_chunk_context ctx(_next);
|
||||
auto v = cell::external_last_chunk::make_view(_next, ctx);
|
||||
|
||||
Reference in New Issue
Block a user