vint: optimise deserialisation routine

At the moment, vint deserialisation is using a naive approach, reading
each byte separately. In practice, vints are going to most often appears
inside larger buffers. That means we can read 8-bytes at a time end then
figure out unneded parts and mask them out. This way we avoid a loop and
do less memory loads which are much more expensive than arithmetic
operations (even if they hit the cache).
This commit is contained in:
Paweł Dziepak
2019-02-21 17:00:39 +00:00
parent 57de2c26b3
commit 552fc0c6b9

View File

@@ -136,7 +136,9 @@ vint_size_type unsigned_vint::serialized_size(uint64_t value) noexcept {
} }
uint64_t unsigned_vint::deserialize(bytes_view v) { uint64_t unsigned_vint::deserialize(bytes_view v) {
const int8_t first_byte = v[0]; auto src = v.data();
auto len = v.size();
const int8_t first_byte = *src;
// No additional bytes, since the most significant bit is not set. // No additional bytes, since the most significant bit is not set.
if (first_byte >= 0) { if (first_byte >= 0) {
@@ -148,11 +150,25 @@ uint64_t unsigned_vint::deserialize(bytes_view v) {
// Extract the bits not used for counting bytes. // Extract the bits not used for counting bytes.
auto result = uint64_t(first_byte) & first_byte_value_mask(extra_bytes_size); auto result = uint64_t(first_byte) & first_byte_value_mask(extra_bytes_size);
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
uint64_t value;
// If we can overread do that. It is cheaper to have a single 64-bit read and
// then mask out the unneeded part than to do 8x 1 byte reads.
if (__builtin_expect(len >= sizeof(uint64_t) + 1, true)) {
std::copy_n(src + 1, sizeof(uint64_t), reinterpret_cast<int8_t*>(&value));
} else {
value = 0;
std::copy_n(src + 1, extra_bytes_size, reinterpret_cast<int8_t*>(&value));
}
value = be_to_cpu(value << (64 - (extra_bytes_size * 8)));
result <<= (extra_bytes_size * 8) % 64;
result |= value;
#else
for (vint_size_type index = 0; index < extra_bytes_size; ++index) { for (vint_size_type index = 0; index < extra_bytes_size; ++index) {
result <<= 8; result <<= 8;
result |= (uint64_t(v[index + 1]) & uint64_t(0xff)); result |= (uint64_t(v[index + 1]) & uint64_t(0xff));
} }
#endif
return result; return result;
} }