From 671affc36c9381375c75bb95b5ebcca4e6b5daa9 Mon Sep 17 00:00:00 2001 From: Piotr Jastrzebski Date: Sun, 18 Dec 2016 18:56:55 +0100 Subject: [PATCH] Implement intrusive set using rbtree_algorithms This new implementation takes less memory because it does not store comparator. It also uses tree nodes optimized for size. This means that instead of storing an enum field |color| they embed this information inside pointer to parent. Signed-off-by: Piotr Jastrzebski --- intrusive_set.hh | 98 ------------- intrusive_set_external_comparator.hh | 199 +++++++++++++++++++++++++++ licenses/boost-license-1.0.txt | 23 ++++ mutation_partition.cc | 24 ++-- mutation_partition.hh | 10 +- 5 files changed, 240 insertions(+), 114 deletions(-) delete mode 100644 intrusive_set.hh create mode 100644 intrusive_set_external_comparator.hh create mode 100644 licenses/boost-license-1.0.txt diff --git a/intrusive_set.hh b/intrusive_set.hh deleted file mode 100644 index 3ebbd64d36..0000000000 --- a/intrusive_set.hh +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Copyright (C) 2016 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once - -#include - -namespace bi = boost::intrusive; - -typedef bi::rbtree_algorithms> algo; - -class intrusive_set_member_hook : public bi::set_member_hook> { -public: - intrusive_set_member_hook() = default; - intrusive_set_member_hook(intrusive_set_member_hook&& o) noexcept { - algo::replace_node(o.this_ptr(), this_ptr()); - algo::init(o.this_ptr()); - } -}; - -template -class intrusive_set final { - using set_type = bi::set, - bi::compare>; -public: - typedef Elem value_type; - typedef typename set_type::iterator iterator; - typedef typename set_type::const_iterator const_iterator; - typedef typename set_type::reverse_iterator reverse_iterator; - typedef typename set_type::const_reverse_iterator const_reverse_iterator; -private: - set_type _set; -public: - intrusive_set(Comparator c) : _set(std::move(c)) { } - Comparator key_comp() const { return _set.key_comp(); } - iterator begin() { return _set.begin(); } - const_iterator begin() const { return _set.begin(); } - iterator end() { return _set.end(); } - const_iterator end() const { return _set.end(); } - reverse_iterator rbegin() { return _set.rbegin(); } - const_reverse_iterator rbegin() const { return _set.rbegin(); } - reverse_iterator rend() { return _set.rend(); } - const_reverse_iterator rend() const { return _set.rend(); } - iterator lower_bound(const Elem &key) { return _set.lower_bound(key); } - template - iterator upper_bound(const KeyType& key, KeyTypeKeyCompare comp) { return _set.upper_bound(key, comp); } - template - const_iterator upper_bound(const KeyType& key, KeyTypeKeyCompare comp) const { return _set.upper_bound(key, comp); } - const_iterator lower_bound(const Elem &key) const { return _set.lower_bound(key); } - Elem* unlink_leftmost_without_rebalance() { return _set.unlink_leftmost_without_rebalance(); } - iterator insert_before(const_iterator pos, Elem& value) { return _set.insert_before(pos, value); } - template - void clear_and_dispose(Disposer disposer) { _set.clear_and_dispose(disposer); } - template - void clone_from(const intrusive_set &src, Cloner cloner, Disposer disposer) { - _set.clone_from(src._set, cloner, disposer); - } - iterator find(const Elem &key) { return _set.find(key); } - const_iterator find(const Elem &key) const { return _set.find(key); } - template - iterator find(const KeyType &key, KeyTypeKeyCompare comp) { return _set.find(key, comp); } - template - const_iterator find(const KeyType &key, KeyTypeKeyCompare comp) const { return _set.find(key, comp); } - iterator insert(const_iterator hint, Elem& value) { return _set.insert(hint, value); } - template - iterator erase_and_dispose(const_iterator i, Disposer disposer) { - return _set.erase_and_dispose(i, disposer); - } - iterator erase(const_iterator i) { return _set.erase(i); } - iterator erase(const_iterator b, const_iterator e) { return _set.erase(b, e); } - template - iterator erase_and_dispose(const_iterator b, const_iterator e, Disposer disposer) { - return _set.erase_and_dispose(b, e, disposer); - } - bool empty() const { return _set.empty(); } - auto size() const { return _set.size(); } -}; diff --git a/intrusive_set_external_comparator.hh b/intrusive_set_external_comparator.hh new file mode 100644 index 0000000000..3fe2832c06 --- /dev/null +++ b/intrusive_set_external_comparator.hh @@ -0,0 +1,199 @@ +/* + * Copyright (C) 2016 ScyllaDB + */ + +/* + * This file is part of Scylla. + * + * Scylla is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Scylla is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Scylla. If not, see . + */ + +/* + * (C) Copyright Ion Gaztanaga 2013-2014 + * Distributed under the Boost Software License, Version 1.0. + * (See accompanying file LICENSE_1_0.txt or copy at + * http://www.boost.org/LICENSE_1_0.txt) + */ + +#pragma once + +#include + +namespace bi = boost::intrusive; + +typedef bi::rbtree_algorithms> algo; + +class intrusive_set_external_comparator_member_hook : public bi::set_member_hook> { +public: + intrusive_set_external_comparator_member_hook() = default; + intrusive_set_external_comparator_member_hook(intrusive_set_external_comparator_member_hook&& o) noexcept { + algo::replace_node(o.this_ptr(), this_ptr()); + algo::init(o.this_ptr()); + } +}; + +template +class intrusive_set_external_comparator final { + typedef boost::intrusive::mhtraits value_traits; + typedef typename value_traits::node_traits node_traits; + typedef typename node_traits::node_ptr node_ptr; +public: + typedef Elem value_type; + typedef typename bi::tree_iterator iterator; + typedef typename bi::tree_iterator const_iterator; + typedef typename bi::reverse_iterator reverse_iterator; + typedef typename bi::reverse_iterator const_reverse_iterator; + +private: + intrusive_set_external_comparator_member_hook _header; + value_traits _value_traits; + + typedef typename bi::value_traits_pointers::const_value_traits_ptr const_value_traits_ptr; + typedef typename bi::detail::identity key_of_value; + + const_value_traits_ptr priv_value_traits_ptr() const { + return bi::pointer_traits::pointer_to(_value_traits); + } + template + struct key_node_comp_ret { + typedef bi::detail::key_nodeptr_comp type; + }; + + template + typename key_node_comp_ret::type key_node_comp(KeyTypeKeyCompare comp) const { + return bi::detail::key_nodeptr_comp(comp, &_value_traits); + } + iterator insert_unique_commit(Elem& value, const algo::insert_commit_data &commit_data) { + node_ptr to_insert(_value_traits.to_node_ptr(value)); + algo::insert_unique_commit(_header.this_ptr(), to_insert, commit_data); + return iterator(to_insert, priv_value_traits_ptr()); + } +public: + intrusive_set_external_comparator() { algo::init_header(_header.this_ptr()); } + intrusive_set_external_comparator(intrusive_set_external_comparator&& o) : _value_traits(std::move(o._value_traits)) { + algo::swap_tree(_header.this_ptr(), node_ptr(o._header.this_ptr())); + } + iterator begin() { return iterator(algo::begin_node(_header.this_ptr()), priv_value_traits_ptr()); } + const_iterator begin() const { return const_iterator(algo::begin_node(_header.this_ptr()), priv_value_traits_ptr()); } + iterator end() { return iterator(algo::end_node(_header.this_ptr()), priv_value_traits_ptr()); } + const_iterator end() const { return const_iterator(algo::end_node(_header.this_ptr()), priv_value_traits_ptr()); } + reverse_iterator rbegin() { return reverse_iterator(end()); } + const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); } + reverse_iterator rend() { return reverse_iterator(begin()); } + const_reverse_iterator rend() const { return const_reverse_iterator(begin()); } + template + void clear_and_dispose(Disposer disposer) { + algo::clear_and_dispose(_header.this_ptr(), + bi::detail::node_disposer(disposer, &_value_traits)); + algo::init_header(_header.this_ptr()); + } + bool empty() const { return algo::unique(_header.this_ptr()); } + + // WARNING: this method has O(N) time complexity, use with care + auto size() const { return algo::size(_header.this_ptr()); } + iterator erase(const_iterator i) { + const_iterator ret(i); + ++ret; + node_ptr to_erase(i.pointed_node()); + algo::erase(_header.this_ptr(), to_erase); + algo::init(to_erase); + return ret.unconst(); + } + iterator erase(const_iterator b, const_iterator e) { + while (b != e) { + erase(b++); + } + return b.unconst(); + } + template + iterator erase_and_dispose(const_iterator i, Disposer disposer) { + node_ptr to_erase(i.pointed_node()); + iterator ret(erase(i)); + disposer(_value_traits.to_value_ptr(to_erase)); + return ret; + } + template + iterator erase_and_dispose(const_iterator b, const_iterator e, Disposer disposer) { + while (b != e) { + erase_and_dispose(b++, disposer); + } + return b.unconst(); + } + template + void clone_from(const intrusive_set_external_comparator &src, Cloner cloner, Disposer disposer) { + clear_and_dispose(disposer); + if (!src.empty()) { + bi::detail::exception_disposer rollback(*this, disposer); + algo::clone(src._header.this_ptr(), + _header.this_ptr(), + bi::detail::node_cloner(cloner, &_value_traits), + bi::detail::node_disposer(disposer, &_value_traits)); + rollback.release(); + } + } + Elem* unlink_leftmost_without_rebalance() { + node_ptr to_be_disposed(algo::unlink_leftmost_without_rebalance(_header.this_ptr())); + if(!to_be_disposed) + return 0; + algo::init(to_be_disposed); + return _value_traits.to_value_ptr(to_be_disposed); + } + iterator insert_before(const_iterator pos, Elem& value) { + node_ptr to_insert(_value_traits.to_node_ptr(value)); + return iterator(algo::insert_before(_header.this_ptr(), pos.pointed_node(), to_insert), priv_value_traits_ptr()); + } + template + iterator upper_bound(const KeyType& key, KeyTypeKeyCompare comp) { + return iterator(algo::upper_bound(_header.this_ptr(), key, key_node_comp(comp)), priv_value_traits_ptr()); + } + template + const_iterator upper_bound(const KeyType& key, KeyTypeKeyCompare comp) const { + return const_iterator(algo::upper_bound(_header.this_ptr(), key, key_node_comp(comp)), priv_value_traits_ptr()); + } + template + iterator lower_bound(const KeyType &key, KeyTypeKeyCompare comp) { + return iterator(algo::lower_bound(_header.this_ptr(), key, key_node_comp(comp)), priv_value_traits_ptr()); + } + template + const_iterator lower_bound(const KeyType &key, KeyTypeKeyCompare comp) const { + return const_iterator(algo::lower_bound(_header.this_ptr(), key, key_node_comp(comp)), priv_value_traits_ptr()); + } + template + iterator find(const KeyType &key, KeyTypeKeyCompare comp) { + return iterator(algo::find(_header.this_ptr(), key, key_node_comp(comp)), priv_value_traits_ptr()); + } + template + const_iterator find(const KeyType &key, KeyTypeKeyCompare comp) const { + return const_iterator(algo::find(_header.this_ptr(), key, key_node_comp(comp)), priv_value_traits_ptr()); + } + template + iterator insert(const_iterator hint, Elem& value, ElemCompare cmp) { + algo::insert_commit_data commit_data; + std::pair ret = + algo::insert_unique_check(_header.this_ptr(), + hint.pointed_node(), + key_of_value()(value), + key_node_comp(cmp), + commit_data); + return ret.second ? insert_unique_commit(value, commit_data) + : iterator(ret.first, priv_value_traits_ptr()); + } +}; diff --git a/licenses/boost-license-1.0.txt b/licenses/boost-license-1.0.txt new file mode 100644 index 0000000000..36b7cd93cd --- /dev/null +++ b/licenses/boost-license-1.0.txt @@ -0,0 +1,23 @@ +Boost Software License - Version 1.0 - August 17th, 2003 + +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/mutation_partition.cc b/mutation_partition.cc index 78ad0ebcef..8ff0b5e8a6 100644 --- a/mutation_partition.cc +++ b/mutation_partition.cc @@ -32,6 +32,7 @@ #include "mutation_query.hh" #include "service/priority_manager.hh" #include "mutation_compactor.hh" +#include "intrusive_set_external_comparator.hh" template struct reversal_traits; @@ -168,7 +169,7 @@ void revert_intrusive_set_range(const schema& s, mutation_partition::rows_type& // lower_bound() can allocate if linearization is required but it should have // been already performed by the lower_bound() invocation in apply_reversibly_intrusive_set() and // stored in the linearization context. - auto i = dst.find(e); + auto i = dst.find(e, rows_entry::compare(s)); assert(i != dst.end()); rows_entry& dst_e = *i; @@ -196,6 +197,7 @@ void revert_intrusive_set(const schema& s, mutation_partition::rows_type& dst, m auto apply_reversibly_intrusive_set(const schema& s, mutation_partition::rows_type& dst, mutation_partition::rows_type& src) { auto src_i = src.begin(); try { + rows_entry::compare cmp(s); while (src_i != src.end()) { rows_entry& src_e = *src_i; @@ -207,8 +209,8 @@ auto apply_reversibly_intrusive_set(const schema& s, mutation_partition::rows_ty continue; } - auto i = dst.lower_bound(src_e); - if (i == dst.end() || dst.key_comp()(src_e, *i)) { + auto i = dst.lower_bound(src_e, cmp); + if (i == dst.end() || cmp(src_e, *i)) { // Construct neutral entry which will represent missing dst entry for revert. rows_entry* empty_e = current_allocator().construct(src_e.key()); [&] () noexcept { @@ -231,7 +233,7 @@ auto apply_reversibly_intrusive_set(const schema& s, mutation_partition::rows_ty mutation_partition::mutation_partition(const mutation_partition& x) : _tombstone(x._tombstone) , _static_row(x._static_row) - , _rows(x._rows.key_comp()) + , _rows() , _row_tombstones(x._row_tombstones) { auto cloner = [] (const auto& x) { return current_allocator().construct>>(x); @@ -243,12 +245,12 @@ mutation_partition::mutation_partition(const mutation_partition& x, const schema query::clustering_key_filter_ranges ck_ranges) : _tombstone(x._tombstone) , _static_row(x._static_row) - , _rows(x._rows.key_comp()) + , _rows() , _row_tombstones(x._row_tombstones) { try { for(auto&& r : ck_ranges) { for (const rows_entry& e : x.range(schema, r)) { - _rows.push_back(*current_allocator().construct(e)); + _rows.insert(_rows.end(), *current_allocator().construct(e), rows_entry::compare(schema)); } } } catch (...) { @@ -447,12 +449,12 @@ mutation_partition::apply_insert(const schema& s, clustering_key_view key, api:: void mutation_partition::insert_row(const schema& s, const clustering_key& key, deletable_row&& row) { auto e = current_allocator().construct(key, std::move(row)); - _rows.insert(_rows.end(), *e); + _rows.insert(_rows.end(), *e, rows_entry::compare(s)); } void mutation_partition::insert_row(const schema& s, const clustering_key& key, const deletable_row& row) { auto e = current_allocator().construct(key, row); - _rows.insert(_rows.end(), *e); + _rows.insert(_rows.end(), *e, rows_entry::compare(s)); } const row* @@ -469,7 +471,7 @@ mutation_partition::clustered_row(const schema& s, clustering_key&& key) { auto i = _rows.find(key, rows_entry::compare(s)); if (i == _rows.end()) { auto e = current_allocator().construct(std::move(key)); - _rows.insert(i, *e); + _rows.insert(i, *e, rows_entry::compare(s)); return e->row(); } return i->row(); @@ -480,7 +482,7 @@ mutation_partition::clustered_row(const schema& s, const clustering_key& key) { auto i = _rows.find(key, rows_entry::compare(s)); if (i == _rows.end()) { auto e = current_allocator().construct(key); - _rows.insert(i, *e); + _rows.insert(i, *e, rows_entry::compare(s)); return e->row(); } return i->row(); @@ -491,7 +493,7 @@ mutation_partition::clustered_row(const schema& s, const clustering_key_view& ke auto i = _rows.find(key, rows_entry::compare(s)); if (i == _rows.end()) { auto e = current_allocator().construct(key); - _rows.insert(i, *e); + _rows.insert(i, *e, rows_entry::compare(s)); return e->row(); } return i->row(); diff --git a/mutation_partition.hh b/mutation_partition.hh index fbabd863f8..79166bfc73 100644 --- a/mutation_partition.hh +++ b/mutation_partition.hh @@ -41,7 +41,7 @@ #include "hashing_partition_visitor.hh" #include "range_tombstone_list.hh" #include "clustering_key_filter.hh" -#include "intrusive_set.hh" +#include "intrusive_set_external_comparator.hh" // // Container for cells of a row. Cells are identified by column_id. @@ -439,7 +439,7 @@ public: }; class rows_entry { - intrusive_set_member_hook _link; + intrusive_set_external_comparator_member_hook _link; clustering_key _key; deletable_row _row; friend class mutation_partition; @@ -535,7 +535,7 @@ class serializer; class mutation_partition final { public: - using rows_type = intrusive_set; + using rows_type = intrusive_set_external_comparator; friend class rows_entry; friend class size_calculator; private: @@ -553,11 +553,11 @@ private: public: struct copy_comparators_only {}; mutation_partition(schema_ptr s) - : _rows(rows_entry::compare(*s)) + : _rows() , _row_tombstones(*s) { } mutation_partition(mutation_partition& other, copy_comparators_only) - : _rows(other._rows.key_comp()) + : _rows() , _row_tombstones(other._row_tombstones, range_tombstone_list::copy_comparator_only()) { } mutation_partition(mutation_partition&&) = default;