utils: Use dedicated enum for Bloom filter format instead of a boolean.

It better reflects the purpose of the parameter and provides better type-safety.

Signed-off-by: Vladimir Krivopalov <vladimir@scylladb.com>
Message-Id: <10a4fc16dafa0fb3234969041f68f9e7bfc61312.1525899669.git.vladimir@scylladb.com>
This commit is contained in:
Vladimir Krivopalov
2018-05-09 14:01:48 -07:00
committed by Avi Kivity
parent 76c64e1f26
commit e5477c6c6c
5 changed files with 30 additions and 22 deletions

View File

@@ -1413,7 +1413,10 @@ future<> sstable::read_filter(const io_priority_class& pc) {
read_simple<component_type::Filter>(filter, pc).get();
auto nr_bits = filter.buckets.elements.size() * std::numeric_limits<typename decltype(filter.buckets.elements)::value_type>::digits;
large_bitset bs(nr_bits, std::move(filter.buckets.elements));
_components->filter = utils::filter::create_filter(filter.hashes, std::move(bs), (_version != sstable_version_types::mc));
utils::filter_format format = (_version == sstable_version_types::mc)
? utils::filter_format::m_format
: utils::filter_format::k_l_format;
_components->filter = utils::filter::create_filter(filter.hashes, std::move(bs), format);
});
}
@@ -2177,7 +2180,7 @@ components_writer::components_writer(sstable& sst, const schema& s, file_writer&
, _range_tombstones(s)
, _large_partition_handler(cfg.large_partition_handler)
{
_sst._components->filter = utils::i_filter::get_filter(estimated_partitions, _schema.bloom_filter_fp_chance(), true);
_sst._components->filter = utils::i_filter::get_filter(estimated_partitions, _schema.bloom_filter_fp_chance(), utils::filter_format::k_l_format);
_sst._pi_write.desired_block_size = cfg.promoted_index_block_size.value_or(get_config().column_index_size_in_kb() * 1024);
_sst._correctly_serialize_non_compound_range_tombstones = cfg.correctly_serialize_non_compound_range_tombstones;
_index_sampling_state.summary_byte_cost = summary_byte_cost();
@@ -2685,7 +2688,7 @@ public:
_sst._shards = { shard };
_cfg.monitor->on_write_started(_data_writer->offset_tracker());
_sst._components->filter = utils::i_filter::get_filter(estimated_partitions, _schema.bloom_filter_fp_chance(), false);
_sst._components->filter = utils::i_filter::get_filter(estimated_partitions, _schema.bloom_filter_fp_chance(), utils::filter_format::m_format);
_pi_write_m.desired_block_size = cfg.promoted_index_block_size.value_or(get_config().column_index_size_in_kb() * 1024);
_sst._correctly_serialize_non_compound_range_tombstones = _cfg.correctly_serialize_non_compound_range_tombstones;
_index_sampling_state.summary_byte_cost = summary_byte_cost();

View File

@@ -53,10 +53,10 @@ namespace utils {
namespace filter {
template<typename Func>
void for_each_index(hashed_key hk, int count, int64_t max, bool old_bf_hash_order, Func&& func) {
void for_each_index(hashed_key hk, int count, int64_t max, filter_format format, Func&& func) {
auto h = hk.hash();
int64_t base = old_bf_hash_order ? h[0] : h[1];
int64_t inc = old_bf_hash_order ? h[1] : h[0];
int64_t base = (format == filter_format::k_l_format) ? h[0] : h[1];
int64_t inc = (format == filter_format::k_l_format) ? h[1] : h[0];
for (int i = 0; i < count; i++) {
if (func(std::abs(base % max)) == stop_iteration::yes) {
break;
@@ -67,7 +67,7 @@ void for_each_index(hashed_key hk, int count, int64_t max, bool old_bf_hash_orde
bool bloom_filter::is_present(hashed_key key) {
bool result = true;
for_each_index(key, _hash_count, _bitset.size(), _old_bf_hash_order, [this, &result] (auto i) {
for_each_index(key, _hash_count, _bitset.size(), _format, [this, &result] (auto i) {
if (!_bitset.test(i)) {
result = false;
return stop_iteration::yes;
@@ -78,7 +78,7 @@ bool bloom_filter::is_present(hashed_key key) {
}
void bloom_filter::add(const bytes_view& key) {
for_each_index(make_hashed_key(key), _hash_count, _bitset.size(), _old_bf_hash_order, [this] (auto i) {
for_each_index(make_hashed_key(key), _hash_count, _bitset.size(), _format, [this] (auto i) {
_bitset.set(i);
return stop_iteration::no;
});
@@ -88,15 +88,15 @@ bool bloom_filter::is_present(const bytes_view& key) {
return is_present(make_hashed_key(key));
}
filter_ptr create_filter(int hash, large_bitset&& bitset, bool old_bf_hash_order) {
return std::make_unique<murmur3_bloom_filter>(hash, std::move(bitset), old_bf_hash_order);
filter_ptr create_filter(int hash, large_bitset&& bitset, filter_format format) {
return std::make_unique<murmur3_bloom_filter>(hash, std::move(bitset), format);
}
filter_ptr create_filter(int hash, int64_t num_elements, int buckets_per, bool old_bf_hash_order) {
filter_ptr create_filter(int hash, int64_t num_elements, int buckets_per, filter_format format) {
int64_t num_bits = (num_elements * buckets_per) + bloom_calculations::EXCESS;
num_bits = align_up<int64_t>(num_bits, 64); // Seems to be implied in origin
large_bitset bitset(num_bits);
return std::make_unique<murmur3_bloom_filter>(hash, std::move(bitset), old_bf_hash_order);
return std::make_unique<murmur3_bloom_filter>(hash, std::move(bitset), format);
}
}
}

View File

@@ -58,15 +58,15 @@ public:
private:
bitmap _bitset;
int _hash_count;
bool _old_bf_hash_order;
filter_format _format;
public:
int num_hashes() { return _hash_count; }
bitmap& bits() { return _bitset; }
bloom_filter(int hashes, bitmap&& bs, bool old_bf_hash_order)
bloom_filter(int hashes, bitmap&& bs, filter_format format)
: _bitset(std::move(bs))
, _hash_count(hashes)
, _old_bf_hash_order(old_bf_hash_order)
, _format(format)
{}
virtual void add(const bytes_view& key) override;
@@ -88,8 +88,8 @@ public:
struct murmur3_bloom_filter: public bloom_filter {
murmur3_bloom_filter(int hashes, bitmap&& bs, bool old_bf_hash_order)
: bloom_filter(hashes, std::move(bs), old_bf_hash_order)
murmur3_bloom_filter(int hashes, bitmap&& bs, filter_format format)
: bloom_filter(hashes, std::move(bs), format)
{}
};
@@ -114,7 +114,7 @@ struct always_present_filter: public i_filter {
}
};
filter_ptr create_filter(int hash, large_bitset&& bitset, bool old_bf_hash_order);
filter_ptr create_filter(int hash, int64_t num_elements, int buckets_per, bool old_bf_hash_order);
filter_ptr create_filter(int hash, large_bitset&& bitset, filter_format format);
filter_ptr create_filter(int hash, int64_t num_elements, int buckets_per, filter_format format);
}
}

View File

@@ -28,7 +28,7 @@
namespace utils {
static logging::logger filterlog("bloom_filter");
filter_ptr i_filter::get_filter(int64_t num_elements, double max_false_pos_probability, bool old_bf_hash_order) {
filter_ptr i_filter::get_filter(int64_t num_elements, double max_false_pos_probability, filter_format format) {
assert(seastar::thread::running_in_thread());
if (max_false_pos_probability > 1.0) {
@@ -41,7 +41,7 @@ filter_ptr i_filter::get_filter(int64_t num_elements, double max_false_pos_proba
int buckets_per_element = bloom_calculations::max_buckets_per_element(num_elements);
auto spec = bloom_calculations::compute_bloom_spec(buckets_per_element, max_false_pos_probability);
return filter::create_filter(spec.K, num_elements, spec.buckets_per_element, old_bf_hash_order);
return filter::create_filter(spec.K, num_elements, spec.buckets_per_element, format);
}
hashed_key make_hashed_key(bytes_view b) {

View File

@@ -48,6 +48,11 @@ namespace utils {
struct i_filter;
using filter_ptr = std::unique_ptr<i_filter>;
enum class filter_format {
k_l_format,
m_format,
};
class hashed_key {
private:
std::array<uint64_t, 2> _hash;
@@ -78,6 +83,6 @@ struct i_filter {
* Asserts that the given probability can be satisfied using this
* filter.
*/
static filter_ptr get_filter(int64_t num_elements, double max_false_pos_prob, bool old_bf_hash_order);
static filter_ptr get_filter(int64_t num_elements, double max_false_pos_prob, filter_format format);
};
}