Merge "Add multishard_writer support" from Asias

"
We need a multishard_writer which gets mutation fragments from a producer
(e.g., from the network using the rpc streaming) and consumes the mutation
fragments with a consumer (e.g., write to sstable).

The multishard_writer will take care of the mutation fragments do not belong to
current shard.

This multishard_writer will be used in the new scylla streaming.
"

* 'asias/multishard_writer_v10.1' of github.com:scylladb/seastar-dev:
  tests: Add multishard_writer_test to test.py
  tests: Add test for multishard_writer
  multishard_writer: Introduce multishard_writer
  tests: Allow random_mutation_generator to generate mutations belong to remote shrard
This commit is contained in:
Avi Kivity
2018-06-28 12:36:28 +03:00
8 changed files with 388 additions and 16 deletions

View File

@@ -303,6 +303,7 @@ scylla_tests = [
'tests/imr_test',
'tests/partition_data_test',
'tests/reusable_buffer_test',
'tests/multishard_writer_test',
]
perf_tests = [
@@ -629,6 +630,7 @@ scylla_core = (['database.cc',
'utils/arch/powerpc/crc32-vpmsum/crc32_wrapper.cc',
'querier.cc',
'data/cell.cc',
'multishard_writer.cc',
]
+ [Antlr3Grammar('cql3/Cql.g')]
+ [Thrift('interface/cassandra.thrift', 'Cassandra')]

226
multishard_writer.cc Normal file
View File

@@ -0,0 +1,226 @@
/*
* Copyright (C) 2018 ScyllaDB
*/
/*
* This file is part of Scylla.
*
* Scylla is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Scylla is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
*/
#include "multishard_writer.hh"
#include "mutation_reader.hh"
#include "mutation_fragment.hh"
#include "schema_registry.hh"
#include <vector>
#include <seastar/core/future-util.hh>
#include <seastar/core/queue.hh>
class queue_reader final : public flat_mutation_reader::impl {
seastar::queue<mutation_fragment_opt>& _mq;
public:
queue_reader(schema_ptr s, seastar::queue<mutation_fragment_opt>& mq)
: impl(std::move(s))
, _mq(mq) {
}
virtual future<> fill_buffer(db::timeout_clock::time_point) override {
return do_until([this] { return is_end_of_stream() || is_buffer_full(); }, [this] {
return _mq.pop_eventually().then([this] (mutation_fragment_opt mopt) {
if (!mopt) {
_end_of_stream = true;
} else {
push_mutation_fragment(std::move(*mopt));
}
});
});
}
virtual void next_partition() override {
throw std::bad_function_call();
}
virtual future<> fast_forward_to(const dht::partition_range&, db::timeout_clock::time_point) override {
throw std::bad_function_call();
}
virtual future<> fast_forward_to(position_range, db::timeout_clock::time_point) override {
throw std::bad_function_call();
}
};
class shard_writer {
private:
schema_ptr _s;
flat_mutation_reader _reader;
std::function<future<> (flat_mutation_reader reader)> _consumer;
public:
shard_writer(schema_ptr s,
flat_mutation_reader reader,
std::function<future<> (flat_mutation_reader reader)> consumer);
future<> consume();
};
// The multishard_writer class gets mutation_fragments generated from
// flat_mutation_reader and consumes the mutation_fragments with
// multishard_writer::_consumer. If the mutation_fragment does not belong to
// the shard multishard_writer is on, it will forward the mutation_fragment to
// the correct shard. Future returned by multishard_writer() becomes
// ready when all the mutation_fragments are consumed.
class multishard_writer {
private:
schema_ptr _s;
dht::i_partitioner& _partitioner;
std::vector<foreign_ptr<std::unique_ptr<shard_writer>>> _shard_writers;
std::vector<future<>> _pending_consumers;
std::vector<seastar::queue<mutation_fragment_opt>> _queues;
unsigned _current_shard = -1;
uint64_t _consumed_partitions = 0;
flat_mutation_reader _producer;
std::function<future<> (flat_mutation_reader)> _consumer;
private:
unsigned shard_for_mf(const mutation_fragment& mf) {
return _partitioner.shard_of(mf.as_partition_start().key().token());
}
future<> make_shard_writer(unsigned shard);
future<stop_iteration> handle_mutation_fragment(mutation_fragment mf);
future<stop_iteration> handle_end_of_stream();
future<> consume(unsigned shard);
future<> wait_pending_consumers();
future<> distribute_mutation_fragments();
public:
multishard_writer(
schema_ptr s,
dht::i_partitioner& partitioner,
flat_mutation_reader producer,
std::function<future<> (flat_mutation_reader)> consumer);
future<uint64_t> operator()();
};
shard_writer::shard_writer(schema_ptr s,
flat_mutation_reader reader,
std::function<future<> (flat_mutation_reader reader)> consumer)
: _s(s)
, _reader(std::move(reader))
, _consumer(std::move(consumer)) {
}
future<> shard_writer::consume() {
return _reader.peek().then([this] (mutation_fragment* mf_ptr) {
if (mf_ptr) {
return _consumer(std::move(_reader));
}
return make_ready_future<>();
});
}
multishard_writer::multishard_writer(
schema_ptr s,
dht::i_partitioner& partitioner,
flat_mutation_reader producer,
std::function<future<> (flat_mutation_reader)> consumer)
: _s(std::move(s))
, _partitioner(partitioner)
, _producer(std::move(producer))
, _consumer(std::move(consumer)) {
_shard_writers.resize(_partitioner.shard_count());
_queues.reserve(_partitioner.shard_count());
for (unsigned shard = 0; shard < _partitioner.shard_count(); shard++) {
_queues.push_back(seastar::queue<mutation_fragment_opt>{2});
}
}
future<> multishard_writer::make_shard_writer(unsigned shard) {
auto this_shard_reader = make_foreign(std::make_unique<flat_mutation_reader>(make_flat_mutation_reader<queue_reader>(_s, _queues[shard])));
return smp::submit_to(shard, [gs = global_schema_ptr(_s),
consumer = _consumer,
reader = std::move(this_shard_reader)] () mutable {
auto this_shard_reader = make_foreign_reader(gs.get(), std::move(reader));
return make_foreign(std::make_unique<shard_writer>(gs.get(), std::move(this_shard_reader), consumer));
}).then([this, shard] (foreign_ptr<std::unique_ptr<shard_writer>> writer) {
_shard_writers[shard] = std::move(writer);
_pending_consumers.push_back(consume(shard));
});
}
future<stop_iteration> multishard_writer::handle_mutation_fragment(mutation_fragment mf) {
auto f = make_ready_future<>();
if (mf.is_partition_start()) {
_consumed_partitions++;
if (unsigned shard = shard_for_mf(mf); shard != _current_shard) {
_current_shard = shard;
if (!bool(_shard_writers[shard])) {
f = make_shard_writer(shard);
}
}
}
return f.then([this, mf = std::move(mf)] () mutable {
assert(_current_shard != -1u);
return _queues[_current_shard].push_eventually(mutation_fragment_opt(std::move(mf)));
}).then([] {
return stop_iteration::no;
});
}
future<stop_iteration> multishard_writer::handle_end_of_stream() {
return parallel_for_each(boost::irange(0u, _partitioner.shard_count()), [this] (unsigned shard) {
if (bool(_shard_writers[shard])) {
return _queues[shard].push_eventually(mutation_fragment_opt());
} else {
return make_ready_future<>();
}
}).then([] {
return stop_iteration::yes;
});
}
future<> multishard_writer::consume(unsigned shard) {
return smp::submit_to(shard, [writer = _shard_writers[shard].get()] () mutable {
return writer->consume();
}).handle_exception([this] (std::exception_ptr ep) {
for (auto& q : _queues) {
q.abort(ep);
}
return make_exception_future<>(std::move(ep));
});
}
future<> multishard_writer::wait_pending_consumers() {
return seastar::when_all_succeed(_pending_consumers.begin(), _pending_consumers.end());
}
future<> multishard_writer::distribute_mutation_fragments() {
return repeat([this] () mutable {
return _producer().then([this] (mutation_fragment_opt mf_opt) mutable {
if (mf_opt) {
return handle_mutation_fragment(std::move(*mf_opt));
} else {
return handle_end_of_stream();
}
});
});
}
future<uint64_t> multishard_writer::operator()() {
return distribute_mutation_fragments().finally([this] {
return wait_pending_consumers();
}).then([this] {
return _consumed_partitions;
});
}
future<uint64_t> distribute_reader_and_consume_on_shards(schema_ptr s,
dht::i_partitioner& partitioner,
flat_mutation_reader producer,
std::function<future<> (flat_mutation_reader)> consumer) {
return do_with(multishard_writer(std::move(s), partitioner, std::move(producer), std::move(consumer)), [] (multishard_writer& writer) {
return writer();
});
}

35
multishard_writer.hh Normal file
View File

@@ -0,0 +1,35 @@
/*
* Copyright (C) 2018 ScyllaDB
*/
/*
* This file is part of Scylla.
*
* Scylla is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Scylla is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include "schema.hh"
#include "flat_mutation_reader.hh"
#include "dht/i_partitioner.hh"
// Helper to use multishard_writer to distribute mutation_fragments from the
// producer to the correct shard and consume with the consumer.
// It returns number of partitions consumed.
future<uint64_t> distribute_reader_and_consume_on_shards(schema_ptr s,
dht::i_partitioner& partitioner,
flat_mutation_reader producer,
std::function<future<> (flat_mutation_reader)> consumer);

View File

@@ -112,6 +112,7 @@ boost_tests = [
'sstable_3_x_test',
'meta_test',
'reusable_buffer_test',
'multishard_writer_test',
]
other_tests = [

View File

@@ -0,0 +1,99 @@
/*
* Copyright (C) 2018 ScyllaDB
*/
/*
* This file is part of Scylla.
*
* Scylla is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Scylla is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
*/
#include <seastar/core/thread.hh>
#include <seastar/tests/test-utils.hh>
#include <seastar/util/bool_class.hh>
#include "mutation_fragment.hh"
#include "mutation_source_test.hh"
#include "flat_mutation_reader.hh"
#include "multishard_writer.hh"
#include "tests/cql_test_env.hh"
struct generate_error_tag { };
using generate_error = bool_class<generate_error_tag>;
SEASTAR_TEST_CASE(test_multishard_writer) {
return do_with_cql_env_thread([] (cql_test_env& e) {
auto test_random_streams = [] (random_mutation_generator&& gen, size_t partition_nr, generate_error error = generate_error::no) {
for (auto i = 0; i < 3; i++) {
auto muts = gen(partition_nr);
std::vector<size_t> shards_before(smp::count, 0);
std::vector<size_t> shards_after(smp::count, 0);
for (auto& m : muts) {
auto shard = dht::global_partitioner().shard_of(m.token());
shards_before[shard]++;
}
schema_ptr s = gen.schema();
auto source_reader = partition_nr > 0 ? flat_mutation_reader_from_mutations(muts) : make_empty_flat_reader(s);
size_t partitions_received = distribute_reader_and_consume_on_shards(s,
dht::global_partitioner(),
std::move(source_reader),
[&shards_after, error] (flat_mutation_reader reader) mutable {
if (error) {
return make_exception_future<>(std::runtime_error("Failed to write"));
}
return repeat([&shards_after, reader = std::move(reader), error] () mutable {
return reader().then([&shards_after, error] (mutation_fragment_opt mf_opt) mutable {
if (mf_opt) {
if (mf_opt->is_partition_start()) {
auto shard = dht::global_partitioner().shard_of(mf_opt->as_partition_start().key().token());
BOOST_REQUIRE_EQUAL(shard, engine().cpu_id());
shards_after[shard]++;
}
return make_ready_future<stop_iteration>(stop_iteration::no);
} else {
return make_ready_future<stop_iteration>(stop_iteration::yes);
}
});
});
}
).get0();
BOOST_REQUIRE_EQUAL(partitions_received, partition_nr);
BOOST_REQUIRE_EQUAL(shards_after, shards_before);
}
};
test_random_streams(random_mutation_generator(random_mutation_generator::generate_counters::no, local_shard_only::no), 0);
test_random_streams(random_mutation_generator(random_mutation_generator::generate_counters::yes, local_shard_only::no), 0);
test_random_streams(random_mutation_generator(random_mutation_generator::generate_counters::no, local_shard_only::no), 1);
test_random_streams(random_mutation_generator(random_mutation_generator::generate_counters::yes, local_shard_only::no), 1);
test_random_streams(random_mutation_generator(random_mutation_generator::generate_counters::no, local_shard_only::no), 1000);
test_random_streams(random_mutation_generator(random_mutation_generator::generate_counters::yes, local_shard_only::no), 1000);
try {
test_random_streams(random_mutation_generator(random_mutation_generator::generate_counters::no, local_shard_only::no), 1000, generate_error::yes);
BOOST_ASSERT(false);
} catch (...) {
}
try {
test_random_streams(random_mutation_generator(random_mutation_generator::generate_counters::yes, local_shard_only::no), 1000, generate_error::yes);
BOOST_ASSERT(false);
} catch (...) {
}
});
}

View File

@@ -1325,6 +1325,7 @@ bytes make_blob(size_t blob_size) {
class random_mutation_generator::impl {
friend class random_mutation_generator;
generate_counters _generate_counters;
local_shard_only _local_shard_only;
const size_t _external_blob_size = 128; // Should be enough to force use of external bytes storage
const size_t n_blobs = 1024;
const column_id column_count = row::max_vector_size * 2;
@@ -1368,7 +1369,7 @@ class random_mutation_generator::impl {
: do_make_schema(bytes_type);
}
public:
explicit impl(generate_counters counters) : _generate_counters(counters) {
explicit impl(generate_counters counters, local_shard_only lso = local_shard_only::yes) : _generate_counters(counters), _local_shard_only(lso) {
std::random_device rd;
// In case of errors, replace the seed with a fixed value to get a deterministic run.
auto seed = rd();
@@ -1377,7 +1378,7 @@ public:
_schema = make_schema();
auto keys = make_local_keys(n_blobs, _schema, _external_blob_size);
auto keys = _local_shard_only ? make_local_keys(n_blobs, _schema, _external_blob_size) : make_keys(n_blobs, _schema, _external_blob_size);
_blobs = boost::copy_range<std::vector<bytes>>(keys | boost::adaptors::transformed([this] (sstring& k) { return to_bytes(k); }));
}
@@ -1596,7 +1597,7 @@ public:
}
std::vector<dht::decorated_key> make_partition_keys(size_t n) {
auto local_keys = make_local_keys(n, _schema);
auto local_keys = _local_shard_only ? make_local_keys(n, _schema) : make_keys(n, _schema);
return boost::copy_range<std::vector<dht::decorated_key>>(local_keys | boost::adaptors::transformed([this] (sstring& key) {
auto pkey = partition_key::from_single_value(*_schema, to_bytes(key));
return dht::global_partitioner().decorate_key(*_schema, std::move(pkey));
@@ -1616,8 +1617,8 @@ public:
random_mutation_generator::~random_mutation_generator() {}
random_mutation_generator::random_mutation_generator(generate_counters counters)
: _impl(std::make_unique<random_mutation_generator::impl>(counters))
random_mutation_generator::random_mutation_generator(generate_counters counters, local_shard_only lso)
: _impl(std::make_unique<random_mutation_generator::impl>(counters, lso))
{ }
mutation random_mutation_generator::operator()() {

View File

@@ -22,6 +22,7 @@
#pragma once
#include "mutation_reader.hh"
#include "tests/sstable_utils.hh"
using populate_fn = std::function<mutation_source(schema_ptr s, const std::vector<mutation>&)>;
@@ -49,7 +50,7 @@ public:
struct generate_counters_tag { };
using generate_counters = bool_class<generate_counters_tag>;
explicit random_mutation_generator(generate_counters);
explicit random_mutation_generator(generate_counters, local_shard_only lso = local_shard_only::yes);
~random_mutation_generator();
mutation operator()();
// Generates n mutations sharing the same schema nad sorted by their decorated keys.

View File

@@ -27,6 +27,9 @@
#include <boost/range/irange.hpp>
#include <boost/range/adaptor/map.hpp>
struct local_shard_only_tag { };
using local_shard_only = bool_class<local_shard_only_tag>;
sstables::shared_sstable make_sstable_containing(std::function<sstables::shared_sstable()> sst_factory, std::vector<mutation> muts);
inline future<> write_memtable_to_sstable_for_test(memtable& mt, sstables::shared_sstable sst) {
@@ -35,9 +38,9 @@ inline future<> write_memtable_to_sstable_for_test(memtable& mt, sstables::share
}
//
// Make set of keys sorted by token for current shard.
// Make set of keys sorted by token for current or remote shard.
//
static std::vector<sstring> make_local_keys(unsigned n, const schema_ptr& s, size_t min_key_size = 1) {
static std::vector<sstring> do_make_keys(unsigned n, const schema_ptr& s, size_t min_key_size = 1, local_shard_only lso = local_shard_only::yes) {
std::vector<std::pair<sstring, dht::decorated_key>> p;
p.reserve(n);
@@ -48,9 +51,10 @@ static std::vector<sstring> make_local_keys(unsigned n, const schema_ptr& s, siz
std::copy_n(reinterpret_cast<int8_t*>(&key_id), sizeof(key_id), raw_key.begin());
auto dk = dht::global_partitioner().decorate_key(*s, partition_key::from_single_value(*s, to_bytes(raw_key)));
key_id++;
if (engine_is_ready() && engine().cpu_id() != dht::global_partitioner().shard_of(dk.token())) {
continue;
if (lso) {
if (engine_is_ready() && engine().cpu_id() != dht::global_partitioner().shard_of(dk.token())) {
continue;
}
}
generated++;
p.emplace_back(std::move(raw_key), std::move(dk));
@@ -61,11 +65,14 @@ static std::vector<sstring> make_local_keys(unsigned n, const schema_ptr& s, siz
return boost::copy_range<std::vector<sstring>>(p | boost::adaptors::map_keys);
}
//
// Return one key for current shard. Note that it always returns the same key for a given shard.
//
inline sstring make_local_key(const schema_ptr& s, size_t min_key_size = 1) {
return make_local_keys(1, s, min_key_size).front();
inline std::vector<sstring> make_local_keys(unsigned n, const schema_ptr& s, size_t min_key_size = 1) {
return do_make_keys(n, s, min_key_size, local_shard_only::yes);
}
inline sstring make_local_key(const schema_ptr& s, size_t min_key_size = 1) {
return do_make_keys(1, s, min_key_size, local_shard_only::yes).front();
}
inline std::vector<sstring> make_keys(unsigned n, const schema_ptr& s, size_t min_key_size = 1) {
return do_make_keys(n, s, min_key_size, local_shard_only::no);
}