From 7ed89266b30f782c0584131888b025a0de9b5f8f Mon Sep 17 00:00:00 2001 From: Calle Wilund Date: Wed, 8 Jan 2025 10:37:39 +0000 Subject: [PATCH 01/10] config/config_file: Add exports and definitions of config_type_for<> Required for implementors. Other than config.cc. --- db/config.cc | 3 +++ utils/config_file.hh | 17 +++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/db/config.cc b/db/config.cc index d6adb1e34a..6e5410d5c2 100644 --- a/db/config.cc +++ b/db/config.cc @@ -144,6 +144,9 @@ const config_type config_type_for = config_type("string", value_to_ template <> const config_type config_type_for> = config_type("string list", value_to_json>); +template <> +const config_type config_type_for>> = config_type("string map map", value_to_json>>); + template <> const config_type config_type_for> = config_type("string map", value_to_json>); diff --git a/utils/config_file.hh b/utils/config_file.hh index 771fa77e4a..547c75d98d 100644 --- a/utils/config_file.hh +++ b/utils/config_file.hh @@ -53,6 +53,21 @@ public: template extern const config_type config_type_for; +template<> +extern const config_type config_type_for; + +template<> +extern const config_type config_type_for; + +template<> +extern const config_type config_type_for; + +template<> +extern const config_type config_type_for>; + +template<> +extern const config_type config_type_for>>; + class config_file { static thread_local unsigned s_shard_id; struct any_value { @@ -271,6 +286,8 @@ public: config_file(std::initializer_list = {}); config_file(const config_file&) = delete; + virtual ~config_file() = default; + void add(cfg_ref, std::unique_ptr value); void add(std::initializer_list); void add(const std::vector &); From 9f06a0e3a30dcb0f761e14fbf85e640fbf5f6d31 Mon Sep 17 00:00:00 2001 From: Calle Wilund Date: Wed, 8 Jan 2025 10:38:20 +0000 Subject: [PATCH 02/10] sstables: add get_shared_components accessor To access the shared components. --- sstables/sstables.hh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sstables/sstables.hh b/sstables/sstables.hh index 61eabb93ff..820435e5af 100644 --- a/sstables/sstables.hh +++ b/sstables/sstables.hh @@ -783,6 +783,10 @@ private: public: future<> read_toc() noexcept; + shareable_components& get_shared_components() const { + return *_components; + } + schema_ptr get_schema() const { return _schema; } From 511326882ab0c9cb372b979b477f95c23d269448 Mon Sep 17 00:00:00 2001 From: Calle Wilund Date: Wed, 8 Jan 2025 10:39:28 +0000 Subject: [PATCH 03/10] schema/migration_manager: Add schema validate Validates schema before announce. To ensure all extensions are happy. --- schema/schema.hh | 3 +++ service/migration_manager.cc | 13 +++++++++++++ 2 files changed, 16 insertions(+) diff --git a/schema/schema.hh b/schema/schema.hh index 76f2d1918d..ce66a9a59a 100644 --- a/schema/schema.hh +++ b/schema/schema.hh @@ -473,6 +473,9 @@ class partition_slice; class schema_extension { public: virtual ~schema_extension() {}; + virtual future<> validate(const schema&) const { + return make_ready_future<>(); + } virtual bytes serialize() const = 0; virtual bool is_placeholder() const { return false; diff --git a/service/migration_manager.cc b/service/migration_manager.cc index 21d0e730c5..0a1acd0f12 100644 --- a/service/migration_manager.cc +++ b/service/migration_manager.cc @@ -626,6 +626,13 @@ std::vector prepare_new_keyspace_announcement(replica::database& db, l return db::schema_tables::make_create_keyspace_mutations(db.features().cluster_schema_features(), ksm, timestamp); } +static +future<> validate(schema_ptr schema) { + return do_for_each(schema->extensions(), [schema](auto & p) { + return p.second->validate(*schema); + }); +} + static future> include_keyspace( storage_proxy& sp, const keyspace_metadata& keyspace, std::vector mutations) { // Include the serialized keyspace in case the target node missed a CREATE KEYSPACE migration (see CASSANDRA-5631). @@ -656,6 +663,7 @@ static future> do_prepare_new_column_family_announcement(s } future> prepare_new_column_family_announcement(storage_proxy& sp, schema_ptr cfm, api::timestamp_type timestamp) { + return validate(cfm).then([&sp, cfm, timestamp] { try { auto& db = sp.get_db().local(); auto ksm = db.find_keyspace(cfm->ks_name()).metadata(); @@ -663,6 +671,7 @@ future> prepare_new_column_family_announcement(storage_pro } catch (const replica::no_such_keyspace& e) { throw exceptions::configuration_exception(format("Cannot add table '{}' to non existing keyspace '{}'.", cfm->cf_name(), cfm->ks_name())); } + }); } future<> prepare_new_column_family_announcement(std::vector& mutations, @@ -677,6 +686,7 @@ future<> prepare_new_column_family_announcement(std::vector& mutations future> prepare_column_family_update_announcement(storage_proxy& sp, schema_ptr cfm, std::vector view_updates, api::timestamp_type ts) { warn(unimplemented::cause::VALIDATION); + co_await validate(cfm); try { auto& db = sp.local_db(); auto&& old_schema = db.find_column_family(cfm->ks_name(), cfm->cf_name()).schema(); // FIXME: Should we lookup by id? @@ -826,6 +836,7 @@ future> prepare_type_drop_announcement(storage_proxy& sp, } future> prepare_new_view_announcement(storage_proxy& sp, view_ptr view, api::timestamp_type ts) { + return validate(view).then([&sp, view = std::move(view), ts] { auto& db = sp.local_db(); try { auto keyspace = db.find_keyspace(view->ks_name()).metadata(); @@ -846,9 +857,11 @@ future> prepare_new_view_announcement(storage_proxy& sp, v return make_exception_future>( exceptions::configuration_exception(format("Cannot add view '{}' to non existing keyspace '{}'.", view->cf_name(), view->ks_name()))); } + }); } future> prepare_view_update_announcement(storage_proxy& sp, view_ptr view, api::timestamp_type ts) { + co_await validate(view); auto db = sp.data_dictionary(); try { auto&& keyspace = db.find_keyspace(view->ks_name()).metadata(); From e734fc11ec166d38f726f628c0b1a0080bdf3f58 Mon Sep 17 00:00:00 2001 From: Calle Wilund Date: Wed, 8 Jan 2025 10:40:41 +0000 Subject: [PATCH 04/10] cql_test_env: Add optional query timeout Some tests need queries to actually fail. --- test/lib/cql_test_env.cc | 7 ++++--- test/lib/cql_test_env.hh | 2 ++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/test/lib/cql_test_env.cc b/test/lib/cql_test_env.cc index c1c906859e..0a6fd3f920 100644 --- a/test/lib/cql_test_env.cc +++ b/test/lib/cql_test_env.cc @@ -52,6 +52,7 @@ #include "message/messaging_service.hh" #include "gms/gossiper.hh" #include "gms/feature_service.hh" +#include "service/qos/service_level_controller.hh" #include "db/system_keyspace.hh" #include "db/system_distributed_keyspace.hh" #include "db/sstables-format-selector.hh" @@ -172,8 +173,8 @@ private: struct core_local_state { service::client_state client_state; - core_local_state(auth::service& auth_service, qos::service_level_controller& sl_controller) - : client_state(service::client_state::external_tag{}, auth_service, &sl_controller, infinite_timeout_config) + core_local_state(auth::service& auth_service, qos::service_level_controller& sl_controller, timeout_config timeout) + : client_state(service::client_state::external_tag{}, auth_service, &sl_controller, timeout) { client_state.set_login(auth::authenticated_user(testing_superuser)); } @@ -1070,7 +1071,7 @@ private: _group0_client = &group0_client; - _core_local.start(std::ref(_auth_service), std::ref(_sl_controller)).get(); + _core_local.start(std::ref(_auth_service), std::ref(_sl_controller), cfg_in.query_timeout.value_or(infinite_timeout_config)).get(); auto stop_core_local = defer([this] { _core_local.stop().get(); }); if (!local_db().has_keyspace(ks_name)) { diff --git a/test/lib/cql_test_env.hh b/test/lib/cql_test_env.hh index 22e7527353..92e33ed620 100644 --- a/test/lib/cql_test_env.hh +++ b/test/lib/cql_test_env.hh @@ -98,6 +98,8 @@ public: bool ms_listen = false; bool run_with_raft_recovery = false; + std::optional query_timeout; + cql_test_config(); cql_test_config(const cql_test_config&); cql_test_config(shared_ptr); From 723518c3902be15224d8fa3cd7fdf253517e9f45 Mon Sep 17 00:00:00 2001 From: Calle Wilund Date: Wed, 8 Jan 2025 10:49:08 +0000 Subject: [PATCH 05/10] EAR: port the ear feature from enterprise Bulk transfer of EAR functionality. Includes all providers etc. Could maybe break up into smaller blocks, but once it gets down to the core of it, would require messing with code instead of just moving. So this is it. Note: KMIP support is disabled unless you happen to have the kmipc SDK in your scylla dir. Adds optional encryption of sstables and commitlog, using block level file encryption. Provides key sourcing from various sources, such as local files or popular KMS systems. --- CMakeLists.txt | 2 + cmake/Findkmip.cmake | 53 + configure.py | 41 +- ent/CMakeLists.txt | 1 + ent/encryption/CMakeLists.txt | 44 + ent/encryption/encrypted_file_impl.cc | 555 ++++++++++ ent/encryption/encrypted_file_impl.hh | 24 + ent/encryption/encryption.cc | 1040 ++++++++++++++++++ ent/encryption/encryption.hh | 196 ++++ ent/encryption/encryption_config.cc | 164 +++ ent/encryption/encryption_config.hh | 33 + ent/encryption/encryption_exceptions.hh | 55 + ent/encryption/gcp_host.cc | 1031 +++++++++++++++++ ent/encryption/gcp_host.hh | 80 ++ ent/encryption/gcp_key_provider.cc | 77 ++ ent/encryption/gcp_key_provider.hh | 25 + ent/encryption/kmip_host.cc | 1222 +++++++++++++++++++++ ent/encryption/kmip_host.hh | 80 ++ ent/encryption/kmip_key_provider.cc | 119 ++ ent/encryption/kmip_key_provider.hh | 40 + ent/encryption/kms_host.cc | 1164 ++++++++++++++++++++ ent/encryption/kms_host.hh | 80 ++ ent/encryption/kms_key_provider.cc | 71 ++ ent/encryption/kms_key_provider.hh | 37 + ent/encryption/local_file_provider.cc | 292 +++++ ent/encryption/local_file_provider.hh | 41 + ent/encryption/replicated_key_provider.cc | 477 ++++++++ ent/encryption/replicated_key_provider.hh | 39 + ent/encryption/symmetric_key.cc | 396 +++++++ ent/encryption/symmetric_key.hh | 154 +++ ent/encryption/system_key.cc | 65 ++ ent/encryption/system_key.hh | 34 + 32 files changed, 7731 insertions(+), 1 deletion(-) create mode 100644 cmake/Findkmip.cmake create mode 100644 ent/CMakeLists.txt create mode 100644 ent/encryption/CMakeLists.txt create mode 100644 ent/encryption/encrypted_file_impl.cc create mode 100644 ent/encryption/encrypted_file_impl.hh create mode 100644 ent/encryption/encryption.cc create mode 100644 ent/encryption/encryption.hh create mode 100644 ent/encryption/encryption_config.cc create mode 100644 ent/encryption/encryption_config.hh create mode 100644 ent/encryption/encryption_exceptions.hh create mode 100644 ent/encryption/gcp_host.cc create mode 100644 ent/encryption/gcp_host.hh create mode 100644 ent/encryption/gcp_key_provider.cc create mode 100644 ent/encryption/gcp_key_provider.hh create mode 100644 ent/encryption/kmip_host.cc create mode 100644 ent/encryption/kmip_host.hh create mode 100644 ent/encryption/kmip_key_provider.cc create mode 100644 ent/encryption/kmip_key_provider.hh create mode 100644 ent/encryption/kms_host.cc create mode 100644 ent/encryption/kms_host.hh create mode 100644 ent/encryption/kms_key_provider.cc create mode 100644 ent/encryption/kms_key_provider.hh create mode 100644 ent/encryption/local_file_provider.cc create mode 100644 ent/encryption/local_file_provider.hh create mode 100644 ent/encryption/replicated_key_provider.cc create mode 100644 ent/encryption/replicated_key_provider.hh create mode 100644 ent/encryption/symmetric_key.cc create mode 100644 ent/encryption/symmetric_key.hh create mode 100644 ent/encryption/system_key.cc create mode 100644 ent/encryption/system_key.hh diff --git a/CMakeLists.txt b/CMakeLists.txt index 1a19edda5e..cd32630f91 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -267,6 +267,7 @@ add_subdirectory(compaction) add_subdirectory(cql3) add_subdirectory(data_dictionary) add_subdirectory(dht) +add_subdirectory(ent) add_subdirectory(gms) add_subdirectory(idl) add_subdirectory(index) @@ -308,6 +309,7 @@ set(scylla_libs cql3 data_dictionary dht + encryption gms idl index diff --git a/cmake/Findkmip.cmake b/cmake/Findkmip.cmake new file mode 100644 index 0000000000..02b9169711 --- /dev/null +++ b/cmake/Findkmip.cmake @@ -0,0 +1,53 @@ +# +# Copyright 2024-present ScyllaDB +# + +# +# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0 +# + +set(kmip_ver "2.1.0t") + +cmake_host_system_information( + RESULT distrib_id QUERY DISTRIB_ID) +if(distrib_id MATCHES "centos|fedora|rhel") + set(kmip_distrib "rhel84") +else() + message(FATAL_ERROR "Could not locate kmipc library for ${distrib_id}") +endif() + +if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64") + set(kmip_arch "aarch64") +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64|x86_64") + set(kmip_arch "64") +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "(powerpc|ppc)64le") + set(kmip_arch "ppc64le") +endif() + +set(kmip_ROOT "${PROJECT_SOURCE_DIR}/kmipc/kmipc-${kmip_ver}-${kmip_distrib}_${kmip_arch}") +find_library(kmip_LIBRARY + NAMES kmip + HINTS ${kmip_ROOT}/lib) + +find_path(kmip_INCLUDE_DIR + NAMES kmip.h + HINTS ${kmip_ROOT}/include) + +mark_as_advanced( + kmip_LIBRARY + kmip_INCLUDE_DIR) + +find_package_handle_standard_args(kmip + REQUIRED_VARS + kmip_LIBRARY + kmip_INCLUDE_DIR) + +if(kmip_FOUND) + if (NOT TARGET KMIP::kmipc) + add_library(KMIP::kmipc UNKNOWN IMPORTED) + set_target_properties(KMIP::kmipc PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${kmip_INCLUDE_DIR}" + IMPORTED_LINK_INTERFACE_LANGUAGES "C" + IMPORTED_LOCATION "${kmip_LIBRARY}") + endif() +endif() diff --git a/configure.py b/configure.py index 03fe64ba8e..5e930ff6db 100755 --- a/configure.py +++ b/configure.py @@ -1123,6 +1123,19 @@ scylla_core = (['message/messaging_service.cc', 'utils/arch/powerpc/crc32-vpmsum/crc32_wrapper.cc', 'querier.cc', 'mutation_writer/multishard_writer.cc', + 'ent/encryption/encryption_config.cc', + 'ent/encryption/encryption.cc', + 'ent/encryption/symmetric_key.cc', + 'ent/encryption/local_file_provider.cc', + 'ent/encryption/replicated_key_provider.cc', + 'ent/encryption/system_key.cc', + 'ent/encryption/encrypted_file_impl.cc', + 'ent/encryption/kmip_host.cc', + 'ent/encryption/kmip_key_provider.cc', + 'ent/encryption/kms_host.cc', + 'ent/encryption/kms_key_provider.cc', + 'ent/encryption/gcp_host.cc', + 'ent/encryption/gcp_key_provider.cc', 'multishard_mutation_query.cc', 'reader_concurrency_semaphore.cc', 'sstables_loader.cc', @@ -2000,7 +2013,7 @@ pkgs = ['libsystemd', pkgs.append('lua53' if have_pkg('lua53') else 'lua') -libs = ' '.join([maybe_static(args.staticyamlcpp, '-lyaml-cpp'), '-latomic', '-lz', '-lsnappy', +libs = ' '.join([maybe_static(args.staticyamlcpp, '-lyaml-cpp'), '-latomic', '-lz', '-lsnappy', '-lcrypto', ' -lstdc++fs', ' -lcrypt', ' -lcryptopp', ' -lpthread', # Must link with static version of libzstd, since # experimental APIs that we use are only present there. @@ -2022,6 +2035,32 @@ user_ldflags += ' -fvisibility=hidden' if args.staticcxx: user_ldflags += " -static-libstdc++" +kmip_lib_ver = '1.9.2a'; + +def kmiplib(): + os_ids = get_os_ids() + for id in os_ids: + if id in { 'centos', 'fedora', 'rhel' }: + return 'rhel84' + print('Could not resolve libkmip.a for platform {}'.format(os_ids)) + sys.exit(1) + +def target_cpu(): + cpu, _, _ = subprocess.check_output([cxx, '-dumpmachine']).decode('utf-8').partition('-') + return cpu + +def kmip_arch(): + arch = target_cpu() + if arch == 'x86_64': + return '64' + return arch + +kmipc_dir = f'kmipc/kmipc-2.1.0t-{kmiplib()}_{kmip_arch()}' +kmipc_lib = f'{kmipc_dir}/lib/libkmip.a' +libs += ' -lboost_filesystem' +if os.path.exists(kmipc_lib): + libs += f' {kmipc_lib}' + user_cflags += f' -I{kmipc_dir}/include -DHAVE_KMIP' def get_extra_cxxflags(mode, mode_config, cxx, debuginfo): cxxflags = [] diff --git a/ent/CMakeLists.txt b/ent/CMakeLists.txt new file mode 100644 index 0000000000..7ca6eb6617 --- /dev/null +++ b/ent/CMakeLists.txt @@ -0,0 +1 @@ +add_subdirectory(encryption) diff --git a/ent/encryption/CMakeLists.txt b/ent/encryption/CMakeLists.txt new file mode 100644 index 0000000000..2bee22124b --- /dev/null +++ b/ent/encryption/CMakeLists.txt @@ -0,0 +1,44 @@ +include(add_whole_archive) + +find_package(cpp-jwt REQUIRED) +find_package(kmip) + +add_library(scylla_encryption STATIC) +target_sources(scylla_encryption + PRIVATE + encrypted_file_impl.cc + encryption.cc + encryption_config.cc + gcp_host.cc + gcp_key_provider.cc + kmip_host.cc + kmip_key_provider.cc + kms_host.cc + kms_key_provider.cc + local_file_provider.cc + replicated_key_provider.cc + symmetric_key.cc + system_key.cc) +target_include_directories(scylla_encryption + PUBLIC + ${CMAKE_SOURCE_DIR}) +target_link_libraries(scylla_encryption + PUBLIC + Seastar::seastar + PRIVATE + cql3 + utils + cpp-jwt::cpp-jwt) +if(kmip_FOUND) + target_link_libraries(scylla_encryption + PRIVATE + KMIP::kmipc) + target_compile_definitions(scylla_encryption + PUBLIC + HAVE_KMIP) +endif() + +check_headers(check-headers scylla_encryption + GLOB_RECURSE ${CMAKE_CURRENT_SOURCE_DIR}/*.hh) + +add_whole_archive(encryption scylla_encryption) diff --git a/ent/encryption/encrypted_file_impl.cc b/ent/encryption/encrypted_file_impl.cc new file mode 100644 index 0000000000..7c3cf86d49 --- /dev/null +++ b/ent/encryption/encrypted_file_impl.cc @@ -0,0 +1,555 @@ +/* + * Copyright (C) 2018 ScyllaDB + * + */ + +/* + * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0 + */ + +#include +#include +#include +#include + +#include "symmetric_key.hh" +#include "encryption.hh" +#include "utils/serialization.hh" +#include "encrypted_file_impl.hh" + +namespace encryption { + +static inline bool is_aligned(size_t n, size_t a) { + return (n & (a - 1)) == 0; +} + +/** + * Very simple block-encrypting file wrapper. + * + * Uses user provided symmetric key + ESSIV block IV calculation + * to encrypt data. + * + * The essiv block key is created by generating the SHA256 hash + * of the provided data encryption key bytes, truncated to block_key_len/8 + * and generating an AES/ECB key using this data. + * + * The file is divided in N blocks of `block_size` size. + * Each block is encrypted (unpadded) with the provided key and + * block mode, using an IV derived by (essiv): + * + * bytes tmp[] = { 0, ..., uint64_t-little-endian() } + * iv = block_key->encrypt(tmp); + * + * All encryption is done unpadded. To handle file sizes we use + * a slightly shaky scheme: + * + * Since all writes are assumed to be done by us, and must be aligned, + * we can assume in turn that any resizing should be made by our truncate + * method. If we attept to truncate to a size not a multiple of our + * _key_ block size (typically 16), we add the same size to the actual + * truncation size. + * On read we then check the file size. If we're reading from a file + * with unaliged size, we know there are key-block-size junk at the end. + * We can align down the last decryption call to match block size, then + * discard the excessive bytes from the result. + * + * If we're in a read/write situation, we need to keep size updated, and + * we could possibly race with disk op/continuations. + * But we are really only for ro/wo cases. + */ +class encrypted_file_impl : public seastar::file_impl { + file _file; + ::shared_ptr _key; + ::shared_ptr _block_key; + bytes _hash_salt; + + std::optional _file_length; + + // this is somewhat large, but we assume this is for bulky stuff like sstables/commitlog + // so large alignment should be preferable to reaclculating block IV too often. + static constexpr size_t block_size = 4096; + static constexpr size_t block_key_len = 256; + + class my_file_handle_impl; + friend class my_file_handle_impl; + + bytes iv_for(uint64_t pos) const; + + using mode = symmetric_key::mode; + + temporary_buffer transform(uint64_t, const void* buffer, size_t len, mode); + size_t transform(uint64_t, const void* buffer, size_t len, void*, mode); + + future<> verify_file_length(); + void maybe_set_length(uint64_t); + void clear_length(); + + static ::shared_ptr generate_block_key(::shared_ptr); + +public: + encrypted_file_impl(file, ::shared_ptr); + + future write_dma(uint64_t pos, const void* buffer, size_t len, io_intent*) override; + future write_dma(uint64_t pos, std::vector iov, io_intent*) override; + future read_dma(uint64_t pos, void* buffer, size_t len, io_intent*) override; + future read_dma(uint64_t pos, std::vector iov, io_intent*) override; + + + future<> flush() override { + return _file.flush(); + } + future stat(void) override; + future<> truncate(uint64_t length) override; + future<> discard(uint64_t offset, uint64_t length) override { + return _file.discard(offset, length); + } + future<> allocate(uint64_t position, uint64_t length) override { + return _file.allocate(position, length); + } + future size(void) override; + future<> close() override { + return _file.close(); + } + std::unique_ptr dup() override; + + subscription list_directory(std::function (directory_entry de)> next) override { + return _file.list_directory(std::move(next)); + } + future> dma_read_bulk(uint64_t offset, size_t range_size, io_intent*) override; +}; + +/** + * Note: ESSIV block iv generation implementation. + * See: http://securityevaluators.com/knowledge/papers/fde_whitepaper_draft_20170627.pdf + * + * We generate a key based on the sha256 of the data key, then encrypt each block number + * using this to get per-block IV. + * The key is AES-256, using ECB (non-iv) encryption + * + */ +::shared_ptr encrypted_file_impl::generate_block_key(::shared_ptr key) { + auto hash = calculate_sha256(key->key()); + hash.resize(block_key_len / 8); + return ::make_shared(key_info{"AES/ECB", block_key_len }, hash); +} + +encrypted_file_impl::encrypted_file_impl(file f, ::shared_ptr key) + : _file(std::move(f)) + , _key(std::move(key)) + , _block_key(generate_block_key(_key)) +{ + _memory_dma_alignment = std::max(_file.memory_dma_alignment(), block_size); + _disk_read_dma_alignment = std::max(_file.disk_read_dma_alignment(), block_size); + _disk_write_dma_alignment = std::max(_file.disk_write_dma_alignment(), block_size); +} + +static future calculate_file_length(const file& f, size_t key_block_size) { + return f.size().then([key_block_size](uint64_t s) { + if (!is_aligned(s, key_block_size)) { + if (s < key_block_size) { + throw std::domain_error(fmt::format("file size {}, expected 0 or at least {}", s, key_block_size)); + } + s -= key_block_size; + } + return s; + }); +} + +future<> encrypted_file_impl::verify_file_length() { + if (_file_length) { + return make_ready_future(); + } + return calculate_file_length(_file, _key->block_size()).then([this](uint64_t s) { + _file_length = s; + }); +} + +void encrypted_file_impl::maybe_set_length(uint64_t s) { + if (s > _file_length.value_or(0)) { + _file_length = s; + } +} + +void encrypted_file_impl::clear_length() { + _file_length = std::nullopt; +} + +bytes encrypted_file_impl::iv_for(uint64_t pos) const { + assert(!(pos & (block_size - 1))); + + // #658. ECB block mode has no IV. Bad for security, + // but must handle. + size_t iv_len = _key->iv_len(); + if (iv_len == 0) { + return bytes{}; + } + + assert(iv_len >= _key->block_size()); + assert(iv_len >= sizeof(uint64_t)); + + bytes b(bytes::initialized_later(), std::max(iv_len, _block_key->block_size())); + std::fill(b.begin(), b.end() - sizeof(uint64_t), 0); + + // write block pos as little endian IV-len integer + auto block = pos / block_size; + write_le(reinterpret_cast(b.end()) - sizeof(uint64_t), block); + + // encrypt the encoded block number to build an IV + _block_key->encrypt_unpadded(b.data(), b.size(), b.data()); + + b.resize(iv_len); + + return b; +} + +size_t encrypted_file_impl::transform(uint64_t pos, const void* buffer, size_t len, void* dst, mode m) { + assert(!(pos & (block_size - 1))); + assert(_file_length || m == mode::encrypt); + + auto o = reinterpret_cast(dst); + auto i = reinterpret_cast(buffer); + auto l = _file_length.value_or(std::numeric_limits::max()); + auto b = _key->block_size(); + + size_t off = 0; + + for (; off < len; off += block_size) { + auto iv = iv_for(pos + off); + auto rem = std::min(block_size, len - off); + + if (rem < block_size || ((pos + off + rem) > l && m == symmetric_key::mode::decrypt)) { + // truncated block. should be the last one. + if (m != symmetric_key::mode::decrypt) { + throw std::invalid_argument("Output data not aligned"); + } + _key->transform_unpadded(m, i + off, align_down(rem, b), o + off, iv.data()); + return l - pos; + } + _key->transform_unpadded(m, i + off, block_size, o + off, iv.data()); + } + + return off; +} + +temporary_buffer encrypted_file_impl::transform(uint64_t pos, const void* buffer, size_t len, mode m) { + assert(!(len & (block_size - 1))); + auto tmp = temporary_buffer::aligned(_file.memory_dma_alignment(), len); + auto s = transform(pos, buffer, len, tmp.get_write(), m); + tmp.trim(s); + return tmp; +} + +future encrypted_file_impl::write_dma(uint64_t pos, const void* buffer, size_t len, io_intent* intent) { + assert(!(len & (block_size - 1))); + auto tmp = transform(pos, buffer, len, mode::encrypt); + assert(tmp.size() == len); // writing + auto p = tmp.get(); + return _file.dma_write(pos, p, len, intent).then([this, tmp = std::move(tmp), pos](size_t s) { + maybe_set_length(pos + s); + return s; + }); +} + +future encrypted_file_impl::write_dma(uint64_t pos, std::vector iov, io_intent* intent) { + std::vector> tmp; + tmp.reserve(iov.size()); + size_t n = 0; + for (auto& i : iov) { + assert(!(i.iov_len & (block_size - 1))); + + tmp.emplace_back(transform(pos + n, i.iov_base, i.iov_len, mode::encrypt)); + assert(tmp.back().size() == i.iov_len); // writing + n += i.iov_len; + i = iovec{ tmp.back().get_write(), tmp.back().size() }; + } + return _file.dma_write(pos, std::move(iov), intent).then([this, tmp = std::move(tmp), pos](size_t s) { + maybe_set_length(pos + s); + return s; + }); +} + +future encrypted_file_impl::read_dma(uint64_t pos, void* buffer, size_t len, io_intent* intent) { + assert(!(len & (block_size - 1))); + return verify_file_length().then([this, pos, buffer, len, intent] { + return _file.dma_read(pos, buffer, len, intent).then([this, pos, buffer](size_t len) { + return transform(pos, buffer, len, buffer, mode::decrypt); + }); + }); +} + +future encrypted_file_impl::read_dma(uint64_t pos, std::vector iov, io_intent* intent) { + return verify_file_length().then([this, pos, iov = std::move(iov), intent]() mutable { + auto f = _file.dma_read(pos, iov, intent); + return f.then([this, pos, iov = std::move(iov)](size_t len) mutable { + size_t off = 0; + for (auto& i : iov) { + off += transform(pos + off, i.iov_base, i.iov_len, i.iov_base, mode::decrypt); + } + return off; + }); + }); +} + +future> encrypted_file_impl::dma_read_bulk(uint64_t offset, size_t range_size, io_intent* intent) { + return verify_file_length().then([this, offset, range_size, intent]() mutable { + auto front = offset & (block_size - 1); + offset -= front; + range_size += front; + // enterprise #925 + // If caller is clever and asks for the last chunk of file + // explicitly (as in offset = N, range_size = size() - N), + // or any other unaligned size, we need to add enough padding + // to get the actual full block to decode. + auto block_size = align_up(range_size, _key->block_size()); + return _file.dma_read_bulk(offset, block_size, intent).then([this, offset, front, range_size](temporary_buffer result) { + auto s = transform(offset, result.get(), result.size(), result.get_write(), mode::decrypt); + // never give back more than asked for. + result.trim(std::min(s, range_size)); + result.trim_front(front); + return result; + }); + }); +} + +future<> encrypted_file_impl::truncate(uint64_t length) { + return size().then([this, length](uint64_t s) { + if (s >= length) { + auto kb = _key->block_size(); + auto n = length; + if (!is_aligned(length, kb)) { + n += kb; + } + return _file.truncate(n).then([this, length] { + _file_length = length; + }); + } + + // crap. we need to pad zeros. But zeros here means + // encrypted zeros. So we must do this surprisingly + // expensively, by actually writing said zeros block + // by block. Anyone hoping for sparse files is now + // severely disappointed! + + auto buf_size = align_up(std::min(length, 32 * block_size), block_size); + auto aligned_size = align_down(s, block_size); + + temporary_buffer buf(buf_size); + std::fill(buf.get_write(), buf.get_write() + buf_size, 0); + + struct trunc { + temporary_buffer buf; + uint64_t aligned_size; + uint64_t size; + uint64_t length; + }; + + return do_with(trunc{std::move(buf), aligned_size, s, length}, [this](trunc & t) { + return repeat([this, &t] { + if (t.aligned_size >= t.length) { + return make_ready_future(stop_iteration::yes); + } + auto n = std::min(t.buf.size(), align_up(size_t(t.length - t.aligned_size), block_size)); + if (t.aligned_size < t.size) { + return read_dma(t.aligned_size, t.buf.get_write(), n, nullptr).then([&, n](size_t r) mutable { + auto rem = size_t(t.size - t.aligned_size); + auto ar = align_up(r, block_size); + assert(ar <= t.buf.size()); + if (rem < ar) { + std::fill(t.buf.get_write() + rem, t.buf.get_write() + ar, 0); + } + return write_dma(t.aligned_size, t.buf.get(), ar, nullptr).then([&, n](size_t w) { + t.aligned_size += w; + // #1869. On btrfs, we get the buffer potentially clobbered up to "n" (max read amount) + // even when "r" (actual bytes read) is less. + std::fill(t.buf.get_write(), t.buf.get_write() + n, 0); + return make_ready_future(stop_iteration::no); + }); + }); + } + return write_dma(t.aligned_size, t.buf.get(), n, nullptr).then([&](size_t w) { + t.aligned_size += w; + return make_ready_future(stop_iteration::no); + }); + }); + }).then([this, length] { + return truncate(length); + });; + }); +} + +future encrypted_file_impl::stat() { + return _file.stat().then([this](struct stat s) { + return verify_file_length().then([this, s]() mutable { + s.st_size = *_file_length; + return s; + }); + }); +} + +future encrypted_file_impl::size() { + return verify_file_length().then([this] { + return *_file_length; + }); +} + + +std::unique_ptr encrypted_file_impl::dup() { + class my_file_handle_impl : public seastar::file_handle_impl { + seastar::file_handle _handle; + key_info _info; + bytes _key; + public: + my_file_handle_impl(seastar::file_handle h, const key_info& info, const bytes& key) + : _handle(std::move(h)) + , _info(info) + , _key(key) + {} + std::unique_ptr clone() const override { + return std::make_unique(_handle, _info, _key); + } + seastar::shared_ptr to_file() && override { + return seastar::make_shared(_handle.to_file(), ::make_shared(_info, _key)); + } + }; + + return std::make_unique(_file.dup(), _key->info(), _key->key()); +} + +shared_ptr make_encrypted_file(file f, ::shared_ptr k) { + return ::make_shared(std::move(f), std::move(k)); +} + +class indirect_encrypted_file_impl : public file_impl { + ::shared_ptr _impl; + file _f; + size_t _key_block_size; + get_key_func _get; + + future<> get() { + if (_impl) { + return make_ready_future<>(); + } + return _get().then([this](::shared_ptr k) { + // #978 could be running the getting more than once. + // Only write _impl once though + if (!_impl) { + _impl = make_encrypted_file(_f, std::move(k)); + } + }); + } +public: + indirect_encrypted_file_impl(file f, size_t key_block_size, get_key_func get) + : _f(f), _key_block_size(key_block_size), _get(std::move(get)) + {} + + future write_dma(uint64_t pos, const void* buffer, size_t len, io_intent* intent) override { + return get().then([this, pos, buffer, len, intent]() { + return _impl->write_dma(pos, buffer, len, intent); + }); + } + future write_dma(uint64_t pos, std::vector iov, io_intent* intent) override { + return get().then([this, pos, iov = std::move(iov), intent]() mutable { + return _impl->write_dma(pos, std::move(iov), intent); + }); + } + future read_dma(uint64_t pos, void* buffer, size_t len, io_intent* intent) override { + return get().then([this, pos, buffer, len, intent]() { + return _impl->read_dma(pos, buffer, len, intent); + }); + } + future read_dma(uint64_t pos, std::vector iov, io_intent* intent) override { + return get().then([this, pos, iov = std::move(iov), intent]() mutable { + return _impl->read_dma(pos, std::move(iov), intent); + }); + } + future> dma_read_bulk(uint64_t offset, size_t range_size, io_intent* intent) override { + return get().then([this, offset, range_size, intent]() { + return _impl->dma_read_bulk(offset, range_size, intent); + }); + } + future<> flush(void) override { + if (_impl) { + return _impl->flush(); + } + return _f.flush(); + } + future stat(void) override { + if (_impl) { + return _impl->stat(); + } + return _f.stat().then([this](struct stat s) { + return calculate_file_length(_f, _key_block_size).then([s](uint64_t fs) mutable { + s.st_size = fs; + return s; + }); + }); + } + future<> truncate(uint64_t length) override { + if (_impl) { + return _impl->truncate(length); + } + return _f.truncate(length); + } + future<> discard(uint64_t offset, uint64_t length) override { + if (_impl) { + return _impl->discard(offset, length); + } + return _f.discard(offset, length); + } + future<> allocate(uint64_t position, uint64_t length) override { + if (_impl) { + return _impl->allocate(position, length); + } + return _f.allocate(position, length); + } + future size(void) override { + if (_impl) { + return _impl->size(); + } + return calculate_file_length(_f, _key_block_size); + } + future<> close() override { + if (_impl) { + return _impl->close(); + } + return _f.close(); + } + std::unique_ptr dup() override { + if (_impl) { + return _impl->dup(); + } + class my_file_handle_impl : public seastar::file_handle_impl { + seastar::file_handle _handle; + size_t _key_block_size; + get_key_func _get; + public: + my_file_handle_impl(seastar::file_handle h, size_t key_block_size, get_key_func get) + : _handle(std::move(h)) + , _key_block_size(key_block_size) + , _get(std::move(get)) + {} + std::unique_ptr clone() const override { + return std::make_unique(_handle, _key_block_size, _get); + } + seastar::shared_ptr to_file() && override { + return make_delayed_encrypted_file(_handle.to_file(), _key_block_size, _get); + } + }; + return std::make_unique(_f.dup(), _key_block_size, _get); + } + + subscription list_directory(std::function (directory_entry de)> next) override { + if (_impl) { + return _impl->list_directory(std::move(next)); + } + return _f.list_directory(std::move(next)); + } +}; + +shared_ptr make_delayed_encrypted_file(file f, size_t key_block_size, get_key_func get) { + return ::make_shared(std::move(f), key_block_size, std::move(get)); +} + + +} + diff --git a/ent/encryption/encrypted_file_impl.hh b/ent/encryption/encrypted_file_impl.hh new file mode 100644 index 0000000000..f4ecc7719b --- /dev/null +++ b/ent/encryption/encrypted_file_impl.hh @@ -0,0 +1,24 @@ +/* + * Copyright (C) 2018 ScyllaDB + * + */ + +/* + * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0 + */ + +#include +#include + +#include "symmetric_key.hh" + +namespace encryption { + +class symmetric_key; + +shared_ptr make_encrypted_file(file, ::shared_ptr); + +using get_key_func = std::function>()>; + +shared_ptr make_delayed_encrypted_file(file, size_t, get_key_func); +} diff --git a/ent/encryption/encryption.cc b/ent/encryption/encryption.cc new file mode 100644 index 0000000000..0031b06843 --- /dev/null +++ b/ent/encryption/encryption.cc @@ -0,0 +1,1040 @@ +/* + * Copyright (C) 2015 ScyllaDB + * + */ + +/* + * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0 + */ +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include "utils/to_string.hh" + +#include "compress.hh" +#include "encryption.hh" +#include "symmetric_key.hh" +#include "local_file_provider.hh" +#include "replicated_key_provider.hh" +#include "kmip_key_provider.hh" +#include "kmip_host.hh" +#include "kms_key_provider.hh" +#include "kms_host.hh" +#include "gcp_key_provider.hh" +#include "gcp_host.hh" +#include "bytes.hh" +#include "utils/class_registrator.hh" +#include "cql3/query_processor.hh" +#include "db/extensions.hh" +#include "db/system_keyspace.hh" +#include "serializer.hh" +#include "serializer_impl.hh" +#include "schema/schema.hh" +#include "sstables/sstables.hh" +#include "service/storage_service.hh" +#include "service/migration_manager.hh" +#include "db/commitlog/commitlog_extensions.hh" +#include "encrypted_file_impl.hh" +#include "encryption_config.hh" +#include "utils/UUID_gen.hh" +#include "init.hh" + +static seastar::logger logg{"encryption"}; + +namespace encryption { + +static constexpr auto REPLICATED_KEY_PROVIDER_FACTORY = "ReplicatedKeyProviderFactory"; +static constexpr auto LOCAL_FILE_SYSTEM_KEY_PROVIDER_FACTORY = "LocalFileSystemKeyProviderFactory"; +static constexpr auto KMIP_KEY_PROVIDER_FACTORY = "KmipKeyProviderFactory"; +static constexpr auto KMS_KEY_PROVIDER_FACTORY = "KmsKeyProviderFactory"; +static constexpr auto GCP_KEY_PROVIDER_FACTORY = "GcpKeyProviderFactory"; + +bytes base64_decode(const sstring& s, size_t off, size_t len) { + if (off >= s.size()) { + throw std::out_of_range("Invalid offset"); + } + len = std::min(len, s.size() - off); + auto n = (len / 4) * 3; + bytes b{bytes::initialized_later(), n}; + + // EVP_DecodeBlock does not handle padding well (i.e. it returns + // data with actual padding. This is not what we want, since + // we need to allow zeros in data. + // Must thus do decoding the hard way... + + std::unique_ptr ctxt(EVP_ENCODE_CTX_new(), &EVP_ENCODE_CTX_free); + + ::EVP_DecodeInit(ctxt.get()); + + int outl = 0; + auto r = ::EVP_DecodeUpdate(ctxt.get(), reinterpret_cast(b.data()), &outl, reinterpret_cast(s.data() + off), + int(len)); + if (r < 0) { + throw std::invalid_argument("Could not decode: " + s); + } + + int outl2 = 0; + r = ::EVP_DecodeFinal(ctxt.get(), reinterpret_cast(b.data() + outl), &outl2); + if (r < 0) { + throw std::invalid_argument("Could not decode: " + s); + } + b.resize(outl + outl2); + return b; +} + +sstring base64_encode(const bytes& b, size_t off, size_t len) { + if (off >= b.size()) { + throw std::out_of_range("Invalid offset"); + } + len = std::min(len, b.size() - off); + auto n = ((len + 2) / 3) * 4; + sstring s{sstring::initialized_later(), n}; + auto r = EVP_EncodeBlock(reinterpret_cast(s.data()), + reinterpret_cast(b.data() + off), int(len)); + if (r < 0) { + throw std::invalid_argument("Could not encode"); + } + s.resize(r); + return s; +} + +bytes calculate_md5(const bytes& b, size_t off, size_t len) { + if (off >= b.size()) { + throw std::out_of_range("Invalid offset"); + } + len = std::min(len, b.size() - off); + bytes res{bytes::initialized_later(), MD5_DIGEST_LENGTH}; +#if OPENSSL_VERSION_NUMBER >= (3<<28) + EVP_MD_CTX *md5 = EVP_MD_CTX_new(); + EVP_DigestInit_ex(md5, EVP_md5(), nullptr); + EVP_DigestUpdate(md5, b.data() + off, len); + EVP_DigestFinal_ex(md5, reinterpret_cast(res.data()), nullptr); + EVP_MD_CTX_free(md5); +#else + MD5(reinterpret_cast(b.data() + off), len, reinterpret_cast(res.data())); +#endif + return res; +} + +bytes calculate_sha256(bytes_view b) { + bytes res{bytes::initialized_later(), SHA256_DIGEST_LENGTH}; + SHA256(reinterpret_cast(b.data()), b.size(), reinterpret_cast(res.data())); + return res; +} + +bytes calculate_sha256(const bytes& b, size_t off, size_t len) { + if (off >= b.size()) { + throw std::out_of_range("Invalid offset"); + } + len = std::min(len, b.size() - off); + return calculate_sha256(bytes_view(b.data() + off, len)); +} + +bytes hmac_sha256(bytes_view msg, bytes_view key) { + bytes res{bytes::initialized_later(), SHA256_DIGEST_LENGTH}; + + unsigned length; + HMAC(EVP_sha256(), + key.data(), key.size(), + reinterpret_cast(msg.data()), msg.size(), + reinterpret_cast(res.data()), &length); + return res; +} + +future> read_text_file_fully(const sstring& filename) { + return open_file_dma(filename, open_flags::ro).then([](file f) { + return f.size().then([f](size_t s) { + return do_with(make_file_input_stream(f), [s](input_stream& in) { + return in.read_exactly(s).then([](temporary_buffer buf) { + return make_ready_future>(std::move(buf)); + }).finally([&in] { + return in.close(); + }); + }); + }); + }); +} + +future<> write_text_file_fully(const sstring& filename, temporary_buffer buf) { + return open_file_dma(filename, open_flags::wo|open_flags::create).then([buf = std::move(buf)](file f) mutable { + return make_file_output_stream(f).then([buf = std::move(buf)] (output_stream out) mutable { + return do_with(std::move(out), [buf = std::move(buf)](output_stream& out) mutable { + auto p = buf.get(); + auto s = buf.size(); + return out.write(p, s).finally([&out, buf = std::move(buf)] { + return out.close(); + }); + }); + }); + }); +} + +future<> write_text_file_fully(const sstring& filename, const sstring& s) { + return write_text_file_fully(filename, temporary_buffer(s.data(), s.size())); +} + +std::optional parse_expiry(std::optional in) { + if (!in) { + return std::nullopt; + } + size_t idx = 0; + auto n = std::stoll(*in, &idx); // we assume seconds + + if (idx != 0) { + auto unit = in->substr(idx); + if (unit == "ms") { + return std::chrono::milliseconds(n); + } else if (unit == "h") { + return std::chrono::duration_cast(std::chrono::hours(n)); + } else if (unit == "d") { + return std::chrono::duration_cast(std::chrono::days(n)); + } else if (unit == "s") { + // ok + } else if (unit != "") { + throw std::invalid_argument("Unsupported time unit: " + unit); + } + } + return std::chrono::duration_cast(std::chrono::seconds(n)); +} + + +static const sstring namespace_prefix = "com.datastax.bdp.cassandra.crypto."; +static const sstring encryption_attribute = "scylla_encryption_options"; + +static inline const sstring key_id_attribute = "scylla_key_id"; +static inline const sstring encrypted_components_attribute = "encrypted_components"; + +static inline const sstables::disk_string encryption_attribute_ds{ + bytes{encryption_attribute.begin(), encryption_attribute.end()} +}; +static inline const sstables::disk_string key_id_attribute_ds{ + bytes{key_id_attribute.begin(), key_id_attribute.end()} +}; +static inline const sstables::disk_string encrypted_components_attribute_ds{ + bytes{encrypted_components_attribute.begin(), encrypted_components_attribute.end()} +}; + +key_info get_key_info(const options& map) { + opt_wrapper opts(map); + + auto cipher_name = opts(CIPHER_ALGORITHM).value_or("AES/CBC/PKCS5Padding"); + auto key_strength = std::stoul(opts(SECRET_KEY_STRENGTH).value_or("128")); + // todo: static constexpr auto KMIP_KEY_PROVIDER_FACTORY = "KmipKeyProviderFactory"; + return key_info{ std::move(cipher_name), unsigned(key_strength) }; +} + +std::ostream& operator<<(std::ostream& os, const key_provider& p) { + p.print(os); + return os; +} + +sstring encryption_context::maybe_decrypt_config_value(const sstring& s) const { + shared_ptr k = get_config_encryption_key(); + if (!s.empty() && k != nullptr) { + auto b = base64_decode(s); + auto iv = calculate_sha256(k->key()); + iv.resize(k->block_size(), 0); + bytes dst(bytes::initialized_later(), b.size()); + auto len = k->decrypt(b.data(), b.size(), dst.data(), dst.size(), iv.data()); + return sstring(dst.begin(), dst.begin() + len); + } + return s; +} + +class encryption_schema_extension; + +class encryption_context_impl : public encryption_context { + // poor mans per-thread instance variable. We need a lookup map + // per shard, so preallocate it, much like a "sharded" thing would, + // but without all the fancy start/stop stuff. + // Allows this object to be effectively stateless, except for the + // objects in the maps. + std::vector>> _per_thread_provider_cache; + std::vector>> _per_thread_system_key_cache; + std::vector>> _per_thread_kmip_host_cache; + std::vector>> _per_thread_kms_host_cache; + std::vector>> _per_thread_gcp_host_cache; + std::vector> _per_thread_global_user_extension; + std::unique_ptr _cfg; + sharded* _qp;; + sharded* _mm; + sharded* _db; + sharded* _ss; + shared_ptr _cfg_encryption_key; + bool _allow_per_table_encryption; +public: + encryption_context_impl(std::unique_ptr cfg, const service_set& services) + : _per_thread_provider_cache(smp::count) + , _per_thread_system_key_cache(smp::count) + , _per_thread_kmip_host_cache(smp::count) + , _per_thread_kms_host_cache(smp::count) + , _per_thread_gcp_host_cache(smp::count) + , _per_thread_global_user_extension(smp::count) + , _cfg(std::move(cfg)) + , _qp(find_or_null(services)) + , _mm(find_or_null(services)) + , _db(find_or_null(services)) + , _ss(find_or_null(services)) + , _allow_per_table_encryption(_cfg->allow_per_table_encryption()) + {} + + template + static sharded* find_or_null(const service_set& services) { + try { + return std::addressof(services.find()); + } catch (std::out_of_range&) { + // TODO: would be great if we could verify we are in tool mode here. + return nullptr; + } + } + + shared_ptr get_provider(const options& map) override { + opt_wrapper opts(map); + + auto provider_class = opts(KEY_PROVIDER); + if (!provider_class) { + provider_class = opts(SECRET_KEY_PROVIDER_FACTORY_CLASS).value_or(REPLICATED_KEY_PROVIDER_FACTORY); + } + if (provider_class->empty() || ::strcasecmp(provider_class->c_str(), "none") == 0) { + return {}; + } + static const std::unordered_map> providers = [] { + std::unordered_map> map; + + map[REPLICATED_KEY_PROVIDER_FACTORY] = std::make_unique(); + map[LOCAL_FILE_SYSTEM_KEY_PROVIDER_FACTORY] = std::make_unique(); + map[KMIP_KEY_PROVIDER_FACTORY] = std::make_unique(); + map[KMS_KEY_PROVIDER_FACTORY] = std::make_unique(); + map[GCP_KEY_PROVIDER_FACTORY] = std::make_unique(); + + return map; + }(); + + unqualified_name qn(namespace_prefix, *provider_class); + + try { + return providers.at(qn)->get_provider(*this, map); + } catch (std::out_of_range&) { + throw std::invalid_argument("Unknown provider: " + *provider_class); + } + } + shared_ptr get_cached_provider(const sstring& id) const override { + auto& cache = _per_thread_provider_cache[this_shard_id()]; + auto i = cache.find(id); + if (i != cache.end()) { + return i->second; + } + return {}; + } + void cache_provider(const sstring& id, shared_ptr p) override { + _per_thread_provider_cache[this_shard_id()][id] = std::move(p); + } + + shared_ptr get_system_key(const sstring& name) override { + auto& cache = _per_thread_system_key_cache[this_shard_id()]; + auto i = cache.find(name); + if (i != cache.end()) { + return i->second; + } + + shared_ptr k; + + if (kmip_system_key::is_kmip_path(name)) { + k = make_shared(*this, name); + } else { + k = make_shared(*this, name); + } + + if (k != nullptr) { + cache[name] = k; + } + + return k; + } + + shared_ptr get_kmip_host(const sstring& host) override { + auto& cache = _per_thread_kmip_host_cache[this_shard_id()]; + auto i = cache.find(host); + if (i != cache.end()) { + return i->second; + } + + auto j = _cfg->kmip_hosts().find(host); + if (j != _cfg->kmip_hosts().end()) { + auto result = ::make_shared(*this, host, j->second); + cache.emplace(host, result); + return result; + } + + throw std::invalid_argument("No such host: "+ host); + } + + shared_ptr get_kms_host(const sstring& host) override { + auto& cache = _per_thread_kms_host_cache[this_shard_id()]; + auto i = cache.find(host); + if (i != cache.end()) { + return i->second; + } + + auto j = _cfg->kms_hosts().find(host); + if (j != _cfg->kms_hosts().end()) { + auto result = ::make_shared(*this, host, j->second); + cache.emplace(host, result); + return result; + } + + throw std::invalid_argument("No such host: "+ host); + } + + shared_ptr get_gcp_host(const sstring& host) override { + auto& cache = _per_thread_gcp_host_cache[this_shard_id()]; + auto i = cache.find(host); + if (i != cache.end()) { + return i->second; + } + + auto j = _cfg->gcp_hosts().find(host); + if (j != _cfg->gcp_hosts().end()) { + auto result = ::make_shared(*this, host, j->second); + cache.emplace(host, result); + return result; + } + + throw std::invalid_argument("No such host: "+ host); + } + + + const encryption_config& config() const override { + return *_cfg; + } + shared_ptr get_config_encryption_key() const override { + return _cfg_encryption_key; + } + future<> load_config_encryption_key(const sstring & name) { + return get_system_key(name)->get_key().then([this](auto k) { + _cfg_encryption_key = std::move(k); + }); + } + /** + * This looks like checking too late, but since these are only used by + * replicated provider, they will be checked very early anyway, unless + * running tool mode, in which case they don't exist. + */ + template + T& check_service_object(T* t) const { + if (t == nullptr) { + throw std::runtime_error(fmt::format("Service {} not registered", typeid(T).name())); + } + return *t; + } + distributed& get_query_processor() const override { + return check_service_object(_qp); + } + distributed& get_storage_service() const override { + return check_service_object(_ss); + } + distributed& get_database() const override { + return check_service_object(_db); + } + distributed& get_migration_manager() const override { + return check_service_object(_mm); + } + + future<> start() override { + if (_qp && _ss && _db && _mm) { + co_await replicated_key_provider_factory::on_started(get_database().local(), get_migration_manager().local()); + } + } + future<> stop() override { + return smp::invoke_on_all([this]() -> future<> { + for (auto&& [id, h] : _per_thread_kmip_host_cache[this_shard_id()]) { + co_await h->disconnect(); + } + _per_thread_provider_cache[this_shard_id()].clear(); + _per_thread_system_key_cache[this_shard_id()].clear(); + _per_thread_kmip_host_cache[this_shard_id()].clear(); + _per_thread_kms_host_cache[this_shard_id()].clear(); + _per_thread_gcp_host_cache[this_shard_id()].clear(); + _per_thread_global_user_extension[this_shard_id()] = {}; + }); + } + + void add_global_user_encryption(shared_ptr ext) { + _per_thread_global_user_extension[this_shard_id()] = std::move(ext); + } + + shared_ptr get_global_user_encryption() const { + return _per_thread_global_user_extension[this_shard_id()]; + } + bool allow_per_table_encryption() const { + return _allow_per_table_encryption; + } +}; + +class encryption_schema_extension; + +std::ostream& operator<<(std::ostream& os, const encryption_schema_extension& ext); + +} + +template <> struct fmt::formatter : fmt::ostream_formatter {}; + +namespace encryption { + +class encryption_schema_extension : public schema_extension { + key_info _info; + shared_ptr _provider; + std::map _options; + std::optional _key_block_size; + + friend std::ostream& operator<<(std::ostream&, const encryption_schema_extension&); +public: + encryption_schema_extension(key_info, shared_ptr, std::map); + + using extension_ptr = ::shared_ptr; + + static extension_ptr create(encryption_context_impl&, std::map); + static extension_ptr create(encryption_context_impl&, const bytes&); + + static extension_ptr parse(encryption_context_impl& ctxt, db::extensions::schema_ext_config cfg) { + struct { + encryption_context_impl& _ctxt; + + extension_ptr operator()(const sstring&) const { + throw std::invalid_argument("Malformed extension"); + } + extension_ptr operator()(const std::map& opts) const { + return create(_ctxt, opts); + } + extension_ptr operator()(const bytes& v) const { + return create(_ctxt, v); + } + } v{ctxt}; + + auto res = std::visit(v, cfg); + // Note: We always allow _disbling_ per-table encryption, i.e. if user encryption is active, we fall back to node-local + if (res && !ctxt.allow_per_table_encryption() && ctxt.get_global_user_encryption()) { + throw std::invalid_argument(fmt::format("Node global user encryption is active and per-table encryption attributes have been prohibited ({})", *res)); + } + return res; + } + + static options parse_options(const bytes& v) { + return ser::deserialize_from_buffer(v, std::type_identity(), 0); + } + + future<::shared_ptr> key_for_read(opt_bytes id) const { + return _provider->key(_info, std::move(id)).then([](std::tuple k_id) { + return std::get<0>(std::move(k_id)); + }); + } + future, opt_bytes>> key_for_write(opt_bytes id = {}) const { + return _provider->key(_info, std::move(id)); + } + + bytes serialize() const override { + return ser::serialize_to_buffer(_options, 0); + } + future<> validate(const schema& s) const override { + try { + co_await _provider->validate(); + auto k = co_await key_for_write(); + logg.info("Added encryption extension to {}.{}", s.ks_name(), s.cf_name()); + logg.info(" Options: {}", _options); + logg.info(" Key Algorithm: {}", _info); + logg.info(" Provider: {}", *_provider); + + auto problems = std::get<0>(k)->validate_exact_info_result(); + if (!problems.empty()) { + logg.warn("{}", problems); + } + } catch (...) { + std::throw_with_nested(exceptions::configuration_exception((std::stringstream{} << "Validation failed:" << std::current_exception()).str())); + } + } + + bool should_delay_read(const opt_bytes& id) { + return _provider->should_delay_read(id); + } + size_t key_block_size() { + if (!_key_block_size) { + _key_block_size = symmetric_key(_info).block_size(); + } + return *_key_block_size; + } +}; + +std::ostream& operator<<(std::ostream& os, const encryption_schema_extension& ext) { + fmt::print(os, "{}, alg={}, provider={}", ext._options, ext._info, *ext._provider); + return os; +} + +encryption_schema_extension::encryption_schema_extension(key_info info, shared_ptr provider, std::map options) + : _info(std::move(info)) + , _provider(std::move(provider)) + , _options(std::move(options)) +{} + +::shared_ptr encryption_schema_extension::create(encryption_context_impl& ctxt, const bytes& v) { + auto map = parse_options(v); + return create(ctxt, map); +} + +::shared_ptr encryption_schema_extension::create(encryption_context_impl& ctxt, std::map map) { + key_info info = get_key_info(map); + auto provider = ctxt.get_provider(map); + if (!provider) { + return {}; + } + return ::make_shared(std::move(info), std::move(provider), std::move(map)); +} + +class encryption_file_io_extension : public sstables::file_io_extension { + ::shared_ptr _ctxt; +public: + encryption_file_io_extension(::shared_ptr ctxt) + : _ctxt(std::move(ctxt)) + {} + + attr_value_map get_attributes(const sstables::sstable& sst) const override { + auto& sc = sst.get_shared_components(); + if (!sc.scylla_metadata) { + return {}; + } + auto* exta = sc.scylla_metadata->get_extension_attributes(); + if (!exta) { + return {}; + } + + auto i = exta->map.find(encryption_attribute_ds); + if (i == exta->map.end()) { + return {}; + } + auto opts = encryption_schema_extension::parse_options(i->second.value); + + if (exta->map.count(key_id_attribute_ds)) { + auto id = exta->map.at(key_id_attribute_ds).value; + auto id_str = id.size() == utils::UUID::serialized_size() + ? sstring(fmt::format("{}", utils::UUID_gen::get_UUID(id))) + : to_hex(id) + ; + opts["key_id"] = std::move(id_str); + } + + if (exta->map.count(encrypted_components_attribute_ds)) { + std::vector ccs; + ccs.reserve(9); + auto mask = ser::deserialize_from_buffer(exta->map.at(encrypted_components_attribute_ds).value, std::type_identity{}, 0); + for (auto c : { sstables::component_type::Index, + sstables::component_type::CompressionInfo, + sstables::component_type::Data, + sstables::component_type::Summary, + sstables::component_type::Digest, + sstables::component_type::CRC, + sstables::component_type::Filter, + sstables::component_type::Statistics, + sstables::component_type::TemporaryStatistics, + }) { + if (mask & int(c)) { + ccs.emplace_back(c); + } + } + opts["components"] = fmt::to_string(fmt::join(ccs, ", ")); + } else { + opts["components"] = "Data"; + } + attr_value_map res; + res["encryption_info"] = std::move(opts); + return res; + } + + future wrap_file(sstables::sstable& sst, sstables::component_type type, file f, open_flags flags) override { + switch (type) { + case sstables::component_type::Scylla: + case sstables::component_type::TemporaryTOC: + case sstables::component_type::TOC: + co_return file{}; + default: + break; + } + + if (flags == open_flags::ro) { + // open existing. check read opts. + auto& sc = sst.get_shared_components(); + if (sc.scylla_metadata) { + auto* exta = sc.scylla_metadata->get_extension_attributes(); + if (exta) { + auto i = exta->map.find(encryption_attribute_ds); + // note: earlier builds of encryption extension would only encrypt data component, + // so iff we are opening old sstables we need to check if this component is actually + // encrypted. We use a bitmask attribute for this. + + bool ok = i != exta->map.end(); + if (ok && type != sstables::component_type::Data) { + ok = exta->map.count(encrypted_components_attribute_ds) && + (ser::deserialize_from_buffer(exta->map.at(encrypted_components_attribute_ds).value, std::type_identity{}, 0) & (1 << int(type))); + } + + if (ok) { + auto esx = encryption_schema_extension::create(*_ctxt, i->second.value); + opt_bytes id; + + if (exta->map.count(key_id_attribute_ds)) { + id = exta->map.at(key_id_attribute_ds).value; + } + + if (esx->should_delay_read(id)) { + logg.debug("Encrypted sstable component {} using delayed opening {} (id: {})", sst.component_basename(type), *esx, id); + + co_return make_delayed_encrypted_file(f, esx->key_block_size(), [esx, comp = sst.component_basename(type), id = std::move(id)] { + logg.trace("Delayed component {} using {} (id: {}) resolve", comp, *esx, id); + return esx->key_for_read(id); + }); + } + + logg.debug("Open encrypted sstable component {} using {} (id: {})", sst.component_basename(type), *esx, id); + + auto k = co_await esx->key_for_read(std::move(id)); + co_return make_encrypted_file(f, std::move(k)); + } + } + } + } else { + auto s = sst.get_schema(); + shared_ptr esx; + auto e = s->extensions().find(encryption_attribute); + // #4844 - don't allow schema encryption to be used for writing + // iff it is disallowed by config -> placeholder here + // (See schema_tables.cc::prepare_builder_from_table_row - if an extension + // is unavailable/non-creatable at load time a dummy object is inserted + // ) + if (e != s->extensions().end() && !e->second->is_placeholder()) { + esx = static_pointer_cast(e->second); + } else if (!is_system_keyspace(s->ks_name())) { + esx = _ctxt->get_global_user_encryption(); + } + if (esx) { + auto& sc = sst.get_shared_components(); + if (!sc.scylla_metadata) { + sc.scylla_metadata.emplace(); + } + auto& ext = sc.scylla_metadata->get_or_create_extension_attributes(); + opt_bytes id; + + // We are writing more than one component. If we used a named key before + // we need to make sure we use the exact same one for all components, + // even if something like KMIP key invalidation replaced it. + // This will also speed up key lookup in some cases, as both repl + // and kmip cache id bound keys. + if (ext.map.count(key_id_attribute_ds)) { + id = ext.map.at(key_id_attribute_ds).value; + } + + logg.debug("Write encrypted sstable component {} using {} (id: {})", sst.component_basename(type), *esx, id); + + /** + * #3954 We can be (and are) called with two components simultaneously (hello index, data). + * If this case we could block on the below "key" call and iff provider has certain cache behaviour (hello replicated) + * or caches expire, we could end up with different keys for respective components, leading to one + * of the components ending up unreadable. + */ + for (;;) { + auto [k, k_id] = co_await esx->key_for_write(std::move(id)); + + if (k_id && ext.map.count(key_id_attribute_ds)) { + id = ext.map.at(key_id_attribute_ds).value; + if (k_id != id) { + continue; + } + } + + id = std::move(k_id); + + if (!ext.map.count(encryption_attribute_ds)) { + ext.map.emplace(encryption_attribute_ds, sstables::disk_string{esx->serialize()}); + } + if (id) { + ext.map.emplace(key_id_attribute_ds, sstables::disk_string{*id}); + } + if (type != sstables::component_type::Data) { + uint32_t mask = 0; + if (ext.map.count(encrypted_components_attribute_ds)) { + mask = ser::deserialize_from_buffer(ext.map.at(encrypted_components_attribute_ds).value, std::type_identity{}, 0); + } + mask |= (1 << int(type)); + // just a marker. see above + ext.map[encrypted_components_attribute_ds] = sstables::disk_string{ser::serialize_to_buffer(mask, 0)}; + } + co_return make_encrypted_file(f, std::move(k)); + } + } + } + + co_return file{}; + } +}; + +std::string encryption_provider(const sstables::sstable& sst) { + auto& sc = sst.get_shared_components(); + if (!sc.scylla_metadata) { + return {}; + } + auto* exta = sc.scylla_metadata->get_extension_attributes(); + if (!exta) { + return {}; + } + + auto i = exta->map.find(encryption_attribute_ds); + if (i == exta->map.end()) { + return {}; + } + auto options = encryption_schema_extension::parse_options(i->second.value); + opt_wrapper opts(options); + + return opts(KEY_PROVIDER).value_or(std::string{}); +} + +namespace bfs = std::filesystem; + +class encryption_commitlog_file_extension : public db::commitlog_file_extension { + const ::shared_ptr _ctxt; + const options _opts; + + static const inline std::regex prop_expr = std::regex("^([^=]+)=(\\S+)$"); + static const inline sstring id_key = "key_id"; + static const inline std::string end_of_file_mark = "#-- end of file"; + +public: + encryption_commitlog_file_extension(::shared_ptr ctxt, options opts) + : _ctxt(ctxt) + , _opts(std::move(opts)) + {} + sstring config_name(const sstring& filename) const { + bfs::path p(filename); + auto dir = p.parent_path(); + auto file = p.filename(); + return (dir / bfs::path("." + file.string())).string(); + } + future wrap_file(const sstring& filename, file f, open_flags flags) override { + auto cfg_file = config_name(filename); + + if (flags == open_flags::ro) { + return file_exists(cfg_file).then([=, this](bool exists) { + if (!exists) { + // #1681 if file system errors caused the options file to simply not exist, + // we can at least hope that the file itself is not very encrypted either. + // But who knows. Will probably cause data corruption. + logg.info("Commitlog segment {} has no encryption info. Opening unencrypted.", filename); + return make_ready_future(std::move(f)); + } + return read_text_file_fully(cfg_file).then([f, this, filename](temporary_buffer buf) { + std::istringstream ss(std::string(buf.begin(), buf.end())); + options opts; + std::string line; + bool has_eof = false; + while (std::getline(ss, line)) { + std::smatch m; + if (std::regex_match(line, m, prop_expr)) { + auto k = m[1].str(); + auto v = m[2].str(); + opts[k] = v; + } else if (line == end_of_file_mark) { + has_eof = true; + } + } + + // #1682 - if we crashed while writing the options file, + // it is quite possible that we are eventually trying to + // open + replay an (empty) CL file, but cannot read the + // properties now, since _our_ metadata is empty/truncated + if (!has_eof) { + // just return the unwrapped file. + logg.info("Commitlog segment {} has incomplete encryption info. Opening unencrypted.", filename); + return make_ready_future(std::move(f)); + } + opt_bytes id; + if (opts.count(id_key)) { + id = base64_decode(opts[id_key]); + } + + auto provider = _ctxt->get_provider(opts); + + logg.debug("Open commitlog segment {} using {} (id: {})", filename, *provider, id); + auto info = make_shared(get_key_info(opts)); + return provider->key(*info, id).then([f, info](std::tuple, opt_bytes> k) { + return make_ready_future(make_encrypted_file(f, std::get<0>(k))); + }); + }); + }); + } else { + auto provider = _ctxt->get_provider(_opts); + auto info = make_shared(get_key_info(_opts)); + return provider->key(*info).then([f, this, info, cfg_file, filename, &provider = *provider](std::tuple, opt_bytes> k_id) { + auto&& k = std::get<0>(k_id); + auto&& id = std::get<1>(k_id); + std::ostringstream ss; + for (auto&p : _opts) { + ss << p.first << "=" << p.second << std::endl; + } + if (id) { + ss << id_key << "=" << base64_encode(*id) << std::endl; + } + ss << end_of_file_mark << std::endl; + + logg.debug("Creating commitlog segment {} using {} (id: {})", filename, provider, id); + + return write_text_file_fully(cfg_file, ss.str()).then([f, k] { + return make_ready_future(make_encrypted_file(f, k)); + }); + }); + } + } + future<> before_delete(const sstring& filename) override { + auto cfg_file = config_name(filename); + return file_exists(cfg_file).then([cfg_file](bool b) { + return b ? remove_file(cfg_file) : make_ready_future(); + }); + } +}; + +future> register_extensions(const db::config&, std::unique_ptr cfg_in, db::extensions& exts, const ::service_set& services) { + auto& cfg = *cfg_in; + auto ctxt = ::make_shared(std::move(cfg_in), services); + // Note: extensions are immutable and shared across shards. + // Object in them must be stateless. We anchor the context in the + // extension objects, and while it is not as such 100% stateless, + // it is close enough. + exts.add_schema_extension(encryption_attribute, [ctxt](auto v) { + return encryption_schema_extension::parse(*ctxt, std::move(v)); + }); + exts.add_sstable_file_io_extension(encryption_attribute, std::make_unique(ctxt)); + + auto maybe_get_options = [&](const utils::config_file::string_map& map, const sstring& what) -> std::optional { + options opts(map.begin(), map.end()); + opt_wrapper sie(opts); + if (!::strcasecmp(sie("enabled").value_or("false").c_str(), "false")) { + return std::nullopt; + } + // commitlog/system table encryption/global user encryption should not use replicated keys, + // We default to local keys, but KMIP/KMS is ok as well (better in fact). + opts[KEY_PROVIDER] = sie(KEY_PROVIDER).value_or(LOCAL_FILE_SYSTEM_KEY_PROVIDER_FACTORY); + if (opts[KEY_PROVIDER] == LOCAL_FILE_SYSTEM_KEY_PROVIDER_FACTORY && !sie(SECRET_KEY_FILE)) { + // system encryption uses different key folder than user tables. + // explicitly set the key file path + opts[SECRET_KEY_FILE] = (bfs::path(cfg.system_key_directory()) / bfs::path("system") / bfs::path(sie("key_name").value_or("system_table_keytab"))).string(); + } + // forbid replicated. we cannot guarantee being able to open sstables on populate + if (opts[KEY_PROVIDER] == REPLICATED_KEY_PROVIDER_FACTORY) { + throw std::invalid_argument("Replicated provider is not allowed for " + what); + } + return opts; + }; + + future<> f = make_ready_future<>(); + + auto opts = maybe_get_options(cfg.system_info_encryption(), "system table encryption"); + + if (opts) { + logg.info("Adding system info encryption using {}", *opts); + + exts.add_commitlog_file_extension(encryption_attribute, std::make_unique(ctxt, *opts)); + + // modify schemas for tables holding sensitive data to use encryption w. key described + // by the opts. + // since schemas are duplicated across shards, we must call to each shard and augument + // them all. + // Since we are in pre-init phase, this should be safe. + f = f.then([opts = *opts, &exts] { + return smp::invoke_on_all([opts = make_lw_shared(opts), &exts] () mutable { + auto& f = exts.schema_extensions().at(encryption_attribute); + for (auto& s : { db::system_keyspace::paxos(), db::system_keyspace::batchlog(), db::system_keyspace::dicts() }) { + exts.add_extension_to_schema(s, encryption_attribute, f(*opts)); + } + }); + }); + } + + if (cfg.config_encryption_active()) { + f = f.then([&cfg, ctxt] { + return ctxt->load_config_encryption_key(cfg.config_encryption_key_name()); + }); + } + + + if (!cfg.kmip_hosts().empty()) { + // only pre-create on shard 0. + f = f.then([&cfg, ctxt] { + return parallel_for_each(cfg.kmip_hosts(), [ctxt](auto& p) { + auto host = ctxt->get_kmip_host(p.first); + return host->connect(); + }); + }); + } + + if (!cfg.kms_hosts().empty()) { + // only pre-create on shard 0. + f = f.then([&cfg, ctxt] { + return parallel_for_each(cfg.kms_hosts(), [ctxt](auto& p) { + auto host = ctxt->get_kms_host(p.first); + return host->init(); + }); + }); + } + + if (!cfg.gcp_hosts().empty()) { + // only pre-create on shard 0. + f = f.then([&cfg, ctxt] { + return parallel_for_each(cfg.gcp_hosts(), [ctxt](auto& p) { + auto host = ctxt->get_gcp_host(p.first); + return host->init(); + }); + }); + } + + replicated_key_provider_factory::init(exts); + + auto user_opts = maybe_get_options(cfg.user_info_encryption(), "user table encryption"); + + if (user_opts) { + logg.info("Adding user info encryption using {}", *user_opts); + + f = f.then([user_opts = *user_opts, ctxt] { + return smp::invoke_on_all([user_opts = make_lw_shared(user_opts), ctxt]() { + auto ext = encryption_schema_extension::create(*ctxt, *user_opts); + ctxt->add_global_user_encryption(std::move(ext)); + }); + }); + } + + return f.then([ctxt]() -> ::shared_ptr { + return ctxt; + }); +} + +} + diff --git a/ent/encryption/encryption.hh b/ent/encryption/encryption.hh new file mode 100644 index 0000000000..d70fd3ae19 --- /dev/null +++ b/ent/encryption/encryption.hh @@ -0,0 +1,196 @@ +/* + * Copyright (C) 2018 ScyllaDB + * + */ + +/* + * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0 + */ + +#pragma once + +#include + +#include +#include +#include +#include + +#include +#include + +#include "../../bytes.hh" +#include "../../compress.hh" + +class service_set; + +namespace replica { +class database; +} + +namespace db { +class config; +class extensions; +} + +namespace cql3 { +class query_processor; +} +namespace service { +class storage_service; +class migration_manager; +} + +namespace sstables { + class sstable; +} + +namespace encryption { +inline const sstring KEY_PROVIDER = "key_provider"; +inline const sstring SECRET_KEY_PROVIDER_FACTORY_CLASS = "secret_key_provider_factory_class"; +inline const sstring SECRET_KEY_FILE = "secret_key_file"; +inline const sstring SYSTEM_KEY_FILE = "system_key_file"; +inline const sstring CIPHER_ALGORITHM = "cipher_algorithm"; +inline const sstring SECRET_KEY_STRENGTH = "secret_key_strength"; + +inline const sstring HOST_NAME = "kmip_host"; +inline const sstring TEMPLATE_NAME = "template_name"; +inline const sstring KEY_NAMESPACE = "key_namespace"; + +bytes base64_decode(const sstring&, size_t off = 0, size_t n = sstring::npos); +sstring base64_encode(const bytes&, size_t off = 0, size_t n = bytes::npos); +bytes calculate_md5(const bytes&, size_t off = 0, size_t n = bytes::npos); +bytes calculate_sha256(const bytes&, size_t off = 0, size_t n = bytes::npos); +bytes calculate_sha256(bytes_view); +bytes hmac_sha256(bytes_view msg, bytes_view key); + +future> read_text_file_fully(const sstring&); +future<> write_text_file_fully(const sstring&, temporary_buffer); +future<> write_text_file_fully(const sstring&, const sstring&); + +std::optional parse_expiry(std::optional); + +class symmetric_key; +struct key_info; + +using options = std::map; +using opt_bytes = std::optional; +using key_ptr = shared_ptr; + +/** + * wrapper for "options" (map) to provide an + * interface returning empty optionals for + * non-available values. Makes query simpler + * and allows .value_or(...)-statements, which + * are neat for default values. + * + * In the long run one could contemplate + * using non-std maps with similar built-in + * functionality for all our various configs + * in the system, but for now we are firmly + * entrenched in map + */ +template +class map_wrapper { + const Map& _options; +public: + using mapped_type = typename Map::mapped_type; + using key_type = typename Map::key_type; + + map_wrapper(const Map& opts) + : _options(opts) + {} + + std::optional operator()(const key_type& k) const { + auto i = _options.find(k); + if (i != _options.end()) { + return i->second; + } + return std::nullopt; + } +}; + +using opt_wrapper = map_wrapper; + +key_info get_key_info(const options&); + +class encryption_context; + +class key_provider { +public: + virtual ~key_provider() + {} + virtual future> key(const key_info&, opt_bytes = {}) = 0; + virtual future<> validate() const { + return make_ready_future<>(); + } + virtual bool should_delay_read(const opt_bytes&) const { + return false; + } +private: + friend std::ostream& operator<<(std::ostream&, const key_provider&); + virtual void print(std::ostream&) const = 0; +}; + +std::ostream& operator<<(std::ostream&, const key_provider&); + +} + +template <> struct fmt::formatter : fmt::ostream_formatter {}; + +namespace encryption { + +class key_provider_factory { +public: + virtual ~key_provider_factory() + {} + virtual shared_ptr get_provider(encryption_context& c, const options&) = 0; +}; + +class encryption_config; +class system_key; +class kmip_host; +class kms_host; +class gcp_host; + +/** + * Context is a singleton object, shared across shards. I.e. even though there are obvious mutating + * calls in it, it guarantees thread/shard safety. + * + * Why is this not a sharded thingamajing? Because its own instance methods need to send itself + * as a shard-safe object forwards, and thus need to know that same shard, which breaks the circle of + * ownership and stuff. + */ +class encryption_context { +public: + virtual ~encryption_context() = default; + virtual shared_ptr get_provider(const options&) = 0; + virtual shared_ptr get_system_key(const sstring&) = 0; + virtual shared_ptr get_kmip_host(const sstring&) = 0; + virtual shared_ptr get_kms_host(const sstring&) = 0; + virtual shared_ptr get_gcp_host(const sstring&) = 0; + + virtual shared_ptr get_cached_provider(const sstring& id) const = 0; + virtual void cache_provider(const sstring& id, shared_ptr) = 0; + + virtual const encryption_config& config() const = 0; + virtual shared_ptr get_config_encryption_key() const = 0; + + virtual distributed& get_query_processor() const = 0; + virtual distributed& get_storage_service() const = 0; + virtual distributed& get_database() const = 0; + virtual distributed& get_migration_manager() const = 0; + + sstring maybe_decrypt_config_value(const sstring&) const; + + virtual future<> start() = 0; + virtual future<> stop() = 0; +}; + +future> +register_extensions(const db::config&, std::unique_ptr, db::extensions&, const ::service_set&); + +// for testing +std::string encryption_provider(const sstables::sstable&); +} + diff --git a/ent/encryption/encryption_config.cc b/ent/encryption/encryption_config.cc new file mode 100644 index 0000000000..46e819025d --- /dev/null +++ b/ent/encryption/encryption_config.cc @@ -0,0 +1,164 @@ +/* + * Copyright (C) 2015 ScyllaDB + * + */ + +/* + * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0 + */ + +#include + +#include "db/config.hh" +#include "utils/config_file_impl.hh" + +#include "init.hh" +#include "encryption_config.hh" +#include "encryption.hh" + +#include + +encryption::encryption_config::encryption_config() + : config_file() +// BEGIN entry definitions + + , system_key_directory(this, "system_key_directory", value_status::Used, "/etc/scylla/conf/resources/system_keys", + R"foo(The directory where system keys are kept + +This directory should have 700 permissions and belong to the scylla user)foo") + + , config_encryption_active(this, "config_encryption_active", value_status::Used, false, "") + + , config_encryption_key_name(this, "config_encryption_key_name", value_status::Used, "system_key", + "Set to the local encryption key filename or KMIP key URL to use for configuration file property value decryption") + + , system_info_encryption(this, "system_info_encryption", value_status::Used, + { { "enabled", "false" }, { CIPHER_ALGORITHM, + "AES/CBC/PKCS5Padding" }, { + SECRET_KEY_STRENGTH, "128" }, + }, + R"foo(System information encryption settings + +If enabled, system tables that may contain sensitive information (system.batchlog, +system.paxos), hints files and commit logs are encrypted with the +encryption settings below. + +When enabling system table encryption on a node with existing data, run +`nodetool upgradesstables -a` on the listed tables to encrypt existing data. + +When tracing is enabled, sensitive info will be written into the tables in the +system_traces keyspace. Those tables should be configured to encrypt their data +on disk. + +It is recommended to use remote encryption keys from a KMIP server when using +Transparent Data Encryption (TDE) features. +Local key support is provided when a KMIP server is not available. + +See the scylla documentation for available key providers and their properties. +)foo") + , kmip_hosts(this, "kmip_hosts", value_status::Used, { }, + R"foo(KMIP host(s). + +The unique name of kmip host/cluster that can be referenced in table schema. + +host.yourdomain.com={ hosts=[, ...], keyfile=/path/to/keyfile, truststore=/path/to/truststore.pem, key_cache_millis=, timeout= }:... + +The KMIP connection management only supports failover, so all requests will go through a +single KMIP server. There is no load balancing, as no KMIP servers (at the time of this writing) +support read replication, or other strategies for availability. + +Hosts are tried in the order they appear here. Add them in the same sequence they'll fail over in. + +KMIP requests will fail over/retry 'max_command_retries' times (default 3) + +)foo") + , kms_hosts(this, "kms_hosts", value_status::Used, { }, + R"foo(KMS host(s). + +The unique name of kms host that can be referenced in table schema. + +host.yourdomain.com={ endpoint=, aws_access_key_id=, aws_secret_access_key=, aws_profile, aws_region=, aws_use_ec2_credentials, aws_use_ec2_region=, aws_assume_role_arn=, master_key=, keyfile=/path/to/keyfile, truststore=/path/to/truststore.pem, key_cache_millis=, timeout= }:... + +Actual connection can be either an explicit endpoint (:), or selected automatic via aws_region. + +If aws_use_ec2_region is true, regions is instead queried from EC2 metadata. + +Authentication can be explicit with aws_access_key_id and aws_secret_access_key. Either secret or both can be ommitted +in which case the provider will try to read them from AWS credentials in ~/.aws/credentials + +If aws_use_ec2_credentials is true, authentication is instead queried from EC2 metadata. + +If aws_assume_role_arn is set, scylla will issue an AssumeRole command and use the resulting security token for key operations. + +master_key is an AWS KMS key id or alias from which all keys used for actual encryption of scylla data will be derived. +This key must be pre-created with access policy allowing the above AWS id Encrypt, Decrypt and GenerateDataKey operations. + +)foo") + , gcp_hosts(this, "gcp_hosts", value_status::Used, { }, + R"foo(Google Compute Engine KMS host(s). + +The unique name of GCP kms host that can be referenced in table schema. + +gcp_project_id=, gcp_location=, master_key=, gcp_credentials_file=, gcp_impersonate_service_account=,keyfile=/path/to/keyfile, truststore=/path/to/truststore.pem, key_cache_millis=, timeout= }:... + +Authentication can be explicit with auth_file or by resolving default credentials (see google docs). + +If use_gcp_machine_credentials is true, authentication is instead queried from GCP metadata. + +auth_file can contain either a user, service or impersonated service account. + +master_key is an GCP KMS key name from which all keys used for actual encryption of scylla data will be derived. +This key must be pre-created with access policy allowing the above credentials Encrypt and Decrypt operations. + +)foo") + , user_info_encryption(this, "user_info_encryption", value_status::Used, + { { "enabled", "false" }, { CIPHER_ALGORITHM, + "AES/CBC/PKCS5Padding" }, { + SECRET_KEY_STRENGTH, "128" }, + }, + R"foo(Global user table encryption settings. If enabled, all user tables + will be encrypted using the provided settings, unless overridden + by table scylla_encryption_options.)foo") + , allow_per_table_encryption(this, "allow_per_table_encryption", value_status::Used, true, + "If 'user_info_encryption` is enabled this controls whether specifying per-table encryption using create/alter table is allowed" + ) + + +// END entry definitions +{} + +static class : public configurable { + std::unordered_map> _cfgs; + +public: + void append_options(db::config& cfg, boost::program_options::options_description_easy_init& init) override { + // While it is fine for normal execution to have just one, static (us) encryption config, + // it does not work well with unit testing, where we repeatedly create new cql_test_envs etc, + // since new config values will not be overwritten due to the actual named_values being shared here. + // Fix this (temporarily) by simply keeping a local map cfg->ecfg and using these. + // TODO: improve this by allowing db::config to hold named sub->configs (mapping config file objects). + if (_cfgs.count(&cfg)) { + throw std::runtime_error("Config already processed"); + } + auto& ccfg = _cfgs.emplace(&cfg, std::make_unique()).first->second; + // hook into main scylla.yaml. + cfg.add(ccfg->values()); + } + future initialize_ex(const boost::program_options::variables_map& opts, const db::config& cfg, db::extensions& exts, const service_set& services) override { + auto ccfg = _cfgs.count(&cfg) ? std::move(_cfgs.at(&cfg)) : std::make_unique(); + _cfgs.erase(&cfg); + auto ctxt = co_await encryption::register_extensions(cfg, std::move(ccfg), exts, services); + co_return [ctxt](system_state e) -> future<> { + switch (e) { + case system_state::started: + co_await ctxt->start(); + break; + case system_state::stopped: + co_await ctxt->stop(); + break; + default: + break; + } + }; + } +} cfg; diff --git a/ent/encryption/encryption_config.hh b/ent/encryption/encryption_config.hh new file mode 100644 index 0000000000..ced6f8ecf1 --- /dev/null +++ b/ent/encryption/encryption_config.hh @@ -0,0 +1,33 @@ +/* + * Copyright (C) 2018 ScyllaDB + * + */ + +/* + * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0 + */ + +#pragma once + +#include "../../utils/config_file.hh" + +namespace encryption { + +class encryption_config : public utils::config_file { +public: + encryption_config(); + + typedef std::unordered_map string_string_map; + + named_value system_key_directory; + named_value config_encryption_active; + named_value config_encryption_key_name; + named_value system_info_encryption; + named_value kmip_hosts; + named_value kms_hosts; + named_value gcp_hosts; + named_value user_info_encryption; + named_value allow_per_table_encryption; +}; + +} diff --git a/ent/encryption/encryption_exceptions.hh b/ent/encryption/encryption_exceptions.hh new file mode 100644 index 0000000000..7f551a7418 --- /dev/null +++ b/ent/encryption/encryption_exceptions.hh @@ -0,0 +1,55 @@ +/* + * Copyright (C) 2024 ScyllaDB + * + */ + +/* + * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0 + */ + +#pragma once + +#include "db/extensions.hh" + +namespace encryption { + +using base_error = db::extension_storage_exception; + +class permission_error : public db::extension_storage_permission_error { +public: + using mybase = db::extension_storage_permission_error; + using mybase::mybase; +}; + +class configuration_error : public db::extension_storage_misconfigured { +public: + using mybase = db::extension_storage_misconfigured; + using mybase::mybase; +}; + +class service_error : public base_error { +public: + using base_error::base_error; +}; + +class missing_resource_error : public db::extension_storage_resource_unavailable { +public: + using mybase = db::extension_storage_resource_unavailable; + using mybase::mybase; +}; + +// #4970 - not 100% correct, but network errors are +// generally intermittent/recoverable. Mark as a non-isolating +// error. +class network_error : public missing_resource_error { +public: + using missing_resource_error::missing_resource_error; +}; + +class malformed_response_error : public service_error { +public: + using service_error::service_error; +}; + +} + diff --git a/ent/encryption/gcp_host.cc b/ent/encryption/gcp_host.cc new file mode 100644 index 0000000000..23b2c39644 --- /dev/null +++ b/ent/encryption/gcp_host.cc @@ -0,0 +1,1031 @@ +/* + * Copyright (C) 2024 ScyllaDB + * + */ + +/* + * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0 + */ +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +#define CPP_JWT_USE_VENDORED_NLOHMANN_JSON +#include + +#include +#include +#include +#include "utils/to_string.hh" + +#include "gcp_host.hh" +#include "encryption.hh" +#include "encryption_exceptions.hh" +#include "symmetric_key.hh" +#include "utils/hash.hh" +#include "utils/loading_cache.hh" +#include "utils/UUID.hh" +#include "utils/UUID_gen.hh" +#include "utils/rjson.hh" +#include "marshal_exception.hh" +#include "db/config.hh" + +using namespace std::chrono_literals; +using namespace std::string_literals; + +logger gcp_log("gcp"); + +namespace encryption { +bool operator==(const gcp_host::credentials_source& k1, const gcp_host::credentials_source& k2) { + return k1.gcp_credentials_file == k2.gcp_credentials_file && k1.gcp_impersonate_service_account == k2.gcp_impersonate_service_account; +} +} + +template<> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); } + auto format(const encryption::gcp_host::credentials_source& d, fmt::format_context& ctxt) const { + return fmt::format_to(ctxt.out(), "{{ gcp_credentials_file = {}, gcp_impersonate_service_account = {} }}", d.gcp_credentials_file, d.gcp_impersonate_service_account); + } +}; + +template<> +struct std::hash { + size_t operator()(const encryption::gcp_host::credentials_source& a) const { + return utils::tuple_hash{}(std::tie(a.gcp_credentials_file, a.gcp_impersonate_service_account)); + } +}; + +class encryption::gcp_host::impl { +public: + // set a rather long expiry. normal KMS policies are 365-day rotation of keys. + // we can do with 10 minutes. CMH. maybe even longer. + // (see comments below on what keys are here) + static inline constexpr std::chrono::milliseconds default_expiry = 600s; + static inline constexpr std::chrono::milliseconds default_refresh = 1200s; + + impl(encryption_context& ctxt, const std::string& name, const host_options& options) + : _ctxt(ctxt) + , _name(name) + , _options(options) + , _attr_cache(utils::loading_cache_config{ + .max_size = std::numeric_limits::max(), + .expiry = options.key_cache_expiry.value_or(default_expiry), + .refresh = options.key_cache_refresh.value_or(default_refresh)}, gcp_log, std::bind_front(&impl::create_key, this)) + , _id_cache(utils::loading_cache_config{ + .max_size = std::numeric_limits::max(), + .expiry = options.key_cache_expiry.value_or(default_expiry), + .refresh = options.key_cache_refresh.value_or(default_refresh)}, gcp_log, std::bind_front(&impl::find_key, this)) + {} + ~impl() = default; + + future<> init(); + const host_options& options() const { + return _options; + } + + future, id_type>> get_or_create_key(const key_info&, const option_override* = nullptr); + future> get_key_by_id(const id_type&, const key_info&, const option_override* = nullptr); + + using scopes_type = std::string; // space separated. avoids some transforms. makes other easy. +private: + class httpclient; + using key_and_id_type = std::tuple, id_type>; + + struct attr_cache_key { + credentials_source src; + std::string master_key; + key_info info; + bool operator==(const attr_cache_key& v) const = default; + }; + + friend struct fmt::formatter; + + struct attr_cache_key_hash { + size_t operator()(const attr_cache_key& k) const { + return utils::tuple_hash()(std::tie(k.master_key, k.src, k.info.len)); + } + }; + + struct id_cache_key { + credentials_source src; + id_type id; + bool operator==(const id_cache_key& v) const = default; + }; + + friend struct fmt::formatter; + + struct id_cache_key_hash { + size_t operator()(const id_cache_key& k) const { + return utils::tuple_hash()(std::tie(k.id, k.src)); + } + }; + + future create_key(const attr_cache_key&); + future find_key(const id_cache_key&); + + using timeout_clock = std::chrono::system_clock; + using timestamp_type = typename timeout_clock::time_point; + + struct access_token; + struct user_credentials; + struct service_account_credentials; + struct impersonated_service_account_credentials; + struct compute_engine_credentials{}; + + struct google_credentials; + + + struct access_token { + access_token() = default; + access_token(const rjson::value&); + + std::string token; + timestamp_type expiry; + scopes_type scopes; + + bool empty() const; + bool expired() const; + }; + + struct user_credentials { + user_credentials(const rjson::value&); + + std::string client_id; + std::string client_secret; + std::string refresh_token; + std::string access_token; + std::string quota_project_id; + }; + + using p_key = std::unique_ptr; + + struct service_account_credentials { + service_account_credentials(const rjson::value&); + + std::string client_id; + std::string client_email; + std::string private_key_id; + std::string private_key_pkcs8; + std::string token_server_uri; + std::string project_id; + std::string quota_project_id; + }; + + struct impersonated_service_account_credentials { + impersonated_service_account_credentials(std::string principal, google_credentials&&); + impersonated_service_account_credentials(const rjson::value&); + + std::vector delegates; + std::vector scopes; + std::string quota_project_id; + std::string iam_endpoint_override; + std::string target_principal; + + std::unique_ptr source_credentials; + access_token token; + }; + + using credentials_variant = std::variant< + user_credentials, + service_account_credentials, + impersonated_service_account_credentials, + compute_engine_credentials + >; + + struct google_credentials { + google_credentials(google_credentials&&) = default; + google_credentials(credentials_variant&& c) + : credentials(std::move(c)) + {} + google_credentials& operator=(google_credentials&&) = default; + credentials_variant credentials; + access_token token; + }; + + google_credentials from_data(std::string_view) const; + google_credentials from_data(const temporary_buffer& buf) const { + return from_data(std::string_view(buf.get(), buf.size())); + } + future from_file(const std::string& path) const { + auto buf = co_await read_text_file_fully(path); + co_return from_data(std::string_view(buf.get(), buf.size())); + } + + future get_default_credentials(); + + future get_access_token(const google_credentials&, const scopes_type& scopes) const; + + future<> refresh(google_credentials&, const scopes_type&) const; + + using key_values = std::initializer_list>; + + static std::string body(key_values kv); + + future send_request(std::string_view uri, std::string body, std::string_view content_type, httpd::operation_type = httpd::operation_type::GET, key_values headers = {}) const; + future send_request(std::string_view uri, const rjson::value& body, httpd::operation_type = httpd::operation_type::GET, key_values headers = {}) const; + future<> send_request(std::string_view uri, std::string body, std::string_view content_type, const std::function&, httpd::operation_type = httpd::operation_type::GET, key_values headers = {}) const; + + static std::tuple parse_key(std::string_view); + + future gcp_auth_post_with_retry(std::string_view uri, const rjson::value& body, const credentials_source&); + + encryption_context& _ctxt; + std::string _name; + host_options _options; + + std::unordered_map _cached_credentials; + + utils::loading_cache, attr_cache_key_hash> _attr_cache; + utils::loading_cache, id_cache_key_hash> _id_cache; + shared_ptr _creds; + std::unordered_map> _cache; + bool _initialized = false; + bool _checked_is_on_gce = false; + bool _is_on_gce = false; +}; + +template +static T get_option(const encryption::gcp_host::option_override* oov, std::optional C::* f, const T& def) { + if (oov) { + return (oov->*f).value_or(def); + } + return {}; +}; + +future, encryption::gcp_host::id_type>> encryption::gcp_host::impl::get_or_create_key(const key_info& info, const option_override* oov) { + attr_cache_key key { + .src = { + .gcp_credentials_file = get_option(oov, &option_override::gcp_credentials_file, _options.gcp_credentials_file), + .gcp_impersonate_service_account = get_option(oov, &option_override::gcp_impersonate_service_account, _options.gcp_impersonate_service_account), + }, + .master_key = get_option(oov, &option_override::master_key, _options.master_key), + .info = info, + }; + + if (key.master_key.empty()) { + throw configuration_error("No master key set in gcp host config or encryption attributes"); + } + try { + co_return co_await _attr_cache.get(key); + } catch (base_error&) { + throw; + } catch (std::invalid_argument& e) { + std::throw_with_nested(configuration_error(fmt::format("get_or_create_key: {}", e.what()))); + } catch (...) { + std::throw_with_nested(service_error(fmt::format("get_or_create_key: {}", std::current_exception()))); + } +} + +future> encryption::gcp_host::impl::get_key_by_id(const id_type& id, const key_info& info, const option_override* oov) { + // note: since KMS does not really have any actual "key" association of id -> key, + // we only cache/query raw bytes of some length. (See below). + // Thus keys returned are always new objects. But they are not huge... + id_cache_key key { + .src = { + .gcp_credentials_file = get_option(oov, &option_override::gcp_credentials_file, _options.gcp_credentials_file), + .gcp_impersonate_service_account = get_option(oov, &option_override::gcp_impersonate_service_account, _options.gcp_impersonate_service_account), + }, + .id = id, + }; + try { + auto data = co_await _id_cache.get(key); + co_return make_shared(info, data); + } catch (base_error&) { + throw; + } catch (std::invalid_argument& e) { + std::throw_with_nested(configuration_error(fmt::format("get_key_by_id: {}", e.what()))); + } catch (...) { + std::throw_with_nested(service_error(fmt::format("get_key_by_id: {}", std::current_exception()))); + } +} + +static const char CREDENTIAL_ENV_VAR[] = "GOOGLE_APPLICATION_CREDENTIALS"; +static const char WELL_KNOWN_CREDENTIALS_FILE[] = "application_default_credentials.json"; +static const char CLOUDSDK_CONFIG_DIRECTORY[] = "gcloud"; + +static const char USER_FILE_TYPE[] = "authorized_user"; +static const char SERVICE_ACCOUNT_FILE_TYPE[] = "service_account"; +static const char IMPERSONATED_SERVICE_ACCOUNT_FILE_TYPE[] = "impersonated_service_account"; + +static const char GCE_METADATA_HOST_ENV_VAR[] = "GCE_METADATA_HOST"; + +static const char DEFAULT_METADATA_SERVER_URL[] = "http://metadata.google.internal";; + +static const char METADATA_FLAVOR[] = "Metadata-Flavor"; +static const char GOOGLE[] = "Google"; + +static const char TOKEN_SERVER_URI[] = "https://oauth2.googleapis.com/token"; + +static const char AUTHORIZATION[] = "Authorization"; + +static const char KMS_SCOPE[] = "https://www.googleapis.com/auth/cloudkms"; +static const char CLOUD_PLATFORM_SCOPE[] = "https://www.googleapis.com/auth/cloud-platform"; + +//static const char[] CLOUD_SHELL_ENV_VAR = "DEVSHELL_CLIENT_PORT"; +//static const char[] SKIP_APP_ENGINE_ENV_VAR = "GOOGLE_APPLICATION_CREDENTIALS_SKIP_APP_ENGINE"; +//static const char[] NO_GCE_CHECK_ENV_VAR = "NO_GCE_CHECK"; +//static const char[] GCE_METADATA_HOST_ENV_VAR = "GCE_METADATA_HOST"; + +bool encryption::gcp_host::impl::access_token::empty() const { + return token.empty(); +} + +bool encryption::gcp_host::impl::access_token::expired() const { + if (empty()) { + return true; + } + return timeout_clock::now() >= this->expiry; +} + +encryption::gcp_host::impl::user_credentials::user_credentials(const rjson::value& v) + : client_id(rjson::get(v, "client_id")) + , client_secret(rjson::get(v, "client_secret")) + , refresh_token(rjson::get(v, "refresh_token")) + , quota_project_id(rjson::get_opt(v, "refresh_token").value_or("")) +{} + +encryption::gcp_host::impl::service_account_credentials::service_account_credentials(const rjson::value& v) + : client_id(rjson::get(v, "client_id")) + , client_email(rjson::get(v, "client_email")) + , private_key_id(rjson::get(v, "private_key_id")) + , private_key_pkcs8(rjson::get(v, "private_key")) + , token_server_uri([&] { + auto token_uri = rjson::get_opt(v, "token_uri"); + if (token_uri) { + // TODO: verify uri + return *token_uri; + } + return std::string{}; + }()) + , project_id(rjson::get_opt(v, "project_id").value_or("")) + , quota_project_id(rjson::get_opt(v, "refresh_token").value_or("")) +{} + + +encryption::gcp_host::impl::impersonated_service_account_credentials::impersonated_service_account_credentials(std::string principal, google_credentials&& c) + : target_principal(std::move(principal)) + , source_credentials(std::make_unique(std::move(c))) +{} + +encryption::gcp_host::impl::impersonated_service_account_credentials::impersonated_service_account_credentials(const rjson::value& v) + : delegates([&] { + std::vector res; + auto tmp = rjson::find(v, "delegates"); + if (tmp) { + if (!tmp->IsArray()) { + throw configuration_error("Malformed json"); + } + + for (const auto& d : tmp->GetArray()) { + res.emplace_back(std::string(rjson::to_string_view(d))); + } + } + return res; + }()) + , quota_project_id(rjson::get_opt(v, "quota_project_id").value_or("")) + , target_principal([&] { + auto url = rjson::get(v, "service_account_impersonation_url"); + + auto si = url.find_last_of('/'); + auto ei = url.find(":generateAccessToken"); + + if (si != std::string::npos && ei != std::string::npos && si < ei) { + return url.substr(si + 1, ei - si - 1); + } + throw configuration_error( "Unable to determine target principal from service account impersonation URL."); + }()) + , source_credentials([&]() -> decltype(source_credentials) { + auto& scjson = rjson::get(v, "source_credentials"); + auto type = rjson::get(scjson, "type"); + + if (type == USER_FILE_TYPE) { + return std::make_unique(user_credentials(scjson)); + } else if (type == SERVICE_ACCOUNT_FILE_TYPE) { + return std::make_unique(service_account_credentials(scjson)); + } + throw configuration_error(fmt::format("A credential of type {} is not supported as source credential for impersonation.", type)); + }()) +{} + +encryption::gcp_host::impl::google_credentials +encryption::gcp_host::impl::from_data(std::string_view content) const { + auto json = rjson::parse(content); + auto type = rjson::get_opt(json, "type"); + + if (!type) { + throw configuration_error("Error reading credentials from stream, 'type' field not specified."); + } + if (type == USER_FILE_TYPE) { + return google_credentials(user_credentials(json)); + } + if (type == SERVICE_ACCOUNT_FILE_TYPE) { + return google_credentials(service_account_credentials(json)); + } + if (type == IMPERSONATED_SERVICE_ACCOUNT_FILE_TYPE) { + return google_credentials(impersonated_service_account_credentials(json)); + } + throw configuration_error(fmt::format( + "Error reading credentials from stream, 'type' value '{}' not recognized. Expecting '{}', '{}' or '{}'." + , type, USER_FILE_TYPE, SERVICE_ACCOUNT_FILE_TYPE, IMPERSONATED_SERVICE_ACCOUNT_FILE_TYPE)); +} + +static std::string get_metadata_server_url() { + auto meta_host = std::getenv(GCE_METADATA_HOST_ENV_VAR); + auto token_uri = meta_host ? std::string("http://") + meta_host : DEFAULT_METADATA_SERVER_URL; + return token_uri; +} + +future +encryption::gcp_host::impl::get_default_credentials() { + auto credentials_path = std::getenv(CREDENTIAL_ENV_VAR); + + if (credentials_path != nullptr && strlen(credentials_path)) { + gcp_log.debug("Attempting to load credentials from file: {}", credentials_path); + + try { + co_return co_await from_file(credentials_path); + } catch (...) { + std::throw_with_nested(configuration_error(fmt::format( + "Error reading credential file from environment variable {}, value '{}'" + , CREDENTIAL_ENV_VAR + , credentials_path + )) + ); + } + } + + { + std::string well_known_file; + auto env_path = std::getenv("CLOUDSDK_CONFIG"); + if (env_path) { + well_known_file = fmt::format("~/{}/{}", env_path, WELL_KNOWN_CREDENTIALS_FILE); + } else { + well_known_file = fmt::format("~/.config/{}/{}", CLOUDSDK_CONFIG_DIRECTORY, WELL_KNOWN_CREDENTIALS_FILE); + } + + if (co_await seastar::file_exists(well_known_file)) { + gcp_log.debug("Attempting to load credentials from well known file: {}", well_known_file); + try { + co_return co_await from_file(well_known_file); + } catch (...) { + std::throw_with_nested(configuration_error(fmt::format( + "Error reading credential file from location {}" + , well_known_file + )) + ); + } + } + } + + { + // Then try Compute Engine and GAE 8 standard environment + gcp_log.debug("Attempting to load credentials from GCE"); + + auto is_on_gce = [this]() -> future { + if (_checked_is_on_gce) { + co_return _is_on_gce; + } + + auto token_uri = get_metadata_server_url(); + + for (int i = 1; i <= 3; ++i) { + try { + co_await send_request(token_uri, std::string{}, "", [&](const http::reply& rep, std::string_view) { + _checked_is_on_gce = true; + _is_on_gce = rep.get_header(METADATA_FLAVOR) == GOOGLE; + }, httpd::operation_type::GET, { { METADATA_FLAVOR, GOOGLE } }); + if (_checked_is_on_gce) { + co_return _is_on_gce;; + } + } catch (...) { + // TODO: handle timeout + break; + } + } + + auto linux_path = "/sys/class/dmi/id/product_name"; + if (co_await seastar::file_exists(linux_path)) { + auto f = file_desc::open(linux_path, O_RDONLY | O_CLOEXEC); + char buf[128] = {}; + f.read(buf, 128); + _is_on_gce = std::string_view(buf).find(GOOGLE) == 0; + } + + _checked_is_on_gce = true; + co_return _is_on_gce; + }; + + if (co_await is_on_gce()) { + co_return compute_engine_credentials{}; + } + } + + throw configuration_error("Could not determine initial credentials"); +} + +template +static void for_each_scope(const encryption::gcp_host::impl::scopes_type& s, Func&& f) { + size_t i = 0; + while(i < s.size()) { + auto j = s.find(' ', i + 1); + f(s.substr(i, j - i)); + i = j; + } +} + +encryption::gcp_host::impl::access_token::access_token(const rjson::value& json) + : token(rjson::get(json, "access_token")) + , expiry(timeout_clock::now() + std::chrono::seconds(rjson::get(json, "expires_in"))) + , scopes(rjson::get_opt(json, "scope").value_or("")) +{} + +std::string encryption::gcp_host::impl::body(key_values kv) { + std::ostringstream ss; + std::string_view sep = ""; + for (auto& [k, v] : kv) { + ss << sep << k << "=" << http::internal::url_encode(v); + sep = "&"; + } + return ss.str(); +} + +future encryption::gcp_host::impl::send_request(std::string_view uri, const rjson::value& body, httpd::operation_type op, key_values headers) const { + return send_request(uri, rjson::print(body), "application/json", op, std::move(headers)); +} + +future encryption::gcp_host::impl::send_request(std::string_view uri, std::string body, std::string_view content_type, httpd::operation_type op, key_values headers) const { + rjson::value v; + co_await send_request(uri, std::move(body), content_type, [&](const http::reply& rep, std::string_view s) { + if (rep._status != http::reply::status_type::ok) { + gcp_log.trace("Got unexpected reponse ({})", rep._status); + for (auto& [k, v] : rep._headers) { + gcp_log.trace("{}: {}", k, v); + } + gcp_log.trace("{}", s); + throw httpd::unexpected_status_error(rep._status); + } + v = rjson::parse(s); + }, op, std::move(headers)); + co_return v; +} + +future<> encryption::gcp_host::impl::send_request(std::string_view uri, std::string body, std::string_view content_type, const std::function& handler, httpd::operation_type op, key_values headers) const { + // Extremely simplified URI parsing. Does not handle any params etc. But we do not expect such here. + static boost::regex simple_url(R"foo((https?):\/\/([^\/:]+)(:\d+)?(\/.*)?)foo"); + + boost::smatch m; + std::string tmp(uri); + if (!boost::regex_match(tmp, m, simple_url)) { + throw std::invalid_argument(fmt::format("Could not parse URI {}", uri)); + } + + auto scheme = m[1].str(); + auto host = m[2].str(); + auto port = m[3].str(); + auto path = m[4].str(); + + auto addr = co_await net::dns::resolve_name(host, net::inet_address::family::INET /* CMH our client does not handle ipv6 well?*/); + auto certs = scheme == "https" + ? ::make_shared() + : shared_ptr() + ; + if (certs) { + if (!_options.priority_string.empty()) { + certs->set_priority_string(_options.priority_string); + } else { + certs->set_priority_string(db::config::default_tls_priority); + } + if (!_options.certfile.empty()) { + co_await certs->set_x509_key_file(_options.certfile, _options.keyfile, seastar::tls::x509_crt_format::PEM); + } + if (!_options.truststore.empty()) { + co_await certs->set_x509_trust_file(_options.truststore, seastar::tls::x509_crt_format::PEM); + } else { + co_await certs->set_system_trust(); + } + } + + uint16_t pi = port.empty() ? (certs ? 443 : 80) : uint16_t(std::stoi(port.substr(1))); + auto client = certs + ? http::experimental::client(socket_address(addr, pi), std::move(certs), host) + : http::experimental::client(socket_address(addr, pi)) + ; + if (path.empty()) { + path = "/"; + } + + gcp_log.trace("Resolved {} -> {}:{}{}", uri, addr, pi, path); + + auto req = http::request::make(op, host, path); + + for (auto& [k, v] : headers) { + req._headers[sstring(k)] = sstring(v); + } + + if (!body.empty()) { + if (content_type.empty()) { + content_type = "application/x-www-form-urlencoded"; + } + req.write_body("", std::move(body)); + req.set_mime_type(sstring(content_type)); + } + + gcp_log.trace("Sending {} request to {} ({}): {}", content_type, uri, headers, body); + + co_await client.make_request(std::move(req), [&] (const http::reply& rep, input_stream&& in) -> future<> { + auto&lh = handler; + auto lin = std::move(in); + auto result = co_await util::read_entire_stream_contiguous(lin); + gcp_log.trace("Got reponse {}: {}", int(rep._status), result); + lh(rep, result); + }); + + co_await client.close(); +} + + +future<> encryption::gcp_host::impl::refresh(google_credentials& c, const scopes_type& scopes) const { + if (!c.token.expired() && c.token.scopes == scopes) { + co_return; + } + c.token = co_await get_access_token(c, scopes); +} + +future +encryption::gcp_host::impl::get_access_token(const google_credentials& creds, const scopes_type& scope) const { + co_return co_await std::visit(overloaded_functor { + [&](const user_credentials& c) -> future { + assert(!c.refresh_token.empty()); + auto json = co_await send_request(TOKEN_SERVER_URI, body({ + { "client_id", c.client_id }, + { "client_secret", c.client_secret }, + { "refresh_token", c.refresh_token }, + { "grant_type", "grant_type" }, + }), "", httpd::operation_type::POST); + + co_return access_token{ json }; + }, + [&](const service_account_credentials& c) -> future { + using namespace jwt::params; + + jwt::jwt_object obj{algorithm("RS256"), secret(c.private_key_pkcs8), headers({{"kid", c.private_key_id }})}; + + auto uri = c.token_server_uri.empty() ? TOKEN_SERVER_URI : c.token_server_uri; + obj.add_claim("iss", c.client_email) + .add_claim("iat", timeout_clock::now()) + .add_claim("exp", timeout_clock::now() + std::chrono::seconds(3600)) + .add_claim("scope", scope) + .add_claim("aud", uri) + ; + auto sign = obj.signature(); + + auto json = co_await send_request(uri, body({ + { "grant_type", "urn:ietf:params:oauth:grant-type:jwt-bearer" }, + { "assertion", sign } + }), "", httpd::operation_type::POST); + co_return access_token{ json }; + }, + [&](const impersonated_service_account_credentials& c) -> future { + auto json_body = rjson::empty_object(); + auto scopes = rjson::empty_array(); + for_each_scope(scope, [&](std::string s) { + rjson::push_back(scopes, rjson::from_string(s)); + }); + + rjson::add(json_body, "scope", std::move(scopes)); + + if (!c.delegates.empty()) { + auto delegates = rjson::empty_array(); + for (auto& d : c.delegates) { + rjson::push_back(delegates, rjson::from_string(d)); + } + rjson::add(json_body, "delegates", std::move(delegates)); + } + + rjson::add(json_body, "lifetime", "3600s"); + + co_await refresh(*c.source_credentials, CLOUD_PLATFORM_SCOPE); + + auto endpoint = c.iam_endpoint_override.empty() + ? fmt::format("https://iamcredentials.googleapis.com/v1/projects/-/serviceAccounts/{}:generateAccessToken", c.target_principal) + : c.iam_endpoint_override + ; + auto json = co_await send_request(endpoint, json_body, httpd::operation_type::POST, { + { AUTHORIZATION, fmt::format("Bearer {}", c.source_credentials->token.token) }, + }); + + struct tm tmp; + ::strptime(rjson::get(json, "expireTime").data(), "%FT%TZ", &tmp); + + access_token a; + + a.expiry = timeout_clock::from_time_t(::mktime(&tmp)); + a.scopes = scope; + a.token = rjson::get(json, "accessToken"); + + co_return a; + }, + [this](const compute_engine_credentials& c) -> future { + auto meta_uri = get_metadata_server_url(); + auto token_uri = meta_uri + "/computeMetadata/v1/instance/service-accounts/default/token"; + try { + auto json = co_await send_request(token_uri, std::string{}, "", httpd::operation_type::GET, { { METADATA_FLAVOR, GOOGLE } }); + co_return access_token{ json }; + } catch (...) { + std::throw_with_nested(service_error("Unexpected Error code trying to get security access token from Compute Engine metadata for the default service account")); + } + } + }, creds.credentials); +} + +future encryption::gcp_host::impl::gcp_auth_post_with_retry(std::string_view uri, const rjson::value& body, const credentials_source& src) { + auto i = _cached_credentials.find(src); + if (i == _cached_credentials.end()) { + try { + auto c = !src.gcp_credentials_file.empty() + ? co_await from_file(src.gcp_credentials_file) + : co_await get_default_credentials() + ; + if (!src.gcp_credentials_file.empty()) { + gcp_log.trace("Loaded credentials from {}", src.gcp_credentials_file); + } + if (!src.gcp_impersonate_service_account.empty()) { + c = google_credentials(impersonated_service_account_credentials(src.gcp_impersonate_service_account, std::move(c))); + } + i = _cached_credentials.emplace(src, std::move(c)).first; + } catch (...) { + gcp_log.warn("Error resolving credentials for {}: {}", src, std::current_exception()); + throw; + } + } + + assert(i != _cached_credentials.end()); // should either be set now or we threw. + + auto& creds = i->second; + + int retries = 0; + + for (;;) { + try { + co_await this->refresh(creds, KMS_SCOPE); + } catch (...) { + std::throw_with_nested(permission_error("Error refreshing credentials")); + } + + try { + auto res = co_await send_request(uri, body, httpd::operation_type::POST, { + { AUTHORIZATION, fmt::format("Bearer {}", creds.token.token) }, + }); + co_return res; + } catch (httpd::unexpected_status_error& e) { + gcp_log.debug("{}: Got unexpected response: {}", uri, e.status()); + if (e.status() == http::reply::status_type::unauthorized && retries++ < 3) { + // refresh access token and retry. + continue; + } + if (e.status() == http::reply::status_type::unauthorized) { + std::throw_with_nested(permission_error(std::string(uri))); + } + std::throw_with_nested(service_error(std::string(uri))); + } catch (...) { + std::throw_with_nested(network_error(std::string(uri))); + } + } +} + +static constexpr char GCP_KMS_QUERY_TEMPLATE[] = "https://cloudkms.googleapis.com/v1/projects/{}/locations/{}/keyRings/{}/cryptoKeys/{}:{}"; + +future<> encryption::gcp_host::impl::init() { + if (_initialized) { + co_return; + } + + if (!_options.master_key.empty()) { + gcp_log.debug("Looking up master key"); + + attr_cache_key k{ + .src = _options, + .master_key = _options.master_key, + .info = key_info{ .alg = "AES", .len = 128 }, + }; + co_await create_key(k); + gcp_log.debug("Master key exists"); + } else { + gcp_log.info("No default master key configured. Not verifying."); + } + _initialized = true; +} + +std::tuple encryption::gcp_host::impl::parse_key(std::string_view spec) { + auto i = spec.find_last_of('/'); + if (i == std::string_view::npos) { + throw std::invalid_argument(fmt::format("Invalid master key spec '{}'. Must be in format /", spec)); + } + return std::make_tuple(std::string(spec.substr(0, i)), std::string(spec.substr(i + 1))); +} + +future encryption::gcp_host::impl::create_key(const attr_cache_key& k) { + auto& info = k.info; + + /** + * Google GCP KMS does allow us to create keys, but like AWS this would + * force us to deal with permissions and assignments etc. We instead + * require a pre-prepared key. + * + * Like AWS, we cannot get the actual key out, nor can we really bulk + * encrypt/decrypt things. So we do just like with AWS KMS, and generate + * a data key, and encrypt it as the key ID. + * + * For ID -> key, we simply split the ID into the encrypted key part, and + * the master key name part, decrypt the first using the second (AWS KMS Decrypt), + * and create a local key using the result. + * + * Data recovery: + * Assuming you have data encrypted using a KMS generated key, you will have + * metadata detailing algorithm, key length etc (see sstable metadata, and key info). + * Metadata will also include a byte blob representing the ID of the encryption key. + * For GCP KMS, the ID will actually be a text string: + * :: + * + * I.e. something like: + * mykeyring:mykey:e56sadfafa3324ff=/wfsdfwssdf + * + * The actual data key can be retreived by doing a KMS "Decrypt" of the data blob part + * using the KMS key referenced by the key ID. This gives back actual key data that can + * be used to create a symmetric_key with algo, length etc as specified by metadata. + * + */ + + // avoid creating too many keys and too many calls. If we are not shard 0, delegate there. + if (this_shard_id() != 0) { + auto [data, id] = co_await smp::submit_to(0, [this, k]() -> future> { + auto host = _ctxt.get_gcp_host(_name); + auto [key, id] = co_await host->_impl->_attr_cache.get(k); + co_return std::make_tuple(key != nullptr ? key->key() : bytes{}, id); + }); + co_return key_and_id_type{ + data.empty() ? nullptr : make_shared(info, data), + id + }; + } + + // note: since external keys are _not_ stored, + // there is nothing we can "look up" or anything. Always + // new key here. + + gcp_log.debug("Creating new key: {}", info); + + auto [keyring, keyname] = parse_key(k.master_key); + + auto key = make_shared(info); + auto url = fmt::format(GCP_KMS_QUERY_TEMPLATE, + _options.gcp_project_id, + _options.gcp_location, + keyring, + keyname, + "encrypt" + ); + auto query = rjson::empty_object(); + rjson::add(query, "plaintext", std::string(base64_encode(key->key()))); + + auto response = co_await gcp_auth_post_with_retry(url, query, k.src); + auto cipher = rjson::get(response, "ciphertext"); + auto data = base64_decode(cipher); + + auto sid = fmt::format("{}/{}:{}", keyring, keyname, cipher); + bytes id(sid.begin(), sid.end()); + + gcp_log.trace("Created key id {}", sid); + + co_return key_and_id_type{ key, id }; +} + +future encryption::gcp_host::impl::find_key(const id_cache_key& k) { + // avoid creating too many keys and too many calls. If we are not shard 0, delegate there. + if (this_shard_id() != 0) { + co_return co_await smp::submit_to(0, [this, k]() -> future { + auto host = _ctxt.get_gcp_host(_name); + auto bytes = co_await host->_impl->_id_cache.get(k); + co_return bytes; + }); + } + + // See create_key. ID consists of :. + // master id can contain ':', but blob will not. + // (we are being wasteful, and keeping the base64 encoding - easier to read) + std::string_view id(reinterpret_cast(k.id.data()), k.id.size()); + gcp_log.debug("Finding key: {}", id); + + auto pos = id.find_last_of(':'); + auto pos2 = id.find_last_of('/', pos - 1); + if (pos == id_type::npos || pos2 == id_type::npos || pos2 >= pos) { + throw std::invalid_argument(fmt::format("Not a valid key id: {}", id)); + } + + std::string keyring(id.begin(), id.begin() + pos2); + std::string keyname(id.begin() + pos2 + 1, id.begin() + pos); + std::string enc(id.begin() + pos + 1, id.end()); + + auto url = fmt::format(GCP_KMS_QUERY_TEMPLATE, + _options.gcp_project_id, + _options.gcp_location, + keyring, + keyname, + "decrypt" + ); + auto query = rjson::empty_object(); + rjson::add(query, "ciphertext", enc); + + auto response = co_await gcp_auth_post_with_retry(url, query, k.src); + auto data = base64_decode(rjson::get(response, "plaintext")); + + // we know nothing about key type etc, so just return data. + co_return data; +} + +encryption::gcp_host::gcp_host(encryption_context& ctxt, const std::string& name, const host_options& options) + : _impl(std::make_unique(ctxt, name, options)) +{} + +encryption::gcp_host::gcp_host(encryption_context& ctxt, const std::string& name, const std::unordered_map& map) + : gcp_host(ctxt, name, [&map] { + host_options opts; + map_wrapper> m(map); + + opts.master_key = m("master_key").value_or(""); + + opts.gcp_project_id = m("gcp_project_id").value_or(""); + opts.gcp_location = m("gcp_location").value_or(""); + + opts.gcp_credentials_file = m("gcp_credentials_file").value_or(""); + opts.gcp_impersonate_service_account = m("gcp_impersonate_service_account").value_or(""); + + opts.certfile = m("certfile").value_or(""); + opts.keyfile = m("keyfile").value_or(""); + opts.truststore = m("truststore").value_or(""); + opts.priority_string = m("priority_string").value_or(""); + + opts.key_cache_expiry = parse_expiry(m("key_cache_expiry")); + opts.key_cache_refresh = parse_expiry(m("key_cache_refresh")); + + return opts; + }()) +{} + +encryption::gcp_host::~gcp_host() = default; + +future<> encryption::gcp_host::init() { + return _impl->init(); +} + +const encryption::gcp_host::host_options& encryption::gcp_host::options() const { + return _impl->options(); +} + +future, encryption::gcp_host::id_type>> encryption::gcp_host::get_or_create_key(const key_info& info, const option_override* oov) { + return _impl->get_or_create_key(info, oov); +} + +future> encryption::gcp_host::get_key_by_id(const id_type& id, const key_info& info, const option_override* oov) { + return _impl->get_key_by_id(id, info, oov); +} + +template<> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); } + auto format(const encryption::gcp_host::impl::attr_cache_key& d, fmt::format_context& ctxt) const { + return fmt::format_to(ctxt.out(), "{},{},{}", d.master_key, d.src.gcp_credentials_file, d.src.gcp_impersonate_service_account); + } +}; + +template<> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); } + auto format(const encryption::gcp_host::impl::id_cache_key& d, fmt::format_context& ctxt) const { + return fmt::format_to(ctxt.out(), "{},{},{}", d.id, d.src.gcp_credentials_file, d.src.gcp_impersonate_service_account); + } +}; diff --git a/ent/encryption/gcp_host.hh b/ent/encryption/gcp_host.hh new file mode 100644 index 0000000000..8764c0d24a --- /dev/null +++ b/ent/encryption/gcp_host.hh @@ -0,0 +1,80 @@ +/* + * Copyright (C) 2024 ScyllaDB + * + */ + +/* + * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0 + */ + +#pragma once + +#include +#include +#include +#include +#include + +#include +#include + +#include "symmetric_key.hh" + +namespace encryption { + +class encryption_context; +struct key_info; + +class gcp_host { +public: + class impl; + + template + struct t_credentials_source { + // Path to credentials JSON file (exported from gcloud console) + T gcp_credentials_file; + // Optional service account (email address) to impersonate + T gcp_impersonate_service_account; + }; + + using credentials_source = t_credentials_source; + + struct host_options : public credentials_source { + std::string gcp_project_id; + std::string gcp_location; + + // GCP KMS Key to encrypt data keys with. Format: / + std::string master_key; + + // tls. if unspeced, use system for https + // GCP does not (afaik?) allow certificate auth + // but we keep the option available just in case. + std::string certfile; + std::string keyfile; + std::string truststore; + std::string priority_string; + + std::optional key_cache_expiry; + std::optional key_cache_refresh; + }; + + using id_type = bytes; + + gcp_host(encryption_context&, const std::string& name, const host_options&); + gcp_host(encryption_context&, const std::string& name, const std::unordered_map&); + ~gcp_host(); + + future<> init(); + const host_options& options() const; + + struct option_override : public t_credentials_source> { + std::optional master_key; + }; + + future, id_type>> get_or_create_key(const key_info&, const option_override* = nullptr); + future> get_key_by_id(const id_type&, const key_info&, const option_override* = nullptr); +private: + std::unique_ptr _impl; +}; + +} diff --git a/ent/encryption/gcp_key_provider.cc b/ent/encryption/gcp_key_provider.cc new file mode 100644 index 0000000000..1c00b969ac --- /dev/null +++ b/ent/encryption/gcp_key_provider.cc @@ -0,0 +1,77 @@ +/* + * Copyright (C) 2024 ScyllaDB + * + */ + +/* + * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0 + */ + +#include +#include + +#include "gcp_key_provider.hh" +#include "gcp_host.hh" + +namespace encryption { + +class gcp_key_provider : public key_provider { +public: + gcp_key_provider(::shared_ptr gcp_host, std::string name, gcp_host::option_override oov) + : _gcp_host(std::move(gcp_host)) + , _name(std::move(name)) + , _oov(std::move(oov)) + {} + future> key(const key_info& info, opt_bytes id) override { + if (id) { + return _gcp_host->get_key_by_id(*id, info, &_oov).then([id](key_ptr k) { + return make_ready_future>(std::tuple(k, id)); + }); + } + return _gcp_host->get_or_create_key(info, &_oov).then([](std::tuple k_id) { + return make_ready_future>(k_id); + }); + } + void print(std::ostream& os) const override { + os << _name; + } +private: + ::shared_ptr _gcp_host; + std::string _name; + gcp_host::option_override _oov; +}; + +shared_ptr gcp_key_provider_factory::get_provider(encryption_context& ctxt, const options& map) { + opt_wrapper opts(map); + auto gcp_host = opts("gcp_host"); + + + gcp_host::option_override oov { + .master_key = opts("master_key"), + }; + + oov.gcp_credentials_file = opts("gcp_credentials_file"); + oov.gcp_impersonate_service_account = opts("gcp_impersonate_service_account"); + + if (!gcp_host) { + throw std::invalid_argument("gcp_host must be provided"); + } + + auto host = ctxt.get_gcp_host(*gcp_host); + auto id = gcp_host.value() + + ":" + oov.master_key.value_or(host->options().master_key) + + ":" + oov.gcp_credentials_file.value_or(host->options().gcp_credentials_file) + + ":" + oov.gcp_impersonate_service_account.value_or(host->options().gcp_impersonate_service_account) + ; + + auto provider = ctxt.get_cached_provider(id); + + if (!provider) { + provider = ::make_shared(host, *gcp_host, std::move(oov)); + ctxt.cache_provider(id, provider); + } + + return provider; +} + +} diff --git a/ent/encryption/gcp_key_provider.hh b/ent/encryption/gcp_key_provider.hh new file mode 100644 index 0000000000..76ddb674f6 --- /dev/null +++ b/ent/encryption/gcp_key_provider.hh @@ -0,0 +1,25 @@ +/* + * Copyright (C) 2024 ScyllaDB + * + */ + +/* + * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0 + */ + +#pragma once + +#include "encryption.hh" +#include "system_key.hh" + +namespace encryption { + +class gcp_key_provider_factory : public key_provider_factory { +public: + shared_ptr get_provider(encryption_context&, const options&) override; +}; + +/** + * See comment for AWS KMS regarding system key support. + */ +} diff --git a/ent/encryption/kmip_host.cc b/ent/encryption/kmip_host.cc new file mode 100644 index 0000000000..1e2bf5dd0b --- /dev/null +++ b/ent/encryption/kmip_host.cc @@ -0,0 +1,1222 @@ +/* + * Copyright (C) 2018 ScyllaDB + * + */ + +/* + * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0 + */ + +#ifdef HAVE_KMIP + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include + +// workaround cryptsoft sdk issue: +#define strcasestr kmip_strcasestr +#include +#include +#undef strcasestr + +#include "kmip_host.hh" +#include "encryption.hh" +#include "encryption_exceptions.hh" +#include "symmetric_key.hh" +#include "utils/hash.hh" +#include "utils/loading_cache.hh" +#include "utils/UUID.hh" +#include "utils/UUID_gen.hh" +#include "marshal_exception.hh" +#include "db/config.hh" + +using namespace std::chrono_literals; + +static logger kmip_log("kmip"); +static constexpr uint16_t kmip_port = 5696u; +// default for command execution/failover retry. +static constexpr int default_num_cmd_retry = 5; +static constexpr int min_num_cmd_retry = 2; +static constexpr auto base_backoff_time = 100ms; + +std::ostream& operator<<(std::ostream& os, KMIP* kmip) { + auto* s = KMIP_dump_str(kmip, KMIP_DUMP_FORMAT_DEFAULT); + os << s; + free(s); + return os; +} + +static void kmip_logger(void *cb_arg, unsigned char *str, unsigned long len) { + // kmipc likes to write a log of white space and newlines. Skip these. + std::string_view v(reinterpret_cast(str), len); + if (std::find_if(v.begin(), v.end(), [](char c) { return !::isspace(c); }) == v.end()) { + return; + } + kmip_log.trace("kmipcmd: {}", v); +} + +namespace encryption { + +bool operator==(const kmip_host::key_options& l, const kmip_host::key_options& r) { + return std::tie(l.template_name, l.key_namespace) == std::tie(r.template_name, r.key_namespace); +} + +class kmip_error_category : public std::error_category { +public: + constexpr kmip_error_category() noexcept : std::error_category{} {} + const char * name() const noexcept { + return "KMIP"; + } + std::string message(int error) const { + return KMIP_error2string(error); + } +}; + +static const kmip_error_category kmip_errorc; + +class kmip_error : public std::system_error { +public: + kmip_error(int res) + : system_error(res, kmip_errorc) + {} + kmip_error(int res, const std::string& msg) + : system_error(res, kmip_errorc, msg) + {} +}; + +// Checks a gnutls return value. +// < 0 -> error. +static void kmip_chk(int res, KMIP_CMD * cmd = nullptr) { + if (res != KMIP_ERROR_NONE) { + int status=0, reason=0; + char* message = nullptr; + + if (KMIP_CMD_get_result(cmd, &status, &reason, &message) == KMIP_ERROR_NONE) { + auto* ctxt = cmd != nullptr ? KMIP_CMD_get_ctx(cmd) : "(unknown cmd)"; + auto s = fmt::format("{}: status={}, reason={}, message={}", + ctxt, + KMIP_RESULT_STATUS_to_string(status, 0, nullptr), + KMIP_RESULT_REASON_to_string(reason, 0, nullptr), + message ? message : "" + ); + throw kmip_error(res, s); + } + throw kmip_error(res); + } +} + + +class kmip_host::impl { +public: + struct kmip_key_info { + key_info info; + key_options options; + bool operator==(const kmip_key_info& i) const { + return info == i.info && options == i.options; + } + friend std::ostream& operator<<(std::ostream& os, const kmip_key_info& info) { + return os << info.info << ":" << info.options; + } + }; + struct kmip_key_info_hash { + size_t operator()(const kmip_key_info& i) const { + return utils::tuple_hash()( + std::tie(i.info.alg, i.info.len, + i.options.template_name, + i.options.key_namespace)); + } + }; + + using key_and_id_type = std::tuple, id_type>; + + inline static constexpr std::chrono::milliseconds default_expiry = 30s; + inline static constexpr std::chrono::milliseconds default_refresh = 100s; + inline static constexpr uintptr_t max_hosts = 1<<8; + + inline static constexpr size_t def_max_pooled_connections_per_host = 8; + + impl(encryption_context& ctxt, const sstring& name, const host_options& options) + : _ctxt(ctxt), _name(name), _options(options), _attr_cache( + utils::loading_cache_config{ + .max_size = std::numeric_limits::max(), + .expiry = options.key_cache_expiry.value_or( + default_expiry), + .refresh = options.key_cache_refresh.value_or(default_refresh)}, + kmip_log, + std::bind(&impl::create_key, this, + std::placeholders::_1)), + _id_cache( + utils::loading_cache_config{ + .max_size = std::numeric_limits::max(), + .expiry = options.key_cache_expiry.value_or( + default_expiry), + .refresh = options.key_cache_refresh.value_or(default_refresh), + }, + kmip_log, + std::bind(&impl::find_key, this, + std::placeholders::_1)), + _max_retry(std::max(size_t(min_num_cmd_retry), options.max_command_retries.value_or(default_num_cmd_retry))) + { + if (_options.hosts.size() > max_hosts) { + throw std::invalid_argument("Too many hosts"); + } + + KMIP_CMD_set_default_logfile(nullptr, nullptr); // disable logfile + KMIP_CMD_set_default_logger(kmip_logger, nullptr); // send logs to us instead + } + + future<> connect(); + future<> disconnect(); + future, id_type>> get_or_create_key(const key_info&, const key_options& = {}); + future> get_key_by_id(const id_type&, const std::optional& = {}); + + id_type kmip_id_to_id(const sstring&) const; + sstring id_to_kmip_string(const id_type&) const; +private: + future create_key(const kmip_key_info&); + future> find_key(const id_type&); + future> find_matching_keys(const kmip_key_info&, std::optional max = {}); + + static shared_ptr ensure_compatible_key(shared_ptr, const key_info&); + + template + class kmip_handle; + class kmip_cmd; + class kmip_data_list; + class connection; + + std::tuple make_attributes(const kmip_key_info&, bool include_template = true) const; + + union userdata { + void * ptr; + const char* host; + }; + + friend std::ostream& operator<<(std::ostream& os, const impl& me) { + fmt::print(os, "{}", me._name); + return os; + } + + using con_ptr = ::shared_ptr; + using opt_int = std::optional; + + template + future do_cmd(kmip_cmd, Func &&); + template + future do_cmd(KMIP_CMD*, con_ptr, Func&, bool retain_connection_after_command = false); + + future get_connection(KMIP_CMD*); + future get_connection(const sstring&); + future<> clear_connections(const sstring& host); + + void release(KMIP_CMD*, con_ptr, bool retain_connection = false); + + size_t max_pooled_connections_per_host() const { + return _options.max_pooled_connections_per_host.value_or(def_max_pooled_connections_per_host); + } + bool is_current_host(const sstring& host) { + return host == _options.hosts.at(_index % _options.hosts.size()); + } + + encryption_context& _ctxt; + sstring _name; + host_options _options; + utils::loading_cache, + kmip_key_info_hash> _attr_cache; + + utils::loading_cache, 2, + utils::loading_cache_reload_enabled::yes, + utils::simple_entry_size<::shared_ptr>> _id_cache; + + using connections = std::deque; + using host_to_connections = std::unordered_map; + + host_to_connections _host_connections; + // current default host. If a host fails, incremented and + // we try another in the host ip list. + size_t _index = 0; + size_t _max_retry = default_num_cmd_retry; +}; + +} + +template <> struct fmt::formatter : fmt::ostream_formatter {}; +template <> struct fmt::formatter : fmt::ostream_formatter {}; + +namespace encryption { + +class kmip_host::impl::connection { +public: + connection(const sstring& host, host_options& options) + : _host(host) + , _options(options) + {} + ~connection() + {} + + const sstring& host() const { + return _host; + } + + void attach(KMIP_CMD*); + + future<> connect(); + future<> wait_for_io(); + future<> close(); +private: + static int io_callback(KMIP*, void*, int, void*, unsigned int, unsigned int*); + + int send(void*, unsigned int, unsigned int*); + int recv(void*, unsigned int, unsigned int*); + + friend std::ostream& operator<<(std::ostream& os, const connection& me) { + return os << me._host; + } + + sstring _host; + host_options& _options; + output_stream _output; + input_stream _input; + seastar::connected_socket _socket; + std::optional> _in_buffer; + std::optional> _pending; +}; + +} + +template <> struct fmt::formatter : fmt::ostream_formatter {}; + +namespace encryption { + +future<> kmip_host::impl::connection::connect() { + auto cred = ::make_shared(); + auto f = make_ready_future(); + + kmip_log.debug("connecting {}", _host); + + if (!_options.priority_string.empty()) { + cred->set_priority_string(_options.priority_string); + } else { + cred->set_priority_string(db::config::default_tls_priority); + } + + if (!_options.certfile.empty()) { + f = f.then([this, cred] { + return cred->set_x509_key_file(_options.certfile, _options.keyfile, seastar::tls::x509_crt_format::PEM); + }); + } + if (!_options.truststore.empty()) { + f = f.then([this, cred] { + return cred->set_x509_trust_file(_options.truststore, seastar::tls::x509_crt_format::PEM); + }); + } + return f.then([this, cred] { + // TODO, find if we should do hostname verification + // TODO: connect all failovers already? + + auto i = _host.find_last_of(':'); + auto name = _host.substr(0, i); + auto port = i != sstring::npos ? std::stoul(_host.substr(i + 1)) : kmip_port; + + return seastar::net::dns::resolve_name(name).then([this, cred, port](seastar::net::inet_address addr) { + return seastar::tls::connect(cred, seastar::ipv4_addr{addr, uint16_t(port)}).then([this](seastar::connected_socket s) { + kmip_log.debug("Successfully connected {}", _host); + // #998 Set keepalive to try avoiding connection going stale inbetween commands. + s.set_keepalive_parameters(net::tcp_keepalive_params{60s, 60s, 10}); + s.set_keepalive(true); + _input = s.input(); + _output = s.output(); + }); + }); + }); +} + +future<> kmip_host::impl::connection::wait_for_io() { + kmip_log.trace("{}: Waiting...", *this); + auto o = std::exchange(_pending, std::nullopt); + return o ? std::move(*o) : make_ready_future(); +} + +int kmip_host::impl::connection::send(void* data, unsigned int len, unsigned int*) { + if (_pending) { + kmip_log.trace("{}: operation pending...", *this); + return KMIP_ERROR_RETRY; + } + kmip_log.trace("{}: Sending {} bytes", *this, len); + + auto f = _output.write(reinterpret_cast(data), len).then([this] { + kmip_log.trace("{}: send done. flushing...", *this); + return _output.flush(); + }); + // if the call failed already, we still want to + // drop back to "wait_for_io()", because we cannot throw + // exceptions through the kmipc code frames. + if (!f.available() || f.failed()) { + _pending.emplace(std::move(f)); + } + return KMIP_ERROR_NONE; +} + +int kmip_host::impl::connection::recv(void* data, unsigned int len, unsigned int* outlen) { + kmip_log.trace("{}: Waiting for data ({})", *this, len); + for (;;) { + if (_in_buffer) { + auto n = std::min(unsigned(_in_buffer->size()), len); + *outlen = n; + kmip_log.trace("{}: returning {} ({}) bytes", *this, n, _in_buffer->size()); + std::copy(_in_buffer->begin(), _in_buffer->begin() + n, reinterpret_cast(data)); + _in_buffer->trim_front(n); + if (_in_buffer->empty()) { + _in_buffer = std::nullopt; + } + // #998 cryptsoft example returns error on EOF. + if (n == 0) { + return KMIP_ERROR_IO; + } + break; + } + + if (_pending) { + kmip_log.trace("{}: operation pending...", *this); + return KMIP_ERROR_RETRY; + } + + kmip_log.trace("{}: issue read", *this); + auto f = _input.read().then([this](temporary_buffer buf) { + kmip_log.trace("{}: got {} bytes", *this, buf.size()); + _in_buffer = std::move(buf); + }); + + // if the call failed already, we still want to + // drop back to "wait_for_io()", because we cannot throw + // exceptions through the kmipc code frames. + if (!f.available() || f.failed()) { + _pending.emplace(std::move(f)); + } + } + return KMIP_ERROR_NONE; +} + +int kmip_host::impl::connection::io_callback(KMIP *kmip, void *cb_arg, int op, void *data, unsigned int len, unsigned int *outlen) { + auto* conn = reinterpret_cast(cb_arg); + try { + switch(op) { + default: + return KMIP_ERROR_NOT_SUPPORTED; + case KMIP_IO_CMD_SEND: + return conn->send(data, len, outlen); + case KMIP_IO_CMD_RECV: + return conn->recv(data, len, outlen); + } + } catch (...) { + kmip_log.warn("Error in KMIP IO: {}", std::current_exception()); + return KMIP_ERROR_IO; + } +} + +void kmip_host::impl::connection::attach(KMIP_CMD* cmd) { + kmip_log.trace("{} Attach: {}", *this, reinterpret_cast(cmd)); + if (cmd == nullptr) { + return; + } + + if (!_options.username.empty()) { + kmip_chk( + KMIP_CMD_set_credential_username(cmd, + const_cast(_options.username.c_str()), + const_cast(_options.password.c_str()))); + } + + /* because we haven't passed in anything to the KMIP_CMD layer + * that would provide it with the protocol version details we + * have to separately indicate that here + */ + kmip_chk(KMIP_CMD_set_lib_protocol(cmd, KMIP_LIB_PROTOCOL_KMIP1)); + /* handle all IO via the callback */ + kmip_chk( + KMIP_CMD_set_io_cb(cmd, &connection::io_callback, + reinterpret_cast(this))); +} + +future<> kmip_host::impl::connection::close() { + return _output.close().finally([this] { + return _input.close(); + }); +} + +template +class kmip_host::impl::kmip_handle { +public: + kmip_handle(T * ptr) + : _ptr(ptr, FreeFunc) + {} + kmip_handle(kmip_handle&&) = default; + kmip_handle& operator=(kmip_handle&&) = default; + + T* get() const { + return _ptr.get(); + } + operator T*() const { + return _ptr.get(); + } + explicit operator bool() const { + return _ptr != nullptr; + } +private: + using ptr_type = std::unique_ptr; + ptr_type _ptr; +}; + +class kmip_host::impl::kmip_cmd : public kmip_handle { +public: + kmip_cmd(int flags = KMIP_CMD_FLAGS_DEFAULT|KMIP_CMD_FLAGS_LOG|KMIP_CMD_FLAGS_LOG_XML) + : kmip_handle([flags] { + KMIP_CMD* cmd; + kmip_chk(KMIP_CMD_new_ex(flags, nullptr, &cmd)); + return cmd; + }()) + {} + kmip_cmd(kmip_cmd&&) = default; + kmip_cmd& operator=(kmip_cmd&&) = default; + + friend std::ostream& operator<<(std::ostream& os, const kmip_cmd& cmd) { + return os << KMIP_CMD_get_request(cmd); + } +}; + +} + +template <> struct fmt::formatter : fmt::ostream_formatter {}; + +namespace encryption { + +class kmip_host::impl::kmip_data_list : public kmip_handle { +public: + kmip_data_list(int flags = KMIP_DATA_LIST_FLAGS_DEFAULT) + : kmip_handle([flags] { + KMIP_DATA_LIST* kdl; + kmip_chk(KMIP_DATA_LIST_new(flags, &kdl)); + return kdl; + }()) + {} + kmip_data_list(kmip_data_list&&) = default; + kmip_data_list& operator=(kmip_data_list&&) = default; +}; + +/** + * Clears and releases a connection cp. Release connection after. + * If retain_connection is true, the connection is only cleared of command data and + * can be reused by caller, otherwise it is either added to the connection pool + * or dropped. +*/ +void kmip_host::impl::release(KMIP_CMD* cmd, con_ptr cp, bool retain_connection) { + auto i = _host_connections.find(cp->host()); + userdata u; + u.host = i->first.c_str(); + if (cmd) { + KMIP_CMD_set_userdata(cmd, u.ptr); + } + if (!retain_connection && is_current_host(i->first) && max_pooled_connections_per_host() > i->second.size()) { + i->second.emplace_back(std::move(cp)); + } +} + +/** + * Run a function on a KMIP command using connection cp. Release connection after. + * If retain_connection_after_command is true, the connection is only cleared of command data and + * can be reused by caller. +*/ +template +future kmip_host::impl::do_cmd(KMIP_CMD* cmd, con_ptr cp, Func& f, bool retain_connection_after_command) { + cp->attach(cmd); + + return repeat_until_value([this, cmd, &f, cp, retain_connection_after_command] { + int res = f(cmd); + switch (res) { + case KMIP_ERROR_RETRY: + return cp->wait_for_io().then([] { + return opt_int(); + }).handle_exception([cp](auto ep) { + // get here if we had any wire exceptions below. + // make sure to force flush and stuff here as well. + return cp->close().then_wrapped([ep = std::move(ep)](auto f) mutable { + try { + f.get(); + } catch (...) { + } + return make_exception_future(std::move(ep)); + }); + }); + case 0: + release(cmd, cp, retain_connection_after_command); + return make_ready_future(res); + default: + // error. connection is dicarded. close it. + return cp->close().then_wrapped([cp, res](auto f) { + // ignore any exception thrown from the close. + // ensure we provide the kmip error instead. + try { + f.get(); + } catch (...) { + } + return make_ready_future(res); + }); + } + }).finally([cp] {}); +} + +template +future kmip_host::impl::do_cmd(kmip_cmd cmd_in, Func && f) { + kmip_log.trace("{}: begin do_cmd", *this, cmd_in); + KMIP_CMD* cmd = cmd_in; + + // #998 Need to do retry loop, because we can have either timed out connection, + // lost it (connected server went down) or some other network error. + return do_with(std::move(f), [this, cmd](Func& f) { + return repeat_until_value([this, cmd, &f, retry = _max_retry]() mutable { + --retry; + return get_connection(cmd).handle_exception([this, cmd, retry](std::exception_ptr ep) { + if (retry) { + // failed to connect. do more serious backing off. + // we only retry this once, since get_connection + // will either give back cached connections, + // or explicitly try all avail hosts. + // In the first case, we will do the lower retry + // loop if something is stale/borked, the latter is + // more or less dead. + auto sleeptime = base_backoff_time * (_max_retry - retry); + kmip_log.debug("{}: Connection failed. backoff {}", *this, std::chrono::duration_cast(sleeptime).count()); + return seastar::sleep(sleeptime).then([this, cmd] { + kmip_log.debug("{}: retrying...", *this); + return get_connection(cmd); + }); + } + return make_exception_future(std::move(ep)); + }).then([this, cmd, &f, retry](con_ptr cp) mutable { + auto host = cp->host(); + auto res = do_cmd(cmd, std::move(cp), f); + kmip_log.trace("{}: request {}", *this, fmt::ptr(KMIP_CMD_get_request(cmd))); + return res.then([this, retry, host = std::move(host)](int res) { + if (res == KMIP_ERROR_IO) { + kmip_log.debug("{}: request error {}", *this, kmip_errorc.message(res)); + if (retry) { + // do some backing off unless this is the first retry, which + // might be a stale connection. Clear out all caches for the + // current host first, then retry. + auto f = clear_connections(host); + if (retry != (_max_retry - 1)) { + f = f.then([this] { + auto sleeptime = base_backoff_time; + kmip_log.debug("{}: backoff {}ms", *this, std::chrono::duration_cast(sleeptime).count()); + return seastar::sleep(sleeptime); + }); + } + return f.then([this] { + kmip_log.debug("{}: retrying...", *this); + return opt_int{}; + }); + } + } + return make_ready_future(res); + }); + }); + }); + }).then([this, cmd = std::move(cmd_in)](int res) mutable { + kmip_chk(res, cmd); + kmip_log.trace("{}: result {}", *this, fmt::ptr(KMIP_CMD_get_response(cmd))); + return std::move(cmd); + }); +} + +future kmip_host::impl::get_connection(const sstring& host) { + // TODO: if a pooled connection is stale, the command run will fail, + // and the connection will be discarded. Would be good if we could detect this case + // and re-run command with a new connection. Maybe always verify connection, even if + // it is old? + auto& q = _host_connections[host]; + + if (!q.empty()) { + auto cp = q.front(); + q.pop_front(); + return make_ready_future<::shared_ptr>(cp); + } + + auto cp = ::make_shared(host, _options); + kmip_log.trace("{}: connecting to {}", *this, host); + return cp->connect().then([this, cp, host] { + kmip_log.trace("{}: verifying {}", *this, host); + kmip_cmd cmd; + static auto connection_query = [](KMIP_CMD* cmd) { + static const std::array query_options = { + KMIP_QUERY_FUNCTION_QUERY_OPERATIONS, + KMIP_QUERY_FUNCTION_QUERY_OBJECTS, + }; + return KMIP_CMD_query(cmd, const_cast(query_options.data()), unsigned(query_options.size())); + }; + // when/if this succeeds, it will push the connection onto the available stack + auto f = do_cmd(cmd, cp, connection_query, true /* keep cp */); + return f.then([this, host, cmd = std::move(cmd), cp](int res) { + kmip_chk(res, cmd); + kmip_log.trace("{}: connected {}", *this, host); + return cp; + }); + }); +} + + +future kmip_host::impl::get_connection(KMIP_CMD* cmd) { + userdata u{ KMIP_CMD_get_userdata(cmd) }; + if (u.host != nullptr) { + return get_connection(u.host).then([](con_ptr cp) { + return cp; + }); + } + + using con_ptr = ::shared_ptr; + using con_opt = std::optional; + + return repeat_until_value([this, i = size_t(0)]() mutable { + if (i++ == _options.hosts.size()) { + throw missing_resource_error("Could not connect to any server"); + } + auto& host = _options.hosts[_index % _options.hosts.size()]; + return get_connection(host).then([](con_ptr cp) { + return con_opt(std::move(cp)); + }).handle_exception([this, host](auto) { + ++_index; + // if we fail one host, clear out any + // caches for it just in case. + return clear_connections(host).then([] { + return con_opt(); + }); + }); + }); +} + +future<> kmip_host::impl::clear_connections(const sstring& host) { + auto q = std::exchange(_host_connections[host], {}); + return parallel_for_each(q.begin(), q.end(), [](con_ptr c) { + return c->close().handle_exception([c](auto ep) { + // ignore exceptions + }); + }); +} + +future<> kmip_host::impl::connect() { + return do_for_each(_options.hosts, [this](const sstring& host) { + return get_connection(host).then([this](auto cp) { + release(nullptr, cp); + }); + }); +} + +future<> kmip_host::impl::disconnect() { + return do_for_each(_options.hosts, [this](const sstring& host) { + return clear_connections(host); + }); +} + +static unsigned from_str(unsigned (*f)(char*, int, int*), const sstring& s, const sstring& what) { + int found = 0; + auto res = f(const_cast(s.c_str()), CODE2STR_FLAG_STR_CASE, &found); + if (!found) { + throw std::invalid_argument(format("Unsupported {}: {}", what, s)); + } + return res; +} + +std::tuple kmip_host::impl::make_attributes(const kmip_key_info& info, bool include_template) const { + kmip_data_list kdl_attrs; + + if (!info.options.template_name.empty()) { + kmip_chk(KMIP_DATA_LIST_add_attr_str_by_tag(kdl_attrs, + KMIP_TAG_TEMPLATE, + const_cast(info.options.template_name.c_str())) + ); + } + if (!info.options.key_namespace.empty()) { + kmip_chk(KMIP_DATA_LIST_add_attr_str(kdl_attrs, + const_cast("x-key-namespace"), + const_cast(info.options.key_namespace.c_str())) + ); + } + sstring type, mode, padd; + std::tie(type, mode, padd) = parse_key_spec_and_validate_defaults(info.info.alg); + + try { + auto crypt_alg = from_str(&KMIP_string_to_CRYPTOGRAPHIC_ALGORITHM, type, "cryptographic algorithm"); + return std::make_tuple(std::move(kdl_attrs), crypt_alg); + } catch (std::invalid_argument& e) { + std::throw_with_nested(std::invalid_argument("Invalid algorithm: " + info.info.alg)); + } +} + +kmip_host::id_type kmip_host::impl::kmip_id_to_id(const sstring& s) const { + try { + // #2205 - we previously made all ID:s into uuids (because the literal functions + // are called KMIP_CMD_get_uuid etc). This has issues with Keysecure which apparently + // does _not_ give back UUID format strings, but "other" things. + // Could just always store ascii as bytes instead, but that would now + // break existing installations, so we check for UUID, and if it does not + // match we encode it. + utils::UUID uuid(s); + return uuid.serialize(); + } catch (marshal_exception&) { + // very simple exncoding scheme: add a "len" byte at the end. + // iff byte size of id + 1 (len) equals 16 (length of UUID), + // add a padding byte. + size_t len = s.size() + 1; + if (len == 16) { + ++len; + } + bytes res(len, 0); + std::copy(s.begin(), s.end(), res.begin()); + res.back() = int8_t(len - s.size()); + return res; + } +} + +sstring kmip_host::impl::id_to_kmip_string(const id_type& id) const { + // see comment above for encoding scheme. + if (id.size() == 16) { + // if byte size is UUID it must be a UUID. No "old" id:s are + // not, and we never encode non-uuid as 16 bytes. + auto uuid = utils::UUID_gen::get_UUID(id); + return fmt::format("{}", uuid); + } + auto len = id.size() - id.back(); + return sstring(id.begin(), id.begin() + len); +} + +future kmip_host::impl::create_key(const kmip_key_info& info) { + if (this_shard_id() == 0) { + // #1039 First try looking for existing keys on server + return find_matching_keys(info, 1).then([this, info](std::vector ids) { + if (!ids.empty()) { + // got it + return get_key_by_id(ids.front(), info.info).then([id = ids.front()](shared_ptr k) { + return key_and_id_type(std::move(k), id); + }); + } + + kmip_log.debug("{}: Creating key {}", _name, info); + + auto kdl_attrs_crypt_alg = make_attributes(info); + auto&& kdl_attrs = std::get<0>(kdl_attrs_crypt_alg); + auto&& crypt_alg = std::get<1>(kdl_attrs_crypt_alg); + + // TODO: this is inefficient. We can probably put this in a single batch. + kmip_cmd cmd; + KMIP_CMD_set_ctx(cmd, const_cast("Create key")); + + return do_cmd(std::move(cmd), [info, kdl_attrs = std::move(kdl_attrs), crypt_alg](KMIP_CMD* cmd) { + return KMIP_CMD_create_smpl(cmd, KMIP_OBJECT_TYPE_SYMMETRIC_KEY, + crypt_alg, + KMIP_CRYPTOGRAPHIC_USAGE_ENCRYPT|KMIP_CRYPTOGRAPHIC_USAGE_DECRYPT, + int(info.info.len), + KMIP_DATA_LIST_attrs(kdl_attrs), KMIP_DATA_LIST_n_attrs(kdl_attrs) + ); + }).then([this, info](kmip_cmd cmd) { + /* now get the details (the value of the key) */ + char* new_id; + kmip_chk(KMIP_CMD_get_uuid(cmd, 0, &new_id), cmd); + sstring uuid(new_id); + + kmip_log.debug("{}: Created {}:{}", _name, info, uuid); + + KMIP_CMD_set_ctx(cmd, const_cast("activate")); + + return do_cmd(std::move(cmd), [new_id](KMIP_CMD* cmd) { + return KMIP_CMD_activate(cmd, new_id); + }).then([this, info, uuid](kmip_cmd cmd) { + auto id = kmip_id_to_id(uuid); + kmip_log.debug("{}: Activated {}", _name, uuid); + return get_key_by_id(id, info.info).then([id](auto k) { + return key_and_id_type(k, id); + }); + }); + }); + }); + } + + return smp::submit_to(0, [this, info] { + return _ctxt.get_kmip_host(_name)->get_or_create_key(info.info, info.options).then([](std::tuple, id_type> k_id) { + auto&& [k, id] = k_id; + return make_ready_future>(std::tuple(k->info(), k->key(), id)); + }); + }).then([](std::tuple info_b_id) { + auto&& [info, b, id] = info_b_id; + return make_ready_future(key_and_id_type(make_shared(info, b), id)); + }); +} + +future> kmip_host::impl::find_matching_keys(const kmip_key_info& info, std::optional max) { + kmip_log.debug("{}: Finding matching key {}", _name, info); + + auto [kdl_attrs, crypt_alg] = make_attributes(info, false); + + static const char kmip_tag_cryptographic_length[] = KMIP_TAG_CRYPTOGRAPHIC_LENGTH_STR; + static const char kmip_tag_cryptographic_usage_mask[] = KMIP_TAG_CRYPTOGRAPHIC_USAGE_MASK_STR; + + // #1079. Query mask apparently ignores things like cryptographic + // attribute set of options, instead we must specify the query + // as a list of attributes. + kmip_chk(KMIP_DATA_LIST_add_attr_enum_by_tag(kdl_attrs, + KMIP_TAG_OBJECT_TYPE, + KMIP_OBJECT_TYPE_SYMMETRIC_KEY) + ); + kmip_chk(KMIP_DATA_LIST_add_attr_enum_by_tag(kdl_attrs, + KMIP_TAG_CRYPTOGRAPHIC_ALGORITHM, + int(crypt_alg)) + ); + kmip_chk(KMIP_DATA_LIST_add_attr_int(kdl_attrs, + // our kmip sdk is broken/const-challenged + const_cast(kmip_tag_cryptographic_length), + int(info.info.len)) + ); + kmip_chk(KMIP_DATA_LIST_add_attr_enum_by_tag(kdl_attrs, + KMIP_TAG_STATE, + KMIP_STATE_ACTIVE) + ); + kmip_chk(KMIP_DATA_LIST_add_attr_int(kdl_attrs, + const_cast(kmip_tag_cryptographic_usage_mask), + KMIP_CRYPTOGRAPHIC_USAGE_ENCRYPT|KMIP_CRYPTOGRAPHIC_USAGE_DECRYPT) + ); + + kmip_cmd cmd; + KMIP_CMD_set_ctx(cmd, const_cast("Find matching key")); + + std::unique_ptr mp; + int* maxp = nullptr; + if (max) { + mp = std::make_unique(*max); + maxp = mp.get(); + } + + return do_cmd(std::move(cmd), [kdl_attrs = std::move(kdl_attrs), maxp](KMIP_CMD* cmd) { + return KMIP_CMD_locate(cmd, maxp, nullptr, KMIP_DATA_LIST_attrs(kdl_attrs), KMIP_DATA_LIST_n_attrs(kdl_attrs)); + }).then([this, info, mp = std::move(mp)](kmip_cmd cmd) { + std::vector result; + + for (int i = 0; ; ++i) { + char* new_id; + auto err = KMIP_CMD_get_uuid(cmd, i, &new_id); + if (err == KMIP_ERROR_NOT_FOUND) { + break; + } + kmip_chk(err, cmd); + result.emplace_back(kmip_id_to_id(new_id)); + } + + kmip_log.debug("{}: Found {} matching keys {}", _name, result.size(), info); + + return result; + }); +} + +future> kmip_host::impl::find_key(const id_type& id) { + if (this_shard_id() == 0) { + kmip_cmd cmd; + KMIP_CMD_set_ctx(cmd, const_cast("Find key")); + + auto uuid = id_to_kmip_string(id); + kmip_log.debug("{}: Finding {}", _name, uuid); + + // Batch operation. Nothing is sent/received until xmit below + kmip_chk(KMIP_CMD_batch_start(cmd)); + kmip_chk(KMIP_CMD_set_batch_order(cmd, 1)); + { + int key_format_type = KMIP_KEY_FORMAT_TYPE_RAW; + kmip_chk(KMIP_CMD_get(cmd, const_cast(uuid.c_str()), &key_format_type, nullptr, nullptr)); + } + kmip_chk(KMIP_CMD_get_attributes(cmd, const_cast(uuid.c_str()), nullptr, 0)); + + return do_cmd(std::move(cmd), [](KMIP_CMD* cmd) { + return KMIP_CMD_batch_xmit(cmd); + }).then([this, uuid](kmip_cmd cmd) { + auto nb = KMIP_CMD_get_batch_count(cmd); + if (nb != 2) { + throw malformed_response_error("Invalid batch count in response: " + std::to_string(nb)); + } + + sstring alg; + sstring mode; + sstring padd; + + // "Get" result + auto kdl_res = KMIP_CMD_get_batch(cmd, 0); + + /* get a reference to the key material (the actual key value) */ + unsigned char* key; + unsigned int keylen; + kmip_chk(KMIP_DATA_LIST_get_data(kdl_res, KMIP_TAG_KEY_MATERIAL, 0, &key, &keylen)); + + auto tag_to_string = [](auto f, auto val) { + int found; + auto p = f(val, CODE2STR_FLAG_STR_CASE, &found); + if (!found) { + throw malformed_response_error("Invalid tag: " + std::to_string(val)); + } + return sstring(p); + }; + + int crypto_alg; + kmip_chk(KMIP_DATA_LIST_get_32(kdl_res, KMIP_TAG_CRYPTOGRAPHIC_ALGORITHM, 0, &crypto_alg)); + alg = tag_to_string(&KMIP_CRYPTOGRAPHIC_ALGORITHM_to_string, crypto_alg); + + // "Attribute list" result + // This will apparently most of the time _not_ contain the info we want, + // depending on server, but we record as much as we can anyway. + // The actual resulting keys used will be based on external config. Only + // key data and verifying that it is compatible with said info is + // important for us. + auto kdl_attr = KMIP_CMD_get_batch(cmd, 1); + + unsigned int attr_count = 0; + kmip_chk(KMIP_DATA_LIST_get_count(kdl_attr, KMIP_TAG_ATTRIBUTE, &attr_count)); + + for (unsigned int i = 0; i < attr_count; i++) { + KMIP_DATA *attr = nullptr; + int n_attr = 0; + + kmip_chk(KMIP_DATA_LIST_get_struct(kdl_attr, KMIP_TAG_ATTRIBUTE, i, &attr, &n_attr, NULL)); + + + KMIP_DATA *attr_val = nullptr; + kmip_chk(KMIP_DATA_get(attr, n_attr,KMIP_TAG_ATTRIBUTE_VALUE, 0, &attr_val)); + + switch (attr_val->tag) { + case KMIP_TAG_BLOCK_CIPHER_MODE: + mode = tag_to_string(&KMIP_BLOCK_CIPHER_MODE_to_string, attr_val->data32); + break; + case KMIP_TAG_PADDING_METHOD: + padd = tag_to_string(&KMIP_PADDING_METHOD_to_string, attr_val->data32); + break; + default: + break; + } + } + + if (alg.empty()) { + throw configuration_error("Could not find algorithm"); + } + if (mode.empty() != padd.empty()) { + throw configuration_error("Invalid block mode/padding"); + } + + auto str = mode.empty() || padd.empty() ? alg : alg + "/" + mode + "/" + padd; + key_info derived_info{ str, keylen*8}; + + kmip_log.trace("{}: Found {}:{} {}", _name, uuid, derived_info.alg, derived_info.len); + + return make_shared(derived_info, bytes(key, key + keylen)); + }); + } + + return smp::submit_to(0, [this, id] { + return _ctxt.get_kmip_host(_name)->get_key_by_id(id).then([](shared_ptr k) { + return make_ready_future>(std::tuple(k->info(), k->key())); + }); + }).then([](std::tuple info_b) { + auto&& [info, b] = info_b; + return make_shared(info, b); + }); +} + +shared_ptr kmip_host::impl::ensure_compatible_key(shared_ptr k, const key_info& info) { + // keys we get back are typically void + // of block mode/padding info (because this is meaningless + // from the standpoint of the kmip server). + // Check and re-init the actual key used based + // on what the user wants so we adhere to block mode etc. + if (!info.compatible(k->info())) { + throw malformed_response_error(fmt::format("Incompatible key: {}", k->info())); + } + if (k->info() != info) { + k = ::make_shared(info, k->key()); + } + return k; +} + +[[noreturn]] +static void translate_kmip_error(const kmip_error& e) { + switch (e.code().value()) { + case KMIP_ERROR_BAD_CONNECT: case KMIP_ERROR_IO: + std::throw_with_nested(network_error(e.what())); + case KMIP_ERROR_BAD_PROTOCOL: + std::throw_with_nested(configuration_error(e.what())); + case KMIP_ERROR_NOT_FOUND: + std::throw_with_nested(missing_resource_error(e.what())); + case KMIP_ERROR_AUTH_FAILED: case KMIP_ERROR_CERT_AUTH_FAILED: + std::throw_with_nested(permission_error(e.what())); + default: + std::throw_with_nested(service_error(e.what())); + } +} + +future, kmip_host::id_type>> kmip_host::impl::get_or_create_key(const key_info& info, const key_options& opts) { + kmip_log.debug("{}: Lookup key {}:{}", _name, info, opts); + try { + auto linfo = info; + auto kinfo = co_await _attr_cache.get(kmip_key_info{info, opts}); + co_return std::tuple(ensure_compatible_key(std::get<0>(kinfo), linfo), std::get<1>(kinfo)); + } catch (kmip_error& e) { + translate_kmip_error(e); + } catch (base_error&) { + throw; + } catch (std::invalid_argument& e) { + std::throw_with_nested(configuration_error(fmt::format("get_or_create_key: {}", e.what()))); + } catch (...) { + std::throw_with_nested(service_error(fmt::format("get_or_create_key: {}", std::current_exception()))); + } +} + +future> kmip_host::impl::get_key_by_id(const id_type& id, const std::optional& info) { + try { + auto linfo = info; // maintain on stack + auto k = co_await _id_cache.get(id); + if (linfo) { + k = ensure_compatible_key(k, *linfo); + } + co_return k; + } catch (kmip_error& e) { + translate_kmip_error(e); + } catch (base_error&) { + throw; + } catch (std::invalid_argument& e) { + std::throw_with_nested(configuration_error(fmt::format("get_key_by_id: {}", e.what()))); + } catch (...) { + std::throw_with_nested(service_error(fmt::format("get_key_by_id: {}", std::current_exception()))); + } +} + +kmip_host::kmip_host(encryption_context& ctxt, const sstring& name, const std::unordered_map& map) + : kmip_host(ctxt, name, [&ctxt, &map] { + host_options opts; + map_wrapper> m(map); + + try { + static const std::regex wsc("\\s*,\\s*"); // comma+whitespace + + std::string hosts = m("hosts").value(); + + auto i = std::sregex_token_iterator(hosts.begin(), hosts.end(), wsc, -1); + auto e = std::sregex_token_iterator(); + + std::for_each(i, e, [&](const std::string & s) { + opts.hosts.emplace_back(s); + }); + } catch (std::bad_optional_access&) { + throw std::invalid_argument("No KMIP host names provided"); + } + + opts.certfile = m("certificate").value_or(""); + opts.keyfile = m("keyfile").value_or(""); + opts.truststore = m("truststore").value_or(""); + opts.priority_string = m("priority_string").value_or(""); + + opts.username = m("username").value_or(""); + opts.password = ctxt.maybe_decrypt_config_value(m("password").value_or("")); + + if (m("max_command_retries")) { + opts.max_command_retries = std::stoul(*m("max_command_retries")); + } + + opts.key_cache_expiry = parse_expiry(m("key_cache_expiry")); + opts.key_cache_refresh = parse_expiry(m("key_cache_refresh")); + + return opts; + }()) +{} + +kmip_host::kmip_host(encryption_context& ctxt, const sstring& name, const host_options& opts) + : _impl(std::make_unique(ctxt, name, opts)) +{} + +kmip_host::~kmip_host() = default; + +future<> kmip_host::connect() { + return _impl->connect(); +} + +future<> kmip_host::disconnect() { + return _impl->disconnect(); +} + +future, kmip_host::id_type>> kmip_host::get_or_create_key(const key_info& info, const key_options& opts) { + return _impl->get_or_create_key(info, opts); +} + +future> kmip_host::get_key_by_id(const id_type& id, std::optional info) { + return _impl->get_key_by_id(id, info); +} + +future> kmip_host::get_key_by_name(const sstring& name) { + return _impl->get_key_by_id(_impl->kmip_id_to_id(name)); +} + +std::ostream& operator<<(std::ostream& os, const kmip_host::key_options& opts) { + return os << opts.template_name << ":" << opts.key_namespace; +} + +} + +#else + +#include "kmip_host.hh" + +namespace encryption { + +class kmip_host::impl { +}; + +kmip_host::kmip_host(encryption_context& ctxt, const sstring& name, const std::unordered_map& map) { + throw std::runtime_error("KMIP support not enabled"); +} + +kmip_host::kmip_host(encryption_context& ctxt, const sstring& name, const host_options& opts) { + throw std::runtime_error("KMIP support not enabled"); +} + +kmip_host::~kmip_host() = default; + +future<> kmip_host::connect() { + throw std::runtime_error("KMIP support not enabled"); +} + +future<> kmip_host::disconnect() { + throw std::runtime_error("KMIP support not enabled"); +} + +future, kmip_host::id_type>> kmip_host::get_or_create_key(const key_info& info, const key_options& opts) { + throw std::runtime_error("KMIP support not enabled"); +} + +future> kmip_host::get_key_by_id(const id_type& id, std::optional info) { + throw std::runtime_error("KMIP support not enabled"); +} + +future> kmip_host::get_key_by_name(const sstring& name) { + throw std::runtime_error("KMIP support not enabled"); +} + +std::ostream& operator<<(std::ostream& os, const kmip_host::key_options& opts) { + return os << opts.template_name << ":" << opts.key_namespace; +} + +} + +#endif diff --git a/ent/encryption/kmip_host.hh b/ent/encryption/kmip_host.hh new file mode 100644 index 0000000000..05963e2784 --- /dev/null +++ b/ent/encryption/kmip_host.hh @@ -0,0 +1,80 @@ +/* + * Copyright (C) 2018 ScyllaDB + * + */ + +/* + * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0 + */ + +#pragma once + +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + +#include "../../bytes.hh" + +#include "symmetric_key.hh" + +namespace encryption { + +class symmetric_key; +class encryption_context; +struct key_info; + +class kmip_host { +public: + struct host_options { + std::vector hosts; + + sstring username; + sstring password; + + sstring certfile; + sstring keyfile; + sstring truststore; + sstring priority_string; + + std::optional key_cache_expiry; + std::optional key_cache_refresh; + + std::optional max_pooled_connections_per_host; + std::optional max_command_retries; + }; + struct key_options { + sstring template_name; + sstring key_namespace; + }; + using id_type = bytes; + + kmip_host(encryption_context&, const sstring& name, const host_options&); + kmip_host(encryption_context&, const sstring& name, const std::unordered_map&); + ~kmip_host(); + + future<> connect(); + future<> disconnect(); + future, id_type>> get_or_create_key(const key_info&, const key_options& = {}); + future> get_key_by_id(const id_type&, std::optional = std::nullopt); + + /** for system key(s) */ + future> get_key_by_name(const sstring&); + +private: + class impl; + std::unique_ptr _impl; +}; + +std::ostream& operator<<(std::ostream&, const kmip_host::key_options&); + +} + +template <> struct fmt::formatter : fmt::ostream_formatter {}; diff --git a/ent/encryption/kmip_key_provider.cc b/ent/encryption/kmip_key_provider.cc new file mode 100644 index 0000000000..6d98a217c0 --- /dev/null +++ b/ent/encryption/kmip_key_provider.cc @@ -0,0 +1,119 @@ +/* + * Copyright (C) 2018 ScyllaDB + * + */ + +/* + * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0 + */ + +#include +#include + +#include "utils/UUID.hh" +#include "utils/UUID_gen.hh" + +#include "kmip_key_provider.hh" +#include "kmip_host.hh" + +namespace encryption { + +class kmip_key_provider : public key_provider { +public: + kmip_key_provider(::shared_ptr kmip_host, kmip_host::key_options kopts, sstring name) + : _kmip_host(std::move(kmip_host)) + , _kopts(std::move(kopts)) + , _name(std::move(name)) + {} + future> key(const key_info& info, opt_bytes id) override { + if (id) { + return _kmip_host->get_key_by_id(*id, info).then([id](key_ptr k) { + return make_ready_future>(std::tuple(k, id)); + }); + } + return _kmip_host->get_or_create_key(info, _kopts).then([](std::tuple k_id) { + return make_ready_future>(k_id); + }); + } + void print(std::ostream& os) const override { + os << _name; + if (!_kopts.key_namespace.empty()) { + os << ", namespace=" << _kopts.key_namespace; + } + if (!_kopts.template_name.empty()) { + os << ", template=" << _kopts.template_name; + } + } + +private: + ::shared_ptr _kmip_host; + kmip_host::key_options _kopts; + sstring _name; +}; + + +shared_ptr kmip_key_provider_factory::get_provider(encryption_context& ctxt, const options& map) { + opt_wrapper opts(map); + auto host = opts(HOST_NAME); + if (!host) { + throw std::invalid_argument("kmip_host must be provided"); + } + kmip_host::key_options kopts = { + opts(TEMPLATE_NAME).value_or(""), + opts(KEY_NAMESPACE).value_or(""), + }; + + auto cache_key = *host + ":" + boost::lexical_cast(kopts); + auto provider = ctxt.get_cached_provider(cache_key); + + if (!provider) { + provider = ::make_shared(ctxt.get_kmip_host(*host), std::move(kopts), *host); + ctxt.cache_provider(cache_key, provider); + } + + return provider; +} + +static std::optional> parse_kmip_host_and_path(const sstring & s) { + static const std::regex kmip_ex("kmip://([^/]+)/([\\w/]+)"); + + std::match_results m; + if (std::regex_match(s.begin(), s.end(), m, kmip_ex)) { + return std::make_pair(sstring(m[1]), sstring(m[2])); + } + return std::nullopt; +} + +kmip_system_key::kmip_system_key(encryption_context& ctxt, const sstring& s) { + auto p = parse_kmip_host_and_path(s); + if (!p) { + throw std::invalid_argument("Not a kmip path: " + s); + } + + _host = ctxt.get_kmip_host(p->first); + _name = p->second; +} + +kmip_system_key::~kmip_system_key() = default; + +bool kmip_system_key::is_kmip_path(const sstring& s) { + return parse_kmip_host_and_path(s) != std::nullopt; +} + +future> kmip_system_key::get_key() { + if (_key) { + return make_ready_future>(_key); + } + return _host->get_key_by_name(_name).then([this](shared_ptr k) { + _key = k; + return k; + }); +} + +const sstring& kmip_system_key::name() const { + return _name; +} + + +} + diff --git a/ent/encryption/kmip_key_provider.hh b/ent/encryption/kmip_key_provider.hh new file mode 100644 index 0000000000..c383b7fb0f --- /dev/null +++ b/ent/encryption/kmip_key_provider.hh @@ -0,0 +1,40 @@ +/* + * Copyright (C) 2018 ScyllaDB + * + */ + +/* + * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0 + */ +#pragma once + +#include "encryption.hh" +#include "system_key.hh" + +namespace encryption { + +class kmip_key_provider_factory : public key_provider_factory { +public: + shared_ptr get_provider(encryption_context&, const options&) override; +}; + +class kmip_host; + +class kmip_system_key : public system_key { + shared_ptr _key; + shared_ptr _host; + sstring _name; +public: + kmip_system_key(encryption_context&, const sstring&); + ~kmip_system_key(); + + static bool is_kmip_path(const sstring&); + + future> get_key() override; + const sstring& name() const override; + bool is_local() const override { + return false; + } +}; + +} diff --git a/ent/encryption/kms_host.cc b/ent/encryption/kms_host.cc new file mode 100644 index 0000000000..2827f54efa --- /dev/null +++ b/ent/encryption/kms_host.cc @@ -0,0 +1,1164 @@ +/* + * Copyright (C) 2022 ScyllaDB + * + */ + +/* + * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0 + */ +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include "utils/to_string.hh" + +#include "kms_host.hh" +#include "encryption.hh" +#include "encryption_exceptions.hh" +#include "symmetric_key.hh" +#include "utils/hash.hh" +#include "utils/loading_cache.hh" +#include "utils/UUID.hh" +#include "utils/UUID_gen.hh" +#include "utils/rjson.hh" +#include "marshal_exception.hh" +#include "db/config.hh" + +template struct fmt::formatter> : fmt::ostream_formatter {}; +template <> struct fmt::formatter : fmt::ostream_formatter {}; + +using namespace std::chrono_literals; +using namespace std::string_literals; + +logger kms_log("kms"); + +class kms_error : public std::exception { + std::string _type, _msg; +public: + kms_error(std::string_view type, std::string_view msg) + : _type(type) + , _msg(fmt::format("{}: {}", type, msg)) + {} + const std::string& type() const { + return _type; + } + const char* what() const noexcept override { + return _msg.c_str(); + } +}; + +namespace kms_errors { + [[maybe_unused]] static const char* AccessDeniedException = "AccessDeniedException"; + [[maybe_unused]] static const char* IncompleteSignature = "IncompleteSignature"; + [[maybe_unused]] static const char* InternalFailure = "InternalFailure"; + [[maybe_unused]] static const char* InvalidAction = "InvalidAction"; + [[maybe_unused]] static const char* InvalidClientTokenId = "InvalidClientTokenId"; + [[maybe_unused]] static const char* InvalidParameterCombination = "InvalidParameterCombination"; + [[maybe_unused]] static const char* InvalidParameterValue = "InvalidParameterValue"; + [[maybe_unused]] static const char* InvalidQueryParameter = "InvalidQueryParameter"; + [[maybe_unused]] static const char* MalformedQueryString = "MalformedQueryString"; + [[maybe_unused]] static const char* MissingAction = "MissingAction"; + [[maybe_unused]] static const char* MissingAuthenticationToken = "MissingAuthenticationToken"; + [[maybe_unused]] static const char* MissingParameter = "MissingParameter"; + [[maybe_unused]] static const char* NotAuthorized = "NotAuthorized"; + [[maybe_unused]] static const char* OptInRequired = "OptInRequired"; + [[maybe_unused]] static const char* RequestExpired = "RequestExpired"; + [[maybe_unused]] static const char* ServiceUnavailable = "ServiceUnavailable"; + [[maybe_unused]] static const char* ThrottlingException = "ThrottlingException"; + [[maybe_unused]] static const char* ValidationError = "ValidationError"; + [[maybe_unused]] static const char* DependencyTimeoutException = "DependencyTimeoutException"; + [[maybe_unused]] static const char* InvalidArnExceptio = "InvalidArnException"; + [[maybe_unused]] static const char* KMSInternalException = "KMSInternalException"; + [[maybe_unused]] static const char* NotFoundException = "NotFoundException"; + [[maybe_unused]] static const char* AlreadyExistsException = "AlreadyExistsException"; +} + +namespace beast = boost::beast; // from +// Note: switch http -> bhttp to deal with namespace ambiguity. +namespace bhttp = beast::http; // from +namespace shttp = seastar::http; + +static std::string to_lower(std::string_view s) { + std::string tmp(s.size(), 0); + std::transform(s.begin(), s.end(), tmp.begin(), ::tolower); + return tmp; +} + +static bool is_true(std::string_view s) { + auto tmp = to_lower(s); + return tmp == "true" || tmp == "1" || tmp == "yes" || tmp == "on"; +} + +class encryption::kms_host::impl { +public: + // set a rather long expiry. normal KMS policies are 365-day rotation of keys. + // we can do with 10 minutes. CMH. maybe even longer. + // (see comments below on what keys are here) + static inline constexpr std::chrono::milliseconds default_expiry = 600s; + static inline constexpr std::chrono::milliseconds default_refresh = 1200s; + + impl(encryption_context& ctxt, const std::string& name, const host_options& options) + : _ctxt(ctxt) + , _name(name) + , _options(options) + , _attr_cache(utils::loading_cache_config{ + .max_size = std::numeric_limits::max(), + .expiry = options.key_cache_expiry.value_or(default_expiry), + .refresh = options.key_cache_refresh.value_or(default_refresh)}, kms_log, std::bind(&impl::create_key, this, std::placeholders::_1)) + , _id_cache(utils::loading_cache_config{ + .max_size = std::numeric_limits::max(), + .expiry = options.key_cache_expiry.value_or(default_expiry), + .refresh = options.key_cache_refresh.value_or(default_refresh)}, kms_log, std::bind(&impl::find_key, this, std::placeholders::_1)) + { + // check if we have an explicit endpoint set. + if (!_options.endpoint.empty()) { + static std::regex simple_url(R"foo((https?):\/\/(?:([\w\.]+)|\[([\w:]+)\]):?(\d+)?\/?)foo"); + std::transform(_options.endpoint.begin(), _options.endpoint.end(), _options.endpoint.begin(), ::tolower); + std::smatch m; + if (!std::regex_match(_options.endpoint, m, simple_url)) { + throw std::invalid_argument(fmt::format("Could not parse URL: {}", _options.endpoint)); + } + _options.https = m[1].str() == "https"; + _options.host = m[2].length() > 0 ? m[2].str() : m[3].str(); + _options.port = m[4].length() > 0 ? std::stoi(m[4].str()) : 0; + } + if (_options.endpoint.empty() && _options.host.empty() && _options.aws_region.empty() && !_options.aws_use_ec2_region) { + throw std::invalid_argument("No AWS region or endpoint specified"); + } + if (_options.port == 0) { + _options.port = _options.https ? 443 : 80; + } + if (_options.aws_profile.empty()) { + auto profile = std::getenv("AWS_PROFILE"); + if (profile) { + _options.aws_profile = profile; + } else { + _options.aws_profile = "default"; + } + } + kms_log.trace("Added KMS node {}={}", name, _options.endpoint.empty() + ? (_options.host.empty() ? _options.aws_region : _options.host) + : _options.endpoint + ); + } + ~impl() = default; + + future<> init(); + const host_options& options() const { + return _options; + } + + future, id_type>> get_or_create_key(const key_info&, const option_override* = nullptr); + future> get_key_by_id(const id_type&, const key_info&, const option_override* = nullptr); +private: + class httpclient; + using key_and_id_type = std::tuple, id_type>; + + struct attr_cache_key { + std::string master_key; + std::string aws_assume_role_arn; + key_info info; + + bool operator==(const attr_cache_key& v) const = default; + friend std::ostream& operator<<(std::ostream& os, const attr_cache_key& k) { + fmt::print(os, "{}", std::tie(k.master_key, k.aws_assume_role_arn, k.info)); + return os; + } + }; + + struct attr_cache_key_hash { + size_t operator()(const attr_cache_key& k) const { + return utils::tuple_hash()(std::tie(k.master_key, k.aws_assume_role_arn, k.info.len)); + } + }; + + struct id_cache_key { + id_type id; + std::string aws_assume_role_arn; + bool operator==(const id_cache_key& v) const = default; + friend std::ostream& operator<<(std::ostream& os, const id_cache_key& k) { + fmt::print(os, "{{{}, {}}}", k.id, k.aws_assume_role_arn); + return os; + } + }; + + struct id_cache_key_hash { + size_t operator()(const id_cache_key& k) const { + return utils::tuple_hash()(std::tie(k.id, k.aws_assume_role_arn)); + } + }; + + struct aws_query; + using result_type = bhttp::response; + + future post(aws_query); + future post(std::string_view target, std::string_view aws_assume_role_arn, const rjson::value& query); + + future create_key(const attr_cache_key&); + future find_key(const id_cache_key&); + + encryption_context& _ctxt; + std::string _name; + host_options _options; + utils::loading_cache, attr_cache_key_hash> _attr_cache; + utils::loading_cache, id_cache_key_hash> _id_cache; + shared_ptr _creds; + std::unordered_map> _cache; + bool _initialized = false; +}; + +template <> struct fmt::formatter : fmt::ostream_formatter {}; +template <> struct fmt::formatter : fmt::ostream_formatter {}; + +/** + * Not in seastar. Because nowhere near complete, thought through or + * capable of dealing with anything but tiny aws messages. + * + * TODO: formalize and move to seastar + */ +class encryption::kms_host::impl::httpclient { +public: + httpclient(std::string host, uint16_t port, shared_ptr = {}); + + httpclient& add_header(std::string_view key, std::string_view value); + void clear_headers(); + + using result_type = kms_host::impl::result_type; + using request_type = bhttp::request; + + future send(); + + using method_type = bhttp::verb; + + void method(method_type); + void content(std::string_view); + void target(std::string_view); + + request_type& request() { + return _req; + } + const request_type& request() const { + return _req; + } + const std::string& host() const { + return _host; + } + uint16_t port() const { + return _port; + } +private: + + std::string _host; + uint16_t _port; + shared_ptr _creds; + request_type _req; +}; + +encryption::kms_host::impl::httpclient::httpclient(std::string host, uint16_t port, shared_ptr creds) + : _host(std::move(host)) + , _port(port) + , _creds(std::move(creds)) +{} + +encryption::kms_host::impl::httpclient& encryption::kms_host::impl::httpclient::add_header(std::string_view key, std::string_view value) { + _req.set(beast::string_view(key.data(), key.size()), beast::string_view(value.data(), value.size())); + return *this; +} + +void encryption::kms_host::impl::httpclient::clear_headers() { + _req.clear(); +} + +future encryption::kms_host::impl::httpclient::send() { + auto addr = co_await net::dns::resolve_name(_host); + socket_address sa(addr, _port); + connected_socket s = co_await (_creds + ? tls::connect(_creds, sa) + : seastar::connect(sa) + ); + + s.set_keepalive(true); + s.set_nodelay(true); + + auto out = s.output(); + auto in = s.input(); + + bhttp::serializer ser(_req); + + beast::error_code ec; + std::exception_ptr ex; + + bhttp::parser p(result_type{}); + + try { + while (!ser.is_done()) { + future<> f = make_ready_future<>(); + ser.next(ec, [&](beast::error_code& ec, auto&& buffers) { + for (auto const buffer : beast::buffers_range (buffers)) { + f = f.then([&out, data = buffer.data(), size = buffer.size()] { + return out.write(static_cast(data), size); + }); + } + ser.consume(beast::buffer_bytes(buffers)); + }); + + co_await std::move(f); + + if (ec.failed()) { + break; + } + } + + co_await out.flush(); + + p.eager(true); + p.skip(false); + + if (!ec.failed()) { + while (!p.is_done()) { + auto buf = co_await in.read(); + if (buf.empty()) { + break; + } + // parse + boost::asio::const_buffer wrap(buf.get(), buf.size()); + p.put(wrap, ec); + if (ec.failed() && ec != bhttp::error::need_more) { + break; + } + ec.clear(); + } + } + } catch (...) { + ex = std::current_exception(); + } + + try { + co_await out.close(); + } catch (...) { + if (!ex) { + ex = std::current_exception(); + } + } + try { + co_await in.close(); + } catch (...) { + if (!ex) { + ex = std::current_exception(); + } + } + + if (ec.failed()) { + throw std::system_error(ec); + } + if (ex) { + std::rethrow_exception(ex); + } + + co_return p.release(); +} + +void encryption::kms_host::impl::httpclient::method(method_type m) { + _req.method(m); +} + +void encryption::kms_host::impl::httpclient::content(std::string_view body) { + _req.body().assign(body.begin(), body.end()); + _req.set(bhttp::field::content_length, std::to_string(_req.body().size())); +} + +void encryption::kms_host::impl::httpclient::target(std::string_view target) { + _req.target(std::string(target)); +} + +static std::string get_option(const encryption::kms_host::option_override* oov, std::optional encryption::kms_host::option_override::* f, const std::string& def) { + if (oov) { + return (oov->*f).value_or(def); + } + return {}; +}; + +[[noreturn]] +static void translate_kms_error(const kms_error& e) { + using namespace kms_errors; + using namespace encryption; + + if (e.type() == AccessDeniedException || e.type() == MissingAuthenticationToken || e.type() == NotAuthorized) { + std::throw_with_nested(permission_error(e.what())); + } + if (e.type() == OptInRequired || e.type() == InvalidClientTokenId || e.type() == InvalidAction) { + std::throw_with_nested(configuration_error(e.what())); + } + if (e.type() == NotFoundException || e.type() == DependencyTimeoutException) { + std::throw_with_nested(missing_resource_error(e.what())); + } + std::throw_with_nested(service_error(e.what())); +} + +future, encryption::kms_host::id_type>> encryption::kms_host::impl::get_or_create_key(const key_info& info, const option_override* oov) { + attr_cache_key key { + .master_key = get_option(oov, &option_override::master_key, _options.master_key), + .aws_assume_role_arn = get_option(oov, &option_override::aws_assume_role_arn, _options.aws_assume_role_arn), + .info = info, + }; + + if (key.master_key.empty() && _options.master_key.empty()) { + throw configuration_error("No master key set in kms host config or encryption attributes"); + } + try { + co_return co_await _attr_cache.get(key); + } catch (kms_error& e) { + translate_kms_error(e); + } catch (base_error&) { + throw; + } catch (std::system_error& e) { + std::throw_with_nested(network_error(e.what())); + } catch (...) { + std::throw_with_nested(service_error(fmt::format("get_key_by_id: {}", std::current_exception()))); + } +} + +future> encryption::kms_host::impl::get_key_by_id(const id_type& id, const key_info& info, const option_override* oov) { + // note: since KMS does not really have any actual "key" associtation of id -> key, + // we only cache/query raw bytes of some length. (See below). + // Thus keys returned are always new objects. But they are not huge... + id_cache_key key { + .id = id, + .aws_assume_role_arn = get_option(oov, &option_override::aws_assume_role_arn, _options.aws_assume_role_arn), + }; + try { + auto data = co_await _id_cache.get(key); + co_return make_shared(info, data); + } catch (kms_error& e) { + translate_kms_error(e); + } catch (base_error&) { + throw; + } catch (std::system_error& e) { + std::throw_with_nested(network_error(e.what())); + } catch (std::invalid_argument& e) { + std::throw_with_nested(configuration_error(fmt::format("get_key_by_id: {}", e.what()))); + } catch (...) { + std::throw_with_nested(service_error(fmt::format("get_key_by_id: {}", std::current_exception()))); + } +} + +std::string make_aws_host(std::string_view aws_region, std::string_view service) { + static const char AWS_GLOBAL[] = "aws-global"; + static const char US_EAST_1[] = "us-east-1"; // US East (N. Virginia) + static const char CN_NORTH_1[] = "cn-north-1"; // China (Beijing) + static const char CN_NORTHWEST_1[] = "cn-northwest-1"; // China (Ningxia) + static const char US_ISO_EAST_1[] = "us-iso-east-1"; // US ISO East + static const char US_ISOB_EAST_1[] = "us-isob-east-1"; // US ISOB East (Ohio) + + // Fallback to us-east-1 if global endpoint does not exists. + auto region = aws_region == AWS_GLOBAL ? US_EAST_1 : aws_region; + + std::stringstream ss; + ss << service << "." << region; + + if (region == CN_NORTH_1 || region == CN_NORTHWEST_1) { + ss << ".amazonaws.com.cn"; + } else if (region == US_ISO_EAST_1) { + ss << ".c2s.ic.gov"; + } else if (region == US_ISOB_EAST_1) { + ss << ".sc2s.sgov.gov"; + } else { + ss << ".amazonaws.com"; + } + return ss.str(); +} + +struct encryption::kms_host::impl::aws_query { + std::string_view host; + + std::string_view service; + std::string_view target; + std::string_view content_type; + std::string_view content; + + std::string_view aws_access_key_id; + std::string_view aws_secret_access_key; + std::string_view security_token; + + uint16_t port; +}; + +future encryption::kms_host::impl::post(std::string_view target, std::string_view aws_assume_role_arn, const rjson::value& query) { + static auto get_response_error = [](const result_type& res) -> std::string { + switch (res.result()) { + case bhttp::status::unauthorized: case bhttp::status::forbidden: return "AccessDenied"; + case bhttp::status::not_found: return "ResourceNotFound"; + case bhttp::status::too_many_requests: return "SlowDown"; + case bhttp::status::internal_server_error: return "InternalError"; + case bhttp::status::service_unavailable: return "ServiceUnavailable"; + case bhttp::status::request_timeout: case bhttp::status::gateway_timeout: + case bhttp::status::network_connect_timeout_error: + return "RequestTimeout"; + default: + return format("{}", res.result()); + } + }; + + static auto query_ec2_meta = [](std::string_view target, std::string token = {}) -> future> { + static auto get_env_def = [](std::string_view var, std::string_view def) { + auto val = std::getenv(var.data()); + return val ? std::string_view(val) : def; + }; + + static const std::string ec2_meta_host(get_env_def("AWS_EC2_METADATA_ADDRESS", "169.254.169.254")); + static const int ec2_meta_port = std::stoi(get_env_def("AWS_EC2_METADATA_PORT", "80").data()); + + kms_log.debug("Query ec2 metadata"); + + httpclient client(ec2_meta_host, ec2_meta_port); + + static constexpr auto X_AWS_EC2_METADATA_TOKEN_TTL_SECONDS = "X-aws-ec2-metadata-token-ttl-seconds"; + static constexpr auto X_AWS_EC2_METADATA_TOKEN = "X-aws-ec2-metadata-token"; + static constexpr const char* HOST_HEADER = "host"; + + static auto logged_send = [](httpclient& client) -> future { + kms_log.trace("Request: {}", client.request()); + result_type res; + try { + res = co_await client.send(); + } catch (std::system_error& e) { + std::throw_with_nested(network_error(fmt::format("Error sending to host {}:{}: {}", client.host(), client.port(), e.what()))); + } catch (std::exception& e) { + std::throw_with_nested(service_error(fmt::format("Error sending to host {}:{}: {}", client.host(), client.port(), e.what()))); + } + kms_log.trace("Result: status={}, response={}", res.result_int(), res); + if (res.result() != bhttp::status::ok) { + throw kms_error(get_response_error(res), "EC2 metadata query"); + } + co_return res; + }; + + client.add_header(HOST_HEADER, ec2_meta_host); + + if (token.empty()) { + client.add_header(X_AWS_EC2_METADATA_TOKEN_TTL_SECONDS, "21600"); + client.method(httpclient::method_type::put); + client.target("/latest/api/token"); + + + auto res = co_await logged_send(client); + + if (res.result() != bhttp::status::ok) { + throw kms_error(get_response_error(res), "EC2 metadata token query"); + } + + token = res.body(); + client.clear_headers(); + } + + client.add_header(X_AWS_EC2_METADATA_TOKEN, token); + client.add_header(HOST_HEADER, ec2_meta_host); + client.method(httpclient::method_type::get); + client.target(target); + + auto res = co_await logged_send(client); + co_return std::make_tuple(std::move(res), token); + }; + + std::string gtoken; + + if (_options.aws_region.empty() && _options.host.empty()) { + assert(_options.aws_use_ec2_region); + httpclient::result_type res; + std::tie(res, gtoken) = co_await query_ec2_meta("/latest/meta-data/placement/region"); + _options.aws_region = res.body(); + } + + if (_options.host.empty()) { + // resolve region -> endpoint + assert(!_options.aws_region.empty()); + _options.host = make_aws_host(_options.aws_region, "kms"); + } + + auto should_resolve_options_credentials = [this] { + if (_options.aws_use_ec2_credentials) { + return false; + } + return _options.aws_access_key_id.empty() || _options.aws_secret_access_key.empty(); + }; + + // if we did not get full auth info in config, we can try to + // retrieve it from environment + if (should_resolve_options_credentials()) { + auto key_id = std::getenv("AWS_ACCESS_KEY_ID"); + auto key = std::getenv("AWS_SECRET_ACCESS_KEY"); + if (_options.aws_access_key_id.empty() && key_id) { + kms_log.debug("No aws id specified. Using environment AWS_ACCESS_KEY_ID"); + _options.aws_access_key_id = key_id; + } + if (_options.aws_secret_access_key.empty() && key) { + kms_log.debug("No aws secret specified. Using environment AWS_SECRET_ACCESS_KEY"); + _options.aws_secret_access_key = key; + } + } + + // if we did not get full auth info in config or env, we can try to + // retrieve it from ~/.aws/credentials + if (should_resolve_options_credentials()) { + auto home = std::getenv("HOME"); + if (home) { + auto credentials = std::string(home) + "/.aws/credentials"; + auto credentials_exists = co_await seastar::file_exists(credentials); + if (credentials_exists) { + kms_log.debug("No aws id/secret specified. Trying to read credentials from {}", credentials); + try { + auto buf = co_await read_text_file_fully(credentials); + std::string profile; + + static std::regex cred_line(R"foo(\s*\[(?:profile\s+)?(\w+)\]|([^\s]+)\s*=\s*([^\s]+)\s*\n)foo"); + std::cregex_iterator i(buf.get(), buf.get() + buf.size(), cred_line), e; + + std::string id, secret; + while (i != e) { + if ((*i)[1].length() > 0) { + profile = (*i)[1].str(); + kms_log.trace("Found profile {} ({})", profile, credentials); + } else if (profile == _options.aws_profile) { + std::string key((*i)[2].str()); + std::string val((*i)[3].str()); + if (key == "aws_access_key_id") { + id = val; + } else if (key == "aws_secret_access_key") { + secret = val; + } + } + ++i; + } + + if (!id.empty() && !_options.aws_access_key_id.empty() && id != _options.aws_access_key_id) { + throw configuration_error(fmt::format("Mismatched aws id: {} != {}", id, _options.aws_access_key_id)); + } + if (!id.empty() && _options.aws_access_key_id.empty()) { + _options.aws_access_key_id = id; + } + if (!secret.empty() && _options.aws_secret_access_key.empty()) { + _options.aws_secret_access_key = secret; + } + if (_options.aws_access_key_id.empty() || _options.aws_secret_access_key.empty()) { + throw configuration_error(fmt::format("Could not find credentials for profile {}", _options.aws_profile)); + } + kms_log.debug("Read credentials from {} ({}:{}{})", credentials, _options.aws_access_key_id + , _options.aws_secret_access_key.substr(0, 2) + , std::string(_options.aws_secret_access_key.size()-2, '-') + ); + } catch (...) { + kms_log.debug("Could not read credentials: {}", std::current_exception()); + } + } + } + } + + auto aws_access_key_id = _options.aws_access_key_id; + auto aws_secret_access_key = _options.aws_secret_access_key; + auto session = ""s; + + if (_options.aws_use_ec2_credentials) { + auto [res, token] = co_await query_ec2_meta("/latest/meta-data/iam/security-credentials/", gtoken); + auto role = res.body(); + + std::tie(res, std::ignore) = co_await query_ec2_meta("/latest/meta-data/iam/security-credentials/" + role, token); + auto body = rjson::parse(std::string_view(res.body().data(), res.body().size())); + + try { + aws_access_key_id = rjson::get(body, "AccessKeyId"); + aws_secret_access_key = rjson::get(body, "SecretAccessKey"); + session = rjson::get(body, "Token"); + } catch (rjson::malformed_value&) { + std::throw_with_nested(kms_error("AccessDenied", fmt::format("Code={}, Message={}" + , rjson::get_opt(body, "Code") + , rjson::get_opt(body, "Message") + ))); + } + } + + // Note: allowing user code to potentially reset aws_assume_role_arn='' -> no assumerole. + // Not 100% sure this is needed. + + if (!aws_assume_role_arn.empty()) { + auto sts_host = make_aws_host(_options.aws_region, "sts"); + auto now = db_clock::now(); + auto rs_id = utils::UUID_gen::get_time_UUID(std::chrono::system_clock::time_point(now.time_since_epoch())); + auto role_session = fmt::format("ScyllaDB-{}", rs_id); + + kms_log.debug("Assume role: {} (RoleSessionID={})", aws_assume_role_arn, role_session); + + auto res = co_await post(aws_query{ + .host = sts_host, + .service = "sts", + .content_type = "application/x-www-form-urlencoded; charset=utf-8", + .content = "Action=AssumeRole&Version=2011-06-15&RoleArn=" + + shttp::internal::url_encode(aws_assume_role_arn) + + "&RoleSessionName=" + role_session, + .aws_access_key_id = aws_access_key_id, + .aws_secret_access_key = aws_secret_access_key, + .security_token = session, + .port = _options.port, + }); + + if (res.result() != bhttp::status::ok) { + throw kms_error(get_response_error(res), "AssumeRole"); + } + + rapidxml::xml_document<> doc; + try { + doc.parse<0>(res.body().data()); + + using node_type = rapidxml::xml_node; + static auto get_xml_node = [](node_type* node, const char* what) { + auto res = node->first_node(what); + if (!res) { + throw kms_error("XML parse error", what); + } + return res; + }; + + auto arrsp = get_xml_node(&doc, "AssumeRoleResponse"); + auto arres = get_xml_node(arrsp, "AssumeRoleResult"); + auto creds = get_xml_node(arres, "Credentials"); + auto keyid = get_xml_node(creds, "AccessKeyId"); + auto key = get_xml_node(creds, "SecretAccessKey"); + auto token = get_xml_node(creds, "SessionToken"); + + aws_access_key_id = keyid->value(); + aws_secret_access_key = key->value(); + session = token->value(); + + } catch (const rapidxml::parse_error& e) { + std::throw_with_nested(kms_error("XML parse error", "AssumeRole")); + } + } + + auto res = co_await post(aws_query{ + .host = _options.host, + .service = "kms", + .target = target, + .content_type = "application/x-amz-json-1.1", + .content = rjson::print(query), + .aws_access_key_id = aws_access_key_id, + .aws_secret_access_key = aws_secret_access_key, + .security_token = session, + .port = _options.port, + }); + + auto body = rjson::empty_object(); + + if (!res.body().empty()) { + try { + body = rjson::parse(std::string_view(res.body().data(), res.body().size())); + } catch (...) { + if (res.result() == bhttp::status::ok) { + throw; + } + // assume non-json formatted error. fall back to parsing below + } + } + + if (res.result() != bhttp::status::ok) { + // try to format as good an error as we can. + static const char* message_lc_header = "message"; + static const char* message_cc_header = "Message"; + static const char* error_type_header = "x-amzn-ErrorType"; + static const char* type_header = "__type"; + + auto o = rjson::get_opt(body, message_lc_header); + if (!o) { + o = rjson::get_opt(body, message_cc_header); + } + auto msg = o.value_or("Unknown error"); + + o = rjson::get_opt(body, error_type_header); + if (!o) { + o = rjson::get_opt(body, type_header); + } + // this should never happen with aws, but... + auto type = o ? *o : get_response_error(res); + + throw kms_error(type, msg); + } + + co_return body; +} + +// helper to build AWS request and parse result. +future encryption::kms_host::impl::post(aws_query query) { + auto creds = _creds; + // if we are https, we need at least a credentials object that says "use system trust" + if (!creds && _options.https) { + creds = ::make_shared(); + + if (!_options.priority_string.empty()) { + creds->set_priority_string(_options.priority_string); + } else { + creds->set_priority_string(db::config::default_tls_priority); + } + + if (!_options.certfile.empty()) { + co_await creds->set_x509_key_file(_options.certfile, _options.keyfile, seastar::tls::x509_crt_format::PEM); + } + if (!_options.truststore.empty()) { + co_await creds->set_x509_trust_file(_options.truststore, seastar::tls::x509_crt_format::PEM); + } else { + co_await creds->set_system_trust(); + } + _creds = creds; + } + + // some of this could be shared with alternator + static constexpr const char* CONTENT_TYPE_HEADER = "content-type"; + static constexpr const char* HOST_HEADER = "host"; + static constexpr const char* X_AWS_DATE_HEADER = "X-Amz-Date"; + static constexpr const char* AWS_AUTHORIZATION_HEADER = "authorization"; + static constexpr const char* AMZ_SDK_INVOCATION_ID = "amz-sdk-invocation-id"; + static constexpr const char* X_AMZ_SECURITY_TOKEN = "X-Amz-Security-Token"; + + static constexpr const char* AMZ_TARGET_HEADER = "x-amz-target"; + static constexpr const char* AWS_HMAC_SHA256 = "AWS4-HMAC-SHA256"; + static constexpr const char* AWS4_REQUEST = "aws4_request"; + static constexpr const char* SIGNING_KEY = "AWS4"; + static constexpr const char* CREDENTIAL = "Credential"; + static constexpr const char* SIGNATURE = "Signature"; + static constexpr const char* SIGNED_HEADERS = "SignedHeaders"; + [[maybe_unused]] static constexpr const char* ACTION_HEADER = "Action"; + + static constexpr const char* ISO_8601_BASIC = "{:%Y%m%dT%H%M%SZ}"; + static constexpr const char* SIMPLE_DATE_FORMAT_STR = "{:%Y%m%d}"; + static constexpr auto NEWLINE = '\n'; + + auto now = db_clock::now(); + auto req_id = utils::UUID_gen::get_time_UUID(std::chrono::system_clock::time_point(now.time_since_epoch())); + + kms_log.trace("Building request: {} ({}:{}) {}", query.target, query.host, query.port, req_id); + + httpclient client(std::string(query.host), query.port, std::move(creds)); + + auto t_now = fmt::gmtime(db_clock::to_time_t(now)); + auto timestamp = fmt::format(ISO_8601_BASIC, t_now); + + // see https://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html + // see AWS SDK. + // see https://docs.aws.amazon.com/general/latest/gr/sigv4-signed-request-examples.html + std::stringstream signedHeadersStream; + std::stringstream canonicalRequestStream; + + canonicalRequestStream + << "POST" << NEWLINE + << "/" << NEWLINE << NEWLINE + ; + + auto add_signed_header = [&](std::string_view name, std::string_view value) { + client.add_header(name, value); + auto lname = to_lower(name); + canonicalRequestStream << lname << ":" << value << NEWLINE; + if (signedHeadersStream.tellp() != 0) { + signedHeadersStream << ';'; + } + signedHeadersStream << lname; + }; + + // headers must be sorted! + + add_signed_header(CONTENT_TYPE_HEADER, query.content_type); + add_signed_header(HOST_HEADER, query.host); + add_signed_header(X_AWS_DATE_HEADER, timestamp); + if (!query.target.empty()) { + add_signed_header(AMZ_TARGET_HEADER, "TrentService."s + std::string(query.target)); + } + + if (!query.security_token.empty()) { + //add_signed_header(X_AMZ_SECURITY_TOKEN, query.security_token); + client.add_header(X_AMZ_SECURITY_TOKEN, query.security_token); + } + + client.add_header(AMZ_SDK_INVOCATION_ID, fmt::format("{}", req_id)); + client.add_header("Accept-Encoding", "identity"); + client.add_header("Accept", "*/*"); + + auto make_hash = [&](std::string_view s) { + auto sha256 = calculate_sha256(bytes_view(reinterpret_cast(s.data()), s.size())); + auto hash = to_hex(sha256); + return hash; + }; + + auto hash = make_hash(query.content); + + auto signedHeadersValue = signedHeadersStream.str(); + canonicalRequestStream << NEWLINE << signedHeadersValue << NEWLINE << hash; + auto canonicalRequestString = canonicalRequestStream.str(); + auto canonicalRequestHash = make_hash(canonicalRequestString); + + kms_log.trace("Canonical request: {}", canonicalRequestString); + + auto simpleDate = fmt::format(SIMPLE_DATE_FORMAT_STR, t_now); + + std::stringstream stringToSignStream; + stringToSignStream << AWS_HMAC_SHA256 << NEWLINE + << timestamp << NEWLINE + << simpleDate << "/" << _options.aws_region << "/" + << query.service << "/" << AWS4_REQUEST << NEWLINE + << canonicalRequestHash + ; + auto stringToSign = stringToSignStream.str(); + + // these log messages intentionally made to mimic aws sdk/boto3 + kms_log.trace("StringToSign: {}", stringToSign); + + std::string finalSignature; + + { + auto tobv = [](std::string_view s) { + return bytes_view(reinterpret_cast(s.data()), s.size()); + }; + + auto signingKey = SIGNING_KEY + std::string(query.aws_secret_access_key); + auto kDate = hmac_sha256(tobv(simpleDate), tobv(signingKey)); + auto kRegion = hmac_sha256(tobv(_options.aws_region), kDate); + auto kService = hmac_sha256(tobv(query.service), kRegion); + auto hashResult = hmac_sha256(tobv(AWS4_REQUEST), kService); + auto finalHash = hmac_sha256(tobv(stringToSign), hashResult); + finalSignature = to_hex(finalHash); + } + + std::stringstream authStream; + authStream << AWS_HMAC_SHA256 << " " + << CREDENTIAL << "=" << query.aws_access_key_id << "/" << simpleDate << "/" << _options.aws_region + << "/" << query.service << "/" << AWS4_REQUEST << ", " << SIGNED_HEADERS + << "=" << signedHeadersValue << ", " << SIGNATURE << "=" << finalSignature + ; + + auto awsAuthString = authStream.str(); + + client.add_header(AWS_AUTHORIZATION_HEADER, awsAuthString); + client.target("/"); + client.content(query.content); + client.method(httpclient::method_type::post); + + kms_log.trace("Request: {}", client.request()); + + auto res = co_await client.send(); + + kms_log.trace("Result: status={}, response={}", res.result_int(), res); + + co_return res; +} + +static std::optional make_opt(const std::string& s) { + if (s.empty()) { + return std::nullopt; + } + return s; +} + +future<> encryption::kms_host::impl::init() { + if (_initialized) { + co_return; + } + + if (!_options.master_key.empty()) { + kms_log.debug("Looking up master key"); + auto query = rjson::empty_object(); + rjson::add(query, "KeyId", _options.master_key); + auto response = co_await post("DescribeKey", _options.aws_assume_role_arn, query); + kms_log.debug("Master key exists"); + } else { + kms_log.info("No default master key configured. Not verifying."); + } + _initialized = true; +} + +future encryption::kms_host::impl::create_key(const attr_cache_key& k) { + auto& master_key = k.master_key; + auto& aws_assume_role_arn = k.aws_assume_role_arn; + auto& info = k.info; + + /** + * AWS KMS does _not_ allow us to actually have "named keys" that can be used externally, + * i.e. exported to us, here, for bulk encryption. + * All named keys are 100% internal, the only options we have is using the + * "GenerateDataKey" API. This creates a new (epiphermal) key, encrypts it + * using a named (internal) key, and gives us both raw and encrypted blobs + * for usage as a local key. + * To be able to actually re-use this key again, on decryption of data, + * we employ the strategy recommended (https://docs.aws.amazon.com/kms/latest/APIReference/API_GenerateDataKey.html) + * namely actually embedding the encrypted key in the key ID associated with + * the locally encrypted data. So ID:s become pretty big. + * + * For ID -> key, we simply split the ID into the encrypted key part, and + * the master key name part, decrypt the first using the second (AWS KMS Decrypt), + * and create a local key using the result. + * + * Data recovery: + * Assuming you have data encrypted using a KMS generated key, you will have + * metadata detailing algorithm, key length etc (see sstable metadata, and key info). + * Metadata will also include a byte blob representing the ID of the encryption key. + * For KMS, the ID will actually be a text string: + * : + * + * I.e. something like: + * 761f258a-e2e9-40b3-8891-602b1b8b947e:e56sadfafa3324ff=/wfsdfwssdf + * or + * arn:aws:kms:us-east-1:797456418907:key/761f258a-e2e9-40b3-8891-602b1b8b947e:e56sadfafa3324ff=/wfsdfwssdf + * + * (last colon is separator) + * + * The actual data key can be retreived by doing a KMS "Decrypt" of the data blob part + * using the KMS key referenced by the key ID. This gives back actual key data that can + * be used to create a symmetric_key with algo, length etc as specified by metadata. + * + */ + + // avoid creating too many keys and too many calls. If we are not shard 0, delegate there. + if (this_shard_id() != 0) { + auto [data, id] = co_await smp::submit_to(0, [this, info, master_key, aws_assume_role_arn]() -> future> { + auto host = _ctxt.get_kms_host(_name); + option_override oov { + .master_key = make_opt(master_key), + .aws_assume_role_arn = make_opt(aws_assume_role_arn), + }; + auto [k, id] = co_await host->_impl->get_or_create_key(info, &oov); + co_return std::make_tuple(k != nullptr ? k->key() : bytes{}, id); + }); + co_return key_and_id_type{ + data.empty() ? nullptr : make_shared(info, data), + id + }; + } + + // note: since external keys are _not_ stored, + // there is nothing we can "look up" or anything. Always + // new key here. + + kms_log.debug("Creating new key: {}", info); + + auto query = rjson::empty_object(); + + rjson::add(query, "KeyId", std::string(master_key.begin(), master_key.end())); + rjson::add(query, "NumberOfBytes", info.len/8); + + auto response = co_await post("GenerateDataKey", aws_assume_role_arn, query); + auto data = base64_decode(rjson::get(response, "Plaintext")); + auto enc = rjson::get(response, "CiphertextBlob"); + auto kid = rjson::get(response, "KeyId"); + + try { + auto key = make_shared(info, data); + bytes id(kid.size() + 1 + enc.size(), 0); + auto i = std::copy(kid.begin(), kid.end(), id.begin()); + *i++ = ':'; + std::copy(enc.begin(), enc.end(), i); + + co_return key_and_id_type{ key, id }; + } catch (std::invalid_argument& e) { + std::throw_with_nested(configuration_error(e.what())); + } +} + +future encryption::kms_host::impl::find_key(const id_cache_key& k) { + // avoid creating too many keys and too many calls. If we are not shard 0, delegate there. + if (this_shard_id() != 0) { + co_return co_await smp::submit_to(0, [this, k]() -> future { + auto host = _ctxt.get_kms_host(_name); + auto bytes = co_await host->_impl->_id_cache.get(k); + co_return bytes; + }); + } + + // See create_key. ID consists of :. + // master id can (and will) contain ':', but blob will not. + // (we are being wasteful, and keeping the base64 encoding - easier to read) + auto& id = k.id; + auto pos = id.find_last_of(':'); + if (pos == id_type::npos) { + throw std::invalid_argument(format("Not a valid key id: {}", id)); + } + + kms_log.debug("Finding key: {}", id); + + std::string kid(id.begin(), id.begin() + pos); + std::string enc(id.begin() + pos + 1, id.end()); + + auto query = rjson::empty_object(); + rjson::add(query, "CiphertextBlob", enc); + rjson::add(query, "KeyId", kid); + + auto response = co_await post("Decrypt", k.aws_assume_role_arn, query); + auto data = base64_decode(rjson::get(response, "Plaintext")); + + // we know nothing about key type etc, so just return data. + co_return data; +} + +encryption::kms_host::kms_host(encryption_context& ctxt, const std::string& name, const host_options& options) + : _impl(std::make_unique(ctxt, name, options)) +{} + +encryption::kms_host::kms_host(encryption_context& ctxt, const std::string& name, const std::unordered_map& map) + : kms_host(ctxt, name, [&map] { + host_options opts; + map_wrapper> m(map); + + opts.aws_access_key_id = m("aws_access_key_id").value_or(""); + opts.aws_secret_access_key = m("aws_secret_access_key").value_or(""); + opts.aws_region = m("aws_region").value_or(""); + opts.aws_profile = m("aws_profile").value_or(""); + opts.aws_assume_role_arn = m("aws_assume_role_arn").value_or(""); + opts.aws_use_ec2_credentials = is_true(m("aws_use_ec2_credentials").value_or("false")); + opts.aws_use_ec2_region = is_true(m("aws_use_ec2_region").value_or("false")); + + // use "endpoint" semantics to match AWS configs. + opts.endpoint = m("endpoint").value_or(""); + opts.host = m("host").value_or(""); + opts.port = std::stoi(m("port").value_or("0")); + + opts.master_key = m("master_key").value_or(""); + opts.certfile = m("certfile").value_or(""); + opts.keyfile = m("keyfile").value_or(""); + opts.truststore = m("truststore").value_or(""); + opts.priority_string = m("priority_string").value_or(""); + + opts.key_cache_expiry = parse_expiry(m("key_cache_expiry")); + opts.key_cache_refresh = parse_expiry(m("key_cache_refresh")); + return opts; + }()) +{} + +encryption::kms_host::~kms_host() = default; + +future<> encryption::kms_host::init() { + return _impl->init(); +} + +const encryption::kms_host::host_options& encryption::kms_host::options() const { + return _impl->options(); +} + +future, encryption::kms_host::id_type>> encryption::kms_host::get_or_create_key(const key_info& info, const option_override* oov) { + return _impl->get_or_create_key(info, oov); +} + +future> encryption::kms_host::get_key_by_id(const id_type& id, const key_info& info, const option_override* oov) { + return _impl->get_key_by_id(id, info, oov); +} + diff --git a/ent/encryption/kms_host.hh b/ent/encryption/kms_host.hh new file mode 100644 index 0000000000..ac9fd1de41 --- /dev/null +++ b/ent/encryption/kms_host.hh @@ -0,0 +1,80 @@ +/* + * Copyright (C) 2022 ScyllaDB + * + */ + +/* + * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0 + */ + +#pragma once + +#include +#include +#include +#include +#include + +#include +#include + +#include "symmetric_key.hh" + +namespace encryption { + +class encryption_context; +struct key_info; + +class kms_host { +public: + struct host_options { + std::string endpoint; + // or... + std::string host; + uint16_t port; + bool https = true; + // auth + std::string aws_access_key_id; + std::string aws_secret_access_key; + std::string aws_region; + std::string aws_profile; + std::string aws_assume_role_arn; + + bool aws_use_ec2_credentials; + bool aws_use_ec2_region; + + // key to use for keys + std::string master_key; + // tls. if unspeced, use system for https + // AWS does not (afaik?) allow certificate auth + // but we keep the option available just in case. + std::string certfile; + std::string keyfile; + std::string truststore; + std::string priority_string; + + std::optional key_cache_expiry; + std::optional key_cache_refresh; + }; + using id_type = bytes; + + kms_host(encryption_context&, const std::string& name, const host_options&); + kms_host(encryption_context&, const std::string& name, const std::unordered_map&); + ~kms_host(); + + future<> init(); + const host_options& options() const; + + struct option_override { + std::optional master_key; + std::optional aws_assume_role_arn; + }; + + future, id_type>> get_or_create_key(const key_info&, const option_override* = nullptr); + future> get_key_by_id(const id_type&, const key_info&, const option_override* = nullptr); +private: + class impl; + std::unique_ptr _impl; +}; + +} diff --git a/ent/encryption/kms_key_provider.cc b/ent/encryption/kms_key_provider.cc new file mode 100644 index 0000000000..3140de1513 --- /dev/null +++ b/ent/encryption/kms_key_provider.cc @@ -0,0 +1,71 @@ +/* + * Copyright (C) 2022 ScyllaDB + * + */ + +/* + * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0 + */ + +#include +#include + +#include "kms_key_provider.hh" +#include "kms_host.hh" + +namespace encryption { + +class kms_key_provider : public key_provider { +public: + kms_key_provider(::shared_ptr kms_host, std::string name, kms_host::option_override oov) + : _kms_host(std::move(kms_host)) + , _name(std::move(name)) + , _oov(std::move(oov)) + {} + future> key(const key_info& info, opt_bytes id) override { + if (id) { + return _kms_host->get_key_by_id(*id, info, &_oov).then([id](key_ptr k) { + return make_ready_future>(std::tuple(k, id)); + }); + } + return _kms_host->get_or_create_key(info, &_oov).then([](std::tuple k_id) { + return make_ready_future>(k_id); + }); + } + void print(std::ostream& os) const override { + os << _name; + } +private: + ::shared_ptr _kms_host; + std::string _name; + kms_host::option_override _oov; +}; + +shared_ptr kms_key_provider_factory::get_provider(encryption_context& ctxt, const options& map) { + opt_wrapper opts(map); + auto kms_host = opts("kms_host"); + kms_host::option_override oov { + .master_key = opts("master_key"), + .aws_assume_role_arn = opts("aws_assume_role_arn"), + }; + + if (!kms_host) { + throw std::invalid_argument("kms_host must be provided"); + } + + auto host = ctxt.get_kms_host(*kms_host); + auto id = kms_host.value() + + ":" + oov.master_key.value_or(host->options().master_key) + + ":" + oov.aws_assume_role_arn.value_or(host->options().aws_assume_role_arn) + ; + auto provider = ctxt.get_cached_provider(id); + + if (!provider) { + provider = ::make_shared(host, *kms_host, std::move(oov)); + ctxt.cache_provider(id, provider); + } + + return provider; +} + +} diff --git a/ent/encryption/kms_key_provider.hh b/ent/encryption/kms_key_provider.hh new file mode 100644 index 0000000000..ae0cf0eeba --- /dev/null +++ b/ent/encryption/kms_key_provider.hh @@ -0,0 +1,37 @@ +/* + * Copyright (C) 2022 ScyllaDB + * + */ + +/* + * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0 + */ + +#pragma once + +#include "encryption.hh" +#include "system_key.hh" + +namespace encryption { + +class kms_key_provider_factory : public key_provider_factory { +public: + shared_ptr get_provider(encryption_context&, const options&) override; +}; + +/** + * As it stands today, given system_key api (gives keys), and + * what it is used for (config encryption), we cannot provide + * a KMS system key. This is because: + * + * a.) KMS does not allow us to store a named object (key) in a secure(ish) way. + * We can encrypt/decrypt and create one-off keys for local usage, which are + * encoded in their own ID (see kms_host), but having a unique key from + * a "path" is not possible. Esp. due to key rotation, encrypted data preamble + * etc. We could keep the encrypted key material in a local file, then decrypt + * it using a named key on startup, but given b.) it is dubious if this is useful. + * b.) System keys are only used for config encryption. The authentication config for + * AWS/KMS access is typically one of the things that should be encrypted. Thus + * we would create a big chicken and egg problem here. + */ +} diff --git a/ent/encryption/local_file_provider.cc b/ent/encryption/local_file_provider.cc new file mode 100644 index 0000000000..2fe1a3e9fa --- /dev/null +++ b/ent/encryption/local_file_provider.cc @@ -0,0 +1,292 @@ +/* + * Copyright (C) 2018 ScyllaDB + * + */ + +/* + * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0 + */ +#include +#include +#include + +#include + +#include +#include + +#include +#include +#include +#include + +#include "local_file_provider.hh" +#include "symmetric_key.hh" +#include "encryption.hh" +#include "encryption_exceptions.hh" +#include "encryption_config.hh" +#include "db/config.hh" + +namespace encryption { + +namespace bfs = std::filesystem; + +const sstring default_key_file_path = (bfs::path(db::config::get_conf_dir()) / "data_encryption_keys").string(); + +static const key_info system_key_info{ "System", 0 }; + +class local_file_provider : public key_provider { +public: + local_file_provider(encryption_context& ctxt, const bfs::path& path, bool must_exist = false) + : local_file_provider(ctxt, sstring(bfs::absolute(path).string()), must_exist) + {} + local_file_provider(encryption_context& ctxt, const sstring& path, bool must_exist = false) + : _ctxt(ctxt) + , _path(path) + , _sem(1) + , _must_exist(must_exist) + {} + future> key(const key_info& info, opt_bytes = {}) override { + // TODO: assert options -> my key + auto i = _keys.find(info); + if (i != _keys.end()) { + return make_ready_future>(std::tuple(i->second, std::nullopt)); + } + return load_or_create(info).then([](key_ptr k) { + return make_ready_future>(std::tuple(k, std::nullopt)); + }); + } + future<> validate() const override { + auto f = make_ready_future<>(); + if (!_must_exist) { + return f; + } + // if we must exist, we don't change. Ok to open from all shards. + return f.then([this] { + return open_file_dma(_path, open_flags::ro).then([](file f) { + return f.close(); + }); + }).handle_exception([this](auto ep) { + try { + std::rethrow_exception(ep); + } catch (...) { + std::throw_with_nested(missing_resource_error("Could not read '" + _path + "'")); + } + }); + } + + const sstring& path() const { + return _path; + } + void print(std::ostream& os) const override { + os << "key=" << _path; + } + +private: + future load_or_create(const key_info&); + future load_or_create_local(const key_info&); + future<> read_key_file(); + future write_key_file(key_info); + + std::unordered_map _keys; + encryption_context& _ctxt; + sstring _path; + semaphore _sem; + bool _read_file = false; + bool _must_exist = false; +}; + +shared_ptr local_file_provider_factory::find(encryption_context& ctxt, const sstring& path) { + auto p = ctxt.get_cached_provider(path); + if (!p) { + p = make_shared(ctxt, path); + ctxt.cache_provider(path, p); + } + return p; +} + +shared_ptr local_file_provider_factory::get_provider(encryption_context& ctxt, const options& map) { + opt_wrapper opts(map); + return find(ctxt, opts(SECRET_KEY_FILE).value_or(default_key_file_path)); +} + +future +local_file_provider::load_or_create(const key_info& info) { + // if someone uses a system key as a table key, we could still race + // here. but that is a user error, so ignore + if (this_shard_id() == 0 || &info == &system_key_info) { + return load_or_create_local(info); + } + + struct data { + bytes key; + key_info info; + }; + + /** + * Key files are singular. Not sharded. This would be ok if we only read from them. + * But in keeping with dse compat, we don't. So rather than dealing with lock files + * or whatnot, we simply say that a single file is handled by a single key object, + * and only on shard 0. So if we are not shard 0, we call to there, find our + * counterpart object (local_file_provider_factory::find), and as him about the + * key data instead. He in turn will sync on his semaphore. + * + * The downside is that we are not resilient against multiple processes messing + * with the key file, but neither is dse + */ + return do_with(data{bytes(bytes::initialized_later(), info.len/8), info}, [this](data& i) { + return smp::submit_to(0, [this, &i]{ + auto kp = static_pointer_cast(local_file_provider_factory::find(_ctxt, _path)); + auto f = kp->load_or_create_local(i.info); + return f.then([&i, kp](key_ptr k) { + auto& kd = k->key(); + i.key.resize(kd.size()); + std::copy(kd.begin(), kd.end(), i.key.begin()); + }); + }).then([this, &i] { + auto k = make_shared(i.info, i.key); + _keys.emplace(i.info, k); + return make_ready_future(std::move(k)); + }); + }); +} + +future +local_file_provider::load_or_create_local(const key_info& info) { + if (_keys.count(info)) { + return make_ready_future(_keys.at(info)); + } + return read_key_file().then([this, info] { + if (_keys.count(info)) { + return make_ready_future(_keys.at(info)); + } + if (info == system_key_info) { + if (_keys.size() != 1) { + _keys.clear(); + return make_exception_future(std::invalid_argument("System key must contain exactly one entry")); + } + auto k = _keys.begin()->second; + _keys.clear(); + _keys.emplace(info, k); + return make_ready_future(k); + } + // create it. + return write_key_file(info); + }); +} + +future<> local_file_provider::read_key_file() { + if (_read_file) { + return make_ready_future(); + } + + // #1923 - a key can have a descriptor string line "AES:128:" iff user relies on + // defaults. Must match this as well. + static const std::regex key_line_expr(R"foo((\w+(?:\/\w+)?(?:\/\w+)?)\:(\d+)\:(\S+)\s*)foo"); + + return with_semaphore(_sem, 1, [this] { + // could do this twice, but it is only reading + return read_text_file_fully(_path).then([this](temporary_buffer buf) { + auto i = std::cregex_iterator(buf.begin(), buf.end(), key_line_expr); + auto e = std::cregex_iterator(); + + while (i != e) { + std::cmatch m = *i; + auto alg = m[1].str(); + auto len = std::stoul(m[2].str()); + auto key = m[3].str(); + + auto info = key_info{alg, unsigned(len)}; + if (!_keys.count(info)) { + auto kb = base64_decode(key); + auto k = make_shared(info, kb); + _keys.emplace(info, std::move(k)); + } + ++i; + } + _read_file = true; + }).handle_exception([this](auto ep) { + try { + std::rethrow_exception(ep); + } catch (std::system_error& e) { + if (e.code() == std::error_code(ENOENT, std::system_category())) { + if (!_must_exist) { + return; + } + std::throw_with_nested(configuration_error("Key file '" + _path + "' does not exist")); + } + std::throw_with_nested(service_error("read_key_file")); + } catch (std::invalid_argument& e) { + std::throw_with_nested(configuration_error(fmt::format("read_key_file: {}", e.what()))); + } catch (...) { + std::throw_with_nested(service_error(fmt::format("read_key_file: {}", std::current_exception()))); + } + }); + }); +} + +future local_file_provider::write_key_file(key_info info) { + return with_semaphore(_sem, 1, [this, info] { + // we can get here more than once if shards race. + // however, we only need to use/write the first key matching + // the required info. + if (_keys.count(info)) { + return make_ready_future(_keys.at(info)); + } + + auto k = make_shared(info); + + std::ostringstream ss; + for (auto& p : _keys) { + ss << p.first.alg << ":" << p.first.len << ":" << base64_encode(p.second->key()) << std::endl; + } + ss << info.alg << ":" << info.len << ":" << base64_encode(k->key()) << std::endl; + auto s = ss.str(); + auto tmpnam = _path + ".tmp"; + auto f = make_ready_future<>(); + if (!_must_exist) { + f = seastar::recursive_touch_directory((bfs::path(tmpnam).remove_filename()).string()); + } + return f.then([this, tmpnam, s] { + return write_text_file_fully(tmpnam, s).then([this, tmpnam] { + return rename_file(tmpnam, _path); + }); + }).then([this, k, info] { + // don't cache until written + _keys[info] = k; + return make_ready_future(k); + }); + }).handle_exception([this](auto ep) -> key_ptr{ + try { + std::rethrow_exception(ep); + } catch (...) { + std::throw_with_nested(service_error("Could not write key file '" + _path + "'")); + } + }); +} + +local_system_key::local_system_key(encryption_context& ctxt, const sstring& path) + : _provider(make_shared(ctxt, bfs::path(ctxt.config().system_key_directory()) / bfs::path(path), true)) +{} + +local_system_key::~local_system_key() +{} + +future> local_system_key::get_key() { + return _provider->key(system_key_info).then([](std::tuple k_id) { + return make_ready_future>(std::get<0>(std::move(k_id))); + }); +} + +future<> local_system_key::validate() const { + // first, just validate the file provider itself + co_await _provider->validate(); + // second, do an early load of the actual key to ensure file contents. + co_await _provider->key(system_key_info); +} + +const sstring& local_system_key::name() const { + return _provider->path(); +} + +} diff --git a/ent/encryption/local_file_provider.hh b/ent/encryption/local_file_provider.hh new file mode 100644 index 0000000000..9378e413c8 --- /dev/null +++ b/ent/encryption/local_file_provider.hh @@ -0,0 +1,41 @@ +/* + * Copyright (C) 2018 ScyllaDB + * + */ + +/* + * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0 + */ + +#pragma once + +#include "encryption.hh" +#include "system_key.hh" + +namespace encryption { + +const extern sstring default_key_file_path; + +class local_file_provider; + +class local_file_provider_factory : public key_provider_factory { +public: + static shared_ptr find(encryption_context&, const sstring& path); + shared_ptr get_provider(encryption_context&, const options&) override; +}; + +class local_system_key : public system_key { + shared_ptr _provider; +public: + local_system_key(encryption_context&, const sstring&); + ~local_system_key(); + + future> get_key() override; + future<> validate() const override; + const sstring& name() const override; + bool is_local() const override { + return true; + } +}; + +} diff --git a/ent/encryption/replicated_key_provider.cc b/ent/encryption/replicated_key_provider.cc new file mode 100644 index 0000000000..9a06950fcc --- /dev/null +++ b/ent/encryption/replicated_key_provider.cc @@ -0,0 +1,477 @@ +/* + * Copyright (C) 2015 ScyllaDB + * + */ + +/* + * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0 + */ +#include +#include +#include +#include + +#include + +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include "utils/to_string.hh" + +#include "replicated_key_provider.hh" +#include "encryption.hh" +#include "encryption_exceptions.hh" +#include "local_file_provider.hh" +#include "symmetric_key.hh" +#include "replica/database.hh" +#include "cql3/query_processor.hh" +#include "cql3/untyped_result_set.hh" +#include "utils/UUID.hh" +#include "utils/UUID_gen.hh" +#include "utils/hash.hh" +#include "service/storage_service.hh" +#include "service/migration_manager.hh" +#include "compaction/compaction_manager.hh" +#include "replica/distributed_loader.hh" +#include "schema/schema_builder.hh" +#include "db/system_keyspace.hh" +#include "db/extensions.hh" +#include "locator/everywhere_replication_strategy.hh" + +namespace encryption { + +static auto constexpr KSNAME = "system_replicated_keys"; +static auto constexpr TABLENAME = "encrypted_keys"; + +static logger log("replicated_key_provider"); + +using utils::UUID; + +class replicated_key_provider : public key_provider { +public: + static constexpr int8_t version = 0; + /** + * Header: + * 1 byte version + * 16 bytes UUID of key + * 16 bytes MD5 of UUID + */ + static const size_t header_size = 33; + + struct key_id { + key_info info; + opt_bytes id; + + key_id(key_info k, opt_bytes b = {}) + : info(std::move(k)) + , id(std::move(b)) + {} + bool operator==(const key_id& v) const { + return info == v.info && id == v.id; + } + }; + + struct key_id_hash { + size_t operator()(const key_id& id) const { + return utils::tuple_hash()(std::tie(id.info.alg, id.info.len, id.id)); + } + }; + + replicated_key_provider(encryption_context& ctxt, shared_ptr system_key, shared_ptr local_provider) + : _ctxt(ctxt) + , _system_key(std::move(system_key)) + , _local_provider(std::move(local_provider)) + {} + + + future> key(const key_info&, opt_bytes = {}) override; + future<> validate() const override; + future<> maybe_initialize_tables(); + static future<> do_initialize_tables(::replica::database& db, service::migration_manager&); + + bool should_delay_read(const opt_bytes& id) const override { + if (!id || _initialized) { + return false; + } + if (!_initialized) { + return true; + } + auto& qp = _ctxt.get_query_processor(); + // This check should be ok, and even somewhat redundant. "Initialized" above + // will only be set once we've generated/queried a key not passing through here + // (i.e. a key for write _or_ commit log (should we allow this)). This can only be + // done if: + // a.) Encryption was already set up, thus table existed and we waited + // for distributed_tables in "ensure_populated" + // b.) Encryption was added. In which case we are way past bootstrap + // and can receive user commands. + // c.) System table/commit log write, with either first use of this provider, + // in which case we're creating the table (here at least) - thus fine, + // or again, we've waited through "ensure_populated", so keys are + // readble. At worst, we create a few extra keys. + // Note: currently c.) is not relevant, as we don't support system/commitlog + // encryption using repl_prov. + return !qp.local_is_initialized(); + } + + void print(std::ostream& os) const override { + os << "system_key=" << _system_key->name() << ", local=" << *_local_provider; + } + +private: + void store_key(const key_id&, const UUID&, key_ptr); + + static opt_bytes decode_id(const opt_bytes&); + static bytes encode_id(const UUID&); + + future> get_key(const key_info&, opt_bytes = {}); + + future load_or_create(const key_info&); + future load_or_create_local(const key_info&); + future<> read_key_file(); + future<> write_key_file(); + + template + future<::shared_ptr> query(sstring, Args&& ...); + + future<> force_blocking_flush(); + + encryption_context& _ctxt; + shared_ptr _system_key; + shared_ptr _local_provider; + std::unordered_map, key_id_hash> _keys; + + bool _initialized = false; + bool _use_cache = true; + + friend class replicated_key_provider_factory; + + static const utils::UUID local_fallback_uuid; + static const bytes local_fallback_id; + static const bytes_view local_fallback_bytes; +}; + +using namespace std::chrono_literals; + +static const timeout_config rkp_db_timeout_config { + 5s, 5s, 5s, 5s, 5s, 5s, 5s, +}; + +static service::query_state& rkp_db_query_state() { + static thread_local service::client_state cs(service::client_state::internal_tag{}, rkp_db_timeout_config); + static thread_local service::query_state qs(cs, empty_service_permit()); + return qs; +} + +template +future<::shared_ptr> replicated_key_provider::query(sstring q, Args&& ...params) { + auto mode = co_await _ctxt.get_storage_service().local().get_operation_mode(); + if (mode != service::storage_service::mode::STARTING) { + co_return co_await _ctxt.get_query_processor().local().execute_internal(q, { std::forward(params)...}, cql3::query_processor::cache_internal::no); + } + co_return co_await _ctxt.get_query_processor().local().execute_internal(q, db::consistency_level::ONE, rkp_db_query_state(), { std::forward(params)...}, cql3::query_processor::cache_internal::no); +} + +future<> replicated_key_provider::force_blocking_flush() { + return _ctxt.get_database().invoke_on_all([](replica::database& db) { + // if (!Boolean.getBoolean("cassandra.unsafesystem")) + replica::column_family& cf = db.find_column_family(KSNAME, TABLENAME); + return cf.flush(); + }); +} + +void replicated_key_provider::store_key(const key_id& id, const UUID& uuid, key_ptr k) { + if (!_use_cache) { + return; + } + _keys[id] = std::make_pair(uuid, k); + if (!id.id) { + _keys[key_id(id.info, uuid.serialize())] = std::make_pair(uuid, k); + } +} + +opt_bytes replicated_key_provider::decode_id(const opt_bytes& b) { + if (b) { + auto i = b->begin(); + auto v = *i++; + if (v == version && b->size() == 33) { + bytes id(i + 1, i + 1 + 16); + bytes md(i + 1 + 16, b->end()); + if (calculate_md5(id) == md) { + return id; + } + } + } + return std::nullopt; +} + +bytes replicated_key_provider::encode_id(const UUID& uuid) { + bytes b{bytes::initialized_later(), header_size}; + auto i = b.begin(); + *i++ = version; + uuid.serialize(i); + auto md = calculate_md5(b, 1, 16); + std::copy(md.begin(), md.end(), i); + return b; +} + +const utils::UUID replicated_key_provider::local_fallback_uuid(0u, 0u); // not valid! +const bytes replicated_key_provider::local_fallback_id = encode_id(local_fallback_uuid); +const bytes_view replicated_key_provider::local_fallback_bytes(local_fallback_id.data() + 1, 16); + +future> replicated_key_provider::key(const key_info& info, opt_bytes input) { + opt_bytes id; + + if (input) { //reading header? + auto v = *input; + if (v[0] == version) { + bytes bid(v.begin() + 1, v.begin() + 1 + 16); + bytes md(v.begin() + 1 + 16, v.begin() + 1 + 32); + if (calculate_md5(bid) == md) { + id = bid; + } + } + } + + bool try_local = id == local_fallback_bytes; + + // if the id indicates the key came from local fallback, don't even + // try keyspace lookup. + if (!try_local) { + try { + auto [uuid, k] = co_await get_key(info, std::move(id)); + co_return std::make_tuple(k, encode_id(uuid)); + } catch (std::invalid_argument& e) { + std::throw_with_nested(configuration_error(e.what())); + } catch (...) { + auto ep = std::current_exception(); + log.warn("Exception looking up key {}: {}", info, ep); + if (_local_provider) { + try { + std::rethrow_exception(ep); + } catch (replica::no_such_keyspace&) { + } catch (exceptions::invalid_request_exception&) { + } catch (exceptions::read_failure_exception&) { + } catch (...) { + std::throw_with_nested(service_error(fmt::format("key: {}", std::current_exception()))); + } + if (!id) { + try_local = true; + } + } + if (!try_local) { + std::throw_with_nested(service_error(fmt::format("key: {}", std::current_exception()))); + } + } + } + + log.warn("Falling back to local key {}", info); + auto [k, nid] = co_await _local_provider->key(info, id); + if (nid && nid != id) { + // local provider does not give ids. + throw malformed_response_error("Expected null id back from local provider"); + } + co_return std::make_tuple(k, local_fallback_id); +} + +future> replicated_key_provider::get_key(const key_info& info, opt_bytes opt_id) { + if (!_initialized) { + co_await maybe_initialize_tables(); + } + + key_id id(info, std::move(opt_id)); + auto i = _keys.find(id); + if (i != _keys.end()) { + co_return std::tuple(i->second.first, i->second.second); + } + + // TODO: origin does non-cql acquire of all available keys from + // replicas in the "host_ids" table iff we get here during boot. + // For now, ignore this and assume that if we have a sstable with + // key X, we should have a local replica of X as well, given + // the "everywhere strategy of the keys table. + + auto cipher = info.alg.substr(0, info.alg.find('/')); // e.g. "AES" + + UUID uuid; + shared_ptr res; + + if (id.id) { + uuid = utils::UUID_gen::get_UUID(*id.id); + log.debug("Finding key {} ({})", uuid, info); + auto s = fmt::format("SELECT * FROM {}.{} WHERE key_file=? AND cipher=? AND strength=? AND key_id=?;", KSNAME, TABLENAME); + res = co_await query(std::move(s), _system_key->name(), cipher, int32_t(id.info.len), uuid); + + // if we find nothing, and we actually queried a specific key (by uuid), we've failed. + if (res->empty()) { + log.debug("Could not find key {}", id.id); + throw std::runtime_error(fmt::format("Unable to find key for cipher={} strength={} id={}", cipher, id.info.len, uuid)); + } + } else { + log.debug("Finding key ({})", info); + auto s = fmt::format("SELECT * FROM {}.{} WHERE key_file=? AND cipher=? AND strength=? LIMIT 1;", KSNAME, TABLENAME); + res = co_await query(std::move(s), _system_key->name(), cipher, int32_t(id.info.len)); + } + + // otoh, if we don't need a specific key, we can just create a new one (writing a sstable) + if (res->empty()) { + uuid = utils::UUID_gen::get_time_UUID(); + + log.debug("No key found. Generating {}", uuid); + + auto k = make_shared(id.info); + store_key(id, uuid, k); + + auto b = co_await _system_key->encrypt(k->key()); + auto ks = base64_encode(b); + log.trace("Inserting generated key {}", uuid); + co_await query(fmt::format("INSERT INTO {}.{} (key_file, cipher, strength, key_id, key) VALUES (?, ?, ?, ?, ?)", + KSNAME, TABLENAME), _system_key->name(), cipher, int32_t(id.info.len), uuid, ks + ); + log.trace("Flushing key table"); + co_await force_blocking_flush(); + + co_return std::tuple(uuid, k); + } + + // found it + auto& row = res->one(); + uuid = row.get_as("key_id"); + auto ks = row.get_as("key"); + auto kb = base64_decode(ks); + auto b = co_await _system_key->decrypt(kb); + auto k = make_shared(id.info, b); + store_key(id, uuid, k); + + co_return std::tuple(uuid, k); +} + +future<> replicated_key_provider::validate() const { + try { + co_await _system_key->validate(); + } catch (...) { + std::throw_with_nested(std::invalid_argument(fmt::format("Could not validate system key: {}", _system_key->name()))); + } + if (_local_provider){ + co_await _local_provider->validate(); + } +} + +schema_ptr encrypted_keys_table() { + static thread_local auto schema = [] { + auto id = generate_legacy_id(KSNAME, TABLENAME); + return schema_builder(KSNAME, TABLENAME, std::make_optional(id)) + .with_column("key_file", utf8_type, column_kind::partition_key) + .with_column("cipher", utf8_type, column_kind::partition_key) + .with_column("strength", int32_type, column_kind::clustering_key) + .with_column("key_id", timeuuid_type, column_kind::clustering_key) + .with_column("key", utf8_type) + .with_hash_version() + .build(); + }(); + return schema; +} + +future<> replicated_key_provider::maybe_initialize_tables() { + if (!_initialized) { + co_await do_initialize_tables(_ctxt.get_database().local(), _ctxt.get_migration_manager().local()); + _initialized = true; + } +} + +future<> replicated_key_provider::do_initialize_tables(::replica::database& db, service::migration_manager& mm) { + if (db.has_schema(KSNAME, TABLENAME)) { + co_return; + } + + log.debug("Creating keyspace and table"); + if (!db.has_keyspace(KSNAME)) { + auto group0_guard = co_await mm.start_group0_operation(); + auto ts = group0_guard.write_timestamp(); + try { + auto ksm = keyspace_metadata::new_keyspace( + KSNAME, + "org.apache.cassandra.locator.EverywhereStrategy", + {}, + std::nullopt, + true); + co_await mm.announce(service::prepare_new_keyspace_announcement(db, ksm, ts), std::move(group0_guard), fmt::format("encryption at rest: create keyspace {}", KSNAME)); + } catch (exceptions::already_exists_exception&) { + } + } + auto group0_guard = co_await mm.start_group0_operation(); + auto ts = group0_guard.write_timestamp(); + try { + co_await mm.announce(co_await service::prepare_new_column_family_announcement(mm.get_storage_proxy(), encrypted_keys_table(), ts), std::move(group0_guard), + fmt::format("encryption at rest: create table {}.{}", KSNAME, TABLENAME)); + } catch (exceptions::already_exists_exception&) { + } + auto& ks = db.find_keyspace(KSNAME); + auto& rs = ks.get_replication_strategy(); + // should perhaps check name also.. + if (rs.get_type() != locator::replication_strategy_type::everywhere_topology) { + // TODO: reset to everywhere + repair. + } +} + +const size_t replicated_key_provider::header_size; + +replicated_key_provider_factory::replicated_key_provider_factory() +{} + +replicated_key_provider_factory::~replicated_key_provider_factory() +{} + +namespace bfs = std::filesystem; + +shared_ptr replicated_key_provider_factory::get_provider(encryption_context& ctxt, const options& map) { + opt_wrapper opts(map); + auto system_key_name = opts(SYSTEM_KEY_FILE).value_or("system_key"); + if (system_key_name.find('/') != sstring::npos) { + throw std::invalid_argument("system_key cannot contain '/'"); + } + + auto system_key = ctxt.get_system_key(system_key_name); + auto local_key_file = bfs::absolute(bfs::path(opts(SECRET_KEY_FILE).value_or(default_key_file_path))); + + if (system_key->is_local() && bfs::absolute(bfs::path(system_key->name())) == local_key_file) { + throw std::invalid_argument("system key and local key cannot be the same"); + } + + auto name = system_key->name() + ":" + local_key_file.string(); + auto debug = opts("DEBUG"); + if (debug) { + name = name + ":" + *debug; + } + auto p = ctxt.get_cached_provider(name); + if (!p) { + auto rp = seastar::make_shared(ctxt, std::move(system_key), local_file_provider_factory::find(ctxt, local_key_file.string())); + ctxt.cache_provider(name, rp); + + if (debug && debug->find("nocache") != sstring::npos) { + log.debug("Turn off cache"); + rp->_use_cache = false; + } + p = std::move(rp); + } + + return p; +} + +void replicated_key_provider_factory::init(db::extensions& exts) { + exts.add_extension_internal_keyspace(KSNAME); +} + +future<> replicated_key_provider_factory::on_started(::replica::database& db, service::migration_manager& mm) { + return replicated_key_provider::do_initialize_tables(db, mm); +} + +} diff --git a/ent/encryption/replicated_key_provider.hh b/ent/encryption/replicated_key_provider.hh new file mode 100644 index 0000000000..cd20208e9e --- /dev/null +++ b/ent/encryption/replicated_key_provider.hh @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2015 ScyllaDB + * + */ + +/* + * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0 + */ + +#pragma once + +#include "encryption.hh" + +namespace db { +class extensions; +} + +namespace replica { +class database; +} + +namespace service { +class migration_manager; +} + +namespace encryption { + +class replicated_key_provider_factory : public key_provider_factory { +public: + replicated_key_provider_factory(); + ~replicated_key_provider_factory(); + + shared_ptr get_provider(encryption_context&, const options&) override; + + static void init(db::extensions&); + static future<> on_started(::replica::database&, service::migration_manager&); +}; + +} diff --git a/ent/encryption/symmetric_key.cc b/ent/encryption/symmetric_key.cc new file mode 100644 index 0000000000..7f3a267f9b --- /dev/null +++ b/ent/encryption/symmetric_key.cc @@ -0,0 +1,396 @@ +/* + * Copyright (C) 2018 ScyllaDB + * + */ + +/* + * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0 + */ +#include +#include +#include + +#include +#include +#include + +#if OPENSSL_VERSION_NUMBER >= (3<<28) +# include +#endif + +#include +#include + +#include "symmetric_key.hh" +#include "utils/hash.hh" + +namespace { +struct openssl_env { + OSSL_PROVIDER* legacy_provider = nullptr; + OSSL_PROVIDER* default_provider = nullptr; + openssl_env() { + OpenSSL_add_all_ciphers(); +#if OPENSSL_VERSION_NUMBER >= (3<<28) + legacy_provider = OSSL_PROVIDER_load(NULL, "legacy"); + default_provider = OSSL_PROVIDER_load(NULL, "default"); +#endif + } + ~openssl_env() { + OSSL_PROVIDER_unload(legacy_provider); + OSSL_PROVIDER_unload(default_provider); + } +}; +static const openssl_env ossl_env; +} + +std::ostream& encryption::operator<<(std::ostream& os, const key_info& info) { + return os << info.alg << ":" << info.len; +} + +static void throw_evp_error(std::string msg) { + auto e = ERR_get_error(); + if (e != 0) { + char buf[512]; + ERR_error_string_n(e, buf, sizeof(buf)); + msg += "(" + std::string(buf) + ")"; + } + throw std::runtime_error(msg); +} + +bool encryption::key_info::compatible(const key_info& rhs) const { + sstring malg, halg; + std::tie(malg, std::ignore, std::ignore) = parse_key_spec(alg); + std::tie(halg, std::ignore, std::ignore) = parse_key_spec(rhs.alg); + if (malg != halg) { + return false; + } + // If lengths differ we need to actual create keys to + // check what the true lengths are. Since openssl and + // java designators count different for DES etc. + if (len != rhs.len) { + symmetric_key k1(*this); + symmetric_key k2(rhs); + if (k1.key().size() != k2.key().size()) { + return false; + } + } + return true; +} + +std::tuple +encryption::parse_key_spec(const sstring& alg) { + static const std::regex alg_exp(R"foo(^(\w+)(?:\/(\w+))?(?:\/(\w+))?$)foo"); + + std::cmatch m; + if (!std::regex_match(alg.begin(), alg.end(), m, alg_exp)) { + throw std::invalid_argument("Invalid algorithm string: " + alg); + } + + auto type = m[1].str(); + auto mode = m[2].str(); + auto padd = m[3].str(); + + std::transform(type.begin(), type.end(), type.begin(), ::tolower); + std::transform(mode.begin(), mode.end(), mode.begin(), ::tolower); + std::transform(padd.begin(), padd.end(), padd.begin(), ::tolower); + + static const std::string padding = "padding"; + if (padd.size() > padding.size() && std::equal(padding.rbegin(), padding.rend(), padd.rbegin())) { + padd.resize(padd.size() - padding.size()); + } + + return std::make_tuple(type, mode, padd); +} + +std::tuple encryption::parse_key_spec_and_validate_defaults(const sstring& alg) { + auto [type, mode, padd] = parse_key_spec(alg); + + // openssl AND kmip server(s?) does not allow missing block mode. so default one. + if (mode.empty()) { + mode = "cbc"; + } + + // OpenSSL only supports one form of padding. We used to just allow + // non-empty string -> pkcs5/pcks7. Better to verify + // (note: pcks5 is sortof a misnomeanor here, as in the Sun world, it + // sort of means "pkcs7 with automatic block size" - which is pretty + // much how things are in the OpenSSL universe as well) + if (padd == "no") { + padd = ""; + } + if (!padd.empty() && padd != "pkcs5" && padd != "pkcs" && padd != "pkcs7") { + throw std::invalid_argument("non-supported padding option: " + padd); + } + + return { type, mode, padd }; +} + +encryption::symmetric_key::symmetric_key(const key_info& info, const bytes& key) + : _ctxt(EVP_CIPHER_CTX_new(), &EVP_CIPHER_CTX_free) + , _info(info) + , _key(key) +{ + if (!_ctxt) { + throw std::bad_alloc(); + } + + sstring type, mode, padd; + std::tie(type, mode, padd) = parse_key_spec_and_validate_defaults(info.alg); + + // Note: we are using some types here that are explicitly marked as "unsupported - placeholder" + // in gnutls. + + // camel case vs. dash + if (type == "desede") { + type = "des-ede"; + // and 168-bits desede is ede3 in openssl... + if (info.len > 16*8) { + type = "des-ede3"; + } + } + + auto str = fmt::format("{}-{}-{}", type, info.len, mode); + auto cipher = EVP_get_cipherbyname(str.c_str()); + + if (!cipher) { + str = fmt::format("{}-{}", type, mode); + cipher = EVP_get_cipherbyname(str.c_str()); + } + if (!cipher) { + str = fmt::format("{}-{}", type, info.len); + cipher = EVP_get_cipherbyname(str.c_str()); + } + if (!cipher) { + str = type; + cipher = EVP_get_cipherbyname(str.c_str()); + } + if (!cipher) { + throw_evp_error("Invalid algorithm: " + info.alg); + } + + size_t len = EVP_CIPHER_key_length(cipher); + + if ((_info.len/8) != len) { + if (!EVP_CipherInit_ex(*this, cipher, nullptr, nullptr, nullptr, 0)) { + throw_evp_error("Could not initialize cipher"); + } + auto dlen = _info.len/8; + // Openssl describes des-56 length as 64 (counts parity), + // des-ede-112 as 128 etc... + // do some special casing... + if ((type == "des" || type == "des-ede" || type == "des-ede3") && (dlen & 7) != 0) { + dlen = align_up(dlen, 8u); + } + // if we had to find a cipher without explicit key length (like rc2), + // try to set the key length to the desired strength. + if (!EVP_CIPHER_CTX_set_key_length(*this, dlen)) { + throw_evp_error(fmt::format("Invalid length {} for resolved type {} (wanted {})", len*8, str, _info.len)); + } + + len = EVP_CIPHER_key_length(cipher); + } + + + if (_key.empty()) { + _key.resize(len); + if (!RAND_bytes(reinterpret_cast(_key.data()), _key.size())) { + throw_evp_error(fmt::format("Could not generate key: {}", info.alg)); + } + } + if (_key.size() < len) { + throw std::invalid_argument(fmt::format("Invalid key data length {} for resolved type {} ({})", _key.size()*8, str, len*8)); + } + + if (!EVP_CipherInit_ex(*this, cipher, nullptr, + reinterpret_cast(_key.data()), nullptr, + 0)) { + throw_evp_error("Could not initialize cipher from key materiel"); + } + + _iv_len = EVP_CIPHER_CTX_iv_length(*this); + _block_size = EVP_CIPHER_CTX_block_size(*this); + _padding = !padd.empty(); + +} + +std::string encryption::symmetric_key::validate_exact_info_result() const { + auto [types, modes, padds] = parse_key_spec(_info.alg); + + auto cipher = EVP_CIPHER_CTX_get0_cipher(*this); + auto len = EVP_CIPHER_key_length(cipher); + auto mode = EVP_CIPHER_get_mode(cipher); + + std::ostringstream ss; + + if (unsigned(len)*8 != align_up(_info.len, 16u)) { + ss << "Length " << len*8 << " differs from requested " << _info.len << std::endl; + } + + static std::unordered_map openssl_modes({ + { EVP_CIPH_ECB_MODE, "ecb" }, + { EVP_CIPH_CBC_MODE, "cbc" }, + { EVP_CIPH_CFB_MODE, "cfb" }, + { EVP_CIPH_OFB_MODE, "ofb" }, + { EVP_CIPH_CTR_MODE, "ctr" }, + { EVP_CIPH_GCM_MODE, "cgm" }, + { EVP_CIPH_CCM_MODE, "ccm" }, + { EVP_CIPH_XTS_MODE, "xts" }, + { EVP_CIPH_WRAP_MODE, "wrap"}, + { EVP_CIPH_OCB_MODE, "ocb" }, + { EVP_CIPH_SIV_MODE, "siv" }, + }); + + auto i = openssl_modes.find(mode); + if (i != openssl_modes.end() && i->second != modes) { + ss << _info << ": " << "Block mode " << i->second << " differers from requested " << modes << std::endl; + } + + if ((!padds.empty() && padds != "no") != _padding) { + ss << _info << ": " << "Padding (" << bool(_padding) << " differs from requested " << padds << std::endl; + } + + return ss.str(); +} + +void encryption::symmetric_key::generate_iv_impl(uint8_t* dst, size_t s) const { + if (s < _iv_len) { + throw std::invalid_argument("Buffer underflow"); + } + if (!RAND_bytes(dst, s)) { + throw_evp_error("Could not generate initialization vector"); + } +} + +void encryption::symmetric_key::transform_unpadded_impl(const uint8_t* input, + size_t input_len, uint8_t* output, const uint8_t* iv, mode m) const { + if (!EVP_CipherInit_ex(*this, nullptr, nullptr, + reinterpret_cast(_key.data()), iv, int(m))) { + throw_evp_error("Could not initialize cipher (transform)"); + } + if (!EVP_CIPHER_CTX_set_padding(*this, 0)) { + throw_evp_error("Could not disable padding"); + } + + if (input_len & (_block_size - 1)) { + throw std::invalid_argument("Data must be aligned to 'blocksize'"); + } + + int outl = 0; + auto res = m == mode::decrypt ? + EVP_DecryptUpdate(*this, output, &outl, input, + int(input_len)) : + EVP_EncryptUpdate(*this, output, &outl, input, + int(input_len)); + + if (!res || outl != int(input_len)) { + throw std::runtime_error("transformation failed"); + } +} + +size_t encryption::symmetric_key::decrypt_impl(const uint8_t* input, + size_t input_len, uint8_t* output, size_t output_len, + const uint8_t* iv) const { + if (!EVP_CipherInit_ex(*this, nullptr, nullptr, + reinterpret_cast(_key.data()), iv, 0)) { + throw_evp_error("Could not initialize cipher (decrypt)"); + } + if (!EVP_CIPHER_CTX_set_padding(*this, int(_padding))) { + throw_evp_error("Could not initialize padding"); + } + + // normal case, caller provides output enough to deal with any padding. + // in padding case, max out size is input_len - 1. + if (input_len <= output_len) { + // one go. + int outl = 0; + int finl = 0; + if (!EVP_DecryptUpdate(*this, output, &outl, input, int(input_len))) { + throw_evp_error("decryption failed"); + } + if (!EVP_DecryptFinal(*this, output + outl, &finl)) { + throw_evp_error("decryption failed"); + } + + return outl + finl; + } + + // meh. must provide block padding. + constexpr size_t local_buf_size = 1024; + + static thread_local std::vector cached_buf; + + if (cached_buf.size() < local_buf_size + _block_size) [[unlikely]] { + cached_buf.resize(local_buf_size + _block_size); + } + + auto buf = cached_buf.data(); + size_t res = 0; + while (input_len) { + auto n = std::min(input_len, local_buf_size); + int outl = 0; + if (!EVP_DecryptUpdate(*this, buf, &outl, input, int(n))) { + throw std::runtime_error("decryption failed"); + } + if (n < local_buf_size) { + // last block + int finl = 0; + if (!EVP_DecryptFinal(*this, buf + outl, &finl)) { + throw std::runtime_error("decryption failed"); + } + outl += finl; + } + if ((res + outl) > output_len) { + throw std::invalid_argument("Output buffer too small"); + } + output = std::copy(buf, buf + outl, output); + res += outl; + input_len -= n; + input += n; + } + + return res; +} + +size_t encryption::symmetric_key::encrypted_size(size_t n) const { + // encryption always adds padding. So if n is multiple of blocksize + // the size is n + blocksize. But if its not, things are "better"... + return _block_size + align_down(n, _block_size); +} + +size_t encryption::symmetric_key::encrypt_impl(const uint8_t* input, + size_t input_len, uint8_t* output, size_t output_len, + const uint8_t* iv) const { + if (output_len < encrypted_size(input_len)) { + throw std::invalid_argument("Insufficient buffer"); + } + + if (!EVP_CipherInit_ex(*this, nullptr, nullptr, + reinterpret_cast(_key.data()), iv, 1)) { + throw_evp_error("Could not initialize cipher (encrypt)"); + } + if (!EVP_CIPHER_CTX_set_padding(*this, int(_padding))) { + throw_evp_error("Could not initialize padding"); + } + + int outl = 0; + int finl = 0; + if (!EVP_EncryptUpdate(*this, output, &outl, input, int(input_len))) { + throw_evp_error("encryption failed"); + } + if (!EVP_EncryptFinal(*this, output + outl, &finl)) { + throw_evp_error("encryption failed"); + } + return outl + finl; +} + +bool encryption::operator==(const key_info& k1, const key_info& k2) { + return k1.alg == k2.alg && k1.len == k2.len; +} + +bool encryption::operator!=(const key_info& k1, const key_info& k2) { + return !(k1 == k2); +} + +size_t encryption::key_info_hash::operator()(const key_info& e) const { + return utils::tuple_hash()(std::tie(e.alg, e.len)); +} diff --git a/ent/encryption/symmetric_key.hh b/ent/encryption/symmetric_key.hh new file mode 100644 index 0000000000..4a752e396f --- /dev/null +++ b/ent/encryption/symmetric_key.hh @@ -0,0 +1,154 @@ +/* + * Copyright (C) 2018 ScyllaDB + * + */ + +/* + * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0 + */ + +#pragma once + +#include +#include +#include +#include +#include + +#include "../../bytes.hh" + +// forward declare openssl evp. +extern "C" { +struct evp_cipher_ctx_st; +} + +namespace encryption { + +struct key_info { + sstring alg; + unsigned len; + + bool compatible(const key_info&) const; +}; + +bool operator==(const key_info& k1, const key_info& k2); +bool operator!=(const key_info& k1, const key_info& k2); +std::ostream& operator<<(std::ostream&, const key_info&); + +struct key_info_hash { + size_t operator()(const key_info& e) const; +}; + +std::tuple parse_key_spec(const sstring&); + +// shared between key & kmip +std::tuple parse_key_spec_and_validate_defaults(const sstring&); + +class symmetric_key { + std::unique_ptr _ctxt; + key_info _info; + bytes _key; + unsigned _iv_len = 0; + unsigned _block_size = 0; + bool _padding = true; + + operator evp_cipher_ctx_st *() const { + return _ctxt.get(); + } + + void generate_iv_impl(uint8_t* dst, size_t) const; + size_t decrypt_impl(const uint8_t* input, size_t input_len, uint8_t* output, + size_t output_len, const uint8_t* iv) const; + size_t encrypt_impl(const uint8_t* input, size_t input_len, uint8_t* output, + size_t output_len, const uint8_t* iv) const; + +public: + symmetric_key(const key_info& info, const bytes& key = { }); + + const key_info& info() const { + return _info; + } + const bytes& key() const { + return _key; + } + size_t iv_len() const { + return _iv_len; + } + size_t block_size() const { + return _block_size; + } + + /** + * Evaluates whether or not the key info provided resulted in + * the exact same result from openssl, i.e. whether the combination + * of alg/block mode/padding etc was actually fully valid (or our + * heuristics have issues) + */ + std::string validate_exact_info_result() const; + + /** + * Write a random IV to dst. Must be iv_len() sized or larger + */ + template + void generate_iv(T* dst, size_t s) const { + static_assert(sizeof(T) == sizeof(uint8_t) && std::is_integral_v); + generate_iv_impl(reinterpret_cast(dst), s); + } + + // returns minimal buffer size required to encrypt n bytes. I.e. + // block alignment + size_t encrypted_size(size_t n) const; + + template + size_t decrypt(const T* input, size_t input_len, V* output, + size_t output_len, const I* iv = nullptr) const { + static_assert(sizeof(T) == sizeof(uint8_t) && std::is_integral_v); + return decrypt_impl(reinterpret_cast(input), input_len, + reinterpret_cast(output), output_len, + reinterpret_cast(iv)); + } + template + size_t encrypt(const T* input, size_t input_len, V* output, + size_t output_len, const I* iv = nullptr) const { + static_assert(sizeof(T) == sizeof(uint8_t) && std::is_integral_v); + return encrypt_impl(reinterpret_cast(input), input_len, + reinterpret_cast(output), output_len, + reinterpret_cast(iv)); + } + + enum class mode { + decrypt, encrypt, + }; + template + void transform_unpadded(mode m, const T* input, size_t input_len, V* output, + const I* iv = nullptr) const { + static_assert(sizeof(T) == sizeof(uint8_t) && std::is_integral_v); + return transform_unpadded_impl(reinterpret_cast(input), + input_len, reinterpret_cast(output), + reinterpret_cast(iv), m); + } + template + void encrypt_unpadded(const T* input, size_t input_len, V* output, + const I* iv = nullptr) const { + static_assert(sizeof(T) == sizeof(uint8_t) && std::is_integral_v); + return transform_unpadded_impl(reinterpret_cast(input), + input_len, reinterpret_cast(output), + reinterpret_cast(iv), mode::encrypt); + } + template + void decrypt_unpadded(const T* input, size_t input_len, V* output, + const I* iv = nullptr) const { + static_assert(sizeof(T) == sizeof(uint8_t) && std::is_integral_v); + return transform_unpadded_impl(reinterpret_cast(input), + input_len, reinterpret_cast(output), + reinterpret_cast(iv), mode::decrypt); + } + +private: + void transform_unpadded_impl(const uint8_t* input, size_t input_len, + uint8_t* output, const uint8_t* iv, mode) const; +}; + +} + +template <> struct fmt::formatter : fmt::ostream_formatter {}; diff --git a/ent/encryption/system_key.cc b/ent/encryption/system_key.cc new file mode 100644 index 0000000000..bb8b845639 --- /dev/null +++ b/ent/encryption/system_key.cc @@ -0,0 +1,65 @@ +/* + * Copyright (C) 2015 ScyllaDB + * + */ + +/* + * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0 + */ +#include +#include +#include +#include + +#include +#include + +#include + +#include "symmetric_key.hh" +#include "system_key.hh" + +future<> encryption::system_key::validate() const { + return make_ready_future<>(); +} + +future encryption::system_key::decrypt(const sstring& s) { + auto b = base64_decode(s); + return decrypt(b).then([](bytes b) { + return make_ready_future(sstring(b.begin(), b.end())); + }); +} + +future encryption::system_key::encrypt(const sstring& s) { + return encrypt(bytes(s.begin(), s.end())).then([](bytes b) { + return make_ready_future(base64_encode(b)); + }); +} + +future encryption::system_key::encrypt(const bytes& b) { + return get_key().then([b](shared_ptr k) { + auto i = k->iv_len(); + auto n = k->encrypted_size(b.size()); + bytes res(bytes::initialized_later(), n + i); + k->generate_iv(reinterpret_cast(res.data()), i); + n = k->encrypt(reinterpret_cast(b.data()), b.size() + , reinterpret_cast(res.data()) + i, res.size() - i + , reinterpret_cast(res.data())); + res.resize(n + i); + return make_ready_future(std::move(res)); + }); + +} + +future encryption::system_key::decrypt(const bytes& b) { + return get_key().then([b](shared_ptr k) { + auto i = k->iv_len(); + bytes res(bytes::initialized_later(), b.size() - i); + auto n = k->decrypt(reinterpret_cast(b.data()) + i, + b.size() - i, reinterpret_cast(res.data()), + res.size(), reinterpret_cast(b.data())); + res.resize(n); + return make_ready_future(std::move(res)); + }); +} + diff --git a/ent/encryption/system_key.hh b/ent/encryption/system_key.hh new file mode 100644 index 0000000000..8d6ac32b26 --- /dev/null +++ b/ent/encryption/system_key.hh @@ -0,0 +1,34 @@ +/* + * Copyright (C) 2015 ScyllaDB + * + */ + +/* + * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0 + */ + +#pragma once + +#include "encryption.hh" +#include "../../bytes.hh" + +namespace encryption { + +class symmetric_key; + +class system_key { +public: + virtual ~system_key() {} + virtual future> get_key() = 0; + virtual const sstring& name() const = 0; + virtual bool is_local() const = 0; + virtual future<> validate() const; + + future encrypt(const sstring&); + future decrypt(const sstring&); + future encrypt(const bytes&); + future decrypt(const bytes&); +}; + +} + From ee62b61c84ff9a43006daaba06261d1d145966fe Mon Sep 17 00:00:00 2001 From: Calle Wilund Date: Wed, 8 Jan 2025 16:03:06 +0000 Subject: [PATCH 06/10] tmpdir: shorten test tempdir path To make certain python tests work in CI --- test/lib/tmpdir.cc | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/test/lib/tmpdir.cc b/test/lib/tmpdir.cc index 738dcdf90f..fce9519dcb 100644 --- a/test/lib/tmpdir.cc +++ b/test/lib/tmpdir.cc @@ -31,9 +31,27 @@ tmpdir::sweeper::~sweeper() { } } -tmpdir::tmpdir() - : _path(fs::temp_directory_path() / fs::path(fmt::format(FMT_STRING("scylla-{}"), utils::make_random_uuid()))) { - fs::create_directories(_path); +tmpdir::tmpdir() { + auto tmp = fs::temp_directory_path(); + for (;;) { + // Reduce the path length of the created tmp dir. This might seem + // silly when running with base TMPDIR=/tmp or similar, but + // in a lot of CI testing, TMPDIR will be a loooooong path into + // jenkins workdirs or similar -> this path will be 100+ chars long. + // Again, this should most often not be a problem, _but_ if we + // for example run something like a sub process of a python server, + // which will try to create various unix sockets et al for its + // operations, the TMPDIR base for this must not exceed 107 chars. + // Note: converting UUID to string first, because for some reason + // our UUID formatter does not respect width/precision. Feel free to + // change once it does. + _path = tmp / fmt::format("scylla-{:.8}", fmt::to_string(utils::make_random_uuid())); + // Note: this is a slight improvement also, in that we ensure the dir + // we use is actually created by us. + if (fs::create_directories(_path)) { + break; + } + } } tmpdir::tmpdir(tmpdir&& other) noexcept : _path(std::exchange(other._path, {})) {} From c596ae6eb1ab881f80fa93a5fad1e79065a5b3c4 Mon Sep 17 00:00:00 2001 From: Calle Wilund Date: Wed, 8 Jan 2025 11:50:16 +0000 Subject: [PATCH 07/10] tests: Add EAR tests Adds the migrated EAR/encryption tests. Note: Until scylla CI is updated to provide all the proper ENV vars, some tests will not execute. --- configure.py | 3 + test/boost/CMakeLists.txt | 9 + test/boost/encrypted_file_test.cc | 265 ++++++ test/boost/encryption_at_rest_test.cc | 1098 +++++++++++++++++++++++++ test/boost/kmip_wrapper.py | 95 +++ test/boost/symmetric_key_test.cc | 221 +++++ test/resource/certs/cacert.pem | 82 ++ test/resource/certs/scylla.pem | 57 ++ 8 files changed, 1830 insertions(+) create mode 100644 test/boost/encrypted_file_test.cc create mode 100644 test/boost/encryption_at_rest_test.cc create mode 100644 test/boost/kmip_wrapper.py create mode 100644 test/boost/symmetric_key_test.cc create mode 100644 test/resource/certs/cacert.pem create mode 100644 test/resource/certs/scylla.pem diff --git a/configure.py b/configure.py index 5e930ff6db..35c1f10039 100755 --- a/configure.py +++ b/configure.py @@ -475,6 +475,8 @@ scylla_tests = set([ 'test/boost/double_decker_test', 'test/boost/duration_test', 'test/boost/dynamic_bitset_test', + 'test/boost/encrypted_file_test', + 'test/boost/encryption_at_rest_test', 'test/boost/enum_option_test', 'test/boost/enum_set_test', 'test/boost/estimated_histogram_test', @@ -554,6 +556,7 @@ scylla_tests = set([ 'test/boost/token_metadata_test', 'test/boost/top_k_test', 'test/boost/transport_test', + 'test/boost/symmetric_key_test', 'test/boost/types_test', 'test/boost/utf8_test', 'test/boost/vint_serialization_test', diff --git a/test/boost/CMakeLists.txt b/test/boost/CMakeLists.txt index 4815188030..1d95d935a3 100644 --- a/test/boost/CMakeLists.txt +++ b/test/boost/CMakeLists.txt @@ -80,6 +80,15 @@ add_scylla_test(duration_test add_scylla_test(dynamic_bitset_test KIND BOOST LIBRARIES utils) +add_scylla_test(encrypted_file_test + KIND SEASTAR + LIBRARIES + encryption) +add_scylla_test(encryption_at_rest_test + KIND SEASTAR + LIBRARIES + Boost::filesystem + encryption) add_scylla_test(enum_option_test KIND BOOST) add_scylla_test(enum_set_test diff --git a/test/boost/encrypted_file_test.cc b/test/boost/encrypted_file_test.cc new file mode 100644 index 0000000000..62c6042e3f --- /dev/null +++ b/test/boost/encrypted_file_test.cc @@ -0,0 +1,265 @@ +/* + * Copyright (C) 2016 ScyllaDB + */ + + + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include "ent/encryption/encryption.hh" +#include "ent/encryption/symmetric_key.hh" +#include "ent/encryption/encrypted_file_impl.hh" +#include "test/lib/tmpdir.hh" +#include "test/lib/random_utils.hh" +#include "test/lib/exception_utils.hh" + +using namespace encryption; + +static tmpdir dir; + +static future>> make_file(const sstring& name, open_flags mode, ::shared_ptr k = nullptr) { + file f = co_await open_file_dma(sstring(dir.path() / std::string(name)), mode); + if (k == nullptr) { + key_info info{"AES/CBC", 256}; + k = ::make_shared(info); + } + co_return std::tuple(file(make_encrypted_file(f, k)), k); +} + +static temporary_buffer generate_random(size_t n, size_t align) { + auto tmp = temporary_buffer::aligned(align, align_up(n, align)); + auto data = tests::random::get_sstring(n); + std::copy(data.begin(), data.end(), tmp.get_write()); + return tmp; +} + +static future<> test_random_data_disk(size_t n) { + auto name = "test_rand_" + std::to_string(n); + auto t = co_await make_file(name, open_flags::rw|open_flags::create); + auto f = std::get<0>(t); + std::exception_ptr ex = nullptr; + + try { + auto k = std::get<1>(t); + auto a = f.memory_dma_alignment(); + auto buf = generate_random(n, a); + auto w = co_await f.dma_write(0, buf.get(), buf.size()); + + co_await f.flush(); + if (n != buf.size()) { + co_await f.truncate(n); + } + + BOOST_REQUIRE_EQUAL(w, buf.size()); + + auto k2 = ::make_shared(k->info(), k->key()); + auto f2 = std::get<0>(co_await make_file(name, open_flags::ro, k2)); + + auto tmp = temporary_buffer::aligned(a, buf.size()); + auto n2 = co_await f2.dma_read(0, tmp.get_write(), tmp.size()); + + BOOST_REQUIRE_EQUAL(n2, n); + BOOST_REQUIRE_EQUAL_COLLECTIONS(tmp.get(), tmp.get() + n2, buf.get(), buf.get() + n2); + } catch (...) { + ex = std::current_exception(); + } + + co_await f.close(); + if (ex) { + std::rethrow_exception(ex); + } +} + +static void test_random_data(size_t n) { + auto buf = generate_random(n, 8); + + + // first, verify padded. + { + key_info info{"AES/CBC/PKCSPadding", 256}; + auto k = ::make_shared(info); + + bytes b(bytes::initialized_later(), k->iv_len()); + k->generate_iv(b.data(), k->iv_len()); + + temporary_buffer tmp(n + k->block_size()); + k->encrypt(buf.get(), buf.size(), tmp.get_write(), tmp.size(), b.data()); + + auto bytes = k->key(); + auto k2 = ::make_shared(info, bytes); + + temporary_buffer tmp2(n + k->block_size()); + k2->decrypt(tmp.get(), tmp.size(), tmp2.get_write(), tmp2.size(), b.data()); + + BOOST_REQUIRE_EQUAL_COLLECTIONS(tmp2.get(), tmp2.get() + n, buf.get(), buf.get() + n); + } + + // unpadded + { + key_info info{"AES/CBC", 256}; + auto k = ::make_shared(info); + + bytes b(bytes::initialized_later(), k->iv_len()); + k->generate_iv(b.data(), k->iv_len()); + + temporary_buffer tmp(n); + k->encrypt_unpadded(buf.get(), buf.size(), tmp.get_write(), b.data()); + + auto bytes = k->key(); + auto k2 = ::make_shared(info, bytes); + + temporary_buffer tmp2(buf.size()); + k2->decrypt_unpadded(tmp.get(), tmp.size(), tmp2.get_write(), b.data()); + + BOOST_REQUIRE_EQUAL_COLLECTIONS(tmp2.get(), tmp2.get() + n, buf.get(), buf.get() + n); + } +} + + +BOOST_AUTO_TEST_CASE(test_encrypting_data_128) { + test_random_data(128); +} + +BOOST_AUTO_TEST_CASE(test_encrypting_data_4k) { + test_random_data(4*1024); +} + + +SEASTAR_TEST_CASE(test_encrypted_file_data_4k) { + return test_random_data_disk(4*1024); +} + +SEASTAR_TEST_CASE(test_encrypted_file_data_16k) { + return test_random_data_disk(16*1024); +} + +SEASTAR_TEST_CASE(test_encrypted_file_data_unaligned) { + return test_random_data_disk(16*1024 - 3); +} + +SEASTAR_TEST_CASE(test_encrypted_file_data_unaligned2) { + return test_random_data_disk(16*1024 - 4092); +} + +SEASTAR_TEST_CASE(test_short) { + auto name = "test_short"; + file f = co_await open_file_dma(sstring(dir.path() / name), open_flags::rw|open_flags::create); + co_await f.truncate(1); + co_await f.close(); + + auto t = co_await make_file(name, open_flags::ro); + f = std::get<0>(t); + std::exception_ptr ex = nullptr; + + try { + temporary_buffer buf(f.memory_dma_alignment()); + + BOOST_REQUIRE_EXCEPTION( + co_await f.dma_read(0, buf.get_write(), buf.size()), + std::domain_error, + exception_predicate::message_contains("file size 1, expected 0 or at least 16") + ); + } catch (...) { + ex = std::current_exception(); + } + + co_await f.close(); + if (ex) { + std::rethrow_exception(ex); + } +} + +SEASTAR_TEST_CASE(test_truncating_empty) { + auto name = "test_truncating_empty"; + auto t = co_await make_file(name, open_flags::rw|open_flags::create); + auto f = std::get<0>(t); + auto k = std::get<1>(t); + auto s = 64 * f.memory_dma_alignment(); + + co_await f.truncate(s); + + temporary_buffer buf(s); + auto n = co_await f.dma_read(0, buf.get_write(), buf.size()); + + co_await f.close(); + + BOOST_REQUIRE_EQUAL(s, n); + + for (auto c : buf) { + BOOST_REQUIRE_EQUAL(c, 0); + } +} + +SEASTAR_TEST_CASE(test_truncating_extend) { + auto name = "test_truncating_extend"; + auto t = co_await make_file(name, open_flags::rw|open_flags::create); + auto f = std::get<0>(t); + auto k = std::get<1>(t); + auto a = f.memory_dma_alignment(); + auto s = 32 * a; + auto buf = generate_random(s, a); + auto w = co_await f.dma_write(0, buf.get(), buf.size()); + + co_await f.flush(); + BOOST_REQUIRE_EQUAL(s, w); + + for (size_t i = 1; i < 64; ++i) { + // truncate smaller, unaligned + auto l = w - i; + auto r = w + 8 * a; + co_await f.truncate(l); + BOOST_REQUIRE_EQUAL(l, (co_await f.stat()).st_size); + + { + auto tmp = temporary_buffer::aligned(a, align_up(l, a)); + auto n = co_await f.dma_read(0, tmp.get_write(), tmp.size()); + + BOOST_REQUIRE_EQUAL(l, n); + BOOST_REQUIRE_EQUAL_COLLECTIONS(tmp.get(), tmp.get() + l, buf.get(), buf.get() + l); + + auto k = align_down(l, a); + + while (k > 0) { + n = co_await f.dma_read(0, tmp.get_write(), k); + + BOOST_REQUIRE_EQUAL(k, n); + BOOST_REQUIRE_EQUAL_COLLECTIONS(tmp.get(), tmp.get() + k, buf.get(), buf.get() + k); + + n = co_await f.dma_read(k, tmp.get_write(), tmp.size()); + BOOST_REQUIRE_EQUAL(l - k, n); + BOOST_REQUIRE_EQUAL_COLLECTIONS(tmp.get(), tmp.get() + n, buf.get() + k, buf.get() + k + n); + + k -= a; + } + } + + co_await f.truncate(r); + BOOST_REQUIRE_EQUAL(r, (co_await f.stat()).st_size); + + auto tmp = temporary_buffer::aligned(a, align_up(r, a)); + auto n = co_await f.dma_read(0, tmp.get_write(), tmp.size()); + + BOOST_REQUIRE_EQUAL(r, n); + BOOST_REQUIRE_EQUAL_COLLECTIONS(tmp.get(), tmp.get() + l, buf.get(), buf.get() + l); + + while (l < r) { + BOOST_REQUIRE_EQUAL(tmp[l], 0); + ++l; + } + } + + co_await f.close(); +} + diff --git a/test/boost/encryption_at_rest_test.cc b/test/boost/encryption_at_rest_test.cc new file mode 100644 index 0000000000..acf06d3564 --- /dev/null +++ b/test/boost/encryption_at_rest_test.cc @@ -0,0 +1,1098 @@ +/* + * Copyright (C) 2016 ScyllaDB + */ + + + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include "ent/encryption/encryption.hh" +#include "ent/encryption/symmetric_key.hh" +#include "ent/encryption/local_file_provider.hh" +#include "test/lib/tmpdir.hh" +#include "test/lib/random_utils.hh" +#include "test/lib/cql_test_env.hh" +#include "test/lib/cql_assertions.hh" +#include "db/config.hh" +#include "db/extensions.hh" +#include "db/commitlog/commitlog.hh" +#include "db/commitlog/commitlog_replayer.hh" +#include "init.hh" +#include "sstables/sstables.hh" +#include "cql3/untyped_result_set.hh" +#include "utils/rjson.hh" +#include "replica/database.hh" +#include "service/client_state.hh" + +using namespace encryption; +namespace fs = std::filesystem; + +using test_hook = std::function; + +struct test_provider_args { + const tmpdir& tmp; + std::string options; + std::string extra_yaml = {}; + unsigned n_tables = 1; + unsigned n_restarts = 1; + std::string explicit_provider = {}; + + test_hook before_create_table; + test_hook after_create_table; + test_hook on_insert_exception; + + std::optional timeout; +}; + +static void do_create_and_insert(cql_test_env& env, const test_provider_args& args, const std::string& pk, const std::string& v) { + for (auto i = 0u; i < args.n_tables; ++i) { + if (args.before_create_table) { + args.before_create_table(env); + } + if (args.options.empty()) { + env.execute_cql(fmt::format("create table t{} (pk text primary key, v text)", i)).get(); + } else { + env.execute_cql(fmt::format("create table t{} (pk text primary key, v text) WITH scylla_encryption_options={{{}}}", i, args.options)).get(); + } + + if (args.after_create_table) { + args.after_create_table(env); + } + try { + env.execute_cql(fmt::format("insert into ks.t{} (pk, v) values ('{}', '{}')", i, pk, v)).get(); + } catch (...) { + args.on_insert_exception(env); + throw; + } + } +} + +static future<> test_provider(const test_provider_args& args) { + auto make_config = [&] { + auto ext = std::make_shared(); + auto cfg = seastar::make_shared(ext); + cfg->data_file_directories({args.tmp.path().string()}); + + // Currently the test fails with consistent_cluster_management = true. See #2995. + cfg->consistent_cluster_management(false); + + if (!args.extra_yaml.empty()) { + boost::program_options::options_description desc; + boost::program_options::options_description_easy_init init(&desc); + configurable::append_all(*cfg, init); + cfg->read_from_yaml(args.extra_yaml); + } + + return std::make_tuple(cfg, ext); + }; + + std::string pk = "apa"; + std::string v = "ko"; + + { + auto [cfg, ext] = make_config(); + + co_await do_with_cql_env_thread([&] (cql_test_env& env) { + do_create_and_insert(env, args, pk, v); + }, cfg, {}, cql_test_init_configurables{ *ext }); + } + + for (auto rs = 0u; rs < args.n_restarts; ++rs) { + auto [cfg, ext] = make_config(); + + co_await do_with_cql_env_thread([&] (cql_test_env& env) { + for (auto i = 0u; i < args.n_tables; ++i) { + require_rows(env, fmt::format("select * from ks.t{}", i), {{utf8_type->decompose(pk), utf8_type->decompose(v)}}); + + auto provider = args.explicit_provider; + + // check that all sstables have the defined provider class (i.e. are encrypted using correct optons) + if (provider.empty() && args.options.find("'key_provider'") != std::string::npos) { + static std::regex ex(R"foo('key_provider'\s*:\s*'(\w+)')foo"); + + std::smatch m; + BOOST_REQUIRE(std::regex_search(args.options.begin(), args.options.end(), m, ex)); + provider = m[1].str(); + BOOST_REQUIRE(!provider.empty()); + } + if (!provider.empty()) { + env.db().invoke_on_all([&](replica::database& db) { + auto& cf = db.find_column_family("ks", "t" + std::to_string(i)); + auto sstables = cf.get_sstables_including_compacted_undeleted(); + + if (sstables) { + for (auto& t : *sstables) { + auto sst_provider = encryption::encryption_provider(*t); + BOOST_REQUIRE_EQUAL(provider, sst_provider); + } + } + }).get(); + } + } + }, cfg, {}, cql_test_init_configurables{ *ext }); + } +} + +static future<> test_provider(const std::string& options, const tmpdir& tmp, const std::string& extra_yaml = {}, unsigned n_tables = 1, unsigned n_restarts = 1, const std::string& explicit_provider = {}) { + test_provider_args args{ + .tmp = tmp, + .options = options, + .extra_yaml = extra_yaml, + .n_tables = n_tables, + .n_restarts = n_restarts, + .explicit_provider = explicit_provider + }; + co_await test_provider(args); +} + +SEASTAR_TEST_CASE(test_local_file_provider) { + tmpdir tmp; + auto keyfile = tmp.path() / "secret_key"; + co_await test_provider(fmt::format("'key_provider': 'LocalFileSystemKeyProviderFactory', 'secret_key_file': '{}', 'cipher_algorithm':'AES/CBC/PKCS5Padding', 'secret_key_strength': 128", keyfile.string()), tmp); +} + +static future<> create_key_file(const fs::path& path, const std::vector& key_types) { + std::ostringstream ss; + + for (auto& info : key_types) { + symmetric_key k(info); + ss << info.alg << ":" << info.len << ":" << base64_encode(k.key()) << std::endl; + } + + auto s = ss.str(); + co_await seastar::recursive_touch_directory(fs::path(path).remove_filename().string()); + co_await write_text_file_fully(path.string(), s); +} + +static future<> do_test_replicated_provider(unsigned n_tables, unsigned n_restarts, const std::string& extra = {}, test_hook hook = {}) { + tmpdir tmp; + auto keyfile = tmp.path() / "secret_key"; + auto sysdir = tmp.path() / "system_keys"; + auto syskey = sysdir / "system_key"; + auto yaml = fmt::format("system_key_directory: {}", sysdir.string()); + + co_await create_key_file(syskey, { { "AES/CBC/PKCSPadding", 256 }}); + + BOOST_REQUIRE(fs::exists(syskey));; + + test_provider_args args{ + .tmp = tmp, + .options = fmt::format("'key_provider': 'ReplicatedKeyProviderFactory', 'system_key_file': 'system_key', 'secret_key_file': '{}','cipher_algorithm':'AES/CBC/PKCS5Padding', 'secret_key_strength': 128{}", keyfile.string(), extra), + .extra_yaml = yaml, + .n_tables = n_tables, + .n_restarts = n_restarts, + .explicit_provider = {}, + .after_create_table = hook + }; + + co_await test_provider(args); + + BOOST_REQUIRE(fs::exists(tmp.path())); +} + +SEASTAR_TEST_CASE(test_replicated_provider) { + co_await do_test_replicated_provider(1, 1); +} + +SEASTAR_TEST_CASE(test_replicated_provider_many_tables) { + co_await do_test_replicated_provider(100, 5); +} + +using namespace std::chrono_literals; + +static const timeout_config rkp_db_timeout_config { + 5s, 5s, 5s, 5s, 5s, 5s, 5s, +}; + +static service::query_state& rkp_db_query_state() { + static thread_local service::client_state cs(service::client_state::internal_tag{}, rkp_db_timeout_config); + static thread_local service::query_state qs(cs, empty_service_permit()); + return qs; +} + +SEASTAR_TEST_CASE(test_replicated_provider_shutdown_failure) { + co_await do_test_replicated_provider(1, 1, ", 'DEBUG': 'nocache,novalidate'", [](cql_test_env& env) { + /** + * Try to remove all keys in replicated table. Note: we can't use truncate because we + * are not running any proper remotes. + */ + auto res = env.local_qp().execute_internal("select * from system_replicated_keys.encrypted_keys", + db::consistency_level::ONE, rkp_db_query_state(), {}, cql3::query_processor::cache_internal::no + ).get(); + for (auto& row : (*res)) { + auto key_file = row.get_as("key_file"); + auto cipher = row.get_as("cipher"); + auto strength = row.get_as("strength"); + auto uuid = row.get_as("key_id"); + + env.local_qp().execute_internal("delete from system_replicated_keys.encrypted_keys where key_file=? AND cipher=? AND strength=? AND key_id=?", + db::consistency_level::ONE, rkp_db_query_state(), + { key_file, cipher, strength, uuid }, + cql3::query_processor::cache_internal::no + ).get(); + } + }); +} + +static std::string get_var_or_default(const char* var, std::string_view def, bool* set) { + const char* val = std::getenv(var); + if (val == nullptr) { + *set = false; + return std::string(def); + } + *set = true; + return val; +} + +static std::string get_var_or_default(const char* var, std::string_view def) { + bool dummy; + return get_var_or_default(var, def, &dummy); +} + +static bool check_run_test(const char* var, bool defval = false) { + auto do_test = get_var_or_default(var, std::to_string(defval)); + + if (!strcasecmp(do_test.data(), "0") || !strcasecmp(do_test.data(), "false")) { + BOOST_TEST_MESSAGE(fmt::format("Skipping test. Set {}=1 to run", var)); + return false; + } + return true; +} + +static auto check_run_test_decorator(const char* var, bool def = false) { + return boost::unit_test::precondition(std::bind(&check_run_test, var, def)); +} + +#ifdef HAVE_KMIP + +struct kmip_test_info { + std::string host; + std::string cert; + std::string key; + std::string ca; + std::string prio; +}; + +namespace bp = boost::process; + +static future<> kmip_test_helper(const std::function(const kmip_test_info&, const tmpdir&)>& f) { + tmpdir tmp; + bool host_set = false; + bp::child python; + bp::group gp; + bp::ipstream is; + + std::future pykmip_status; + + static const char* def_resourcedir = "./test/resource/certs"; + const char* resourcedir = std::getenv("KMIP_RESOURCE_DIR"); + if (resourcedir == nullptr) { + resourcedir = def_resourcedir; + } + + kmip_test_info info { + .host = get_var_or_default("KMIP_HOST", "127.0.0.1", &host_set), + .cert = get_var_or_default("KMIP_CERT", fmt::format("{}/scylla.pem", resourcedir)), + .key = get_var_or_default("KMIP_KEY", fmt::format("{}/scylla.pem", resourcedir)), + .ca = get_var_or_default("KMIP_CA", fmt::format("{}/cacert.pem", resourcedir)), + .prio = get_var_or_default("KMIP_PRIO", "SECURE128:+RSA:-VERS-TLS1.0:-ECDHE-ECDSA") + }; + + auto cleanup = defer([&] { + if (python.running()) { + BOOST_TEST_MESSAGE("Stopping PyKMIP server"); // debug print. Why not. + gp.terminate(); + pykmip_status.get(); + } + }); + + // note: default kmip port = 5696; + + if (!host_set) { + // Note: we set `enable_tls_client_auth=False` - client cert is still validated, + // but we have note generated certs with "extended usage client OID", which + // pykmip will check for if this is true. + auto cfg = fmt::format(R"foo( +[server] +hostname=127.0.0.1 +port=1 +certificate_path={} +key_path={} +ca_path={} +auth_suite=TLS1.2 +policy_path={} +enable_tls_client_auth=False +logging_level=DEBUG +database_path={}/pykmip.db + )foo", info.cert, info.key, info.ca, tmp.path().string(), tmp.path().string()); + + auto cfgfile = fmt::format("{}/pykmip.conf", tmp.path().string()); + auto log = fmt::format("{}/pykmip.log", tmp.path().string()); + + { + std::ofstream of(cfgfile); + of << cfg; + } + + auto pyexec = bp::search_path("python"); + + BOOST_TEST_MESSAGE("Starting PyKMIP server"); // debug print. Why not. + + python = bp::child(pyexec, gp, + "test/boost/kmip_wrapper.py", + "-l", log, + "-f", cfgfile, + "-v", "DEBUG", + (bp::std_out & bp::std_err) > is, bp::std_in.close(), + bp::env["TMPDIR"]=tmp.path().string() + ); + + std::promise port_promise; + auto f = port_promise.get_future(); + + pykmip_status = std::async([&] { + static std::regex port_ex("Listening on (\\d+)"); + + std::string line; + bool b = false; + + do { + while (std::getline(is, line)) { + std::cout << line << std::endl; + std::smatch m; + if (!b && std::regex_match(line, m, port_ex)) { + port_promise.set_value(std::stoi(m[1].str())); + b = true; + } + } + } while (python.running()); + + if (!b) { + port_promise.set_value(-1); + } + }); + // arbitrary timeout of 20s for the server to make some output. Very generous. + if (f.wait_for(20s) == std::future_status::timeout) { + throw std::runtime_error("Could not start pykmip"); + } + auto port = f.get(); + if (port <= 0) { + throw std::runtime_error("Invalid port"); + } + // wait for port. + for (;;) { + try { + // TODO: seastar does not have a connect with timeout. That would be helpful here. But alas... + co_await seastar::connect(socket_address(net::inet_address("127.0.0.1"), port)); + BOOST_TEST_MESSAGE("PyKMIP server up and available"); // debug print. Why not. + break; + } catch (...) { + } + co_await sleep(100ms); + } + + info.host = fmt::format("127.0.0.1:{}", port); + } + + co_await f(info, tmp); +} + +SEASTAR_TEST_CASE(test_kmip_provider, *check_run_test_decorator("ENABLE_KMIP_TEST", true)) { + co_await kmip_test_helper([](const kmip_test_info& info, const tmpdir& tmp) -> future<> { + auto yaml = fmt::format(R"foo( + kmip_hosts: + kmip_test: + hosts: {0} + certificate: {1} + keyfile: {2} + truststore: {3} + priority_string: {4} + )foo" + , info.host, info.cert, info.key, info.ca, info.prio + ); + co_await test_provider("'key_provider': 'KmipKeyProviderFactory', 'kmip_host': 'kmip_test', 'cipher_algorithm':'AES/CBC/PKCS5Padding', 'secret_key_strength': 128", tmp, yaml); + }); +} + +#endif // HAVE_KMIP + +class fake_proxy { + seastar::server_socket _socket; + socket_address _address; + bool _go_on = true; + bool _do_proxy = true; + future<> _f; + + future<> run(std::string s) { + uint16_t port = 443u; + auto i = s.find_last_of(':'); + if (i != std::string::npos && i > 0 && s[i - 1] != ':') { // just check against ipv6... + port = std::stoul(s.substr(i + 1)); + s = s.substr(0, i); + } + + auto addr = co_await seastar::net::dns::resolve_name(s); + std::vector> work; + + while (_go_on) { + try { + auto client = co_await _socket.accept(); + auto dst = co_await seastar::connect(socket_address(addr, port)); + + auto f = [&]() -> future<> { + auto& s = client.connection; + auto& ldst = dst; + + auto do_io = [this](connected_socket& src, connected_socket& dst) -> future<> { + auto sin = src.input(); + auto dout = dst.output(); + // note: have to have differing conditions for proxying + // and shutdown, and need to check inside look, because + // kmip connector caches connection -> not new socket. + while (_go_on && _do_proxy && !sin.eof()) { + auto buf = co_await sin.read(); + if (_do_proxy) { + co_await dout.write(std::move(buf)); + co_await dout.flush(); + } + } + co_await dout.close(); + }; + + co_await when_all(do_io(s, ldst), do_io(ldst, s)); + }(); + + work.emplace_back(std::move(f)); + } catch (...) { + } + } + + for (auto&& f : work) { + co_await std::move(f); + } + } +public: + fake_proxy(std::string dst) + : _socket(seastar::listen(socket_address(0x7f000001, 0))) + , _address(_socket.local_address()) + , _f(run(std::move(dst))) + {} + + const socket_address& address() const { + return _address; + } + void enable(bool b) { + _do_proxy = b; + } + future<> stop() { + if (std::exchange(_go_on, false)) { + _socket.abort_accept(); + co_await std::move(_f); + } + } +}; + +#ifdef HAVE_KMIP + +SEASTAR_TEST_CASE(test_kmip_provider_multiple_hosts, *check_run_test_decorator("ENABLE_KMIP_TEST", true)) { + /** + * Tests for #3251. KMIP connector ends up in endless loop if using more than one + * fallover host. This is only in initial connection (in real life only in initial connection verification). + * + * We don't have access to more than one KMIP server for testing (at a time). + * Pretend to have failover by using a local proxy. + */ + co_await kmip_test_helper([](const kmip_test_info& info, const tmpdir& tmp) -> future<> { + fake_proxy proxy(info.host); + + auto host2 = boost::lexical_cast(proxy.address()); + + auto yaml = fmt::format(R"foo( + kmip_hosts: + kmip_test: + hosts: {0}, {5} + certificate: {1} + keyfile: {2} + truststore: {3} + priority_string: {4} + )foo" + , info.host, info.cert, info.key, info.ca, info.prio, host2 + ); + + std::exception_ptr ex; + + try { + co_await test_provider("'key_provider': 'KmipKeyProviderFactory', 'kmip_host': 'kmip_test', 'cipher_algorithm':'AES/CBC/PKCS5Padding', 'secret_key_strength': 128", tmp, yaml); + } catch (...) { + ex = std::current_exception(); + } + + co_await proxy.stop(); + + if (ex) { + std::rethrow_exception(ex); + } + }); +} + +#endif // HAVE_KMIP + +/* +Simple test of KMS provider. Still has some caveats: + + 1.) Uses aws CLI credentials for auth. I.e. you need to have a valid + ~/.aws/credentials for the user running the test. + 2.) I can't figure out a good way to set up a key "everyone" can access. So user needs + to have read/encrypt access to the key alias (default "alias/kms_encryption_test") + in the scylla AWS account. + + A "better" solution might be to create dummmy user only for KMS testing with only access + to a single key, and no other priviledges. But that seems dangerous as well. + + For this reason, this test is parameterized with env vars: + * ENABLE_KMS_TEST - set to non-zero (1/true) to run + * KMS_KEY_ALIAS - default "alias/kms_encryption_test" - set to key alias you have access to. + * KMS_AWS_REGION - default us-east-1 - set to whatever region your key is in. + +*/ +static future<> kms_test_helper(std::function(const tmpdir&, std::string_view, std::string_view, std::string_view)> f) { + auto kms_key_alias = get_var_or_default("KMS_KEY_ALIAS", "alias/kms_encryption_test"); + auto kms_aws_region = get_var_or_default("KMS_AWS_REGION", "us-east-1"); + auto kms_aws_profile = get_var_or_default("KMS_AWS_PROFILE", "default"); + + tmpdir tmp; + + co_await f(tmp, kms_key_alias, kms_aws_region, kms_aws_profile); +} + +SEASTAR_TEST_CASE(test_kms_provider, *check_run_test_decorator("ENABLE_KMS_TEST")) { + co_await kms_test_helper([](const tmpdir& tmp, std::string_view kms_key_alias, std::string_view kms_aws_region, std::string_view kms_aws_profile) -> future<> { + /** + * Note: NOT including any auth stuff here. The provider will pick up AWS credentials + * from ~/.aws/credentials + */ + auto yaml = fmt::format(R"foo( + kms_hosts: + kms_test: + master_key: {0} + aws_region: {1} + aws_profile: {2} + )foo" + , kms_key_alias, kms_aws_region, kms_aws_profile + ); + + co_await test_provider("'key_provider': 'KmsKeyProviderFactory', 'kms_host': 'kms_test', 'cipher_algorithm':'AES/CBC/PKCS5Padding', 'secret_key_strength': 128", tmp, yaml); + }); +} + +SEASTAR_TEST_CASE(test_kms_provider_with_master_key_in_cf, *check_run_test_decorator("ENABLE_KMS_TEST")) { + co_await kms_test_helper([](const tmpdir& tmp, std::string_view kms_key_alias, std::string_view kms_aws_region, std::string_view kms_aws_profile) -> future<> { + /** + * Note: NOT including any auth stuff here. The provider will pick up AWS credentials + * from ~/.aws/credentials + */ + auto yaml = fmt::format(R"foo( + kms_hosts: + kms_test: + aws_region: {1} + aws_profile: {2} + )foo" + , kms_key_alias, kms_aws_region, kms_aws_profile + ); + + // should fail + BOOST_REQUIRE_THROW( + co_await test_provider("'key_provider': 'KmsKeyProviderFactory', 'kms_host': 'kms_test', 'cipher_algorithm':'AES/CBC/PKCS5Padding', 'secret_key_strength': 128", tmp, yaml) + , std::exception + ); + + // should be ok + co_await test_provider(fmt::format("'key_provider': 'KmsKeyProviderFactory', 'kms_host': 'kms_test', 'master_key': '{}', 'cipher_algorithm':'AES/CBC/PKCS5Padding', 'secret_key_strength': 128", kms_key_alias) + , tmp, yaml + ); + }); +} + + +SEASTAR_TEST_CASE(test_user_info_encryption) { + tmpdir tmp; + auto keyfile = tmp.path() / "secret_key"; + + auto yaml = fmt::format(R"foo( + user_info_encryption: + enabled: True + key_provider: LocalFileSystemKeyProviderFactory + secret_key_file: {} + cipher_algorithm: AES/CBC/PKCS5Padding + secret_key_strength: 128 + )foo" + , keyfile.string()); + + co_await test_provider({}, tmp, yaml, 4, 1, "LocalFileSystemKeyProviderFactory" /* verify encrypted even though no kp in options*/); +} + +SEASTAR_TEST_CASE(test_kms_provider_with_broken_algo, *check_run_test_decorator("ENABLE_KMS_TEST")) { + co_await kms_test_helper([](const tmpdir& tmp, std::string_view kms_key_alias, std::string_view kms_aws_region, std::string_view kms_aws_profile) -> future<> { + /** + * Note: NOT including any auth stuff here. The provider will pick up AWS credentials + * from ~/.aws/credentials + */ + auto yaml = fmt::format(R"foo( + kms_hosts: + kms_test: + master_key: {0} + aws_region: {1} + aws_profile: {2} + )foo" + , kms_key_alias, kms_aws_region, kms_aws_profile + ); + + try { + co_await test_provider("'key_provider': 'KmsKeyProviderFactory', 'kms_host': 'kms_test', 'cipher_algorithm':'', 'secret_key_strength': 128", tmp, yaml); + BOOST_FAIL("should not reach"); + } catch (exceptions::configuration_exception&) { + // ok + } + }); +} + +static auto make_commitlog_config(const test_provider_args& args, const std::unordered_map& scopts) { + auto ext = std::make_shared(); + auto cfg = seastar::make_shared(ext); + cfg->data_file_directories({args.tmp.path().string()}); + cfg->commitlog_sync("batch"); // just to make sure files are written + + // Currently the test fails with consistent_cluster_management = true. See #2995. + cfg->consistent_cluster_management(false); + + boost::program_options::options_description desc; + boost::program_options::options_description_easy_init init(&desc); + configurable::append_all(*cfg, init); + + std::ostringstream ss; + ss << "system_info_encryption:" << std::endl + << " enabled: true" << std::endl + << " cipher_algorithm: AES/CBC/PKCS5Padding" << std::endl + << " secret_key_strength: 128" << std::endl + ; + + for (auto& [k, v] : scopts) { + ss << " " << k << ": " << v << std::endl; + } + auto str = ss.str(); + cfg->read_from_yaml(str); + + if (!args.extra_yaml.empty()) { + cfg->read_from_yaml(args.extra_yaml); + } + + return std::make_tuple(cfg, ext); +} + +static future<> test_encrypted_commitlog(const test_provider_args& args, std::unordered_map scopts = {}) { + fs::path clback = args.tmp.path() / "commitlog_back"; + + std::string pk = "apa"; + std::string v = "ko"; + + + { + auto [cfg, ext] = make_commitlog_config(args, scopts); + + cql_test_config cqlcfg(cfg); + + if (args.timeout) { + cqlcfg.query_timeout = args.timeout; + } + + co_await do_with_cql_env_thread([&] (cql_test_env& env) { + do_create_and_insert(env, args, pk, v); + fs::copy(fs::path(cfg->commitlog_directory()), clback); + }, cqlcfg, {}, cql_test_init_configurables{ *ext }); + + } + + { + auto [cfg, ext] = make_commitlog_config(args, scopts); + + cql_test_config cqlcfg(cfg); + + if (args.timeout) { + cqlcfg.query_timeout = args.timeout; + } + + co_await do_with_cql_env_thread([&] (cql_test_env& env) { + // Fake commitlog replay using the files copied. + std::vector paths; + for (auto const& dir_entry : fs::directory_iterator{clback}) { + auto p = dir_entry.path(); + try { + db::commitlog::descriptor d(p); + paths.emplace_back(std::move(p)); + } catch (...) { + } + } + + BOOST_REQUIRE(!paths.empty()); + + auto rp = db::commitlog_replayer::create_replayer(env.db(), env.get_system_keyspace()).get(); + rp.recover(paths, db::commitlog::descriptor::FILENAME_PREFIX).get(); + + // not really checking anything, but make sure we did not break anything. + for (auto i = 0u; i < args.n_tables; ++i) { + require_rows(env, fmt::format("select * from ks.t{}", i), {{utf8_type->decompose(pk), utf8_type->decompose(v)}}); + } + }, cqlcfg, {}, cql_test_init_configurables{ *ext }); + } +} + +static future<> test_encrypted_commitlog(const tmpdir& tmp, std::unordered_map scopts = {}, const std::string& extra_yaml = {}, unsigned n_tables = 1) { + test_provider_args args{ + .tmp = tmp, + .extra_yaml = extra_yaml, + .n_tables = n_tables, + }; + + co_await test_encrypted_commitlog(args, std::move(scopts)); +} + +SEASTAR_TEST_CASE(test_commitlog_kms_encryption_with_slow_key_resolve, *check_run_test_decorator("ENABLE_KMS_TEST")) { + co_await kms_test_helper([](const tmpdir& tmp, std::string_view kms_key_alias, std::string_view kms_aws_region, std::string_view kms_aws_profile) -> future<> { + /** + * Note: NOT including any auth stuff here. The provider will pick up AWS credentials + * from ~/.aws/credentials + */ + auto yaml = fmt::format(R"foo( + kms_hosts: + kms_test: + master_key: {0} + aws_region: {1} + aws_profile: {2} + )foo" + , kms_key_alias, kms_aws_region, kms_aws_profile + ); + + co_await test_encrypted_commitlog(tmp, { { "key_provider", "KmsKeyProviderFactory" }, { "kms_host", "kms_test" } }, yaml); + }); +} + +#ifdef HAVE_KMIP + +SEASTAR_TEST_CASE(test_commitlog_kmip_encryption_with_slow_key_resolve, *check_run_test_decorator("ENABLE_KMIP_TEST")) { + co_await kmip_test_helper([](const kmip_test_info& info, const tmpdir& tmp) -> future<> { + auto yaml = fmt::format(R"foo( + kmip_hosts: + kmip_test: + hosts: {0} + certificate: {1} + keyfile: {2} + truststore: {3} + priority_string: {4} + )foo" + , info.host, info.cert, info.key, info.ca, info.prio + ); + co_await test_encrypted_commitlog(tmp, { { "key_provider", "KmipKeyProviderFactory" }, { "kmip_host", "kmip_test" } }, yaml); + }); +} + +#endif // HAVE_KMIP + +SEASTAR_TEST_CASE(test_user_info_encryption_dont_allow_per_table_encryption) { + tmpdir tmp; + auto keyfile = tmp.path() / "secret_key"; + + auto yaml = fmt::format(R"foo( + allow_per_table_encryption: false + user_info_encryption: + enabled: True + key_provider: LocalFileSystemKeyProviderFactory + secret_key_file: {} + cipher_algorithm: AES/CBC/PKCS5Padding + secret_key_strength: 128 + )foo" + , keyfile.string()); + + try { + co_await test_provider( + fmt::format("'key_provider': 'LocalFileSystemKeyProviderFactory', 'secret_key_file': '{}', 'cipher_algorithm':'AES/CBC/PKCS5Padding', 'secret_key_strength': 128", keyfile.string()) + , tmp, yaml, 4, 1 + ); + BOOST_FAIL("Should not reach"); + } catch (std::invalid_argument&) { + // Ok. + } +} + +/* + Simple test of GCP cloudkms provider. Uses scylladb GCP project "scylla-kms-test" and keys therein. + + Note: the above text blobs are service account credentials, including private keys. + _Never_ give any real priviledges to these accounts, as we are obviously exposing them here. + + User1 is assumed to have permissions to encrypt/decrypt using the given key + User2 is assumed to _not_ have permissions to encrypt/decrypt using the given key, but permission to + impersonate User1. + + This test is parameterized with env vars: + * ENABLE_GCP_TEST - set to non-zero (1/true) to run + * GCP_USER_1_CREDENTIALS - set to credentials file for user1 + * GCP_USER_2_CREDENTIALS - set to credentials file for user2 + * GCP_KEY_NAME - set to / to override. + * GCP_PROJECT_ID - set to test project + * GCP_LOCATION - set to test location +*/ + +struct gcp_test_env { + std::string key_name; + std::string location; + std::string project_id; + std::string user_1_creds; + std::string user_2_creds; +}; + +static future<> gcp_test_helper(std::function(const tmpdir&, const gcp_test_env&)> f) { + gcp_test_env env { + .key_name = get_var_or_default("GCP_KEY_NAME", "test_ring/test_key"), + .location = get_var_or_default("GCP_LOCATION", "global"), + .project_id = get_var_or_default("GCP_PROJECT_ID", "scylla-kms-test"), + .user_1_creds = get_var_or_default("GCP_USER_1_CREDENTIALS", ""), + .user_2_creds = get_var_or_default("GCP_USER_2_CREDENTIALS", ""), + }; + + tmpdir tmp; + + if (env.user_1_creds.empty()) { + BOOST_ERROR("No 'GCP_USER_1_CREDENTIALS' provided"); + } + if (env.user_2_creds.empty()) { + BOOST_ERROR("No 'GCP_USER_2_CREDENTIALS' provided"); + } + + co_await f(tmp, env); +} + +SEASTAR_TEST_CASE(test_gcp_provider, *check_run_test_decorator("ENABLE_GCP_TEST")) { + co_await gcp_test_helper([](const tmpdir& tmp, const gcp_test_env& gcp) -> future<> { + auto yaml = fmt::format(R"foo( + gcp_hosts: + gcp_test: + master_key: {0} + gcp_project_id: {1} + gcp_location: {2} + gcp_credentials_file: {3} + )foo" + , gcp.key_name, gcp.project_id, gcp.location, gcp.user_1_creds + ); + + co_await test_provider("'key_provider': 'GcpKeyProviderFactory', 'gcp_host': 'gcp_test', 'cipher_algorithm':'AES/CBC/PKCS5Padding', 'secret_key_strength': 128", tmp, yaml); + }); +} + +SEASTAR_TEST_CASE(test_gcp_provider_with_master_key_in_cf, *check_run_test_decorator("ENABLE_GCP_TEST")) { + co_await gcp_test_helper([](const tmpdir& tmp, const gcp_test_env& gcp) -> future<> { + auto yaml = fmt::format(R"foo( + gcp_hosts: + gcp_test: + gcp_project_id: {1} + gcp_location: {2} + gcp_credentials_file: {3} + )foo" + , gcp.key_name, gcp.project_id, gcp.location, gcp.user_1_creds + ); + + // should fail + BOOST_REQUIRE_THROW( + co_await test_provider("'key_provider': 'GcpKeyProviderFactory', 'gcp_host': 'gcp_test', 'cipher_algorithm':'AES/CBC/PKCS5Padding', 'secret_key_strength': 128", tmp, yaml) + , std::exception + ); + + // should be ok + co_await test_provider(fmt::format("'key_provider': 'GcpKeyProviderFactory', 'gcp_host': 'gcp_test', 'master_key': '{}', 'cipher_algorithm':'AES/CBC/PKCS5Padding', 'secret_key_strength': 128", gcp.key_name) + , tmp, yaml + ); + }); +} + +/** + * Verify that trying to access key materials with a user w/o permissions to encrypt/decrypt using cloudkms + * fails. +*/ +SEASTAR_TEST_CASE(test_gcp_provider_with_invalid_user, *check_run_test_decorator("ENABLE_GCP_TEST")) { + co_await gcp_test_helper([](const tmpdir& tmp, const gcp_test_env& gcp) -> future<> { + auto yaml = fmt::format(R"foo( + gcp_hosts: + gcp_test: + master_key: {0} + gcp_project_id: {1} + gcp_location: {2} + gcp_credentials_file: {3} + )foo" + , gcp.key_name, gcp.project_id, gcp.location, gcp.user_2_creds + ); + + // should fail + BOOST_REQUIRE_THROW( + co_await test_provider("'key_provider': 'GcpKeyProviderFactory', 'gcp_host': 'gcp_test', 'cipher_algorithm':'AES/CBC/PKCS5Padding', 'secret_key_strength': 128", tmp, yaml) + , std::exception + ); + }); +} + +/** + * Verify that impersonation of an allowed service account works. User1 can encrypt, but we run + * as User2. However, impersonating user1 will allow us do it ourselves. +*/ +SEASTAR_TEST_CASE(test_gcp_provider_with_impersonated_user, *check_run_test_decorator("ENABLE_GCP_TEST")) { + co_await gcp_test_helper([](const tmpdir& tmp, const gcp_test_env& gcp) -> future<> { + auto buf = co_await read_text_file_fully(sstring(gcp.user_1_creds)); + auto json = rjson::parse(std::string_view(buf.begin(), buf.end())); + auto user1 = rjson::get(json, "client_email"); + + auto yaml = fmt::format(R"foo( + gcp_hosts: + gcp_test: + master_key: {0} + gcp_project_id: {1} + gcp_location: {2} + gcp_credentials_file: {3} + gcp_impersonate_service_account: {4} + )foo" + , gcp.key_name, gcp.project_id, gcp.location, gcp.user_2_creds, user1 + ); + + co_await test_provider("'key_provider': 'GcpKeyProviderFactory', 'gcp_host': 'gcp_test', 'cipher_algorithm':'AES/CBC/PKCS5Padding', 'secret_key_strength': 128", tmp, yaml); + }); +} + +std::string make_aws_host(std::string_view aws_region, std::string_view service); + +using scopts_map = std::unordered_map; + +static future<> test_broken_encrypted_commitlog(const test_provider_args& args, scopts_map scopts = {}) { + std::string pk = "apa"; + std::string v = "ko"; + + { + auto [cfg, ext] = make_commitlog_config(args, scopts); + + cql_test_config cqlcfg(cfg); + + if (args.timeout) { + cqlcfg.query_timeout = args.timeout; + } + + co_await do_with_cql_env_thread([&] (cql_test_env& env) { + do_create_and_insert(env, args, pk, v); + }, cqlcfg, {}, cql_test_init_configurables{ *ext }); + } +} + +/** + * Tests that a network error in key resolution (in commitlog in this case) results in a non-fatal, non-isolating + * exception, i.e. an eventual write error. + */ +static future<> network_error_test_helper(const tmpdir& tmp, const std::string& host, std::function(const fake_proxy&)> make_opts) { + fake_proxy proxy(host); + + auto [scopts, yaml] = make_opts(proxy); + + test_provider_args args{ + .tmp = tmp, + .extra_yaml = yaml, + .n_tables = 10, + .before_create_table = [&](auto& env) { + // turn off proxy. all key resolution after this should fail + proxy.enable(false); + // wait for key cache expiry. + seastar::sleep(10ms).get(); + // ensure commitlog will create a new segment on write -> eventual write failure + env.db().invoke_on_all([](replica::database& db) { + return db.commitlog()->force_new_active_segment(); + }).get(); + }, + .on_insert_exception = [&](auto&&) { + // once we get the exception we have to enable key resolution again, + // otherwise we can't shut down cql test env. + proxy.enable(true); + }, + .timeout = timeout_config{ + // set really low write timeouts so we get a failure (timeout) + // when we fail to write to commitlog + 100ms, 100ms, 100ms, 100ms, 100ms, 100ms, 100ms + }, + }; + + BOOST_REQUIRE_THROW( + co_await test_broken_encrypted_commitlog(args, scopts); + , std::exception + ); + + co_await proxy.stop(); +} + +SEASTAR_TEST_CASE(test_kms_network_error, *check_run_test_decorator("ENABLE_KMS_TEST")) { + co_await kms_test_helper([](const tmpdir& tmp, std::string_view kms_key_alias, std::string_view kms_aws_region, std::string_view kms_aws_profile) -> future<> { + auto host = make_aws_host(kms_aws_region, "kms"); + + co_await network_error_test_helper(tmp, host, [&](const auto& proxy) { + auto yaml = fmt::format(R"foo( + kms_hosts: + kms_test: + master_key: {0} + aws_region: {1} + aws_profile: {2} + endpoint: https://{3} + key_cache_expiry: 1ms + )foo" + , kms_key_alias, kms_aws_region, kms_aws_profile, proxy.address() + ); + return std::make_tuple(scopts_map({ { "key_provider", "KmsKeyProviderFactory" }, { "kms_host", "kms_test" } }), yaml); + }); + }); +} + +#ifdef HAVE_KMIP + +SEASTAR_TEST_CASE(test_kmip_network_error, *check_run_test_decorator("ENABLE_KMIP_TEST")) { + co_await kmip_test_helper([](const kmip_test_info& info, const tmpdir& tmp) -> future<> { + co_await network_error_test_helper(tmp, info.host, [&](const auto& proxy) { + auto yaml = fmt::format(R"foo( + kmip_hosts: + kmip_test: + hosts: {0} + certificate: {1} + keyfile: {2} + truststore: {3} + priority_string: {4} + key_cache_expiry: 1ms + )foo" + , proxy.address(), info.cert, info.key, info.ca, info.prio + ); + return std::make_tuple(scopts_map({ { "key_provider", "KmipKeyProviderFactory" }, { "kmip_host", "kmip_test" } }), yaml); + }); + }); +} + +#endif // HAVE_KMIP + +// Note: cannot do the above test for gcp, because we can't use false endpoints there. Could mess with address resolution, +// but there is no infrastructure for that atm. diff --git a/test/boost/kmip_wrapper.py b/test/boost/kmip_wrapper.py new file mode 100644 index 0000000000..b6deee255e --- /dev/null +++ b/test/boost/kmip_wrapper.py @@ -0,0 +1,95 @@ +import ssl +import sys + +from kmip.services import auth +from kmip.services.server.server import build_argument_parser +from kmip.services.server.server import KmipServer + +# Helper wrapper for running pykmip in scylla testing. Needed because TLS options +# (hardcoded) in pykmip are obsolete and will not work with connecting using gnutls +# of any modern variety. + +class TLS13AuthenticationSuite(auth.TLS12AuthenticationSuite): + """ + An authentication suite used to establish secure network connections. + + Supports TLS 1.3. More importantly, works with gnutls- + """ + def __init__(self, cipher_suites=None): + """ + Create a TLS12AuthenticationSuite object. + + Args: + cipher_suites (list): A list of strings representing the names of + cipher suites to use. Overrides the default set of cipher + suites. Optional, defaults to None. + """ + super().__init__(cipher_suites) + self._protocol = ssl.PROTOCOL_TLS_SERVER + +def main(): + # Build argument parser and parser command-line arguments. + parser = build_argument_parser() + opts, args = parser.parse_args(sys.argv[1:]) + + kwargs = {} + if opts.hostname: + kwargs['hostname'] = opts.hostname + if opts.port: + kwargs['port'] = opts.port + if opts.certificate_path: + kwargs['certificate_path'] = opts.certificate_path + if opts.key_path: + kwargs['key_path'] = opts.key_path + if opts.ca_path: + kwargs['ca_path'] = opts.ca_path + if opts.auth_suite: + kwargs['auth_suite'] = opts.auth_suite + if opts.config_path: + kwargs['config_path'] = opts.config_path + if opts.log_path: + kwargs['log_path'] = opts.log_path + if opts.policy_path: + kwargs['policy_path'] = opts.policy_path + if opts.ignore_tls_client_auth: + kwargs['enable_tls_client_auth'] = False + if opts.logging_level: + kwargs['logging_level'] = opts.logging_level + if opts.database_path: + kwargs['database_path'] = opts.database_path + + kwargs['live_policies'] = True + + # Create and start the server. + s = KmipServer(**kwargs) + # Fix TLS. Try to get this into mainline project, but that will take time... + s.auth_suite = TLS13AuthenticationSuite(s.auth_suite.ciphers) + # force port to zero -> select dynamically + s.config.settings['port'] = 0 + + def fake_wrap_ssl(sock, keyfile=None, certfile=None, + server_side=False, cert_reqs=ssl.CERT_NONE, + ssl_version=ssl.PROTOCOL_TLS, ca_certs=None, + do_handshake_on_connect=True, + suppress_ragged_eofs=True, + ciphers=None): + ctxt = ssl.SSLContext(protocol = ssl_version) + ctxt.load_cert_chain(certfile=certfile, keyfile=keyfile) + ctxt.verify_mode = cert_reqs + ctxt.load_verify_locations(cafile=ca_certs) + ctxt.set_ciphers(ciphers) + return ctxt.wrap_socket(sock, server_side=server_side + , do_handshake_on_connect=do_handshake_on_connect + , suppress_ragged_eofs=suppress_ragged_eofs) + + ssl.wrap_socket = fake_wrap_ssl + + print("Starting...") + + with s: + print("Listening on {}".format(s._socket.getsockname()[1])) + sys.stdout.flush() + s.serve() + +if __name__ == '__main__': + main() diff --git a/test/boost/symmetric_key_test.cc b/test/boost/symmetric_key_test.cc new file mode 100644 index 0000000000..891f9b00e7 --- /dev/null +++ b/test/boost/symmetric_key_test.cc @@ -0,0 +1,221 @@ +/* + * Copyright (C) 2016 ScyllaDB + */ + + + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +#include "ent/encryption/encryption.hh" +#include "ent/encryption/symmetric_key.hh" + +using namespace encryption; + +static temporary_buffer generate_random(size_t n, size_t align) { + std::random_device r; + std::default_random_engine e1(r()); + std::uniform_int_distribution dist('0', 'z'); + + auto tmp = temporary_buffer::aligned(align, align_up(n, align)); + std::generate(tmp.get_write(), tmp.get_write() + tmp.size(), std::bind(dist, std::ref(e1))); + return tmp; +} + +static void test_random_data(const sstring& desc, unsigned int bits) { + auto buf = generate_random(128, 8); + auto n = buf.size(); + + // first, verify padded. + { + key_info info{desc, bits}; + auto k = ::make_shared(info); + + bytes b(bytes::initialized_later(), k->iv_len()); + k->generate_iv(b.data(), k->iv_len()); + + temporary_buffer tmp(n + k->block_size()); + k->encrypt(buf.get(), buf.size(), tmp.get_write(), tmp.size(), b.data()); + + auto bytes = k->key(); + auto k2 = ::make_shared(info, bytes); + + temporary_buffer tmp2(n + k->block_size()); + k2->decrypt(tmp.get(), tmp.size(), tmp2.get_write(), tmp2.size(), b.data()); + + BOOST_REQUIRE_EQUAL_COLLECTIONS(tmp2.get(), tmp2.get() + n, buf.get(), buf.get() + n); + } + + // unpadded + { + auto desc2 = desc; + desc2.resize(desc.find_last_of('/')); + key_info info{desc2, bits}; + auto k = ::make_shared(info); + + bytes b(bytes::initialized_later(), k->iv_len()); + k->generate_iv(b.data(), k->iv_len()); + + temporary_buffer tmp(n); + k->encrypt_unpadded(buf.get(), buf.size(), tmp.get_write(), b.data()); + + auto bytes = k->key(); + auto k2 = ::make_shared(info, bytes); + + temporary_buffer tmp2(buf.size()); + k2->decrypt_unpadded(tmp.get(), tmp.size(), tmp2.get_write(), b.data()); + + BOOST_REQUIRE_EQUAL_COLLECTIONS(tmp2.get(), tmp2.get() + n, buf.get(), buf.get() + n); + } +} + + +SEASTAR_TEST_CASE(test_cipher_types) { + static const std::unordered_map> ciphers = { + { "AES/CBC/PKCS5Padding", { 128, 192, 256 } }, + { "AES/ECB/PKCS5Padding", { 128, 192, 256 } }, + { "DES/CBC/PKCS5Padding", { 56 } }, + { "DESede/CBC/PKCS5Padding", { 112, 168 } }, + { "Blowfish/CBC/PKCS5Padding", { 32, 64, 448 } }, + { "RC2/CBC/PKCS5Padding", { 40, 41, 64, 67, 120, 128 } }, + }; + + for (auto & p : ciphers) { + for (auto s : p.second) { + test_random_data(p.first, s); + } + } + return make_ready_future<>(); +} + +// OpenSSL only supports one form of padding. We used to just allow +// non-empty string -> pkcs5/pcks7. We now instead verify this to be +// within the "sane" limits, i.e. pkcs, pkcs5 or pkcs7. +// Check an non-exhaustive set of invalid padding options and verify +// we get an exception as expected. +// See below for test for valid strings. +SEASTAR_TEST_CASE(test_invalid_padding_options) { + static const std::unordered_map ciphers = { + { "AES/CBC/PKCSU", 128 }, + { "AES/ECB/Gris", 128 }, + { "DES/CBC/PKCS12Padding", 56 }, + { "DES/CBC/KorvPadding", 56 }, + { "DES/CBC/MUPadding", 56 }, + }; + for (auto& p : ciphers) { + try { + key_info info{p.first, p.second}; + symmetric_key k(info); + BOOST_ERROR("should not reach"); + } catch (...) { + // ok. + } + } + return make_ready_future<>(); +} + +SEASTAR_TEST_CASE(test_valid_padding_options) { + static const std::unordered_map ciphers = { + { "AES/CBC/PKCS", 128 }, + { "AES/CBC/PKCSPadding", 128 }, + { "AES/ECB/PKCS7Padding", 128 }, + { "AES/ECB/PKCS7", 128 }, + { "DES/CBC/PKCS5Padding", 56 }, + { "DES/CBC/PKCS5", 56 }, + { "AES/CBC/NoPadding", 128 }, + { "AES/ECB/NoPadding", 128 }, + { "DES/CBC/NoPadding", 56 }, + { "AES/CBC/No", 128 }, + { "AES/ECB/No", 128 }, + { "DES/CBC/No", 56 }, + }; + for (auto& p : ciphers) { + key_info info{p.first, p.second}; + symmetric_key k(info); + + auto errors = k.validate_exact_info_result(); + BOOST_REQUIRE_EQUAL(errors, std::string{}); + } + return make_ready_future<>(); +} + +SEASTAR_TEST_CASE(test_warn_adjusted_options) { + static const std::unordered_map> ciphers = { + // blowfish only supports CBC and will become CBC whatever you say + { "Blowfish/CFB/PKCS5Padding", { 32, 64, 448 } }, + { "Blowfish/XTS/PKCS5Padding", { 32, 64, 448 } }, + }; + for (auto& p : ciphers) { + for (auto s : p.second) { + auto alg = p.first; + key_info info{alg, s}; + symmetric_key k(info); + + auto errors = k.validate_exact_info_result(); + BOOST_REQUIRE_NE(errors, std::string{}); + } + } + return make_ready_future<>(); +} + +/** + * Verifies that when using defaults in a key, the key info returned is still + * equal to the input one (by bit and textually) + */ +SEASTAR_TEST_CASE(test_cipher_defaults) { + static const std::unordered_map> ciphers = { + { "AES/CBC/PKCS5Padding", { 128, 192, 256 } }, + { "AES/ECB/PKCS5Padding", { 128, 192, 256 } }, + { "DES/CBC/PKCS5Padding", { 56 } }, + { "DESede/CBC/PKCS5Padding", { 112, 168 } }, + { "Blowfish/CBC/PKCS5Padding", { 32, 64, 448 } }, + { "RC2/CBC/PKCS5Padding", { 40, 41, 64, 67, 120, 128 } }, + }; + + for (auto& p : ciphers) { + for (auto s : p.second) { + auto alg = p.first; + for (;;) { + key_info info{alg, s}; + symmetric_key k(info); + + BOOST_REQUIRE_EQUAL(info, k.info()); + BOOST_REQUIRE_EQUAL(boost::lexical_cast(info), boost::lexical_cast(k.info())); + + auto i = alg.find_last_of('/'); + if (i != sstring::npos) { + alg.resize(i); + continue; + } + // also verify that whatever we say (or don't say), we get a blockmode + // -> iv len > 0 + BOOST_CHECK_GT(k.iv_len(), 0); + + auto errors = k.validate_exact_info_result(); + if (i != sstring::npos) { + BOOST_REQUIRE_EQUAL(errors, std::string{}); + } else { + // Again, if we cut out block mode (i.e. only cipher name left) + // we will still force a block mode. Thus this should warn. + BOOST_REQUIRE_NE(errors, std::string{}); + } + + break; + } + } + } + return make_ready_future<>(); +} diff --git a/test/resource/certs/cacert.pem b/test/resource/certs/cacert.pem new file mode 100644 index 0000000000..9e6f920887 --- /dev/null +++ b/test/resource/certs/cacert.pem @@ -0,0 +1,82 @@ +Certificate: + Data: + Version: 3 (0x2) + Serial Number: 1572441855 (0x5db98eff) + Signature Algorithm: sha256WithRSAEncryption + Issuer: C=US, O=HyTrust Inc., CN=HyTrust KeyControl Certificate Authority + Validity + Not Before: Jun 1 00:00:00 2011 GMT + Not After : Dec 31 23:59:59 2049 GMT + Subject: C=US, O=HyTrust Inc., CN=HyTrust KeyControl Certificate Authority + Subject Public Key Info: + Public Key Algorithm: rsaEncryption + Public-Key: (2048 bit) + Modulus: + 00:ce:97:5b:a1:30:b1:26:00:31:8a:aa:bd:a2:0c: + 19:1b:24:83:05:20:7a:63:ac:6c:ab:0f:80:24:47: + 2b:03:94:86:25:a4:10:51:fb:b6:e8:5e:33:db:e2: + 91:98:f3:2a:b1:78:ef:83:a5:f2:e1:79:36:44:06: + 3b:01:cd:1a:47:c1:74:89:2b:d3:3c:8d:f1:fb:c2: + 90:88:fe:18:d9:81:7b:2a:e4:67:61:87:17:23:38: + 29:5a:66:eb:d0:01:a6:da:c3:a5:7f:f5:d1:9e:f5: + 04:dc:1c:4a:62:2e:e2:5b:5f:22:56:61:fe:ba:66: + c2:ad:a9:51:43:9c:28:e4:8f:fa:05:12:fa:0d:a5: + 35:e3:2f:99:e8:a4:98:09:f9:e7:c8:e0:6c:a9:bd: + e9:59:b0:83:07:09:10:10:5b:aa:b5:72:3b:40:e6: + 38:f4:e3:f8:9a:55:8e:5e:ae:5c:3e:c3:08:34:13: + 9c:19:fc:65:07:ac:3f:98:ae:a0:d2:f8:1d:4c:bf: + cb:93:a7:e4:d6:37:84:0a:0c:3a:1f:86:f2:35:0c: + 2e:66:b0:9b:43:8e:bc:e4:b9:b0:bf:33:67:c2:97: + df:47:6c:65:cc:55:38:70:a9:39:27:60:e1:74:14: + 34:e9:a6:a0:b6:11:de:61:94:9a:6b:83:f1:84:8d: + 27:9f + Exponent: 65537 (0x10001) + X509v3 extensions: + X509v3 Subject Key Identifier: + 66:AD:DE:45:50:5D:54:68:1F:B0:56:00:65:FB:D1:F2:97:57:EF:6E + X509v3 Authority Key Identifier: + keyid:66:AD:DE:45:50:5D:54:68:1F:B0:56:00:65:FB:D1:F2:97:57:EF:6E + DirName:/C=US/O=HyTrust Inc./CN=HyTrust KeyControl Certificate Authority + serial:5D:B9:8E:FF + + X509v3 Basic Constraints: + CA:TRUE + Signature Algorithm: sha256WithRSAEncryption + ab:a9:19:07:07:f0:b9:74:e1:a8:49:db:bd:c1:21:fc:38:38: + 79:dd:2f:3e:59:be:96:79:1d:18:d4:5e:1f:31:47:fb:bd:d4: + 96:d7:be:87:7e:0d:e4:9e:7c:7a:36:c9:9a:5f:e5:63:38:33: + 68:cf:b1:92:d0:b8:81:1a:6f:23:27:d8:71:50:41:63:ce:5f: + 20:69:72:4c:cd:07:ab:35:58:fe:da:d5:26:1e:44:f4:97:e3: + ff:6c:80:db:31:17:13:52:6c:fb:68:34:71:11:af:b6:84:3b: + b1:5c:d3:67:25:e1:5a:31:a6:68:83:ec:c4:3e:e8:f6:08:60: + d0:2a:26:9e:fe:07:08:57:6e:9a:dd:6e:ba:a2:10:ab:2e:fd: + cd:52:a3:2f:e0:59:6d:33:39:05:ed:fd:ed:ac:b0:e7:98:5e: + f2:51:00:12:df:4c:8a:0c:e2:11:df:43:65:d0:f3:a1:85:59: + 6d:d4:bb:a0:97:f7:c7:40:63:b3:24:cf:ec:5e:9e:42:1b:cc: + e2:36:43:d1:83:79:11:48:3b:3d:db:c3:2a:03:4f:cd:53:2d: + 07:8d:0e:28:4a:a9:58:e0:27:c3:47:f6:ab:00:cd:fc:31:ed: + 99:b9:57:2e:2d:5a:79:5f:48:14:39:8b:0e:da:1c:a0:d6:4e: + d4:81:83:49 +-----BEGIN CERTIFICATE----- +MIID4jCCAsqgAwIBAgIEXbmO/zANBgkqhkiG9w0BAQsFADBXMQswCQYDVQQGEwJV +UzEVMBMGA1UEChMMSHlUcnVzdCBJbmMuMTEwLwYDVQQDEyhIeVRydXN0IEtleUNv +bnRyb2wgQ2VydGlmaWNhdGUgQXV0aG9yaXR5MB4XDTExMDYwMTAwMDAwMFoXDTQ5 +MTIzMTIzNTk1OVowVzELMAkGA1UEBhMCVVMxFTATBgNVBAoTDEh5VHJ1c3QgSW5j +LjExMC8GA1UEAxMoSHlUcnVzdCBLZXlDb250cm9sIENlcnRpZmljYXRlIEF1dGhv +cml0eTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAM6XW6EwsSYAMYqq +vaIMGRskgwUgemOsbKsPgCRHKwOUhiWkEFH7tuheM9vikZjzKrF474Ol8uF5NkQG +OwHNGkfBdIkr0zyN8fvCkIj+GNmBeyrkZ2GHFyM4KVpm69ABptrDpX/10Z71BNwc +SmIu4ltfIlZh/rpmwq2pUUOcKOSP+gUS+g2lNeMvmeikmAn558jgbKm96VmwgwcJ +EBBbqrVyO0DmOPTj+JpVjl6uXD7DCDQTnBn8ZQesP5iuoNL4HUy/y5On5NY3hAoM +Oh+G8jUMLmawm0OOvOS5sL8zZ8KX30dsZcxVOHCpOSdg4XQUNOmmoLYR3mGUmmuD +8YSNJ58CAwEAAaOBtTCBsjAdBgNVHQ4EFgQUZq3eRVBdVGgfsFYAZfvR8pdX724w +gYIGA1UdIwR7MHmAFGat3kVQXVRoH7BWAGX70fKXV+9uoVukWTBXMQswCQYDVQQG +EwJVUzEVMBMGA1UEChMMSHlUcnVzdCBJbmMuMTEwLwYDVQQDEyhIeVRydXN0IEtl +eUNvbnRyb2wgQ2VydGlmaWNhdGUgQXV0aG9yaXR5ggRduY7/MAwGA1UdEwQFMAMB +Af8wDQYJKoZIhvcNAQELBQADggEBAKupGQcH8Ll04ahJ273BIfw4OHndLz5ZvpZ5 +HRjUXh8xR/u91JbXvod+DeSefHo2yZpf5WM4M2jPsZLQuIEabyMn2HFQQWPOXyBp +ckzNB6s1WP7a1SYeRPSX4/9sgNsxFxNSbPtoNHERr7aEO7Fc02cl4VoxpmiD7MQ+ +6PYIYNAqJp7+BwhXbprdbrqiEKsu/c1Soy/gWW0zOQXt/e2ssOeYXvJRABLfTIoM +4hHfQ2XQ86GFWW3Uu6CX98dAY7Mkz+xenkIbzOI2Q9GDeRFIOz3bwyoDT81TLQeN +DihKqVjgJ8NH9qsAzfwx7Zm5Vy4tWnlfSBQ5iw7aHKDWTtSBg0k= +-----END CERTIFICATE----- diff --git a/test/resource/certs/scylla.pem b/test/resource/certs/scylla.pem new file mode 100644 index 0000000000..df1825ddcb --- /dev/null +++ b/test/resource/certs/scylla.pem @@ -0,0 +1,57 @@ +Bag Attributes + localKeyID: 12 1A 34 A0 C8 58 91 A4 E3 B6 7F 16 F0 31 05 81 AA 27 82 05 +subject=/C=US/O=HyTrust Inc./CN=scylla +issuer=/C=US/O=HyTrust Inc./CN=HyTrust KeyControl Certificate Authority +-----BEGIN CERTIFICATE----- +MIIDlTCCAn2gAwIBAgIFAPm5jwcwDQYJKoZIhvcNAQELBQAwVzELMAkGA1UEBhMC +VVMxFTATBgNVBAoTDEh5VHJ1c3QgSW5jLjExMC8GA1UEAxMoSHlUcnVzdCBLZXlD +b250cm9sIENlcnRpZmljYXRlIEF1dGhvcml0eTAgFw0yMTAxMDQxMjMwMTlaGA8y +MDk5MDEwMTEyMzAxOVowNTELMAkGA1UEBhMCVVMxFTATBgNVBAoTDEh5VHJ1c3Qg +SW5jLjEPMA0GA1UEAxMGc2N5bGxhMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIB +CgKCAQEAosGginu/B4eQKyB26ar6rqg8FTMoCTf56iBwQnrOt17YN++IitlEhFyB +X0QdA0z97jLIBckTCbieTg+CdtWDeCnM8IAjoN55C0Z2zBKH6cuPTTnDu0WaZY/8 +IafGwxcWllYpgQ3AiJFNK66QRIiiX+ejrS3+Co0PYWPzmSczWoxBgFhnXnTPE4ki +MLvZ+zY1iXt83NbQIw8yMUcL+RYK4RlACf3bPztOss98LmyntIkNkZL8GblLoZbc +AZc6udnDe1GuP+NlMO+1jPmyND/xz/kK2hkU4+yotBWVxM1lwpANnElsAHaRvthP +kGsjoVvZgEgg6MQX+iaDngjmiLtUlwIDAQABo4GHMIGEMAkGA1UdEwQCMAAwLAYJ +YIZIAYb4QgENBB8WHU9wZW5TU0wgR2VuZXJhdGVkIENlcnRpZmljYXRlMB0GA1Ud +DgQWBBTAgszBvUAggtuLWf2XucTzjLQeqDAfBgNVHSMEGDAWgBRmrd5FUF1UaB+w +VgBl+9Hyl1fvbjAJBgNVHREEAjAAMA0GCSqGSIb3DQEBCwUAA4IBAQBJA9q7q5XL +H7O/EiJYagWaqrO+AGVrg3DtkP0NTaumM/zYoOq9klZNwvsGZ88+L0hBQ8fj8O/l +rTobdM7eT5p4JiwfN+MB8zXuZ+XjL+kIpqFqPJdVDtBLPGINHP7itrUo9Uk/9XcW +JqPAwufEMN5X7iwN80aUVj6/iUiQ1yqXKQmERdoIPqiyHOEBwobwIAQ1bQKtEiBT +ZE0hdDdI+ZCrtJYOES4kpR4WI3997doVhEusNGBETCWMm2HoJ8xsk7fgVgnpYlat +95tGZ/ZR1Zaa+fXSm9adxJDCviG77pTZBa7nbzPoyG3wm76cTGSeHso8rvGVHsTh +AraNJqMQkZT5 +-----END CERTIFICATE----- +Bag Attributes + localKeyID: 12 1A 34 A0 C8 58 91 A4 E3 B6 7F 16 F0 31 05 81 AA 27 82 05 +Key Attributes: +-----BEGIN PRIVATE KEY----- +MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQCiwaCKe78Hh5Ar +IHbpqvquqDwVMygJN/nqIHBCes63Xtg374iK2USEXIFfRB0DTP3uMsgFyRMJuJ5O +D4J21YN4KczwgCOg3nkLRnbMEofpy49NOcO7RZplj/whp8bDFxaWVimBDcCIkU0r +rpBEiKJf56OtLf4KjQ9hY/OZJzNajEGAWGdedM8TiSIwu9n7NjWJe3zc1tAjDzIx +Rwv5FgrhGUAJ/ds/O06yz3wubKe0iQ2RkvwZuUuhltwBlzq52cN7Ua4/42Uw77WM ++bI0P/HP+QraGRTj7Ki0FZXEzWXCkA2cSWwAdpG+2E+QayOhW9mASCDoxBf6JoOe +COaIu1SXAgMBAAECggEAefvDdmd+6obJH/mqBkIWtpbyyTTZOeeRUM/VM45VpovY +ZDwMS3zB5K5sbFlhoVqwKzo2PlfRBAUx6PGo56XpbkNuDYcMrQJxGGlMmnD9GtZt +ZgT4VUC6kopS/2p/BzMjw7N6UfZbqj+05htkl2kMwfKb8y04bPICaA0Aw3XlAGRS +WIrfsXW32/hStXJvt8YSveIUP/WbdyEefwcKsu44LFvaq+j85KH75KLyZQre6n/A +7A67WIs1AcjiBHq/nupNF9rZUFm5YzL/kOC+g9Klq6bIIlSu+2ULIjL0WLy5be8R +OdIipE+ENa5HpC5J8z9WfGCf8yTB8/V+pt2dxZ9T6QKBgQDV2R1/9IgphiNCjPSE +M7JbY+K0XJ5Mfq64h1apLjycsq93dUX+1OI/e/qlxKS6SpqCgEajKch/x16ylZeQ +BNuG3zBuFPJH/17MfjdWFAW5ZJmbsnM05u+IU4aN0+r8laddvyNcdgLS4O6WvGrL +z5Nxio0YLkZnCFBsjdUb5Fs00wKBgQDC1mViZp4yT1b21r070sYQ1w4wgLYfVNIF +rc8AFevGh35kFRe2fFWl7T28Zqhetj7ES2LK7H93o8GdT28RuHrwst/nYL+lClmn +37iHPuHwlqDussCxg5A49HWq2qEoZ5DF379b8FJgcBYxYBsu7LSmQNEgFtCdvD7y +8C3uHieWrQKBgQCoOK0OFOxvzcc6+Or4fDpXzhFuVFVqU1Vab4xqdabUlXOWgzhW +qFx6GCsp77CtozY8ZnAqthm0+r6xuR+K+Wc/h57vWabloCuQrdEV85Y1Kr/zTMN3 +4BqZoSr9srDtlUQdaNiGSYcbIDpPdVSFk3qnHJi1ZuGW92Fco4367P4aZQKBgFyZ +4V2/P/jRVJfEv/Oq3ZArZgcPZX/GpHsmfHeh84lL5HpUvAxzU5GlC+57LBK3s2VA +HxgrBvopzl+h3Twi3euAWIJzrSIXpTzwS5eb/26FaL+KHaNA0E8BgNtPRcEaV+hz +y1M7CSvkmeelscW/oqxRqhMCROxzB8gW9v1xP4eRAoGADhZOjOmsfbBD58ijzaOJ +i2bOjCPzkRsS0cxnVlILA7kWU9bPxOkHKmkZnaYIBJhAJsW9l1XA3mlS5gqf93EW +VivIJZb10wI/MeRuITxanWiHPDcbbyOjBgaj6gNBbyrS9rDordYdLRVOsmlwvvJT +a60BCRcu0IgS6uM0sKaojoc= +-----END PRIVATE KEY----- From f901beec8780c7ed46e2797333054726e0680626 Mon Sep 17 00:00:00 2001 From: Calle Wilund Date: Wed, 8 Jan 2025 12:33:07 +0000 Subject: [PATCH 08/10] tools: Add local-file-key-generator tool For generating key files for local provider --- configure.py | 3 +- main.cc | 1 + tools/CMakeLists.txt | 1 + tools/entry_point.hh | 1 + tools/scylla-local-file-key-generator.cc | 166 +++++++++++++++++++++++ 5 files changed, 171 insertions(+), 1 deletion(-) create mode 100644 tools/scylla-local-file-key-generator.cc diff --git a/configure.py b/configure.py index 35c1f10039..2d5b947823 100755 --- a/configure.py +++ b/configure.py @@ -1336,7 +1336,8 @@ scylla_tests_dependencies = scylla_core + alternator + idls + scylla_tests_gener scylla_raft_dependencies = scylla_raft_core + ['utils/uuid.cc', 'utils/error_injection.cc', 'utils/exceptions.cc'] -scylla_tools = ['tools/read_mutation.cc', +scylla_tools = ['tools/scylla-local-file-key-generator.cc', + 'tools/read_mutation.cc', 'tools/scylla-types.cc', 'tools/scylla-sstable.cc', 'tools/scylla-nodetool.cc', diff --git a/main.cc b/main.cc index 8b73496bfc..2eef9bf599 100644 --- a/main.cc +++ b/main.cc @@ -2393,6 +2393,7 @@ int main(int ac, char** av) { {"types", tools::scylla_types_main, "a command-line tool to examine values belonging to scylla types"}, {"sstable", tools::scylla_sstable_main, "a multifunctional command-line tool to examine the content of sstables"}, {"nodetool", tools::scylla_nodetool_main, "a command-line tool to administer local or remote ScyllaDB nodes"}, + {"local-file-key-generator", tools::scylla_local_file_key_generator_main, "a command-line tool to generate encryption at rest keys"}, {"perf-fast-forward", perf::scylla_fast_forward_main, "run performance tests by fast forwarding the reader on this server"}, {"perf-row-cache-update", perf::scylla_row_cache_update_main, "run performance tests by updating row cache on this server"}, {"perf-tablets", perf::scylla_tablets_main, "run performance tests of tablet metadata management"}, diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index b763bcc300..81ec007559 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -1,6 +1,7 @@ add_library(tools STATIC) target_sources(tools PRIVATE + scylla-local-file-key-generator.cc load_system_tablets.cc read_mutation.cc scylla-types.cc diff --git a/tools/entry_point.hh b/tools/entry_point.hh index fe2ef0ee73..682ac85844 100644 --- a/tools/entry_point.hh +++ b/tools/entry_point.hh @@ -10,6 +10,7 @@ namespace tools { +int scylla_local_file_key_generator_main(int argc, char** argv); int scylla_types_main(int argc, char** argv); int scylla_sstable_main(int argc, char** argv); int scylla_nodetool_main(int argc, char** argv); diff --git a/tools/scylla-local-file-key-generator.cc b/tools/scylla-local-file-key-generator.cc new file mode 100644 index 0000000000..5f46262b12 --- /dev/null +++ b/tools/scylla-local-file-key-generator.cc @@ -0,0 +1,166 @@ +/* + * Copyright (C) 2020-present ScyllaDB + */ + +/* + * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0 + */ + +#include +#include +#include +#include + +#include +#include "compound.hh" +#include "db/marshal/type_parser.hh" +#include "schema/schema_builder.hh" +#include "tools/utils.hh" +#include "dht/i_partitioner.hh" +#include "utils/managed_bytes.hh" +#include "ent/encryption/symmetric_key.hh" +#include "ent/encryption/local_file_provider.hh" + +using namespace seastar; +using namespace tools::utils; + +namespace bpo = boost::program_options; + +namespace std { +// required by boost::lexical_cast(vector), which is in turn used +// by boost::program_option for printing out the default value of an option +static std::ostream& operator<<(std::ostream& os, const std::vector& v) { + return os << fmt::format("{}", v); +} +} + +namespace { + +const auto app_name = "local-file-key-generator"; + +const std::vector global_options{ + typed_option("alg,a", "AES", "Key algorithm (i.e. AES, 3DES)"), + typed_option("block-mode,b", "CBC", "Algorithm block mode (i.e. CBC, EBC)"), + typed_option("padding,p", "PKCS5", "Algorithm padding method (i.e. PKCS5)"), + typed_option("length,l", 128, "Key length in bits (i.e. 128, 256)"), +}; + +const std::vector global_positional_options{ + typed_option>("files", "key path|key name", -1), +}; + +const std::vector operations = { + {"generate", "creates a new key and stores to a new file", +R"( +Generate a key suitable for a given algorithm and key length +and store to a file readable by scylla encryption at rest +local file key provider. +)"}, + {"append", "same as generate, but appends key to existing file", +R"( +Generate a key suitable for a given algorithm and key length +and append to an existing file readable by scylla encryption at rest +local file key provider. +)"}, +}; + +} + +namespace tools { + +using namespace encryption; +using namespace std::string_literals; +namespace fs = std::filesystem; + +int scylla_local_file_key_generator_main(int argc, char** argv) { + constexpr auto description_template = +R"(scylla-{} - a command-line tool to generate file-based encryption keys. + +Usage: scylla {} [--option1] [--option2] ... [key path|key name] + +Allows creating symmetric keys for use with scylla encryption at rest +local key file provider. + +Where can be one of: + +{} +)"; + + auto op_str = std::ranges::to(operations | std::views::transform([] (const operation& op) { + return fmt::format("* {} - {}\n{}", op.name(), op.summary(), op.description()); + }) | std::views::join_with('\n')); + tool_app_template::config app_cfg{ + .name = app_name, + .description = seastar::format(description_template, app_name, app_name, op_str), + .operations = std::move(operations), + .global_options = &global_options, + .global_positional_options = &global_positional_options, + }; + tool_app_template app(std::move(app_cfg)); + + return app.run_async(argc, argv, [] (const operation& op, const boost::program_options::variables_map& app_config) { + std::vector files; + + if (app_config.contains("files")) { + files = app_config["files"].as>(); + } + if (files.size() > 1) { + throw std::invalid_argument("Too many arguments"); + } + auto alg = app_config["alg"].as(); + auto mode = app_config["block-mode"].as(); + auto padd = app_config["padding"].as(); + auto len = app_config["length"].as(); + + if (!padd.ends_with("Padding")) { + padd = padd + "Padding"; + } + + auto java_sig = fmt::format("{}/{}/{}", alg, mode, padd); + + key_info info { + .alg = java_sig, .len = len + }; + + symmetric_key k(info); + auto key = k.key(); + auto hex = base64_encode(key); + auto line = fmt::format("{}:{}:{}", java_sig, len, hex); + auto key_name = "system_key"s; + + if (!files.empty()) { + fs::path f(files.front()); + if (fs::is_directory(f)) { + f = f / key_name; + } + if (!fs::exists(f)) { + auto p = f.parent_path(); + if (!p.empty()) { + fs::create_directories(p); + } + } + std::ios_base::openmode mode = std::ios_base::out; + if (op.name() == "append") { + mode |= std::ios_base::ate|std::ios_base::app; + } else { + mode |= std::ios_base::trunc; + } + + if (!fs::exists(f) || op.name() != "append") { + // create once so we can enforce proper + // permissions. (neither seastar or c++ io is great here) + std::ofstream os(f, mode); + } + + fs::permissions(f, fs::perms::owner_read|fs::perms::owner_write); + std::ofstream os(f, mode); + + os << line << std::endl; + } else { + std::cout << line << std::endl; + } + return 0; + }); +} + +} // namespace tools From 083f7353665e65976c52df659b7b059f8134fb94 Mon Sep 17 00:00:00 2001 From: Calle Wilund Date: Wed, 8 Jan 2025 12:33:58 +0000 Subject: [PATCH 09/10] main/build: Add p11-kit and initialize For p11 certification/validation --- CMakeLists.txt | 2 ++ cmake/Findp11-kit.cmake | 48 +++++++++++++++++++++++++++++++++++++++++ configure.py | 3 ++- main.cc | 21 ++++++++++++++++++ 4 files changed, 73 insertions(+), 1 deletion(-) create mode 100644 cmake/Findp11-kit.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index cd32630f91..375f55e22a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -148,6 +148,7 @@ find_package(ICU COMPONENTS uc i18n REQUIRED) find_package(fmt 10.0.0 REQUIRED) find_package(libdeflate REQUIRED) find_package(libxcrypt REQUIRED) +find_package(p11-kit REQUIRED) find_package(Snappy REQUIRED) find_package(RapidJSON REQUIRED) find_package(xxHash REQUIRED) @@ -345,6 +346,7 @@ if(Scylla_ENABLE_LTO) endif() target_link_libraries(scylla PRIVATE + p11-kit::p11-kit Seastar::seastar absl::headers yaml-cpp::yaml-cpp diff --git a/cmake/Findp11-kit.cmake b/cmake/Findp11-kit.cmake new file mode 100644 index 0000000000..e9188ec1c7 --- /dev/null +++ b/cmake/Findp11-kit.cmake @@ -0,0 +1,48 @@ +# +# Copyright 2023-present ScyllaDB +# + +# +# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0 +# +find_package(PkgConfig REQUIRED) + +pkg_check_modules(PC_p11_kit QUIET p11-kit-1) + +find_library(p11-kit_LIBRARY + NAMES p11-kit + PATH_SUFFIXES p11-kit-1 + HINTS + ${PC_p11_kit_LIBDIR} + ${PC_p11_kit_LIBRARY_DIRS}) + +find_path(p11-kit_INCLUDE_DIR + NAMES p11-kit/p11-kit.h + HINTS + ${PC_p11_kit_INCLUDEDIR} + ${PC_p11_kit_INCLUDE_DIRS}) + +mark_as_advanced( + p11-kit_LIBRARY + p11-kit_INCLUDE_DIR) + +include(FindPackageHandleStandardArgs) + +find_package_handle_standard_args(p11-kit + REQUIRED_VARS + p11-kit_LIBRARY + p11-kit_INCLUDE_DIR + VERSION_VAR PC_p11_kit_VERSION) + +if(p11-kit_FOUND) + set(p11-kit_LIBRARIES ${p11-kit_LIBRARY}) + set(p11-kit_INCLUDE_DIRS ${p11-kit_INCLUDE_DIR}) + if(NOT(TARGET p11-kit::p11-kit)) + add_library(p11-kit::p11-kit UNKNOWN IMPORTED) + + set_target_properties(p11-kit::p11-kit + PROPERTIES + IMPORTED_LOCATION ${p11-kit_LIBRARY} + INTERFACE_INCLUDE_DIRECTORIES ${p11-kit_INCLUDE_DIRS}) + endif() +endif() diff --git a/configure.py b/configure.py index 2d5b947823..956c59d970 100755 --- a/configure.py +++ b/configure.py @@ -2004,7 +2004,6 @@ def query_seastar_flags(pc_file, use_shared_libs, link_static_cxx=False): libs = f"-Wl,-rpath='{rpath}' {libs}" if link_static_cxx: libs = libs.replace('-lstdc++ ', '') - testing_libs = pkg_config(pc_file.replace('seastar.pc', 'seastar-testing.pc'), '--libs', '--static') return {'seastar_cflags': cflags, 'seastar_libs': libs, @@ -2028,6 +2027,8 @@ libs = ' '.join([maybe_static(args.staticyamlcpp, '-lyaml-cpp'), '-latomic', '-l '-ldeflate', ]) +args.user_cflags += " " + pkg_config('p11-kit-1', '--cflags') + if not args.staticboost: user_cflags += ' -DBOOST_ALL_DYN_LINK' diff --git a/main.cc b/main.cc index 2eef9bf599..6b07eb1189 100644 --- a/main.cc +++ b/main.cc @@ -118,6 +118,12 @@ #include "utils/shared_dict.hh" #include "message/dictionary_service.hh" + +#define P11_KIT_FUTURE_UNSTABLE_API +extern "C" { +#include +} + seastar::metrics::metric_groups app_metrics; using namespace std::chrono_literals; @@ -2451,5 +2457,20 @@ int main(int ac, char** av) { return 0; } + // We have to override p11-kit config path before p11-kit initialization. + // And the initialization will invoke on seastar initalization, so it has to + // be before app.run() + // #3583 - need to potentially ensure this for tools as well, since at least + // sstable* might need crypto libraries. + auto scylla_path = fs::read_symlink(fs::path("/proc/self/exe")); // could just be argv[0] I guess... + auto p11_modules = scylla_path.parent_path().parent_path().append("share/p11-kit/modules"); + // Note: must be in scope for application lifetime. p11_kit_override_system_files does _not_ + // copy input strings. + auto p11_modules_str = p11_modules.string(); + // #3392 only do this if we are actually packaged and the path exists. + if (fs::exists(p11_modules)) { + ::p11_kit_override_system_files(NULL, NULL, p11_modules_str.c_str(), NULL, NULL); + } + return main_func(ac, av); } From 8e828f608dffefae5d837f4b9a74df075f874899 Mon Sep 17 00:00:00 2001 From: Calle Wilund Date: Wed, 8 Jan 2025 13:10:12 +0000 Subject: [PATCH 10/10] docs: Add EAR docs Merge docs relating to EAR. --- docs/dev/file_encryption.md | 76 ++ docs/operating-scylla/_common/tools_index.rst | 2 + .../nodetool-commands/upgradesstables.rst | 1 + .../procedures/backup-restore/restore.rst | 4 + .../cassandra-to-scylla-migration-process.rst | 13 + .../security/_common/security-index.rst | 1 + .../security/encryption-at-rest.rst | 859 ++++++++++++++++++ docs/operating-scylla/security/index.rst | 3 +- .../security/security-checklist.rst | 10 +- docs/reference/glossary.rst | 3 + 10 files changed, 967 insertions(+), 5 deletions(-) create mode 100644 docs/dev/file_encryption.md create mode 100644 docs/operating-scylla/security/encryption-at-rest.rst diff --git a/docs/dev/file_encryption.md b/docs/dev/file_encryption.md new file mode 100644 index 0000000000..824c9a10b3 --- /dev/null +++ b/docs/dev/file_encryption.md @@ -0,0 +1,76 @@ +File level encryption in scylla enterprise +========================================== + +File encryption in scylla enterprise is done by "block-level" encryption via a `file_impl` implementation that transparently wraps file IO transforming data +to/from encrypted state. Refer to `encrypted_file_impl` in `ent/encryption/encrypted_file_impl.cc`. + +Encryption is algorithm-agnostic in that the wrapper supports any symmetric-key algorithm (block cipher) that is available in the OpenSSL EVP (envelope) +library. + +The wrapper uses a user-provided symmetric key coupled with ESSIV block initialization vector calculation. *NOTE*: the data file itself does *not* keep track of +the key used to encrypt data, thus an external meta data provider is required to map files to their keys, and is solely the user's responsibility. + +File block encryption does not use padding, since it relies on input data size and output data size being identical. + +The file is divided in `N` blocks of 4096 bytes size. Each 4KB block is encrypted with the provided key `K`, configured block cipher `B`, and block cipher +operating mode (usually CBC). Because 4KB is an integral multiple of any considerable block cipher's block size, no padding is necessary within any 4KB file +block. + +The initialization vector (IV) for the block cipher operating mode (usually CBC) of each 4KB file block is derived via +[ESSIV](https://en.wikipedia.org/wiki/Disk_encryption_theory#Encrypted_salt-sector_initialization_vector_(ESSIV)): + +- The user-provided data encryption key `K` is hashed with SHA256 to value `h` (32 bytes = 256 bits). + +- An AES256 block cipher is keyed with `h`. + +- For the particular file block number, a byte array is populated with 8 `NUL` bytes, followed by the little-endian representation of the `uint64_t` block + number (16 bytes = 128 bits). + +- The 16 byte array is encrypted with a single round (i.e., one ECB mode application) of the AES256 block cipher, to value `c` (16 bytes = 128 bits). + +- `c` is the IV of the block cipher `B` (truncated or zero-padded as required by the block size of `B`). + +``` +h := SHA256(K) +IV_B(block_number) := AES256_h(uint64_t(0) ‖ block_number_le64) +``` + + +Padding/truncation +================== + +All encryption is done unpadded. To handle file sizes we use a simplified padding scheme: + +Since all writes are assumed to be done by us, and must be aligned (scylla requirement), we can assume in turn that any resizing should be made by truncation. + +If a file is truncated to a size that is not a whole multiple of the `B` block cipher's block size (which is typically 16 bytes = 128 bits), then we increment +the actual truncation size by `B`'s block size. + + +``` + +----------- 16 bytes ----------+ + | | + +----------- 16 bytes ----------+ | + | | | | + v v v v ++--------------+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +| n * 16 bytes | T | P | T' | ++--------------+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + ^ ^ + | | +requested truncation offset: n * 16 + 3 | + | + actual truncation offset: (n + 1) * 16 + 3 + +``` + +- This preserves, in its entirety, for decryption's sake, the ciphertext block (`T ‖ P`) that the user expects to be truncated. + +- It records the useful byte count (`size(T)`) of the final ciphertext block (`T ‖ P`) through the trailing misalignment (`size(T')`). The contents of `T'` are + irrelevant (it's a partial cipher block, so it cannot be decrypted); only its size matters. + +When reading an encrypted file, we check the file size. If we're reading from a file with an unaliged size (i.e., `size(T')` is nonzero), we know that the size +of the padding at the end is `B`'s block size. `T'` is discarded; the last complete cipher block (`T ‖ P`) is decrypted. After decryption, `P` is discarded as +well. File size query methods adjust the returned values accordingly. + +Non-empty files that are shorter than `B`'s block size are invalid -- they can never be created by the above-described padding scheme. diff --git a/docs/operating-scylla/_common/tools_index.rst b/docs/operating-scylla/_common/tools_index.rst index d12fbb6f20..ead5410521 100644 --- a/docs/operating-scylla/_common/tools_index.rst +++ b/docs/operating-scylla/_common/tools_index.rst @@ -8,6 +8,8 @@ * :doc:`cassandra-stress ` A tool for benchmarking and load testing a ScyllaDB and Cassandra clusters. * :doc:`SSTabledump ` * :doc:`SSTableMetadata ` +* configuration_encryptor - :doc:`encrypt at rest ` sensitive scylla configuration entries using system key. +* scylla local-file-key-generator - Generate a local file (system) key for :doc:`encryption at rest `, with the provided length, Key algorithm, Algorithm block mode and Algorithm padding method. * `scyllatop `_ - A terminal base top-like tool for scylladb collectd/prometheus metrics. * :doc:`scylla_dev_mode_setup` - run ScyllaDB in Developer Mode. * :doc:`perftune` - performance configuration. diff --git a/docs/operating-scylla/nodetool-commands/upgradesstables.rst b/docs/operating-scylla/nodetool-commands/upgradesstables.rst index e2f9b030cc..a052f59801 100644 --- a/docs/operating-scylla/nodetool-commands/upgradesstables.rst +++ b/docs/operating-scylla/nodetool-commands/upgradesstables.rst @@ -55,6 +55,7 @@ Additional References --------------------- .. include:: nodetool-index.rst +:doc:`Encryption at Rest ` diff --git a/docs/operating-scylla/procedures/backup-restore/restore.rst b/docs/operating-scylla/procedures/backup-restore/restore.rst index 71d8b92e63..61ebf7d793 100644 --- a/docs/operating-scylla/procedures/backup-restore/restore.rst +++ b/docs/operating-scylla/procedures/backup-restore/restore.rst @@ -35,6 +35,10 @@ Procedure Repeat the following steps for each node in the cluster: -------------------------------------------------------- +.. note:: + + If you are restoring :doc:`encrypted backup files `, make sure Scylla Enterprise has the same keys used by Scylla to encrypt the data before starting the restore process. + .. note:: Best practise is **not** to restore :doc:`Materialized Views (MV) ` and :doc:`Secondary Indexes (SI) ` SSTables. diff --git a/docs/operating-scylla/procedures/cassandra-to-scylla-migration-process.rst b/docs/operating-scylla/procedures/cassandra-to-scylla-migration-process.rst index 546653926c..2ec0f0a9a3 100644 --- a/docs/operating-scylla/procedures/cassandra-to-scylla-migration-process.rst +++ b/docs/operating-scylla/procedures/cassandra-to-scylla-migration-process.rst @@ -51,6 +51,19 @@ Procedure - Import schema to ScyllaDB: ``cqlsh [IP] --file 'adjusted_schema.cql'`` .. _`limitations and known issues section`: #notes-limitations-and-known-issues + + +.. note:: + + Scylla and Apache Cassandra :doc:`encrypted backup files ` are **not** compatible. + sstableloader does **not** support loading from encrypted files. + + If you need to migrate/restore from encrypted files: + + * Upload them to the original database + * Decrypted the table with ALTER TABLE + * Update the SSTables files with :doc:`upgradesstable ` + * Use sstableloader .. note:: diff --git a/docs/operating-scylla/security/_common/security-index.rst b/docs/operating-scylla/security/_common/security-index.rst index 33a8c81928..fae922e75a 100644 --- a/docs/operating-scylla/security/_common/security-index.rst +++ b/docs/operating-scylla/security/_common/security-index.rst @@ -8,6 +8,7 @@ * :doc:`Encryption: Data in Transit Client to Node ` * :doc:`Encryption: Data in Transit Node to Node ` * :doc:`Generating a self-signed Certificate Chain Using openssl ` +* :doc:`Encryption at Rest ` diff --git a/docs/operating-scylla/security/encryption-at-rest.rst b/docs/operating-scylla/security/encryption-at-rest.rst new file mode 100644 index 0000000000..d5ae3fb5df --- /dev/null +++ b/docs/operating-scylla/security/encryption-at-rest.rst @@ -0,0 +1,859 @@ +================== +Encryption at Rest +================== + +Introduction +---------------------- + +ScyllaDB Enterprise protects your sensitive data with data-at-rest encryption. +It protects the privacy of your user's data, reduces the risk of data breaches, and helps meet regulatory requirements. +In particular, it provides an additional level of protection for your data persisted in storage or its backups. + +When ScyllaDB Enterprise Encryption at Rest is used together with Encryption in Transit (:doc:`Node to Node ` and :doc:`Client to Node `), you benefit from end to end data encryption. + +About Encryption at Rest +----------------------------- + +The following can be encrypted: + +* ScyllaDB persistent tables (SSTables) +* System level data, such as: + + - Commit logs + - Batches + - hints logs + - KMIP Password (part of scylla.yaml) + +Encryption at Rest works at table level granularity, so you can choose to encrypt only sensitive tables. For both system and table data, you can use different algorithms that are supported by `OpenSSL `_ in a file block encryption scheme. + +.. note:: SSTables of a particular table can have different encryption keys, use different encryption algorithms, or not be encrypted at all - at the same time. + +When is Data Encrypted? +======================== + +As SSTables are immutable, tables are encrypted only once, as a result of memtable flush, compaction, or upgrade (with :doc:`Nodetool upgradesstables `). + +Once a table is encrypted, all resulting SSTables are encrypted using the most current key and algorithm. +When you encrypt an existing table, the new SSTables are encrypted. The old SSTables which existed before the encryption are not updated. These tables are encrypted according to the same actions as described previously. + +When is Data Decrypted? +======================== + +When ScyllaDB reads an encrypted SSTable from disk, it fetches the encryption key's ID from the SSTable and uses it to extract the key and decrypt the data. +When ScyllaDB reads an encrypted system table, it fetches the system table encryption key location from the scylla.yaml file. It locates the key and uses it to extract the key and decrypt the data. + + +Encryption Key Types +---------------------- + +Two types of encryption keys are available: System Keys and Table Keys. + +System Keys +==================== + +System keys are used for encrypting system data, such as commit logs, hints, and/or other user table keys. When a Replicated Key Provider is used for encrypting SSTables, the table keys are stored in the encrypted_keys table, and the system key is used to encrypt the encrypted_keys table. The system key is stored as the contents of a local file and is encrypted with a single key that you provide. The default location of system keys is ``/etc/scylla/resources/system_keys/`` and can be changed with the ``system_key_directory`` option in scylla.yaml file. When a Local Key Provider is used for encrypting system info, you can provide your own key, or ScyllaDB can make one for you. + +.. _Replicated: + +Table Keys +=================== +Table keys are used for encrypting SSTables. Depending on your key provider, this key is stored in different locations: + +* Replicated Key Provider - encrypted_keys table +* KMIP Key Provider - KMIP server +* KMS Key Provider - AWS +* Local Key Provider - in a local file with multiple keys. You can provide your own key or ScyllaDB can make one for you. + +.. _ear-key-providers: + +.. note:: + + Encrypted SStables undergo a regular backup procedure. Ensure you keep your + encryption key available in case you need to restore from backup. + +Key Providers +---------------------- + +When encrypting the system tables or SSTables, you need to state which provider is holding your keys. You can use the following options: + +.. list-table:: + :widths: 33 33 33 + :header-rows: 1 + + * - Key Provider Name + - key_provider Name + - Description + * - Local Key Provider + - LocalFileSystemKeyProviderFactory (**default**) + - Stores the key on the same machine as the data. + * - Replicated Key Provider + - ReplicatedKeyProviderFactory + - Stores table keys in a ScyllaDB table where the table itself is encrypted using the system key (available from 2019.1.3) + * - KMIP Key Provider + - KmipKeyProviderFactory + - External key management server (available from 2019.1.3) + * - KMS Key Provider + - KmsKeyProviderFactory + - Uses key(s) provided by the AWS KMS service. + * - GCP Key Provider + - GcpKeyProviderFactory + - Used key(s) provided by the GCP KMS service. + + +About Local Key Storage +========================== + +Local keys are used for encrypting user data, such as SSTables. +Currently, this is the only option available for user data and, as such, is the default key storage manager. +With local key storage, keys are stored locally on disk in a text file. The location of this file is specified in the scylla.yaml. + +.. caution:: Care should be taken so that no unauthorized person can access the key data from the file system. Make sure that the owner of this file is the ``scylla`` user and that the file is **not** readable by **other users**, not accessible by **other roles**. + +You should also consider keeping the key directory on a network drive (using TLS for the file sharing) to avoid having keys and data on the same storage media, in case your storage is stolen or discarded. + +.. _ear-cipher-algorithms: + +Cipher Algorithms +---------------------- + +The following cipher_algorithims are available for use with ScyllaDB using `OpenSSL `_. Note that the default algorithm (AES/CBC/PKCS5Padding with key strength 128 ) is recommended. + +.. list-table:: + :widths: 70 30 + :header-rows: 1 + + * - cipher_algorithm + - secret_key_strength + * - AES/CBC/PKCS5Padding (**default**) + - 128 (**default**), 192, or 256 + * - AES/ECB/PKCS5Padding + - 128, 192, or 256 + * - Blowfish/CBC/PKCS5Padding + - 32-448 + +.. _ear-create-encryption-key: + +Create Encryption Keys +----------------------------- + +Depending on your key provider, you will either have the option of allowing ScyllaDB to generate an encryption key, or you will have to provide one: + +* KMIP Key Provider - you don't need to generate any key yourself +* KMS Key Provider - you must generate a key yourself in AWS +* Replicated Key Provider - you must generate a system key yourself +* Local Key Provider - If you do not generate your own secret key, ScyllaDB will create one for you + +When encrypting ScyllaDB config by ``configuration_encryptor``, you also need to generate a secret key and upload the key to all nodes. + + +Use the key generator script +================================ + +The Key Generator script generates a key in the directory of your choice. + +**Procedure** + + +#. Create (if it doesn't exist) a local directory for storing the key. Make sure that the owner of the directory is ``scylla`` and not another user. Make sure that the ``scylla`` user can read, write, and execute over the parent directory. Following this procedure makes ``/etc/scylla/encryption_keys/`` the parent directory of your keys. + + For example: + + .. code-block:: none + + sudo mkdir -p /etc/scylla/encryption_keys/system_keys + sudo chown -R scylla:scylla /etc/scylla/encryption_keys + sudo chmod -R 700 /etc/scylla/encryption_keys + +#. Create a key using the local file key generator script making sure that the keyfile owner is ``scylla`` and not another user. Run the command: + + .. code-block:: none + + sudo -u scylla /usr/bin/scylla local-file-key-generator [options] [key-path] + + Where: + + * ``-a,--alg `` - the encryption algorithm (e.g., AES) you want to use to encrypt the key + * ``-h,--help`` - displays the help menu + * ``-l,--length `` - the length of the encryption key in bits (i.e. 128, 256) + * ``-b,--block-mode `` - the encryption algorithm block mode (i.e. CBC, EBC) + * ``-p,--padding `` - the encryption algorithm padding method (i.e. PKCS5) + * ``key-path`` - is the directory you want to place the key into (/etc/scylla/encryption_keys, for example) + + And ```` is one of ``generate`` or ``append``, the first creating a new key file with the generated key, the latter + appending a new key of the required type to an existing file. + + For Example: + + To create a secret key and a system key using other encryption settings in a different location: + + .. code-block:: none + + sudo -u scylla /usr/bin/scylla local-file-key-generator generate -a AES -b ECB -p PKCS5 -l 192 /etc/scylla/encryption_keys/secret_key + sudo -u scylla /usr/bin/scylla local-file-key-generator generate -a AES -b CBC -p PKCS5 -l 128 /etc/scylla/encryption_keys/system_keys/system_key + + To display the secret key parameters: + + .. code-block:: none + + sudo cat /etc/scylla/encryption_keys/secret_key + + Returns: + + .. code-block:: none + + AES/ECB/PKCS5Padding:192:8stVxW5ypYhNxsnRVS1A6suKhk0sG4Tj + + To display the system key parameters: + + .. code-block:: none + + sudo cat /etc/scylla/encryption_keys/system_keys/system_key + + Returns: + + .. code-block:: none + + AES/CBC/PKCS5Padding:128:GGpOSxTGhtPRPLrNPYvVMQ== + + + Once you have created a key, copy the key to each node, using the procedure described in `Copy keys to nodes`_. + +Copy keys to nodes +====================== + +Every key you generate needs to be copied to the nodes for use in local key providers. + +**Procedure** + +#. Securely copy the key file, using ``scp`` or similar, to the same path on all nodes in the cluster. Make sure the key on each target node is moved to the same location as the source directory and that the target directory has the same permissions as the source directory. + +#. Repeat for all nodes in the cluster. + +.. _encryption-at-rest-set-kmip: + +Set the KMIP Host +---------------------- + +If you are using :term:`KMIP ` to encrypt tables or system information, add the KMIP server information to the ``scylla.yaml`` configuration file. + +#. Edit the ``scylla.yaml`` file located in ``/etc/scylla/`` and add the following in KMIP host(s) section: + + .. code-block:: yaml + + # + # kmip_hosts: + # : + # hosts: [, ...] + # certificate: (optional) + # keyfile: (optional; it is required if "certificate" is set) + # truststore: (optional) + # certficate_revocation_list: (optional) + # priority_string: + # username: (optional> + # password: (optional) + # max_command_retries: (optional; default 3) + # key_cache_expiry: + # key_cache_refresh: + # : + + Where: + + * ```` - The cluster name. + * ``hosts`` - The list of hosts specified by IP and port for the KMIP server. The KMIP connection management only supports failover, so all requests go through a single KMIP server. There is no load balancing, as currently no KMIP servers support read replication or other strategies for availability. Hosts are tried in the order they appear, and the next one in the list is tried if the previous one fails. The default number of retries is three, but you can customize it with "max_command_retries". + * ``certificate`` - The name of the certificate and path used to identify yourself to the KMIP server. + * ``keyfile`` - The name of the key used to identify yourself to the KMIP server. It is generated together with the certificate. + * ``truststore`` - The location and key for the truststore to present to the KMIP server. + * ``certficate_revocation_list`` - The path to a PEM-encoded certificate revocation list (CRL) - a list of issued certificates that have been revoked before their expiration date. + * ``priority_string`` - The KMIP TLS priority string. + * ``username`` - The KMIP server user name. + * ``password`` - The KMIP server password. + * ``max_command_retries`` - The number of attempts to connect to the KMIP server before trying the next host in the list. + * ``key_cache_expiry`` - Key cache expiry period, after which keys will be re-requested from server. Default is 600s. + * ``key_cache_refresh`` - Key cache refresh period - the frequency at which cache is checked for expired entries. Default is 1200s. + +#. Save the file. +#. Drain the node with :doc:`nodetool drain ` +#. Restart the scylla-server service. + +.. include:: /rst_include/scylla-commands-restart-index.rst + +.. _encryption-at-rest-set-kms: + +Set the KMS Host +---------------------- + +.. note:: KMS support is available since ScyllaDB Enterprise **2023.1.1**. + +If you are using AWS KMS to encrypt tables or system information, add the KMS information to the ``scylla.yaml`` configuration file. + +#. Edit the ``scylla.yaml`` file located in ``/etc/scylla/`` to add the following in KMS host(s) section: + + .. code-block:: yaml + + kms_hosts: + : + endpoint: http(s)://(:port) (optional if `aws_region` is specified) + aws_region: (optional if `endpoint` is specified) + aws_access_key_id: (optional) + aws_secret_access_key: (optional) + aws_profile: (optional) + aws_use_ec2_credentials: (bool : default false) + aws_use_ec2_region: (bool : default false) + aws_assume_role_arn: (optional) + master_key: (required) + certificate: (optional) + keyfile: (optional) + truststore: (optional) + priority_string: (optional) + key_cache_expiry: + key_cache_refresh: + # : + + Where: + + * ```` - The name to identify the KMS host. You have to provide this name to encrypt a :ref:`new ` or :ref:`existing ` table. + * ``endpoint`` - The explicit KMS host endpoint. If not provided, ``aws_region`` is used for connection. + * ``aws_region`` - An AWS region. If not provided, ``endpoint`` is used for connection. + * ``aws_access_key_id`` - AWS access key used for authentication. If not specified, the provider reads it from your AWS credentials. + * ``aws_secret_access_key`` - AWS secret access key used for authentication. If not specified, the provider reads it from your AWS credentials. + * ``aws_profile`` - AWS profile to use if reading credentials from file + * ``aws_use_ec2_credentials`` - If true, KMS queries will use the credentials provided by ec2 instance role metadata as initial access key. + * ``aws_use_ec2_region`` - If true, KMS queries will use the AWS region indicated by ec2 instance metadata. + * ``aws_assume_role_arn`` - If set, any KMS query will first attempt to assume this role. + * ``master_key`` - The ID or alias of your AWS KMS key. The key must be generated with an appropriate access policy so that the AWS user has permissions to read the key and encrypt data using that key. This parameter is required. + * ``certificate`` - The name of the certificate and the path used to identify yourself to the KMS server. + * ``keyfile`` - The name of the key for the certificate. It is generated together with the certificate. + * ``truststore`` - The location and key for the truststore to present to the KMS server. + * ``priority_string`` - The KMS TLS priority string. + * ``key_cache_expiry`` - Key cache expiry period, after which keys will be re-requested from server. Default is 600s. + * ``key_cache_refresh`` - Key cache refresh period - the frequency at which cache is checked for expired entries. Default is 1200s. + + .. note:: + + Note that either ``endpoint``, ``aws_region`` or ``aws_use_ec2_region`` must be set (one of them is required for connection). + + Example: + + .. code-block:: yaml + + kms_hosts: + my-kms1: + aws_use_ec2_credentials: true + aws_use_ec2_region: true + master_key: myorg/MyKey + +#. Save the file. +#. Drain the node with :doc:`nodetool drain ` +#. Restart the scylla-server service. + +.. include:: /rst_include/scylla-commands-restart-index.rst + +.. _encryption-at-rest-set-gcp: + +Set the GCP Host +---------------------- + +If you are using Google GCP KMS to encrypt tables or system information, add the GCP information to the ``scylla.yaml`` configuration file. + +#. Edit the ``scylla.yaml`` file located in ``/etc/scylla/`` to add the following in KMS host(s) section: + + .. code-block:: yaml + + gcp_hosts: + : + gcp_project_id: + gcp_location: + gcp_credentials_file: <(service) account json key file - authentication> + gcp_impersonate_service_account: + master_key: / - named GCP key for encrypting data keys (required) + certificate: (optional) + keyfile: (optional) + truststore: (optional) + priority_string: (optional) + key_cache_expiry: + key_cache_refresh: + # : + + Where: + + * ```` - The name to identify the GCP host. You have to provide this name to encrypt a :ref:`new ` or :ref:`existing ` table. + * ``gcp_project_id`` - The GCP project from which to retrieve key information. + * ``gcp_location`` - A GCP project location. + * ``gcp_credentials_file`` - GCP credentials file used for authentication. If not specified, the provider reads it from your GCP credentials. + * ``gcp_impersonate_service_account`` - An optional service account to impersonate when issuing key query calls. + * ``master_key`` - The / of your GCP KMS key. The key must be generated with an appropriate access policy so that the AWS user has permissions to read the key and encrypt data using that key. This parameter is required. + * ``certificate`` - The name of the certificate and the path used to identify yourself to the KMS server. + * ``keyfile`` - The name of the key for the certificate. It is generated together with the certificate. + * ``truststore`` - The location and key for the truststore to present to the KMS server. + * ``priority_string`` - The KMS TLS priority string. + * ``key_cache_expiry`` - Key cache expiry period, after which keys will be re-requested from server. Default is 600s. + * ``key_cache_refresh`` - Key cache refresh period - the frequency at which cache is checked for expired entries. Default is 1200s. + + Example: + + .. code-block:: yaml + + gcp_hosts: + my-gcp1: + gcp_project_id: myproject + gcp_location: global + master_key: mykeyring/mykey + +#. Save the file. +#. Drain the node with :doc:`nodetool drain ` +#. Restart the scylla-server service. + +.. include:: /rst_include/scylla-commands-restart-index.rst + +Encrypt Tables +----------------------------- + +.. note:: + + This feature is available since ScyllaDB Enterprise 2023.1.2. + +ScyllaDB allows you to enable or disable default encryption of tables. +When enabled, tables will be encrypted by default using the configuration +provided for the ``user_info_encryption`` option in the ``scylla.yaml`` file. + +You can override the default configuration when you CREATE TABLE or ALTER TABLE +with ``scylla_encryption_options``. See :ref:`Encrypt a Single Table ` +for details. + +**Before you Begin** + +Ensure you have an encryption key available: + +* If you are using AWS KMS, :ref:`set the KMS Host `. +* If you are using KMIP, :ref:`set the KMIP Host `. +* If you are using Google GCP KMS, :ref:`set the GCP Host `. +* If you want to create your own key, follow the procedure in :ref:`Create Encryption Keys `. +* If you do not create your own key, use the following procedure for ScyllaDB + to create a key for you (the default location ``/etc/scylla/data_encryption_keys`` may cause + permission issues; the following example creates a key in the directory ``/etc/scylla/encryption_keys``): + + .. code-block:: none + + sudo mkdir -p /etc/scylla/encryption_keys + sudo chown -R scylla:scylla /etc/scylla/encryption_keys + sudo chmod -R 700 /etc/scylla/encryption_keys + +**Procedure** + +Edit the ``scylla.yaml`` file located in ``/etc/scylla/`` and configure +the ``user_info_encryption`` option: + +.. code-block:: yaml + + user_info_encryption: + enabled: + cipher_algorithm: + secret_key_strength: + key_provider: + secret_key_file: + kmip_host: + kms_host: + gcp_host: + +Where: + +* ``enabled`` - Enables or disables default table encryption. Required. +* ``cipher_algorithm`` - One of the :ref:`cipher algorithms `. + If not provided, the default will be used. +* ``secret_key_strength`` - The length of the key in bytes ( determined by + the :ref:`cipher algorithms ` you choose). + If not provided, the default will be used. +* ``key_provider`` - The name of the key provider. See :ref:`Key Providers `. + Required. +* ``secret_key_file`` - The location of the key created by ScyllaDB (by default ``/etc/scylla/data_encryption_keys``). + Required if you use a ScyllaDB-generated key. +* ``kmip_host`` - The name of your :ref:`kmip_host ` group. + Required if you use KMIP. +* ``kms_host`` - The name of your :ref:`kms_host ` group. + Required if you use KMS. +* ``gcp_host`` - The name of your :ref:`gcp_host ` group. + Required if you use GCP. + +**Example** + +.. code-block:: yaml + + user_info_encryption: + enabled: true + cipher_algorithm: AES + secret_key_strength: 128 + key_provider: LocalFileSystemKeyProviderFactory + secret_key_file: scylla /etc/scylla/encryption_keys + +**Examples for KMS:** + +In the following example, the ``master_key`` configured for :ref:`kms_host ` will be used. + +.. code-block:: yaml + + user_info_encryption: + enabled: true + key_provider: KmsKeyProviderFactory + kms_host: my-kms1 + + +You can specify a different ``master_key`` than the one configured for :ref:`kms_host `: + + .. code-block:: yaml + + user_info_encryption: + enabled: true + key_provider: KmsKeyProviderFactory + kms_host: my-kms1 + master_key: myorg/SomeOtherKey + +.. _ear-create-table: + +Encrypt a Single Table +----------------------------- + +This procedure demonstrates how to encrypt a new table. + +**Before you Begin** + +* Make sure to `Set the KMIP Host`_ if you are using KMIP, or the the :ref:`KMS Host ` if you are using AWS KMS. + +* If you want to make your own key, use the procedure in `Create Encryption Keys`_ and skip to step 3. If you do not create your own key, ScyllaDB will create one for you in the ``secret_key_file`` path. If you are not creating your own key, start with step 1. + +**Procedure** + +#. By default, the encryption key is located in the ``/etc/scylla/`` directory, and the file is named ``data_encryption_keys``. If you want to save the key in a different directory, create one. This example will create encryption keys in a different directory (``/etc/scylla/encryption_keys``, for example), which ensures that the owner of this directory is ``scylla`` and not another user. + + .. note:: Using the default location results in a known permission issue (scylladb/scylla-tools-java#94), so it is recommended to use another location as described in the example. + + .. code-block:: none + + sudo mkdir -p /etc/scylla/encryption_keys + sudo chown -R scylla:scylla /etc/scylla/encryption_keys + sudo chmod -R 700 /etc/scylla/encryption_keys + +#. Create the keyspace if it doesn’t exist. + +#. Create the table using the ``CREATE TABLE`` CQL statement, adding any :ref:`additional options `. To encrypt the table, use the options for encryption below, remembering to set the ``secret_key_file `` to the same directory you created in step 1. + + .. code-block:: cql + + CREATE TABLE . (......) WITH + scylla_encryption_options = { + 'cipher_algorithm' : , + 'secret_key_strength' : , + 'key_provider': , + 'secret_key_file': + } + ; + + Where: + + * ``cipher_algorithm`` - The hashing algorithm which is to be used to create the key. See `Cipher Algorithms`_ for more information. + * ``secret_key_strength`` - The length of the key in bytes. This is determined by the cipher you choose. See `Cipher Algorithms`_ for more information. + * ``key_provider`` is the name or type of key provider. Refer to `Key Providers`_ for more information. + * ``secret_key_file`` - the location that ScyllaDB will store the key it creates (if one does not exist in this location) or the location of the key. By default the location is ``/etc/scylla/data_encryption_keys``. + + **Example:** + + Continuing the example from above, this command will instruct ScyllaDB to encrypt the table and will save the key in the location created in step 1. + + .. code-block:: cql + + CREATE TABLE data.atrest (pk text primary key, c0 int) WITH + scylla_encryption_options = { + 'cipher_algorithm' : 'AES/ECB/PKCS5Padding', + 'secret_key_strength' : 128, + 'key_provider': 'LocalFileSystemKeyProviderFactory', + 'secret_key_file': '/etc/scylla/encryption_keys/data_encryption_keys' + } + ; + + **Example for KMS:** + + .. code-block:: cql + + CREATE TABLE myks.mytable (......) WITH + scylla_encryption_options = { + 'cipher_algorithm' : 'AES/CBC/PKCS5Padding', + 'secret_key_strength' : 128, + 'key_provider': 'KmsKeyProviderFactory', + 'kms_host': 'my-kms1' + } + ; + + You can skip ``cipher_algorithm`` and ``secret_key_strength`` (the :ref:`defaults ` will be used): + + .. code-block:: cql + + CREATE TABLE myks.mytable (......) WITH + scylla_encryption_options = { + 'key_provider': 'KmsKeyProviderFactory', + 'kms_host': 'my-kms1' + } + ; + + You can specify a different master key than the one configured for ``kms_host`` in the ``scylla.yaml`` file: + + .. code-block:: cql + + CREATE TABLE myks.mytable (......) WITH + scylla_encryption_options = { + 'key_provider': 'KmsKeyProviderFactory', + 'kms_host': 'my-kms1', + 'master_key':'myorg/SomeOtherKey' + } + ; + + +#. From this point, every new SSTable created for the ``atrest`` table is encrypted, using the ``data_encryption_keys`` key located in ``/etc/scylla/encryption_keys/``. This table will remain encrypted with this key until you either change the key, change the key properties, or disable encryption. + +#. To ensure all SSTables for this table on every node are encrypted, run the :doc:`Nodetool upgradesstables ` command. If not, the SSTables remain unencrypted until they are compacted or flushed from MemTables. + + For Example: + + .. code-block:: none + + nodetool upgradesstables data atrest + +#. Your SSTables are encrypted. If you want to change the key at any point, use the `Update Encryption Properties of Existing Tables`_ procedure. Always keep your key in a safe location known to you. Do not lose it. See `When a Key is Lost`_. + +.. _ear-alter-table: + +Update Encryption Properties of Existing Tables +================================================== + +You can encrypt any existing table or use this procedure to change the cipher algorithm, key location or key strength or even disable encryption on a table. + +**Procedure** + +#. Edit the table properties to enable encryption of one table of your choosing. Use the properties explained in `Encrypt a Single Table`_ if needed. + + .. code-block:: cql + + ALTER TABLE . (......) WITH + scylla_encryption_options = { + 'cipher_algorithm' : , + 'secret_key_strength' : , + 'key_provider': , + 'secret_key_file': + } + ; + + + **Example:** + + Continuing the example from above, this command will instruct ScyllaDB to encrypt the table and will save the key in the location created in step 1. + + .. code-block:: cql + + ALTER TABLE data.atrest (pk text primary key, c0 int) WITH + scylla_encryption_options = { + 'cipher_algorithm' : 'AES/ECB/PKCS5Padding', + 'secret_key_strength' : 192, + 'key_provider': 'LocalFileSystemKeyProviderFactory', + 'secret_key_file': '/etc/scylla/encryption_keys/data_encryption_keys' + } + ; + + **Example for KMS:** + + .. code-block:: cql + + ALTER TABLE myks.mytable (......) WITH + scylla_encryption_options = { + 'cipher_algorithm' : 'AES/CBC/PKCS5Padding', + 'secret_key_strength' : 128, + 'key_provider': 'KmsKeyProviderFactory', + 'kms_host': 'my-kms1' + } + ; + +#. If you want to make sure that SSTables that existed before this change are also encrypted, you can either upgrade them using the ``nodetool upgradesstables`` command or wait until the next compaction. If you decide to wait, ScyllaDB will still be able to read the old unencrypted tables. If you change the key or remove encryption, ScyllaDB will still continue to read the old tables as long as you still have the key. If your data is encrypted and you do not have the key, your data is unreadable. + + * If you decide to upgrade all of your old SSTables run the :doc:`nodetool upgradesstables ` command. + + .. code-block:: none + + nodetool upgradesstables + + For example: + + .. code-block:: none + + nodetool upgradesstables ks test + + * Repeat this command on all nodes as nodetool runs locally. + +#. If you want to change the key or disable encryption, repeat the `Update Encryption Properties of Existing Tables`_ procedure using the examples below as reference. + +**Examples** + +To encrypt an existing table named test in keyspace ks: + +.. code-block:: cql + + ALTER TABLE ks.test WITH + scylla_encryption_options = { + 'cipher_algorithm' : 'AES/ECB/PKCS5Padding', + 'secret_key_strength' : 128, + 'key_provider': 'LocalFileSystemKeyProviderFactory', + 'secret_key_file': '/etc/scylla/encryption_keys/data_encryption_keys' + } + ; + + +To change the cipher algorithm from AES/ECB/PKCS5Padding to AES/ECB/PKCS5Padding and to change the key strength from 128 to 192 on an existing table: + +.. code-block:: cql + + ALTER TABLE ks.test WITH + scylla_encryption_options = { + 'cipher_algorithm' : 'AES/ECB/PKCS5Padding', + 'secret_key_strength' : 192, + 'key_provider': 'LocalFileSystemKeyProviderFactory', + 'secret_key_file': '/etc/scylla/encryption_keys/data_encryption_keys' + } + ; + +To disable encryption on an encrypted table named test in keyspace ks: + +.. code-block:: cql + + ALTER TABLE ks.test WITH + scylla_encryption_options = { 'key_provider' : 'none’ }; + + +Encrypt System Resources +--------------------------- + +System encryption is applied to semi-transient on-disk data, such as commit logs, batch logs, and hinted handoff data. +This ensures that all temporarily stored data is encrypted until fully persisted to final SSTable on disk. +Once this encryption is enabled, it is used for all system data. + + +**Procedure** + +#. Edit the scylla.yaml file - located in /etc/scylla/scylla.yaml and add the following: + + .. code-block:: none + + system_info_encryption: + enabled: + key_provider: (optional) + system_key_directory: + + Where: + + * ``enabled`` can be true or false. True is enabled; false is disabled. + + * ``key_provider`` is the name or type of key provider. Refer to `Key Providers`_ for more information. + + * ``cipher_algorithm`` is one of the supported `Cipher Algorithms`_. + + * ``secret_key_file`` is the name of the key file containing the secret key (key.pem, for example) + + Example: + + .. code-block:: none + + system_info_encryption: + enabled: True + cipher_algorithm: AES + secret_key_strength: 128 + key_provider: LocalFileSystemKeyProviderFactory + secret_key_file: /path/to/systemKey.pem + + Example for KMIP: + + .. code-block:: none + + system_info_encryption: + enabled: True + cipher_algorithm: AES + secret_key_strength: 128 + key_provider: KmipKeyProviderFactory + kmip_host: yourkmipServerIP.com + + Where ``kmip_host`` is the address for your KMIP server. + + Example for KMS: + + .. code-block:: none + + system_info_encryption: + enabled: True + cipher_algorithm: AES/CBC/PKCS5Padding + secret_key_strength: 128 + key_provider: KmsKeyProviderFactory + kms_host: myScylla + + Where ``kms_host`` is the unique name of the KMS host specified in the scylla.yaml file. + + Example for GCP: + + .. code-block:: none + + system_info_encryption: + enabled: True + cipher_algorithm: AES/CBC/PKCS5Padding + secret_key_strength: 128 + key_provider: GcpKeyProviderFactory + gcp_host: myScylla + + Where ``gcp_host`` is the unique name of the GCP host specified in the scylla.yaml file. + + +#. Do not close the yaml file. Change the system key directory location according to your settings. + + * ``system_key_directory`` is the location of the system key you created in `Create Encryption Keys`_. + + .. code-block:: none + + system_key_directory: /etc/scylla/encryption_keys/system_keys + +#. Save the file. +#. Drain the node with :doc:`nodetool drain ` +#. Restart the scylla-server service. + + .. include:: /rst_include/scylla-commands-restart-index.rst + + .. wasn't able to test this successfully + +.. Encrypt and Decrypt Configuration Files +.. ======================================= + +.. Using the Configuration Encryption tool, you can encrypt parts of the scylla.yaml file which contain encryption configuration settings. + +.. **Procedure** + +.. 1. Run the Configuration Encryption script: + +.. test code-block: none + +.. /bin/configuration_encryptor [options] [key-path] + +.. Where: + +.. * ``-c, --config`` - the path to the configuration file (/etc/scylla/scylla.yaml, for example) +.. * ``-d, --decrypt`` - decrypts the configuration file at the specified path +.. * ``-o, --output`` - (optional) writes the configuration file to a specified target. This can be the same location as the source file. +.. * ``-h. --help`` - help for this command + +.. For example: + +.. test code-block: none + +.. sudo -u scylla /bin/configuration_encryptor -c /etc/scylla/scylla.yaml /etc/scylla/encryption_keys/secret_key +.. end of test + +When a Key is Lost +---------------------- + +It is crucial to back up all of your encryption keys in a secure way. Keep a copy of all keys in a secure location. In the event that you do lose a key, your data encrypted with that key will be unreadable. + +Additional Resources +---------------------- + +* :doc:`nodetool upgradesstables ` +* :ref:`CREATE TABLE parameters ` diff --git a/docs/operating-scylla/security/index.rst b/docs/operating-scylla/security/index.rst index 6d9c4cf380..171024b7eb 100644 --- a/docs/operating-scylla/security/index.rst +++ b/docs/operating-scylla/security/index.rst @@ -18,6 +18,7 @@ Security node-node-encryption generate-certificate saslauthd + encryption-at-rest .. panel-box:: @@ -50,6 +51,6 @@ Security * :doc:`Encryption: Data in Transit Client to Node ` * :doc:`Encryption: Data in Transit Node to Node ` * :doc:`Generating a self-signed Certificate Chain Using openssl ` - * `Encryption at Rest `_ available in `ScyllaDB Enterprise `_ + * :doc:`Encryption at Rest ` Also check out the `Security Features lesson `_ on ScyllaDB University. diff --git a/docs/operating-scylla/security/security-checklist.rst b/docs/operating-scylla/security/security-checklist.rst index a71e712de4..737b07a826 100644 --- a/docs/operating-scylla/security/security-checklist.rst +++ b/docs/operating-scylla/security/security-checklist.rst @@ -41,14 +41,16 @@ Configure ScyllaDB to use TLS/SSL for all the connections. Use TLS/SSL to encryp * :doc:`Encryption Data in Transit Node to Node ` -Encryption at Rest :label-tip:`ScyllaDB Enterprise` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Encryption at Rest is available in `ScyllaDB Enterprise `_. +Encryption at Rest +~~~~~~~~~~~~~~~~~~ +Encryption at Rest is available in a Scylla Enterprise 2019.1.1. Encryption at Rest protects the privacy of your user's data, reduces the risk of data breaches, and helps meet regulatory requirements. In particular, it provides an additional level of protection for your data persisted in storage or backup. -See `Encryption at Rest `_ for details. +See: + +* :doc:`Encryption at Rest ` Reduce the Network Exposure ~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/reference/glossary.rst b/docs/reference/glossary.rst index ae4c93d1f0..627d2de732 100644 --- a/docs/reference/glossary.rst +++ b/docs/reference/glossary.rst @@ -58,6 +58,9 @@ Glossary Keyspace A collection of tables with attributes which define how data is replicated on nodes. See :doc:`Ring Architecture `. + Key Management Interoperability Protocol (KMIP) + :abbr:`KMIP (Key Management Interoperability Protocol)` is a communication protocol that defines message formats for storing keys on a key management server (KMIP server). You can use a KMIP server to protect your keys when using Encryption at Rest. See :doc:`Encryption at Rest`. + Leveled compaction strategy (LCS) :abbr:`LCS (Leveled compaction strategy)` uses small, fixed-size (by default 160 MB) SSTables divided into different levels. See :doc:`Compaction Strategies`.