From 73f0deef6d8dfb6d4c1a29a29cc2404d7cc017a9 Mon Sep 17 00:00:00 2001 From: Szymon Malewski Date: Wed, 21 Jan 2026 09:42:19 +0100 Subject: [PATCH] alternator: optional stripping of http response headers In Alternator's HTTP API, response headers can dominate bandwidth for small payloads. The Server, Date, and Content-Type headers were sent on every response but many clients never use them. This patch introduces three Alternator config options: - alternator_http_response_server_header, - alternator_http_response_disable_date_header, - alternator_http_response_disable_content_type_header, which allow customizing or suppressing the respective HTTP response headers. All three options support live update (no restart needed). The Server header is no longer sent by default; the Date and Content-Type defaults preserve the existing behavior. The Server and Date header suppression uses Seastar's set_server_header() and set_generate_date_header() APIs added in https://github.com/scylladb/seastar/pull/3217. This patch also fixes deprecation warnings from older Seastar HTTP APIs. Tests are in test/alternator/test_http_headers.py. Fixes https://scylladb.atlassian.net/browse/SCYLLADB-70 Closes scylladb/scylladb#28288 --- alternator/http_compression.cc | 16 ++- alternator/http_compression.hh | 4 +- alternator/server.cc | 76 +++++++++---- alternator/server.hh | 4 + db/config.cc | 6 + db/config.hh | 3 + docs/alternator/network.md | 20 +++- test/alternator/test_http_headers.py | 164 +++++++++++++++++++++++++++ test/alternator/test_streams.py | 2 +- 9 files changed, 260 insertions(+), 35 deletions(-) create mode 100644 test/alternator/test_http_headers.py diff --git a/alternator/http_compression.cc b/alternator/http_compression.cc index 88541248ba..e57375337f 100644 --- a/alternator/http_compression.cc +++ b/alternator/http_compression.cc @@ -217,20 +217,18 @@ static sstring flatten(chunked_content&& cc) { return result; } -future> response_compressor::generate_reply(std::unique_ptr rep, sstring accept_encoding, const char* content_type, std::string&& response_body) { +future> response_compressor::generate_reply(std::unique_ptr rep, sstring accept_encoding, std::optional content_type, std::string&& response_body) { response_compressor::compression_type ct = find_compression(accept_encoding, response_body.size()); if (ct != response_compressor::compression_type::none) { rep->add_header("Content-Encoding", get_encoding_name(ct)); - rep->set_content_type(content_type); - return compress(ct, cfg, std::move(response_body)).then([rep = std::move(rep)] (chunked_content compressed) mutable { - rep->_content = flatten(std::move(compressed)); + return compress(ct, cfg, std::move(response_body)).then([rep = std::move(rep), content_type] (chunked_content compressed) mutable { + rep->write_body(content_type, flatten(std::move(compressed))); return make_ready_future>(std::move(rep)); }); } else { - // Note that despite the move, there is a copy here - - // as str is std::string and rep->_content is sstring. - rep->_content = std::move(response_body); - rep->set_content_type(content_type); + // Note that despite the move, response_body (std::string) is copied + // into an sstring when passed to write_body(). + rep->write_body(content_type, std::move(response_body)); } return make_ready_future>(std::move(rep)); } @@ -287,7 +285,7 @@ body_writer compress(response_compressor::compression_type ct, const db::config& }; } -future> response_compressor::generate_reply(std::unique_ptr rep, sstring accept_encoding, const char* content_type, body_writer&& body_writer) { +future> response_compressor::generate_reply(std::unique_ptr rep, sstring accept_encoding, std::optional content_type, body_writer&& body_writer) { response_compressor::compression_type ct = find_compression(accept_encoding, std::numeric_limits::max()); if (ct != response_compressor::compression_type::none) { rep->add_header("Content-Encoding", get_encoding_name(ct)); diff --git a/alternator/http_compression.hh b/alternator/http_compression.hh index 8124151be3..f19e4a3b66 100644 --- a/alternator/http_compression.hh +++ b/alternator/http_compression.hh @@ -83,9 +83,9 @@ private: public: future> generate_reply(std::unique_ptr rep, - sstring accept_encoding, const char* content_type, std::string&& response_body); + sstring accept_encoding, std::optional content_type, std::string&& response_body); future> generate_reply(std::unique_ptr rep, - sstring accept_encoding, const char* content_type, body_writer&& body_writer); + sstring accept_encoding, std::optional content_type, body_writer&& body_writer); }; } diff --git a/alternator/server.cc b/alternator/server.cc index 68f7cacf7a..f9569d788b 100644 --- a/alternator/server.cc +++ b/alternator/server.cc @@ -28,6 +28,7 @@ #include "auth.hh" #include #include +#include #include #include "service/storage_proxy.hh" #include "gms/gossiper.hh" @@ -112,10 +113,18 @@ class api_handler : public handler_base { // "application/json". Some other AWS services use later versions instead // of "1.0", but DynamoDB currently uses "1.0". Note that this content // type applies to all replies, both success and error. - static constexpr const char* REPLY_CONTENT_TYPE = "application/x-amz-json-1.0"; + static constexpr std::string_view REPLY_CONTENT_TYPE = "application/x-amz-json-1.0"; public: api_handler(const std::function(std::unique_ptr req)>& _handle, - const db::config& config) : _response_compressor(config), _f_handle( + const db::config& config) : + _content_type(config.alternator_http_response_disable_content_type_header() + ? std::nullopt + : std::optional(REPLY_CONTENT_TYPE)), + _content_type_observer(config.alternator_http_response_disable_content_type_header.observe( + [this](const bool& ct) { + _content_type = ct ? std::nullopt : std::optional(REPLY_CONTENT_TYPE); + })), + _response_compressor(config), _f_handle( [this, _handle](std::unique_ptr req, std::unique_ptr rep) { sstring accept_encoding = _response_compressor.get_accepted_encoding(*req); return seastar::futurize_invoke(_handle, std::move(req)).then_wrapped( @@ -142,11 +151,11 @@ public: return std::visit(overloaded_functor { [&] (std::string&& str) { return _response_compressor.generate_reply(std::move(rep), std::move(accept_encoding), - REPLY_CONTENT_TYPE, std::move(str)); + _content_type, std::move(str)); }, [&] (body_writer&& body_writer) { return _response_compressor.generate_reply(std::move(rep), std::move(accept_encoding), - REPLY_CONTENT_TYPE, std::move(body_writer)); + _content_type, std::move(body_writer)); }, [&] (const api_error& err) { generate_error_reply(*rep, err); @@ -156,18 +165,18 @@ public: }); }) { } - api_handler(const api_handler&) = default; + api_handler(const api_handler&) = delete; future> handle(const sstring& path, std::unique_ptr req, std::unique_ptr rep) override { handle_CORS(*req, *rep, false); - return _f_handle(std::move(req), std::move(rep)).then( - [](std::unique_ptr rep) { - rep->done(); - return make_ready_future>(std::move(rep)); - }); + return _f_handle(std::move(req), std::move(rep)); } protected: + std::optional _content_type; + utils::observer _content_type_observer; + response_compressor _response_compressor; + future_handler_function _f_handle; void generate_error_reply(reply& rep, const api_error& err) { rjson::value results = rjson::empty_object(); if (!err._extra_fields.IsNull() && err._extra_fields.IsObject()) { @@ -175,14 +184,11 @@ protected: } rjson::add(results, "__type", rjson::from_string("com.amazonaws.dynamodb.v20120810#" + err._type)); rjson::add(results, "message", err._msg); - rep._content = rjson::print(std::move(results)); - rep._status = err._http_code; - rep.set_content_type(REPLY_CONTENT_TYPE); - slogger.trace("api_handler error case: {}", rep._content); + sstring content = rjson::print(std::move(results)); + slogger.trace("api_handler error case: {}", content); + rep.set_status(err._http_code); + rep.write_body(_content_type, std::move(content)); } - - response_compressor _response_compressor; - future_handler_function _f_handle; }; class gated_handler : public handler_base { @@ -256,8 +262,7 @@ protected: } } rep->set_status(reply::status_type::ok); - rep->set_content_type("json"); - rep->_content = rjson::print(results); + rep->write_body("json", rjson::print(results)); return make_ready_future>(std::move(rep)); } }; @@ -924,6 +929,21 @@ server::server(executor& exec, service::storage_proxy& proxy, gms::gossiper& gos } { } +// Sanitize an HTTP header value: strip control characters (RFC 7230 §3.2.6) +// and leading/trailing whitespace. Returns nullopt if the result is empty. +static std::optional sanitize_header_value(const sstring& v, std::string_view option_name) { + std::string sanitized(v.begin(), v.end()); + sanitized.erase(std::remove_if(sanitized.begin(), sanitized.end(), + [](unsigned char c) { return std::iscntrl(c); }), sanitized.end()); + if (sanitized.size() != v.size()) { + slogger.warn("Configuration option '{}' contained control characters, they were stripped", option_name); + } + std::string_view trimmed = sanitized; + while (!trimmed.empty() && std::isspace((unsigned char)trimmed.front())) trimmed.remove_prefix(1); + while (!trimmed.empty() && std::isspace((unsigned char)trimmed.back())) trimmed.remove_suffix(1); + return trimmed.empty() ? std::nullopt : std::optional(trimmed); +} + future<> server::init(net::inet_address addr, std::optional port, std::optional https_port, std::optional port_proxy_protocol, std::optional https_port_proxy_protocol, std::optional creds, @@ -941,6 +961,24 @@ future<> server::init(net::inet_address addr, std::optional port, std: return seastar::async([this, addr, port, https_port, port_proxy_protocol, https_port_proxy_protocol, creds] { _executor.start().get(); + // Apply current config values and register observers for live updates + // before listen() so that no responses are ever sent with stale defaults. + // Both options drive Seastar's built-in header generation directly. + const db::config& cfg = _proxy.data_dictionary().get_config(); + auto apply_server_header = [this] (const sstring& v) { + auto opt = sanitize_header_value(v, "alternator_http_response_server_header"); + _http_server.set_server_header(opt); + _https_server.set_server_header(opt); + }; + auto apply_date_header = [this] (const bool& disable) { + _http_server.set_generate_date_header(!disable); + _https_server.set_generate_date_header(!disable); + }; + apply_server_header(cfg.alternator_http_response_server_header()); + apply_date_header(cfg.alternator_http_response_disable_date_header()); + _server_header_observer = cfg.alternator_http_response_server_header.observe(std::move(apply_server_header)); + _date_header_observer = cfg.alternator_http_response_disable_date_header.observe(std::move(apply_date_header)); + if (port || port_proxy_protocol) { set_routes(_http_server._routes); _http_server.set_content_streaming(true); diff --git a/alternator/server.hh b/alternator/server.hh index f105510bcc..74753dbfdb 100644 --- a/alternator/server.hh +++ b/alternator/server.hh @@ -97,6 +97,10 @@ class server : public peering_sharded_service { }; utils::scoped_item_list _ongoing_requests; + // Observers for live-update config options that drive Seastar HTTP server state. + std::optional> _server_header_observer; + std::optional> _date_header_observer; + public: server(executor& executor, service::storage_proxy& proxy, gms::gossiper& gossiper, auth::service& service, qos::service_level_controller& sl_controller); diff --git a/db/config.cc b/db/config.cc index cc412c9562..bd9e3511f6 100644 --- a/db/config.cc +++ b/db/config.cc @@ -1548,6 +1548,12 @@ db::config::config(std::shared_ptr exts) "\t1-9: Compression levels (1 = fastest, 9 = best compression)") , alternator_response_compression_threshold_in_bytes(this, "alternator_response_compression_threshold_in_bytes", liveness::LiveUpdate, value_status::Used, uint64_t(4096), "When the compression is enabled, this value indicates the minimum size of data to compress. Smaller responses will not be compressed.") + , alternator_http_response_disable_content_type_header(this, "alternator_http_response_disable_content_type_header", liveness::LiveUpdate, value_status::Used, false, + "Disable the Content-Type header in HTTP responses from Alternator.") + , alternator_http_response_disable_date_header(this, "alternator_http_response_disable_date_header", liveness::LiveUpdate, value_status::Used, false, + "Disable the Date header in HTTP responses from Alternator.") + , alternator_http_response_server_header(this, "alternator_http_response_server_header", liveness::LiveUpdate, value_status::Used, "", + "Value for the Server header in HTTP responses from Alternator. An empty string (the default) omits the Server header entirely.") , abort_on_ebadf(this, "abort_on_ebadf", value_status::Used, true, "Abort the server on incorrect file descriptor access. Throws exception when disabled.") , sanitizer_report_backtrace(this, "sanitizer_report_backtrace", value_status::Used, false, "In debug mode, report log-structured allocator sanitizer violations with a backtrace. Slow.") diff --git a/db/config.hh b/db/config.hh index 04e080af0c..d68ee1f25e 100644 --- a/db/config.hh +++ b/db/config.hh @@ -507,6 +507,9 @@ public: named_value alternator_describe_table_info_cache_validity_in_seconds; named_value alternator_response_gzip_compression_level; named_value alternator_response_compression_threshold_in_bytes; + named_value alternator_http_response_disable_content_type_header; + named_value alternator_http_response_disable_date_header; + named_value alternator_http_response_server_header; named_value abort_on_ebadf; diff --git a/docs/alternator/network.md b/docs/alternator/network.md index aedf6faaba..e34c03c706 100644 --- a/docs/alternator/network.md +++ b/docs/alternator/network.md @@ -90,12 +90,24 @@ The Alternator server sends headers like the following in its responses: Content-Length: 2 Content-Type: application/x-amz-json-1.0 Date: Tue, 30 Dec 2025 20:00:01 GMT -Server: Seastar httpd ``` -This is a bit over 100 bytes. Most of it is necessary, but the `Date` -and `Server` headers are not strictly necessary and a future version of -Alternator will most likely make them optional (or remove them altogether). +Previously, Alternator also sent a `Server: Seastar httpd` header, bringing +the total to over 100 bytes per response — mostly unnecessary overhead. The +`Server` header is now removed by default, and Alternator allows suppressing +`Date` and `Content-Type` as well via the options below. + +All three options below support live update — no Alternator restart is needed. + +* **`alternator_http_response_server_header`** (string, default `""`): + Controls the `Server` header. By default, no `Server` header is sent. + Set to a non-empty string to restore it with a custom value. + A whitespace-only string is treated as empty (no header). +* **`alternator_http_response_disable_date_header`** (boolean, default `false`): + When `true`, the `Date` header is omitted from all responses. +* **`alternator_http_response_disable_content_type_header`** (boolean, default `false`): + When `true`, the `Content-Type` header is omitted from all responses. Note + that some client SDKs may rely on this header being present. The request headers add significantly larger overhead, and AWS SDKs add even more than necessary. Here is an example: diff --git a/test/alternator/test_http_headers.py b/test/alternator/test_http_headers.py new file mode 100644 index 0000000000..a042271749 --- /dev/null +++ b/test/alternator/test_http_headers.py @@ -0,0 +1,164 @@ +# Copyright 2026-present ScyllaDB +# +# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.1 + +# Tests for HTTP response header configuration in Alternator. +# Tests the ability to suppress the Server, Date, and Content-Type headers, +# and to customize the Server header value, via configuration options that +# can be updated at runtime. +# +# The default-behavior assertions in each test (headers present with correct +# values) run against both Scylla and Amazon DynamoDB. The configuration- +# manipulation parts are Scylla-only and are skipped when run with --aws. + +import time +import pytest +from botocore.exceptions import ClientError +from test.alternator.util import scylla_config_temporary, is_aws + +# Test that the Server header value is controlled by alternator_http_response_server_header: +# by default the header is absent (empty config value), a non-empty value +# enables it, and a whitespace-only string suppresses it again. +def test_server_header(dynamodb, test_table_s): + # DynamoDB sends a Server header; Scylla's default is to omit it. + response = test_table_s.get_item(Key={'p': 'test_key'}) + headers = response['ResponseMetadata']['HTTPHeaders'] + assert ('server' in headers) == is_aws(dynamodb) + with pytest.raises(ClientError) as exc_info: + dynamodb.meta.client.describe_table(TableName='nonexistent_table_xxxxx') + assert ('server' in exc_info.value.response['ResponseMetadata']['HTTPHeaders']) == is_aws(dynamodb) + if is_aws(dynamodb): + return + + # Setting a non-empty value enables the header with that value on both + # success and error paths. Nested inside, a whitespace-only string + # suppresses the header again (empty strings can't be stored in the + # config table as they are key attributes). + with scylla_config_temporary(dynamodb, 'alternator_http_response_server_header', 'MyCustomServer/1.0'): + response = test_table_s.get_item(Key={'p': 'test_key'}) + headers = response['ResponseMetadata']['HTTPHeaders'] + assert headers['server'] == 'MyCustomServer/1.0' + with pytest.raises(ClientError) as exc_info: + dynamodb.meta.client.describe_table(TableName='nonexistent_table_xxxxx') + assert exc_info.value.response['ResponseMetadata']['HTTPHeaders']['server'] == 'MyCustomServer/1.0' + with scylla_config_temporary(dynamodb, 'alternator_http_response_server_header', ' '): + response = test_table_s.get_item(Key={'p': 'test_key'}) + headers = response['ResponseMetadata']['HTTPHeaders'] + assert 'server' not in headers + with pytest.raises(ClientError) as exc_info: + dynamodb.meta.client.describe_table(TableName='nonexistent_table_xxxxx') + assert 'server' not in exc_info.value.response['ResponseMetadata']['HTTPHeaders'] + # sanitize_header_value strips control characters from the value. + # Here an embedded SOH (\x01) between "Server" and "Name" is removed, + # leaving the header set to "ServerName". + with scylla_config_temporary(dynamodb, 'alternator_http_response_server_header', 'Server\x01Name'): + response = test_table_s.get_item(Key={'p': 'test_key'}) + headers = response['ResponseMetadata']['HTTPHeaders'] + assert headers['server'] == 'ServerName' + # A value consisting entirely of control characters becomes empty + # after sanitization, which suppresses the header. + with scylla_config_temporary(dynamodb, 'alternator_http_response_server_header', '\x01\x02\x03'): + response = test_table_s.get_item(Key={'p': 'test_key'}) + headers = response['ResponseMetadata']['HTTPHeaders'] + assert 'server' not in headers + +# Test that the Date header is present by default and can be suppressed via +# the alternator_http_response_disable_date_header live-update option. After +# re-enabling explicitly (setting to 'false'), the header is present immediately. +def test_config_date_header(dynamodb, test_table_s): + # By default the Date header should be present on both success and error paths. + response = test_table_s.get_item(Key={'p': 'test_key'}) + headers = response['ResponseMetadata']['HTTPHeaders'] + assert 'date' in headers + with pytest.raises(ClientError) as exc_info: + dynamodb.meta.client.describe_table(TableName='nonexistent_table_xxxxx') + assert 'date' in exc_info.value.response['ResponseMetadata']['HTTPHeaders'] + + if is_aws(dynamodb): + return + + # With the option set to true the Date header should be absent on both paths. + # Nested inside the suppression block, re-enable explicitly and verify the + # header is present again immediately -- no waiting required. + with scylla_config_temporary(dynamodb, 'alternator_http_response_disable_date_header', 'true'): + response = test_table_s.get_item(Key={'p': 'test_key'}) + headers = response['ResponseMetadata']['HTTPHeaders'] + assert 'date' not in headers + with pytest.raises(ClientError) as exc_info: + dynamodb.meta.client.describe_table(TableName='nonexistent_table_xxxxx') + assert 'date' not in exc_info.value.response['ResponseMetadata']['HTTPHeaders'] + with scylla_config_temporary(dynamodb, 'alternator_http_response_disable_date_header', 'false'): + response = test_table_s.get_item(Key={'p': 'test_key'}) + headers = response['ResponseMetadata']['HTTPHeaders'] + assert 'date' in headers + with pytest.raises(ClientError) as exc_info: + dynamodb.meta.client.describe_table(TableName='nonexistent_table_xxxxx') + assert 'date' in exc_info.value.response['ResponseMetadata']['HTTPHeaders'] + +# Verifies that the Date header value actually changes over time under normal +# operation, and that the timer continues to run after a suppress-then-re-enable +# cycle. Uses polling rather than fixed sleeps. +@pytest.mark.veryslow +def test_date_header_updates(dynamodb, test_table_s): + def poll_for_date_change(deadline_seconds): + first_date = None + deadline = time.monotonic() + deadline_seconds + while time.monotonic() < deadline: + r = test_table_s.get_item(Key={'p': 'test_key'}) + d = r['ResponseMetadata']['HTTPHeaders'].get('date') + assert d is not None, "Date header missing" + if first_date is None: + first_date = d + elif d != first_date: + return True + time.sleep(0.1) + return False + + # By default the Date header should be present and change over time + # (Seastar's date header timer fires every ~1 s). + assert poll_for_date_change(5), "Date header did not change -- timer may not be updating" + + if is_aws(dynamodb): + return + + # Suppress, then re-enable explicitly. Inside the re-enable block the timer + # must be live again: confirm the date value changes within a deadline. + with scylla_config_temporary(dynamodb, 'alternator_http_response_disable_date_header', 'true'): + response = test_table_s.get_item(Key={'p': 'test_key'}) + assert 'date' not in response['ResponseMetadata']['HTTPHeaders'] + with scylla_config_temporary(dynamodb, 'alternator_http_response_disable_date_header', 'false'): + assert poll_for_date_change(5), "Date header did not change after re-enable -- timer may not have been re-armed" + +# Test that the Content-Type header is present by default and can be suppressed +# via the alternator_http_response_disable_content_type_header live-update option. +# Also verifies the error code path (generate_error_reply) which sets +# Content-Type independently from the success path (write_body). +def test_config_content_type_header(dynamodb, test_table_s): + # By default the Content-Type header should be present on both paths. + response = test_table_s.get_item(Key={'p': 'test_key'}) + headers = response['ResponseMetadata']['HTTPHeaders'] + assert 'content-type' in headers + with pytest.raises(ClientError) as exc_info: + dynamodb.meta.client.describe_table(TableName='nonexistent_table_xxxxx') + assert 'content-type' in exc_info.value.response['ResponseMetadata']['HTTPHeaders'] + + if is_aws(dynamodb): + return + + # Setting the option to true suppresses the header on both success and error + # paths. Nested inside the suppression block, re-enable explicitly and verify + # the header is present again on both paths. + with scylla_config_temporary(dynamodb, 'alternator_http_response_disable_content_type_header', 'true'): + response = test_table_s.get_item(Key={'p': 'test_key'}) + headers = response['ResponseMetadata']['HTTPHeaders'] + assert 'content-type' not in headers + with pytest.raises(ClientError) as exc_info: + dynamodb.meta.client.describe_table(TableName='nonexistent_table_xxxxx') + assert 'content-type' not in exc_info.value.response['ResponseMetadata']['HTTPHeaders'] + with scylla_config_temporary(dynamodb, 'alternator_http_response_disable_content_type_header', 'false'): + response = test_table_s.get_item(Key={'p': 'test_key'}) + headers = response['ResponseMetadata']['HTTPHeaders'] + assert 'content-type' in headers + with pytest.raises(ClientError) as exc_info: + dynamodb.meta.client.describe_table(TableName='nonexistent_table_xxxxx') + assert 'content-type' in exc_info.value.response['ResponseMetadata']['HTTPHeaders'] diff --git a/test/alternator/test_streams.py b/test/alternator/test_streams.py index bfaf675bda..af4d7d3226 100644 --- a/test/alternator/test_streams.py +++ b/test/alternator/test_streams.py @@ -134,7 +134,7 @@ def is_local_java(dynamodbstreams): urllib.request.urlopen(url) except URLError as e: if hasattr(e, 'info'): - return e.info()['Server'].startswith('Jetty') + return e.info().get('Server', '').startswith('Jetty') return False def ensure_java_server(dynamodbstreams, error='ValidationException'):