alternator: optional stripping of http response headers

In Alternator's HTTP API, response headers can dominate bandwidth for
small payloads. The Server, Date, and Content-Type headers were sent on
every response but many clients never use them.

This patch introduces three Alternator config options:
  - alternator_http_response_server_header,
  - alternator_http_response_disable_date_header,
  - alternator_http_response_disable_content_type_header,
which allow customizing or suppressing the respective HTTP response
headers. All three options support live update (no restart needed).
The Server header is no longer sent by default; the Date and
Content-Type defaults preserve the existing behavior.

The Server and Date header suppression uses Seastar's
set_server_header() and set_generate_date_header() APIs added in
https://github.com/scylladb/seastar/pull/3217. This patch also
fixes deprecation warnings from older Seastar HTTP APIs.

Tests are in test/alternator/test_http_headers.py.

Fixes https://scylladb.atlassian.net/browse/SCYLLADB-70

Closes scylladb/scylladb#28288
This commit is contained in:
Szymon Malewski
2026-01-21 09:42:19 +01:00
committed by Nadav Har'El
parent f83270df12
commit 73f0deef6d
9 changed files with 260 additions and 35 deletions

View File

@@ -217,20 +217,18 @@ static sstring flatten(chunked_content&& cc) {
return result;
}
future<std::unique_ptr<http::reply>> response_compressor::generate_reply(std::unique_ptr<http::reply> rep, sstring accept_encoding, const char* content_type, std::string&& response_body) {
future<std::unique_ptr<http::reply>> response_compressor::generate_reply(std::unique_ptr<http::reply> rep, sstring accept_encoding, std::optional<std::string_view> content_type, std::string&& response_body) {
response_compressor::compression_type ct = find_compression(accept_encoding, response_body.size());
if (ct != response_compressor::compression_type::none) {
rep->add_header("Content-Encoding", get_encoding_name(ct));
rep->set_content_type(content_type);
return compress(ct, cfg, std::move(response_body)).then([rep = std::move(rep)] (chunked_content compressed) mutable {
rep->_content = flatten(std::move(compressed));
return compress(ct, cfg, std::move(response_body)).then([rep = std::move(rep), content_type] (chunked_content compressed) mutable {
rep->write_body(content_type, flatten(std::move(compressed)));
return make_ready_future<std::unique_ptr<http::reply>>(std::move(rep));
});
} else {
// Note that despite the move, there is a copy here -
// as str is std::string and rep->_content is sstring.
rep->_content = std::move(response_body);
rep->set_content_type(content_type);
// Note that despite the move, response_body (std::string) is copied
// into an sstring when passed to write_body().
rep->write_body(content_type, std::move(response_body));
}
return make_ready_future<std::unique_ptr<http::reply>>(std::move(rep));
}
@@ -287,7 +285,7 @@ body_writer compress(response_compressor::compression_type ct, const db::config&
};
}
future<std::unique_ptr<http::reply>> response_compressor::generate_reply(std::unique_ptr<http::reply> rep, sstring accept_encoding, const char* content_type, body_writer&& body_writer) {
future<std::unique_ptr<http::reply>> response_compressor::generate_reply(std::unique_ptr<http::reply> rep, sstring accept_encoding, std::optional<std::string_view> content_type, body_writer&& body_writer) {
response_compressor::compression_type ct = find_compression(accept_encoding, std::numeric_limits<size_t>::max());
if (ct != response_compressor::compression_type::none) {
rep->add_header("Content-Encoding", get_encoding_name(ct));

View File

@@ -83,9 +83,9 @@ private:
public:
future<std::unique_ptr<http::reply>> generate_reply(std::unique_ptr<http::reply> rep,
sstring accept_encoding, const char* content_type, std::string&& response_body);
sstring accept_encoding, std::optional<std::string_view> content_type, std::string&& response_body);
future<std::unique_ptr<http::reply>> generate_reply(std::unique_ptr<http::reply> rep,
sstring accept_encoding, const char* content_type, body_writer&& body_writer);
sstring accept_encoding, std::optional<std::string_view> content_type, body_writer&& body_writer);
};
}

View File

@@ -28,6 +28,7 @@
#include "auth.hh"
#include <cctype>
#include <string_view>
#include <algorithm>
#include <utility>
#include "service/storage_proxy.hh"
#include "gms/gossiper.hh"
@@ -112,10 +113,18 @@ class api_handler : public handler_base {
// "application/json". Some other AWS services use later versions instead
// of "1.0", but DynamoDB currently uses "1.0". Note that this content
// type applies to all replies, both success and error.
static constexpr const char* REPLY_CONTENT_TYPE = "application/x-amz-json-1.0";
static constexpr std::string_view REPLY_CONTENT_TYPE = "application/x-amz-json-1.0";
public:
api_handler(const std::function<future<executor::request_return_type>(std::unique_ptr<request> req)>& _handle,
const db::config& config) : _response_compressor(config), _f_handle(
const db::config& config) :
_content_type(config.alternator_http_response_disable_content_type_header()
? std::nullopt
: std::optional<std::string_view>(REPLY_CONTENT_TYPE)),
_content_type_observer(config.alternator_http_response_disable_content_type_header.observe(
[this](const bool& ct) {
_content_type = ct ? std::nullopt : std::optional<std::string_view>(REPLY_CONTENT_TYPE);
})),
_response_compressor(config), _f_handle(
[this, _handle](std::unique_ptr<request> req, std::unique_ptr<reply> rep) {
sstring accept_encoding = _response_compressor.get_accepted_encoding(*req);
return seastar::futurize_invoke(_handle, std::move(req)).then_wrapped(
@@ -142,11 +151,11 @@ public:
return std::visit(overloaded_functor {
[&] (std::string&& str) {
return _response_compressor.generate_reply(std::move(rep), std::move(accept_encoding),
REPLY_CONTENT_TYPE, std::move(str));
_content_type, std::move(str));
},
[&] (body_writer&& body_writer) {
return _response_compressor.generate_reply(std::move(rep), std::move(accept_encoding),
REPLY_CONTENT_TYPE, std::move(body_writer));
_content_type, std::move(body_writer));
},
[&] (const api_error& err) {
generate_error_reply(*rep, err);
@@ -156,18 +165,18 @@ public:
});
}) { }
api_handler(const api_handler&) = default;
api_handler(const api_handler&) = delete;
future<std::unique_ptr<reply>> handle(const sstring& path,
std::unique_ptr<request> req, std::unique_ptr<reply> rep) override {
handle_CORS(*req, *rep, false);
return _f_handle(std::move(req), std::move(rep)).then(
[](std::unique_ptr<reply> rep) {
rep->done();
return make_ready_future<std::unique_ptr<reply>>(std::move(rep));
});
return _f_handle(std::move(req), std::move(rep));
}
protected:
std::optional<std::string_view> _content_type;
utils::observer<bool> _content_type_observer;
response_compressor _response_compressor;
future_handler_function _f_handle;
void generate_error_reply(reply& rep, const api_error& err) {
rjson::value results = rjson::empty_object();
if (!err._extra_fields.IsNull() && err._extra_fields.IsObject()) {
@@ -175,14 +184,11 @@ protected:
}
rjson::add(results, "__type", rjson::from_string("com.amazonaws.dynamodb.v20120810#" + err._type));
rjson::add(results, "message", err._msg);
rep._content = rjson::print(std::move(results));
rep._status = err._http_code;
rep.set_content_type(REPLY_CONTENT_TYPE);
slogger.trace("api_handler error case: {}", rep._content);
sstring content = rjson::print(std::move(results));
slogger.trace("api_handler error case: {}", content);
rep.set_status(err._http_code);
rep.write_body(_content_type, std::move(content));
}
response_compressor _response_compressor;
future_handler_function _f_handle;
};
class gated_handler : public handler_base {
@@ -256,8 +262,7 @@ protected:
}
}
rep->set_status(reply::status_type::ok);
rep->set_content_type("json");
rep->_content = rjson::print(results);
rep->write_body("json", rjson::print(results));
return make_ready_future<std::unique_ptr<reply>>(std::move(rep));
}
};
@@ -924,6 +929,21 @@ server::server(executor& exec, service::storage_proxy& proxy, gms::gossiper& gos
} {
}
// Sanitize an HTTP header value: strip control characters (RFC 7230 §3.2.6)
// and leading/trailing whitespace. Returns nullopt if the result is empty.
static std::optional<sstring> sanitize_header_value(const sstring& v, std::string_view option_name) {
std::string sanitized(v.begin(), v.end());
sanitized.erase(std::remove_if(sanitized.begin(), sanitized.end(),
[](unsigned char c) { return std::iscntrl(c); }), sanitized.end());
if (sanitized.size() != v.size()) {
slogger.warn("Configuration option '{}' contained control characters, they were stripped", option_name);
}
std::string_view trimmed = sanitized;
while (!trimmed.empty() && std::isspace((unsigned char)trimmed.front())) trimmed.remove_prefix(1);
while (!trimmed.empty() && std::isspace((unsigned char)trimmed.back())) trimmed.remove_suffix(1);
return trimmed.empty() ? std::nullopt : std::optional<sstring>(trimmed);
}
future<> server::init(net::inet_address addr, std::optional<uint16_t> port, std::optional<uint16_t> https_port,
std::optional<uint16_t> port_proxy_protocol, std::optional<uint16_t> https_port_proxy_protocol,
std::optional<tls::credentials_builder> creds,
@@ -941,6 +961,24 @@ future<> server::init(net::inet_address addr, std::optional<uint16_t> port, std:
return seastar::async([this, addr, port, https_port, port_proxy_protocol, https_port_proxy_protocol, creds] {
_executor.start().get();
// Apply current config values and register observers for live updates
// before listen() so that no responses are ever sent with stale defaults.
// Both options drive Seastar's built-in header generation directly.
const db::config& cfg = _proxy.data_dictionary().get_config();
auto apply_server_header = [this] (const sstring& v) {
auto opt = sanitize_header_value(v, "alternator_http_response_server_header");
_http_server.set_server_header(opt);
_https_server.set_server_header(opt);
};
auto apply_date_header = [this] (const bool& disable) {
_http_server.set_generate_date_header(!disable);
_https_server.set_generate_date_header(!disable);
};
apply_server_header(cfg.alternator_http_response_server_header());
apply_date_header(cfg.alternator_http_response_disable_date_header());
_server_header_observer = cfg.alternator_http_response_server_header.observe(std::move(apply_server_header));
_date_header_observer = cfg.alternator_http_response_disable_date_header.observe(std::move(apply_date_header));
if (port || port_proxy_protocol) {
set_routes(_http_server._routes);
_http_server.set_content_streaming(true);

View File

@@ -97,6 +97,10 @@ class server : public peering_sharded_service<server> {
};
utils::scoped_item_list<ongoing_request> _ongoing_requests;
// Observers for live-update config options that drive Seastar HTTP server state.
std::optional<utils::observer<sstring>> _server_header_observer;
std::optional<utils::observer<bool>> _date_header_observer;
public:
server(executor& executor, service::storage_proxy& proxy, gms::gossiper& gossiper, auth::service& service, qos::service_level_controller& sl_controller);

View File

@@ -1548,6 +1548,12 @@ db::config::config(std::shared_ptr<db::extensions> exts)
"\t1-9: Compression levels (1 = fastest, 9 = best compression)")
, alternator_response_compression_threshold_in_bytes(this, "alternator_response_compression_threshold_in_bytes", liveness::LiveUpdate, value_status::Used, uint64_t(4096),
"When the compression is enabled, this value indicates the minimum size of data to compress. Smaller responses will not be compressed.")
, alternator_http_response_disable_content_type_header(this, "alternator_http_response_disable_content_type_header", liveness::LiveUpdate, value_status::Used, false,
"Disable the Content-Type header in HTTP responses from Alternator.")
, alternator_http_response_disable_date_header(this, "alternator_http_response_disable_date_header", liveness::LiveUpdate, value_status::Used, false,
"Disable the Date header in HTTP responses from Alternator.")
, alternator_http_response_server_header(this, "alternator_http_response_server_header", liveness::LiveUpdate, value_status::Used, "",
"Value for the Server header in HTTP responses from Alternator. An empty string (the default) omits the Server header entirely.")
, abort_on_ebadf(this, "abort_on_ebadf", value_status::Used, true, "Abort the server on incorrect file descriptor access. Throws exception when disabled.")
, sanitizer_report_backtrace(this, "sanitizer_report_backtrace", value_status::Used, false,
"In debug mode, report log-structured allocator sanitizer violations with a backtrace. Slow.")

View File

@@ -507,6 +507,9 @@ public:
named_value<uint32_t> alternator_describe_table_info_cache_validity_in_seconds;
named_value<int> alternator_response_gzip_compression_level;
named_value<uint32_t> alternator_response_compression_threshold_in_bytes;
named_value<bool> alternator_http_response_disable_content_type_header;
named_value<bool> alternator_http_response_disable_date_header;
named_value<sstring> alternator_http_response_server_header;
named_value<bool> abort_on_ebadf;

View File

@@ -90,12 +90,24 @@ The Alternator server sends headers like the following in its responses:
Content-Length: 2
Content-Type: application/x-amz-json-1.0
Date: Tue, 30 Dec 2025 20:00:01 GMT
Server: Seastar httpd
```
This is a bit over 100 bytes. Most of it is necessary, but the `Date`
and `Server` headers are not strictly necessary and a future version of
Alternator will most likely make them optional (or remove them altogether).
Previously, Alternator also sent a `Server: Seastar httpd` header, bringing
the total to over 100 bytes per response — mostly unnecessary overhead. The
`Server` header is now removed by default, and Alternator allows suppressing
`Date` and `Content-Type` as well via the options below.
All three options below support live update — no Alternator restart is needed.
* **`alternator_http_response_server_header`** (string, default `""`):
Controls the `Server` header. By default, no `Server` header is sent.
Set to a non-empty string to restore it with a custom value.
A whitespace-only string is treated as empty (no header).
* **`alternator_http_response_disable_date_header`** (boolean, default `false`):
When `true`, the `Date` header is omitted from all responses.
* **`alternator_http_response_disable_content_type_header`** (boolean, default `false`):
When `true`, the `Content-Type` header is omitted from all responses. Note
that some client SDKs may rely on this header being present.
The request headers add significantly larger overhead, and AWS SDKs add
even more than necessary. Here is an example:

View File

@@ -0,0 +1,164 @@
# Copyright 2026-present ScyllaDB
#
# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.1
# Tests for HTTP response header configuration in Alternator.
# Tests the ability to suppress the Server, Date, and Content-Type headers,
# and to customize the Server header value, via configuration options that
# can be updated at runtime.
#
# The default-behavior assertions in each test (headers present with correct
# values) run against both Scylla and Amazon DynamoDB. The configuration-
# manipulation parts are Scylla-only and are skipped when run with --aws.
import time
import pytest
from botocore.exceptions import ClientError
from test.alternator.util import scylla_config_temporary, is_aws
# Test that the Server header value is controlled by alternator_http_response_server_header:
# by default the header is absent (empty config value), a non-empty value
# enables it, and a whitespace-only string suppresses it again.
def test_server_header(dynamodb, test_table_s):
# DynamoDB sends a Server header; Scylla's default is to omit it.
response = test_table_s.get_item(Key={'p': 'test_key'})
headers = response['ResponseMetadata']['HTTPHeaders']
assert ('server' in headers) == is_aws(dynamodb)
with pytest.raises(ClientError) as exc_info:
dynamodb.meta.client.describe_table(TableName='nonexistent_table_xxxxx')
assert ('server' in exc_info.value.response['ResponseMetadata']['HTTPHeaders']) == is_aws(dynamodb)
if is_aws(dynamodb):
return
# Setting a non-empty value enables the header with that value on both
# success and error paths. Nested inside, a whitespace-only string
# suppresses the header again (empty strings can't be stored in the
# config table as they are key attributes).
with scylla_config_temporary(dynamodb, 'alternator_http_response_server_header', 'MyCustomServer/1.0'):
response = test_table_s.get_item(Key={'p': 'test_key'})
headers = response['ResponseMetadata']['HTTPHeaders']
assert headers['server'] == 'MyCustomServer/1.0'
with pytest.raises(ClientError) as exc_info:
dynamodb.meta.client.describe_table(TableName='nonexistent_table_xxxxx')
assert exc_info.value.response['ResponseMetadata']['HTTPHeaders']['server'] == 'MyCustomServer/1.0'
with scylla_config_temporary(dynamodb, 'alternator_http_response_server_header', ' '):
response = test_table_s.get_item(Key={'p': 'test_key'})
headers = response['ResponseMetadata']['HTTPHeaders']
assert 'server' not in headers
with pytest.raises(ClientError) as exc_info:
dynamodb.meta.client.describe_table(TableName='nonexistent_table_xxxxx')
assert 'server' not in exc_info.value.response['ResponseMetadata']['HTTPHeaders']
# sanitize_header_value strips control characters from the value.
# Here an embedded SOH (\x01) between "Server" and "Name" is removed,
# leaving the header set to "ServerName".
with scylla_config_temporary(dynamodb, 'alternator_http_response_server_header', 'Server\x01Name'):
response = test_table_s.get_item(Key={'p': 'test_key'})
headers = response['ResponseMetadata']['HTTPHeaders']
assert headers['server'] == 'ServerName'
# A value consisting entirely of control characters becomes empty
# after sanitization, which suppresses the header.
with scylla_config_temporary(dynamodb, 'alternator_http_response_server_header', '\x01\x02\x03'):
response = test_table_s.get_item(Key={'p': 'test_key'})
headers = response['ResponseMetadata']['HTTPHeaders']
assert 'server' not in headers
# Test that the Date header is present by default and can be suppressed via
# the alternator_http_response_disable_date_header live-update option. After
# re-enabling explicitly (setting to 'false'), the header is present immediately.
def test_config_date_header(dynamodb, test_table_s):
# By default the Date header should be present on both success and error paths.
response = test_table_s.get_item(Key={'p': 'test_key'})
headers = response['ResponseMetadata']['HTTPHeaders']
assert 'date' in headers
with pytest.raises(ClientError) as exc_info:
dynamodb.meta.client.describe_table(TableName='nonexistent_table_xxxxx')
assert 'date' in exc_info.value.response['ResponseMetadata']['HTTPHeaders']
if is_aws(dynamodb):
return
# With the option set to true the Date header should be absent on both paths.
# Nested inside the suppression block, re-enable explicitly and verify the
# header is present again immediately -- no waiting required.
with scylla_config_temporary(dynamodb, 'alternator_http_response_disable_date_header', 'true'):
response = test_table_s.get_item(Key={'p': 'test_key'})
headers = response['ResponseMetadata']['HTTPHeaders']
assert 'date' not in headers
with pytest.raises(ClientError) as exc_info:
dynamodb.meta.client.describe_table(TableName='nonexistent_table_xxxxx')
assert 'date' not in exc_info.value.response['ResponseMetadata']['HTTPHeaders']
with scylla_config_temporary(dynamodb, 'alternator_http_response_disable_date_header', 'false'):
response = test_table_s.get_item(Key={'p': 'test_key'})
headers = response['ResponseMetadata']['HTTPHeaders']
assert 'date' in headers
with pytest.raises(ClientError) as exc_info:
dynamodb.meta.client.describe_table(TableName='nonexistent_table_xxxxx')
assert 'date' in exc_info.value.response['ResponseMetadata']['HTTPHeaders']
# Verifies that the Date header value actually changes over time under normal
# operation, and that the timer continues to run after a suppress-then-re-enable
# cycle. Uses polling rather than fixed sleeps.
@pytest.mark.veryslow
def test_date_header_updates(dynamodb, test_table_s):
def poll_for_date_change(deadline_seconds):
first_date = None
deadline = time.monotonic() + deadline_seconds
while time.monotonic() < deadline:
r = test_table_s.get_item(Key={'p': 'test_key'})
d = r['ResponseMetadata']['HTTPHeaders'].get('date')
assert d is not None, "Date header missing"
if first_date is None:
first_date = d
elif d != first_date:
return True
time.sleep(0.1)
return False
# By default the Date header should be present and change over time
# (Seastar's date header timer fires every ~1 s).
assert poll_for_date_change(5), "Date header did not change -- timer may not be updating"
if is_aws(dynamodb):
return
# Suppress, then re-enable explicitly. Inside the re-enable block the timer
# must be live again: confirm the date value changes within a deadline.
with scylla_config_temporary(dynamodb, 'alternator_http_response_disable_date_header', 'true'):
response = test_table_s.get_item(Key={'p': 'test_key'})
assert 'date' not in response['ResponseMetadata']['HTTPHeaders']
with scylla_config_temporary(dynamodb, 'alternator_http_response_disable_date_header', 'false'):
assert poll_for_date_change(5), "Date header did not change after re-enable -- timer may not have been re-armed"
# Test that the Content-Type header is present by default and can be suppressed
# via the alternator_http_response_disable_content_type_header live-update option.
# Also verifies the error code path (generate_error_reply) which sets
# Content-Type independently from the success path (write_body).
def test_config_content_type_header(dynamodb, test_table_s):
# By default the Content-Type header should be present on both paths.
response = test_table_s.get_item(Key={'p': 'test_key'})
headers = response['ResponseMetadata']['HTTPHeaders']
assert 'content-type' in headers
with pytest.raises(ClientError) as exc_info:
dynamodb.meta.client.describe_table(TableName='nonexistent_table_xxxxx')
assert 'content-type' in exc_info.value.response['ResponseMetadata']['HTTPHeaders']
if is_aws(dynamodb):
return
# Setting the option to true suppresses the header on both success and error
# paths. Nested inside the suppression block, re-enable explicitly and verify
# the header is present again on both paths.
with scylla_config_temporary(dynamodb, 'alternator_http_response_disable_content_type_header', 'true'):
response = test_table_s.get_item(Key={'p': 'test_key'})
headers = response['ResponseMetadata']['HTTPHeaders']
assert 'content-type' not in headers
with pytest.raises(ClientError) as exc_info:
dynamodb.meta.client.describe_table(TableName='nonexistent_table_xxxxx')
assert 'content-type' not in exc_info.value.response['ResponseMetadata']['HTTPHeaders']
with scylla_config_temporary(dynamodb, 'alternator_http_response_disable_content_type_header', 'false'):
response = test_table_s.get_item(Key={'p': 'test_key'})
headers = response['ResponseMetadata']['HTTPHeaders']
assert 'content-type' in headers
with pytest.raises(ClientError) as exc_info:
dynamodb.meta.client.describe_table(TableName='nonexistent_table_xxxxx')
assert 'content-type' in exc_info.value.response['ResponseMetadata']['HTTPHeaders']

View File

@@ -134,7 +134,7 @@ def is_local_java(dynamodbstreams):
urllib.request.urlopen(url)
except URLError as e:
if hasattr(e, 'info'):
return e.info()['Server'].startswith('Jetty')
return e.info().get('Server', '').startswith('Jetty')
return False
def ensure_java_server(dynamodbstreams, error='ValidationException'):