Merge 'http: prepare http clients retry machinery refactoring' from Ernest Zaslavsky

Today S3 client has well established and well testes (hopefully) http request retry strategy, in the rest of clients it looks like we are trying to achieve the same writing the same code over and over again and of course missing corner cases that already been addressed in the S3 client.
This PR aims to extract the code that could assist other clients to detect the retryability of an error originating from the http client, reuse the built in seastar http client retryability and to minimize the boilerplate of http client exception handling

No backport needed since it is only refactoring of the existing code

Closes scylladb/scylladb#28250

* github.com:scylladb/scylladb:
  exceptions: add helper to build a chain of error handlers
  http: extract error classification code
  aws_error: extract `retryable` from aws_error
This commit is contained in:
Pavel Emelyanov
2026-02-18 10:06:37 +03:00
10 changed files with 215 additions and 76 deletions

View File

@@ -1174,6 +1174,7 @@ scylla_core = (['message/messaging_service.cc',
'utils/gz/crc_combine.cc',
'utils/gz/crc_combine_table.cc',
'utils/http.cc',
'utils/http_client_error_processing.cc',
'utils/rest/client.cc',
'utils/s3/aws_error.cc',
'utils/s3/client.cc',

View File

@@ -51,17 +51,17 @@ BOOST_AUTO_TEST_CASE(TestXmlErrorPayload) {
auto error = aws::aws_error::parse(build_xml_response("IncompleteSignatureException", message, requestId)).value();
BOOST_REQUIRE_EQUAL(aws::aws_error_type::INCOMPLETE_SIGNATURE, error.get_error_type());
BOOST_REQUIRE_EQUAL(message, error.get_error_message());
BOOST_REQUIRE_EQUAL(error.is_retryable(), aws::retryable::no);
BOOST_REQUIRE_EQUAL(error.is_retryable(), utils::http::retryable::no);
error = aws::aws_error::parse(build_xml_response("InternalFailure", message, requestId, message_style::plural)).value();
BOOST_REQUIRE_EQUAL(aws::aws_error_type::INTERNAL_FAILURE, error.get_error_type());
BOOST_REQUIRE_EQUAL(message, error.get_error_message());
BOOST_REQUIRE_EQUAL(error.is_retryable(), aws::retryable::yes);
BOOST_REQUIRE_EQUAL(error.is_retryable(), utils::http::retryable::yes);
error = aws::aws_error::parse(build_xml_response("IDontExist", message, requestId, message_style::plural)).value();
BOOST_REQUIRE_EQUAL(aws::aws_error_type::UNKNOWN, error.get_error_type());
BOOST_REQUIRE_EQUAL(message, error.get_error_message());
BOOST_REQUIRE_EQUAL(error.is_retryable(), aws::retryable::no);
BOOST_REQUIRE_EQUAL(error.is_retryable(), utils::http::retryable::no);
auto no_error = aws::aws_error::parse("");
BOOST_REQUIRE_EQUAL(no_error.has_value(), false);
@@ -75,7 +75,7 @@ BOOST_AUTO_TEST_CASE(TestXmlErrorPayload) {
error = aws::aws_error::parse(response).value();
BOOST_REQUIRE_EQUAL(aws::aws_error_type::INTERNAL_FAILURE, error.get_error_type());
BOOST_REQUIRE_EQUAL(message, error.get_error_message());
BOOST_REQUIRE_EQUAL(error.is_retryable(), aws::retryable::yes);
BOOST_REQUIRE_EQUAL(error.is_retryable(), utils::http::retryable::yes);
}
BOOST_AUTO_TEST_CASE(TestErrorsWithPrefixParse) {
@@ -92,7 +92,7 @@ BOOST_AUTO_TEST_CASE(TestErrorsWithPrefixParse) {
auto error = aws::aws_error::parse(build_xml_response(exceptionPrefix + "IDon'tExist", "JunkMessage", requestId)).value();
BOOST_REQUIRE_EQUAL(aws::aws_error_type::UNKNOWN, error.get_error_type());
BOOST_REQUIRE_EQUAL("JunkMessage", error.get_error_message());
BOOST_REQUIRE_EQUAL(error.is_retryable(), aws::retryable::no);
BOOST_REQUIRE_EQUAL(error.is_retryable(), utils::http::retryable::no);
}
BOOST_AUTO_TEST_CASE(TestErrorsWithoutPrefixParse) {
@@ -107,7 +107,15 @@ BOOST_AUTO_TEST_CASE(TestErrorsWithoutPrefixParse) {
auto error = aws::aws_error::parse(build_xml_response("IDon'tExist", "JunkMessage", requestId)).value();
BOOST_REQUIRE_EQUAL(aws::aws_error_type::UNKNOWN, error.get_error_type());
BOOST_REQUIRE_EQUAL("JunkMessage", error.get_error_message());
BOOST_REQUIRE_EQUAL(error.is_retryable(), aws::retryable::no);
BOOST_REQUIRE_EQUAL(error.is_retryable(), utils::http::retryable::no);
}
BOOST_AUTO_TEST_CASE(TestHelperFunctions) {
BOOST_REQUIRE_EQUAL(utils::http::from_http_code(seastar::http::reply::status_type::service_unavailable), utils::http::retryable::yes);
BOOST_REQUIRE_EQUAL(utils::http::from_http_code(seastar::http::reply::status_type::unauthorized), utils::http::retryable::no);
BOOST_REQUIRE_EQUAL(utils::http::from_system_error(std::system_error(ECONNRESET, std::system_category())), utils::http::retryable::yes);
BOOST_REQUIRE_EQUAL(utils::http::from_system_error(std::system_error(EADDRINUSE, std::system_category())), utils::http::retryable::no);
}
BOOST_AUTO_TEST_CASE(TestNestedException) {
@@ -126,7 +134,7 @@ BOOST_AUTO_TEST_CASE(TestNestedException) {
auto error = aws::aws_error::from_exception_ptr(std::current_exception());
BOOST_REQUIRE_EQUAL(aws::aws_error_type::NETWORK_CONNECTION, error.get_error_type());
BOOST_REQUIRE_EQUAL("Software caused connection abort", error.get_error_message());
BOOST_REQUIRE_EQUAL(error.is_retryable(), aws::retryable::yes);
BOOST_REQUIRE_EQUAL(error.is_retryable(), utils::http::retryable::yes);
}
// Test nested exceptions where the innermost is NOT a system_error
@@ -140,7 +148,7 @@ BOOST_AUTO_TEST_CASE(TestNestedException) {
auto error = aws::aws_error::from_exception_ptr(std::current_exception());
BOOST_REQUIRE_EQUAL(aws::aws_error_type::UNKNOWN, error.get_error_type());
BOOST_REQUIRE_EQUAL("Higher level runtime_error", error.get_error_message());
BOOST_REQUIRE_EQUAL(error.is_retryable(), aws::retryable::no);
BOOST_REQUIRE_EQUAL(error.is_retryable(), utils::http::retryable::no);
}
// Test single exception which is NOT a nested exception
@@ -150,7 +158,7 @@ BOOST_AUTO_TEST_CASE(TestNestedException) {
auto error = aws::aws_error::from_exception_ptr(std::current_exception());
BOOST_REQUIRE_EQUAL(aws::aws_error_type::UNKNOWN, error.get_error_type());
BOOST_REQUIRE_EQUAL("Something bad happened", error.get_error_message());
BOOST_REQUIRE_EQUAL(error.is_retryable(), aws::retryable::no);
BOOST_REQUIRE_EQUAL(error.is_retryable(), utils::http::retryable::no);
}
// Test with non-std::exception
@@ -160,7 +168,7 @@ BOOST_AUTO_TEST_CASE(TestNestedException) {
auto error = aws::aws_error::from_exception_ptr(std::current_exception());
BOOST_REQUIRE_EQUAL(aws::aws_error_type::UNKNOWN, error.get_error_type());
BOOST_REQUIRE_EQUAL("No error message was provided, exception content: char const*", error.get_error_message());
BOOST_REQUIRE_EQUAL(error.is_retryable(), aws::retryable::no);
BOOST_REQUIRE_EQUAL(error.is_retryable(), utils::http::retryable::no);
}
// Test system_error
@@ -170,7 +178,7 @@ BOOST_AUTO_TEST_CASE(TestNestedException) {
auto error = aws::aws_error::from_exception_ptr(std::current_exception());
BOOST_REQUIRE_EQUAL(aws::aws_error_type::NETWORK_CONNECTION, error.get_error_type());
BOOST_REQUIRE_EQUAL("Software caused connection abort", error.get_error_message());
BOOST_REQUIRE_EQUAL(error.is_retryable(), aws::retryable::yes);
BOOST_REQUIRE_EQUAL(error.is_retryable(), utils::http::retryable::yes);
}
// Test aws_exception
@@ -180,7 +188,7 @@ BOOST_AUTO_TEST_CASE(TestNestedException) {
auto error = aws::aws_error::from_exception_ptr(std::current_exception());
BOOST_REQUIRE_EQUAL(aws::aws_error_type::HTTP_TOO_MANY_REQUESTS, error.get_error_type());
BOOST_REQUIRE_EQUAL("", error.get_error_message());
BOOST_REQUIRE_EQUAL(error.is_retryable(), aws::retryable::yes);
BOOST_REQUIRE_EQUAL(error.is_retryable(), utils::http::retryable::yes);
}
// Test httpd::unexpected_status_error
@@ -190,6 +198,6 @@ BOOST_AUTO_TEST_CASE(TestNestedException) {
auto error = aws::aws_error::from_exception_ptr(std::current_exception());
BOOST_REQUIRE_EQUAL(aws::aws_error_type::HTTP_NETWORK_CONNECT_TIMEOUT, error.get_error_type());
BOOST_REQUIRE_EQUAL(" HTTP code: 599 Network Connect Timeout", error.get_error_message());
BOOST_REQUIRE_EQUAL(error.is_retryable(), aws::retryable::yes);
BOOST_REQUIRE_EQUAL(error.is_retryable(), utils::http::retryable::yes);
}
}

View File

@@ -27,6 +27,7 @@ target_sources(utils
hashers.cc
histogram_metrics_helper.cc
http.cc
http_client_error_processing.cc
human_readable.cc
i_filter.cc
io-wrappers.cc

View File

@@ -26,6 +26,7 @@
#include <seastar/core/align.hh>
#include <functional>
#include <optional>
#include <system_error>
#include <type_traits>
@@ -211,3 +212,75 @@ inline std::exception_ptr make_nested_exception_ptr(Ex&& ex, std::exception_ptr
}
#endif
}
namespace exception::internal {
template <typename F>
struct lambda_arg;
template <typename R, typename C, typename Arg>
struct lambda_arg<R (C::*)(Arg) const> {
using type = Arg;
};
template <typename F>
using lambda_arg_t = std::remove_cvref_t<typename lambda_arg<decltype(&F::operator())>::type>;
} // namespace exception::internal
// dispatch_exception: unwraps nested exceptions (if any) and applies handlers
// The dispatcher gets as input the exception_ptr to process, a default handler
// to call if no other handler matches, and a variadic list of TypedHandlers.
// All handlers (including the default one) must return the same type R.
template <typename R, typename DefaultHandler, typename... Handlers>
requires std::is_same_v<R, std::invoke_result_t<DefaultHandler, std::exception_ptr, std::string&&>> &&
(std::is_same_v<R, std::invoke_result_t<Handlers, const exception::internal::lambda_arg_t<Handlers>&>> && ...)
R dispatch_exception(std::exception_ptr eptr, DefaultHandler&& default_handler, Handlers&&... handlers) {
std::string original_message;
while (eptr) {
try {
std::rethrow_exception(eptr);
} catch (const std::exception& e) {
if (original_message.empty()) {
original_message = e.what();
}
std::optional<R> result;
(
[&] {
using F = std::decay_t<Handlers>;
using Arg = exception::internal::lambda_arg_t<F>;
if constexpr (std::is_base_of_v<std::exception, Arg>) {
if (!result) {
if (auto* casted = dynamic_cast<const Arg*>(&e)) {
result = handlers(*casted);
}
}
}
}(),
...);
if (result) {
return *result;
}
// Try to unwrap nested exception
try {
std::rethrow_if_nested(e);
} catch (...) {
eptr = std::current_exception();
continue;
}
return default_handler(eptr, std::move(original_message));
} catch (...) {
return default_handler(eptr, std::move(original_message));
}
}
return default_handler(eptr, std::move(original_message));
}

View File

@@ -0,0 +1,66 @@
/*
* Copyright (C) 2026-present ScyllaDB
*/
/*
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
*/
#include "http_client_error_processing.hh"
#include <seastar/http/exception.hh>
#include <gnutls/gnutls.h>
namespace utils::http {
retryable from_http_code(seastar::http::reply::status_type http_code) {
switch (http_code) {
case seastar::http::reply::status_type::unauthorized:
case seastar::http::reply::status_type::forbidden:
case seastar::http::reply::status_type::not_found:
return retryable::no;
case seastar::http::reply::status_type::too_many_requests:
case seastar::http::reply::status_type::internal_server_error:
case seastar::http::reply::status_type::bandwidth_limit_exceeded:
case seastar::http::reply::status_type::service_unavailable:
case seastar::http::reply::status_type::request_timeout:
case seastar::http::reply::status_type::page_expired:
case seastar::http::reply::status_type::login_timeout:
case seastar::http::reply::status_type::gateway_timeout:
case seastar::http::reply::status_type::network_connect_timeout:
case seastar::http::reply::status_type::network_read_timeout:
return retryable::yes;
default:
return retryable{seastar::http::reply::classify_status(http_code) == seastar::http::reply::status_class::server_error};
}
}
retryable from_system_error(const std::system_error& system_error) {
switch (system_error.code().value()) {
case static_cast<int>(std::errc::interrupted):
case static_cast<int>(std::errc::resource_unavailable_try_again):
case static_cast<int>(std::errc::timed_out):
case static_cast<int>(std::errc::connection_aborted):
case static_cast<int>(std::errc::connection_reset):
case static_cast<int>(std::errc::connection_refused):
case static_cast<int>(std::errc::broken_pipe):
case static_cast<int>(std::errc::network_unreachable):
case static_cast<int>(std::errc::host_unreachable):
case static_cast<int>(std::errc::network_down):
case static_cast<int>(std::errc::network_reset):
case static_cast<int>(std::errc::no_buffer_space):
// GNU TLS section. Since we pack gnutls error codes in std::system_error and rethrow it as std::nested_exception we have to handle them here.
case GNUTLS_E_PREMATURE_TERMINATION:
case GNUTLS_E_AGAIN:
case GNUTLS_E_INTERRUPTED:
case GNUTLS_E_PUSH_ERROR:
case GNUTLS_E_PULL_ERROR:
case GNUTLS_E_TIMEDOUT:
case GNUTLS_E_SESSION_EOF:
case GNUTLS_E_BAD_COOKIE: // as per RFC6347 section-4.2.1 client should retry
return retryable::yes;
default:
return retryable::no;
}
}
} // namespace utils::http

View File

@@ -0,0 +1,20 @@
/*
* Copyright (C) 2026-present ScyllaDB
*/
/*
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
*/
#pragma once
#include <seastar/http/reply.hh>
#include <seastar/util/bool_class.hh>
namespace utils::http {
using retryable = seastar::bool_class<struct is_retryable>;
retryable from_http_code(seastar::http::reply::status_type http_code);
retryable from_system_error(const std::system_error& system_error);
} // namespace utils::http

View File

@@ -13,13 +13,15 @@
#endif
#include "aws_error.hh"
#include "utils/exceptions.hh"
#include <seastar/util/log.hh>
#include <seastar/http/exception.hh>
#include <gnutls/gnutls.h>
#include <memory>
namespace aws {
using namespace utils::http;
aws_error::aws_error(aws_error_type error_type, retryable is_retryable) : _type(error_type), _is_retryable(is_retryable) {
}
@@ -130,64 +132,32 @@ aws_error aws_error::from_http_code(seastar::http::reply::status_type http_code)
}
aws_error aws_error::from_system_error(const std::system_error& system_error) {
switch (system_error.code().value()) {
case static_cast<int>(std::errc::interrupted):
case static_cast<int>(std::errc::resource_unavailable_try_again):
case static_cast<int>(std::errc::timed_out):
case static_cast<int>(std::errc::connection_aborted):
case static_cast<int>(std::errc::connection_reset):
case static_cast<int>(std::errc::connection_refused):
case static_cast<int>(std::errc::broken_pipe):
case static_cast<int>(std::errc::network_unreachable):
case static_cast<int>(std::errc::host_unreachable):
case static_cast<int>(std::errc::network_down):
case static_cast<int>(std::errc::network_reset):
case static_cast<int>(std::errc::no_buffer_space):
// GNU TLS section. Since we pack gnutls error codes in std::system_error and rethrow it as std::nested_exception we have to handle them here.
case GNUTLS_E_PREMATURE_TERMINATION:
case GNUTLS_E_AGAIN:
case GNUTLS_E_INTERRUPTED:
case GNUTLS_E_PUSH_ERROR:
case GNUTLS_E_PULL_ERROR:
case GNUTLS_E_TIMEDOUT:
case GNUTLS_E_SESSION_EOF:
case GNUTLS_E_BAD_COOKIE: // as per RFC6347 section-4.2.1 client should retry
return {aws_error_type::NETWORK_CONNECTION, system_error.code().message(), retryable::yes};
default:
return {aws_error_type::UNKNOWN,
format("Non-retryable system error occurred. Message: {}, code: {}", system_error.code().message(), system_error.code().value()),
retryable::no};
auto is_retryable = utils::http::from_system_error(system_error);
if (is_retryable == retryable::yes) {
return {aws_error_type::NETWORK_CONNECTION, system_error.code().message(), is_retryable};
}
return {aws_error_type::UNKNOWN,
format("Non-retryable system error occurred. Message: {}, code: {}", system_error.code().message(), system_error.code().value()),
is_retryable};
}
aws_error aws_error::from_exception_ptr(std::exception_ptr exception) {
std::string original_message;
while (exception) {
try {
std::rethrow_exception(exception);
} catch (const aws_exception& ex) {
return ex.error();
} catch (const seastar::httpd::unexpected_status_error& ex) {
return from_http_code(ex.status());
} catch (const std::system_error& ex) {
return from_system_error(ex);
} catch (const std::exception& ex) {
if (original_message.empty()) {
original_message = ex.what();
return dispatch_exception<aws_error>(
std::move(exception),
[](std::exception_ptr eptr, std::string&& original_message) {
if (!original_message.empty()) {
return aws_error{aws_error_type::UNKNOWN, std::move(original_message), retryable::no};
}
try {
std::rethrow_if_nested(ex);
} catch (...) {
exception = std::current_exception();
continue;
if (!eptr) {
return aws_error{aws_error_type::UNKNOWN, "No exception was provided to `aws_error::from_exception_ptr` function call", retryable::no};
}
return aws_error{aws_error_type::UNKNOWN, std::move(original_message), retryable::no};
} catch (...) {
return aws_error{aws_error_type::UNKNOWN, seastar::format("No error message was provided, exception content: {}", std::current_exception()), retryable::no};
}
}
return aws_error{aws_error_type::UNKNOWN, "No exception was provided to `aws_error::from_exception_ptr` function call", retryable::no};
return aws_error{
aws_error_type::UNKNOWN, seastar::format("No error message was provided, exception content: {}", eptr), retryable::no};
},
[](const aws_exception& ex) { return ex.error(); },
[](const seastar::httpd::unexpected_status_error& ex) { return from_http_code(ex.status()); },
[](const std::system_error& ex) { return from_system_error(ex); });
}
const aws_errors& aws_error::get_errors() {

View File

@@ -14,6 +14,7 @@
#include <string>
#include <string_view>
#include <unordered_map>
#include "utils/http_client_error_processing.hh"
namespace aws {
@@ -88,21 +89,20 @@ enum class aws_error_type : uint8_t {
};
class aws_error;
using retryable = seastar::bool_class<struct is_retryable>;
using aws_errors = std::unordered_map<std::string_view, const aws_error>;
class aws_error {
aws_error_type _type{aws_error_type::OK};
std::string _message;
retryable _is_retryable{retryable::no};
utils::http::retryable _is_retryable{utils::http::retryable::no};
public:
aws_error() = default;
aws_error(aws_error_type error_type, retryable is_retryable);
aws_error(aws_error_type error_type, std::string&& error_message, retryable is_retryable);
aws_error(aws_error_type error_type, utils::http::retryable is_retryable);
aws_error(aws_error_type error_type, std::string&& error_message, utils::http::retryable is_retryable);
[[nodiscard]] const std::string& get_error_message() const { return _message; }
[[nodiscard]] aws_error_type get_error_type() const { return _type; }
[[nodiscard]] retryable is_retryable() const { return _is_retryable; }
[[nodiscard]] utils::http::retryable is_retryable() const { return _is_retryable; }
static std::optional<aws_error> parse(seastar::sstring&& body);
static aws_error from_http_code(seastar::http::reply::status_type http_code);
static aws_error from_system_error(const std::system_error& system_error);

View File

@@ -338,13 +338,13 @@ http::experimental::client::reply_handler client::wrap_handler(http::request& re
s3l.warn("Request failed with REQUEST_TIME_TOO_SKEWED. Machine time: {}, request timestamp: {}",
utils::aws::format_time_point(db_clock::now()),
request.get_header("x-amz-date"));
should_retry = aws::retryable::yes;
should_retry = utils::http::retryable::yes;
co_await authorize(request);
}
if (possible_error->get_error_type() == aws::aws_error_type::EXPIRED_TOKEN) {
s3l.warn("Request failed with EXPIRED_TOKEN. Resetting credentials");
_credentials = {};
should_retry = aws::retryable::yes;
should_retry = utils::http::retryable::yes;
co_await authorize(request);
}
co_await coroutine::return_exception_ptr(std::make_exception_ptr(
@@ -359,7 +359,7 @@ http::experimental::client::reply_handler client::wrap_handler(http::request& re
// We need to be able to simulate a retry in s3 tests
if (utils::get_local_injector().enter("s3_client_fail_authorization")) {
throw aws::aws_exception(
aws::aws_error{aws::aws_error_type::HTTP_UNAUTHORIZED, "EACCESS fault injected to simulate authorization failure", aws::retryable::no});
aws::aws_error{aws::aws_error_type::HTTP_UNAUTHORIZED, "EACCESS fault injected to simulate authorization failure", utils::http::retryable::no});
}
co_return co_await handler(rep, std::move(_in));
} catch (...) {
@@ -1289,7 +1289,7 @@ class client::chunked_download_source final : public seastar::data_source_impl {
while (_buffers_size < _max_buffers_size && !_is_finished) {
utils::get_local_injector().inject("kill_s3_inflight_req", [] {
// Inject non-retryable error to emulate source failure
throw aws::aws_exception(aws::aws_error(aws::aws_error_type::RESOURCE_NOT_FOUND, "Injected ResourceNotFound", aws::retryable::no));
throw aws::aws_exception(aws::aws_error(aws::aws_error_type::RESOURCE_NOT_FOUND, "Injected ResourceNotFound", utils::http::retryable::no));
});
s3l.trace("Fiber for object '{}' will try to read within range {}", _object_name, _range);

View File

@@ -39,7 +39,7 @@ seastar::future<bool> default_aws_retry_strategy::should_retry(std::exception_pt
co_return false;
}
auto err = aws_error::from_exception_ptr(error);
bool should_retry = err.is_retryable() == retryable::yes;
bool should_retry = err.is_retryable() == utils::http::retryable::yes;
if (should_retry) {
rs_logger.debug("AWS HTTP client request failed. Reason: {}. Retry# {}", err.get_error_message(), attempted_retries);
co_await sleep_before_retry(attempted_retries);