Unverified Commit e6177b3d authored by John DiSanti's avatar John DiSanti Committed by GitHub
Browse files

Fix native Smithy client retry and retry response errors (#1717)



* Fix retry for native Smithy clients
* Treat `SdkError::ResponseError` as a retryable transient failure
* Rename `ClassifyResponse` to `ClassifyRetry`
* Rename 'policy' to 'classifier' in AWS SDK public API
* Rename `AwsResponseClassifier` to `AwsResponseRetryClassifier`

Co-authored-by: default avatarZelda Hessler <zhessler@amazon.com>
parent 5335e27f
Loading
Loading
Loading
Loading
+44 −0
Original line number Diff line number Diff line
@@ -192,3 +192,47 @@ message = "The AWS STS SDK now automatically retries `IDPCommunicationError` whe
references = ["smithy-rs#966", "smithy-rs#1718"]
meta = { "breaking" = false, "tada" = false, "bug" = true }
author = "jdisanti"

[[smithy-rs]]
message = "Generated clients now retry transient errors without replacing the retry policy."
references = ["smithy-rs#1715", "smithy-rs#1717"]
meta = { "breaking" = false, "tada" = false, "bug" = true, "target" = "client" }
author = "jdisanti"

[[smithy-rs]]
message = "`ClassifyResponse` was renamed to `ClassifyRetry` and is no longer implemented for the unit type."
references = ["smithy-rs#1715", "smithy-rs#1717"]
meta = { "breaking" = true, "tada" = false, "bug" = false, "target" = "client" }
author = "jdisanti"

[[smithy-rs]]
message = """
The `with_retry_policy` and `retry_policy` functions on `aws_smithy_http::operation::Operation` have been
renamed to `with_retry_classifier` and `retry_classifier` respectively. Public member `retry_policy` on
`aws_smithy_http::operation::Parts` has been renamed to `retry_classifier`.
"""
references = ["smithy-rs#1715", "smithy-rs#1717"]
meta = { "breaking" = true, "tada" = false, "bug" = false, "target" = "client" }
author = "jdisanti"

[[aws-sdk-rust]]
message = "The `SdkError::ResponseError`, typically caused by a connection terminating before the full response is received, is now treated as a transient failure and retried."
references = ["aws-sdk-rust#303", "smithy-rs#1717"]
meta = { "breaking" = false, "tada" = false, "bug" = true }
author = "jdisanti"

[[aws-sdk-rust]]
message = "`ClassifyResponse` was renamed to `ClassifyRetry` and is no longer implemented for the unit type."
references = ["smithy-rs#1715", "smithy-rs#1717"]
meta = { "breaking" = true, "tada" = false, "bug" = false }
author = "jdisanti"

[[aws-sdk-rust]]
message = """
The `with_retry_policy` and `retry_policy` functions on `aws_smithy_http::operation::Operation` have been
renamed to `with_retry_classifier` and `retry_classifier` respectively. Public member `retry_policy` on
`aws_smithy_http::operation::Parts` has been renamed to `retry_classifier`.
"""
references = ["smithy-rs#1715", "smithy-rs#1717"]
meta = { "breaking" = true, "tada" = false, "bug" = false }
author = "jdisanti"
+14 −12
Original line number Diff line number Diff line
@@ -14,7 +14,7 @@ use aws_smithy_http::body::SdkBody;
use aws_smithy_http::operation::{Operation, Request};
use aws_smithy_http::response::ParseStrictResponse;
use aws_smithy_http::result::{SdkError, SdkSuccess};
use aws_smithy_http::retry::ClassifyResponse;
use aws_smithy_http::retry::ClassifyRetry;
use aws_smithy_types::retry::{ErrorKind, RetryKind};
use aws_smithy_types::timeout;
use aws_smithy_types::tristate::TriState;
@@ -58,7 +58,7 @@ impl HttpCredentialProvider {
    fn operation(
        &self,
        auth: Option<HeaderValue>,
    ) -> Operation<CredentialsResponseParser, HttpCredentialRetryPolicy> {
    ) -> Operation<CredentialsResponseParser, HttpCredentialRetryClassifier> {
        let mut http_req = http::Request::builder()
            .uri(&self.uri)
            .header(ACCEPT, "application/json");
@@ -73,7 +73,7 @@ impl HttpCredentialProvider {
                provider_name: self.provider_name,
            },
        )
        .with_retry_policy(HttpCredentialRetryPolicy)
        .with_retry_classifier(HttpCredentialRetryClassifier)
    }
}

@@ -165,12 +165,12 @@ impl ParseStrictResponse for CredentialsResponseParser {
}

#[derive(Clone, Debug)]
struct HttpCredentialRetryPolicy;
struct HttpCredentialRetryClassifier;

impl ClassifyResponse<SdkSuccess<Credentials>, SdkError<CredentialsError>>
    for HttpCredentialRetryPolicy
impl ClassifyRetry<SdkSuccess<Credentials>, SdkError<CredentialsError>>
    for HttpCredentialRetryClassifier
{
    fn classify(
    fn classify_retry(
        &self,
        response: Result<&SdkSuccess<credentials::Credentials>, &SdkError<CredentialsError>>,
    ) -> RetryKind {
@@ -206,12 +206,14 @@ impl ClassifyResponse<SdkSuccess<Credentials>, SdkError<CredentialsError>>

#[cfg(test)]
mod test {
    use crate::http_credential_provider::{CredentialsResponseParser, HttpCredentialRetryPolicy};
    use crate::http_credential_provider::{
        CredentialsResponseParser, HttpCredentialRetryClassifier,
    };
    use aws_smithy_http::body::SdkBody;
    use aws_smithy_http::operation;
    use aws_smithy_http::response::ParseStrictResponse;
    use aws_smithy_http::result::{SdkError, SdkSuccess};
    use aws_smithy_http::retry::ClassifyResponse;
    use aws_smithy_http::retry::ClassifyRetry;
    use aws_smithy_types::retry::{ErrorKind, RetryKind};
    use aws_types::credentials::CredentialsError;
    use aws_types::Credentials;
@@ -245,7 +247,7 @@ mod test {
            .unwrap();

        assert_eq!(
            HttpCredentialRetryPolicy.classify(sdk_resp(bad_response).as_ref()),
            HttpCredentialRetryClassifier.classify_retry(sdk_resp(bad_response).as_ref()),
            RetryKind::Error(ErrorKind::ServerError)
        );
    }
@@ -266,7 +268,7 @@ mod test {
        let sdk_result = sdk_resp(ok_response);

        assert_eq!(
            HttpCredentialRetryPolicy.classify(sdk_result.as_ref()),
            HttpCredentialRetryClassifier.classify_retry(sdk_result.as_ref()),
            RetryKind::Unnecessary
        );

@@ -281,7 +283,7 @@ mod test {
            .unwrap();
        let sdk_result = sdk_resp(error_response);
        assert_eq!(
            HttpCredentialRetryPolicy.classify(sdk_result.as_ref()),
            HttpCredentialRetryClassifier.classify_retry(sdk_result.as_ref()),
            RetryKind::UnretryableFailure
        );
        let sdk_error = sdk_result.expect_err("should be error");
+14 −14
Original line number Diff line number Diff line
@@ -23,7 +23,7 @@ use aws_smithy_http::endpoint::Endpoint;
use aws_smithy_http::operation;
use aws_smithy_http::operation::{Metadata, Operation};
use aws_smithy_http::response::ParseStrictResponse;
use aws_smithy_http::retry::ClassifyResponse;
use aws_smithy_http::retry::ClassifyRetry;
use aws_smithy_http_tower::map_request::{
    AsyncMapRequestLayer, AsyncMapRequestService, MapRequestLayer, MapRequestService,
};
@@ -232,7 +232,7 @@ impl Client {
    fn make_operation(
        &self,
        path: &str,
    ) -> Result<Operation<ImdsGetResponseHandler, ImdsErrorPolicy>, ImdsError> {
    ) -> Result<Operation<ImdsGetResponseHandler, ImdsResponseRetryClassifier>, ImdsError> {
        let mut base_uri: Uri = path.parse().map_err(|_| ImdsError::InvalidPath)?;
        self.inner.endpoint.set_endpoint(&mut base_uri, None);
        let request = http::Request::builder()
@@ -243,7 +243,7 @@ impl Client {
        request.properties_mut().insert(user_agent());
        Ok(Operation::new(request, ImdsGetResponseHandler)
            .with_metadata(Metadata::new("get", "imds"))
            .with_retry_policy(ImdsErrorPolicy))
            .with_retry_classifier(ImdsResponseRetryClassifier))
    }
}

@@ -706,9 +706,9 @@ impl Display for TokenError {
impl Error for TokenError {}

#[derive(Clone)]
struct ImdsErrorPolicy;
struct ImdsResponseRetryClassifier;

impl ImdsErrorPolicy {
impl ImdsResponseRetryClassifier {
    fn classify(response: &operation::Response) -> RetryKind {
        let status = response.http().status();
        match status {
@@ -721,7 +721,7 @@ impl ImdsErrorPolicy {
    }
}

/// IMDS Retry Policy
/// IMDS Response Retry Classifier
///
/// Possible status codes:
/// - 200 (OK)
@@ -730,12 +730,12 @@ impl ImdsErrorPolicy {
/// - 403 (IMDS disabled): **Not Retryable**
/// - 404 (Not found): **Not Retryable**
/// - >=500 (server error): **Retryable**
impl<T, E> ClassifyResponse<SdkSuccess<T>, SdkError<E>> for ImdsErrorPolicy {
    fn classify(&self, response: Result<&SdkSuccess<T>, &SdkError<E>>) -> RetryKind {
impl<T, E> ClassifyRetry<SdkSuccess<T>, SdkError<E>> for ImdsResponseRetryClassifier {
    fn classify_retry(&self, response: Result<&SdkSuccess<T>, &SdkError<E>>) -> RetryKind {
        match response {
            Ok(_) => RetryKind::Unnecessary,
            Err(SdkError::ResponseError { raw, .. }) | Err(SdkError::ServiceError { raw, .. }) => {
                ImdsErrorPolicy::classify(raw)
                Self::classify(raw)
            }
            _ => RetryKind::UnretryableFailure,
        }
@@ -744,7 +744,7 @@ impl<T, E> ClassifyResponse<SdkSuccess<T>, SdkError<E>> for ImdsErrorPolicy {

#[cfg(test)]
pub(crate) mod test {
    use crate::imds::client::{Client, EndpointMode, ImdsErrorPolicy};
    use crate::imds::client::{Client, EndpointMode, ImdsResponseRetryClassifier};
    use crate::provider_config::ProviderConfig;
    use aws_smithy_async::rt::sleep::TokioSleep;
    use aws_smithy_client::erase::DynConnector;
@@ -1033,9 +1033,9 @@ pub(crate) mod test {
    /// Successful responses should classify as `RetryKind::Unnecessary`
    #[test]
    fn successful_response_properly_classified() {
        use aws_smithy_http::retry::ClassifyResponse;
        use aws_smithy_http::retry::ClassifyRetry;

        let policy = ImdsErrorPolicy;
        let classifier = ImdsResponseRetryClassifier;
        fn response_200() -> operation::Response {
            operation::Response::new(imds_response("").map(|_| SdkBody::empty()))
        }
@@ -1045,7 +1045,7 @@ pub(crate) mod test {
        };
        assert_eq!(
            RetryKind::Unnecessary,
            policy.classify(Ok::<_, &SdkError<()>>(&success))
            classifier.classify_retry(Ok::<_, &SdkError<()>>(&success))
        );

        // Emulate a failure to parse the response body (using an io error since it's easy to construct in a test)
@@ -1055,7 +1055,7 @@ pub(crate) mod test {
        };
        assert_eq!(
            RetryKind::UnretryableFailure,
            policy.classify(Err::<&SdkSuccess<()>, _>(&failure))
            classifier.classify_retry(Err::<&SdkSuccess<()>, _>(&failure))
        );
    }

+2 −2
Original line number Diff line number Diff line
@@ -38,7 +38,7 @@ use aws_types::os_shim_internal::TimeSource;
use http::{HeaderValue, Uri};

use crate::cache::ExpiringCache;
use crate::imds::client::{ImdsError, ImdsErrorPolicy, TokenError};
use crate::imds::client::{ImdsError, ImdsResponseRetryClassifier, TokenError};

/// Token Refresh Buffer
///
@@ -143,7 +143,7 @@ impl TokenMiddleware {
        request.properties_mut().insert(super::user_agent());

        let operation = Operation::new(request, self.token_parser.clone())
            .with_retry_policy(ImdsErrorPolicy)
            .with_retry_classifier(ImdsResponseRetryClassifier)
            .with_metadata(Metadata::new("get-token", "imds"));
        let response = self
            .client
+71 −54
Original line number Diff line number Diff line
@@ -5,21 +5,10 @@
//! AWS-specific retry logic

use aws_smithy_http::result::SdkError;
use aws_smithy_http::retry::ClassifyResponse;
use aws_smithy_http::retry::{ClassifyRetry, DefaultResponseRetryClassifier};
use aws_smithy_types::retry::{ErrorKind, ProvideErrorKind, RetryKind};
use std::time::Duration;

/// A retry policy that models AWS error codes as outlined in the SEP
///
/// In order of priority:
/// 1. The `x-amz-retry-after` header is checked
/// 2. The modeled error retry mode is checked
/// 3. The code is checked against a predetermined list of throttling errors & transient error codes
/// 4. The status code is checked against a predetermined list of status codes
#[non_exhaustive]
#[derive(Clone, Debug)]
pub struct AwsErrorRetryPolicy;

const TRANSIENT_ERROR_STATUS_CODES: &[u16] = &[500, 502, 503, 504];
const THROTTLING_ERRORS: &[&str] = &[
    "Throttling",
@@ -39,41 +28,42 @@ const THROTTLING_ERRORS: &[&str] = &[
];
const TRANSIENT_ERRORS: &[&str] = &["RequestTimeout", "RequestTimeoutException"];

impl AwsErrorRetryPolicy {
    /// Create an `AwsErrorRetryPolicy` with the default set of known error & status codes
/// Implementation of [`ClassifyRetry`] that classifies AWS error codes.
///
/// In order of priority:
/// 1. The `x-amz-retry-after` header is checked
/// 2. The modeled error retry mode is checked
/// 3. The code is checked against a predetermined list of throttling errors & transient error codes
/// 4. The status code is checked against a predetermined list of status codes
#[non_exhaustive]
#[derive(Clone, Debug)]
pub struct AwsResponseRetryClassifier;

impl AwsResponseRetryClassifier {
    /// Create an `AwsResponseRetryClassifier` with the default set of known error & status codes
    pub fn new() -> Self {
        AwsErrorRetryPolicy
        Self
    }
}

impl Default for AwsErrorRetryPolicy {
impl Default for AwsResponseRetryClassifier {
    fn default() -> Self {
        Self::new()
    }
}

impl<T, E> ClassifyResponse<T, SdkError<E>> for AwsErrorRetryPolicy
impl<T, E> ClassifyRetry<T, SdkError<E>> for AwsResponseRetryClassifier
where
    E: ProvideErrorKind,
{
    fn classify(&self, err: Result<&T, &SdkError<E>>) -> RetryKind {
        let (err, response) = match err {
            Ok(_) => return RetryKind::Unnecessary,
            Err(SdkError::ServiceError { err, raw }) => (err, raw),
            Err(SdkError::TimeoutError(_err)) => {
                return RetryKind::Error(ErrorKind::TransientError)
            }

            Err(SdkError::DispatchFailure(err)) => {
                return if err.is_timeout() || err.is_io() {
                    RetryKind::Error(ErrorKind::TransientError)
                } else if let Some(ek) = err.is_other() {
                    RetryKind::Error(ek)
                } else {
                    RetryKind::UnretryableFailure
                };
            }
            Err(_) => return RetryKind::UnretryableFailure,
    fn classify_retry(&self, result: Result<&T, &SdkError<E>>) -> RetryKind {
        // Run common retry classification logic from aws-smithy-http, and if it yields
        // a `RetryKind`, then return that immediately. Otherwise, continue on to run some
        // AWS SDK specific classification logic.
        let (err, response) = match DefaultResponseRetryClassifier::try_extract_err_response(result)
        {
            Ok(extracted) => extracted,
            Err(retry_kind) => return retry_kind,
        };
        if let Some(retry_after_delay) = response
            .http()
@@ -105,15 +95,23 @@ where

#[cfg(test)]
mod test {
    use crate::retry::AwsErrorRetryPolicy;
    use crate::retry::AwsResponseRetryClassifier;
    use aws_smithy_http::body::SdkBody;
    use aws_smithy_http::operation;
    use aws_smithy_http::result::{SdkError, SdkSuccess};
    use aws_smithy_http::retry::ClassifyResponse;
    use aws_smithy_http::retry::ClassifyRetry;
    use aws_smithy_types::retry::{ErrorKind, ProvideErrorKind, RetryKind};
    use std::fmt;
    use std::time::Duration;

    #[derive(Debug)]
    struct UnmodeledError;
    impl fmt::Display for UnmodeledError {
        fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
            write!(f, "UnmodeledError")
        }
    }
    impl std::error::Error for UnmodeledError {}

    struct CodedError {
        code: &'static str,
@@ -151,36 +149,36 @@ mod test {

    #[test]
    fn not_an_error() {
        let policy = AwsErrorRetryPolicy::new();
        let policy = AwsResponseRetryClassifier::new();
        let test_response = http::Response::new("OK");
        assert_eq!(
            policy.classify(make_err(UnmodeledError, test_response).as_ref()),
            policy.classify_retry(make_err(UnmodeledError, test_response).as_ref()),
            RetryKind::UnretryableFailure
        );
    }

    #[test]
    fn classify_by_response_status() {
        let policy = AwsErrorRetryPolicy::new();
        let policy = AwsResponseRetryClassifier::new();
        let test_resp = http::Response::builder()
            .status(500)
            .body("error!")
            .unwrap();
        assert_eq!(
            policy.classify(make_err(UnmodeledError, test_resp).as_ref()),
            policy.classify_retry(make_err(UnmodeledError, test_resp).as_ref()),
            RetryKind::Error(ErrorKind::TransientError)
        );
    }

    #[test]
    fn classify_by_response_status_not_retryable() {
        let policy = AwsErrorRetryPolicy::new();
        let policy = AwsResponseRetryClassifier::new();
        let test_resp = http::Response::builder()
            .status(408)
            .body("error!")
            .unwrap();
        assert_eq!(
            policy.classify(make_err(UnmodeledError, test_resp).as_ref()),
            policy.classify_retry(make_err(UnmodeledError, test_resp).as_ref()),
            RetryKind::UnretryableFailure
        );
    }
@@ -188,16 +186,18 @@ mod test {
    #[test]
    fn classify_by_error_code() {
        let test_response = http::Response::new("OK");
        let policy = AwsErrorRetryPolicy::new();
        let policy = AwsResponseRetryClassifier::new();

        assert_eq!(
            policy.classify(make_err(CodedError { code: "Throttling" }, test_response).as_ref()),
            policy.classify_retry(
                make_err(CodedError { code: "Throttling" }, test_response).as_ref()
            ),
            RetryKind::Error(ErrorKind::ThrottlingError)
        );

        let test_response = http::Response::new("OK");
        assert_eq!(
            policy.classify(
            policy.classify_retry(
                make_err(
                    CodedError {
                        code: "RequestTimeout"
@@ -214,9 +214,9 @@ mod test {
    fn classify_generic() {
        let err = aws_smithy_types::Error::builder().code("SlowDown").build();
        let test_response = http::Response::new("OK");
        let policy = AwsErrorRetryPolicy::new();
        let policy = AwsResponseRetryClassifier::new();
        assert_eq!(
            policy.classify(make_err(err, test_response).as_ref()),
            policy.classify_retry(make_err(err, test_response).as_ref()),
            RetryKind::Error(ErrorKind::ThrottlingError)
        );
    }
@@ -236,34 +236,51 @@ mod test {
            }
        }

        let policy = AwsErrorRetryPolicy::new();
        let policy = AwsResponseRetryClassifier::new();

        assert_eq!(
            policy.classify(make_err(ModeledRetries, test_response).as_ref()),
            policy.classify_retry(make_err(ModeledRetries, test_response).as_ref()),
            RetryKind::Error(ErrorKind::ClientError)
        );
    }

    #[test]
    fn test_retry_after_header() {
        let policy = AwsErrorRetryPolicy::new();
        let policy = AwsResponseRetryClassifier::new();
        let test_response = http::Response::builder()
            .header("x-amz-retry-after", "5000")
            .body("retry later")
            .unwrap();

        assert_eq!(
            policy.classify(make_err(UnmodeledError, test_response).as_ref()),
            policy.classify_retry(make_err(UnmodeledError, test_response).as_ref()),
            RetryKind::Explicit(Duration::from_millis(5000))
        );
    }

    #[test]
    fn classify_response_error() {
        let policy = AwsResponseRetryClassifier::new();
        assert_eq!(
            policy.classify_retry(
                Result::<SdkSuccess<()>, SdkError<UnmodeledError>>::Err(SdkError::ResponseError {
                    err: Box::new(UnmodeledError),
                    raw: operation::Response::new(
                        http::Response::new("OK").map(|b| SdkBody::from(b))
                    ),
                })
                .as_ref()
            ),
            RetryKind::Error(ErrorKind::TransientError)
        );
    }

    #[test]
    fn test_timeout_error() {
        let policy = AwsErrorRetryPolicy::new();
        let policy = AwsResponseRetryClassifier::new();
        let err: Result<(), SdkError<UnmodeledError>> = Err(SdkError::TimeoutError("blah".into()));
        assert_eq!(
            policy.classify(err.as_ref()),
            policy.classify_retry(err.as_ref()),
            RetryKind::Error(ErrorKind::TransientError)
        );
    }
Loading