Unverified Commit b0d16a00 authored by Copilot's avatar Copilot Committed by GitHub
Browse files

fix(s3s): fix unbounded memory allocation in http::body (#407)



* Fix unbounded memory allocation in http::body and signature verification

- Treat missing x-amz-content-sha256 header as unsigned payload to avoid loading full body for signature verification
- Add store_all_limited() method with configurable size limit to Body
- Use bounded body collection (20MB limit) for XML payload operations
- Add tests for the new bounded body collection function
- Mark hex_sha256_string as test-only since it's no longer used in production code

Co-authored-by: default avatarNugine <30099658+Nugine@users.noreply.github.com>

* Add comment explaining Bytes::clone is O(1) due to reference counting

Co-authored-by: default avatarNugine <30099658+Nugine@users.noreply.github.com>

* Reject requests with missing x-amz-content-sha256 header per S3 protocol

Instead of treating missing x-amz-content-sha256 header as unsigned payload,
reject the request per AWS S3 protocol which requires this header for all
requests authenticated with Signature V4.

Co-authored-by: default avatarNugine <30099658+Nugine@users.noreply.github.com>

* fix

* fix

* Add tests for x-amz-content-sha256 header validation

Add unit tests to verify that:
- extract_amz_content_sha256 returns None when header is missing
- extract_amz_content_sha256 returns proper value when header is present
- extract_amz_content_sha256 returns error for invalid header values

Co-authored-by: default avatarNugine <30099658+Nugine@users.noreply.github.com>

* fix

* fix

---------

Co-authored-by: default avatarcopilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
Co-authored-by: default avatarNugine <30099658+Nugine@users.noreply.github.com>
Co-authored-by: default avatarNugine <nugine@foxmail.com>
parent 4752860f
Loading
Loading
Loading
Loading
+57 −6
Original line number Diff line number Diff line
@@ -255,19 +255,40 @@ impl fmt::Debug for Body {
    }
}

/// Maximum size for XML body payloads (20 MB).
/// This limit prevents unbounded memory allocation for operations that require
/// the full body in memory (e.g., XML parsing).
pub const MAX_XML_BODY_SIZE: usize = 20 * 1024 * 1024;

/// Error returned when body size exceeds the limit.
#[derive(Debug, Clone, thiserror::Error)]
#[error("body size {size} exceeds limit {limit}")]
pub struct BodySizeLimitExceeded {
    /// The actual body size.
    pub size: usize,
    /// The maximum allowed size.
    pub limit: usize,
}

impl Body {
    /// Stores all bytes in memory.
    ///
    /// WARNING: This function may cause **unbounded memory allocation**.
    /// Stores all bytes in memory with a size limit.
    ///
    /// # Errors
    /// Returns an error if `hyper` fails to read the body.
    pub async fn store_all_unlimited(&mut self) -> Result<Bytes, StdError> {
    /// Returns an error if the body exceeds `limit` bytes or if reading fails.
    pub async fn store_all_limited(&mut self, limit: usize) -> Result<Bytes, StdError> {
        if let Some(bytes) = self.bytes() {
            if bytes.len() > limit {
                return Err(Box::new(BodySizeLimitExceeded {
                    size: bytes.len(),
                    limit,
                }));
            }
            return Ok(bytes);
        }
        let body = mem::take(self);
        let bytes = http_body_util::BodyExt::collect(body).await?.to_bytes();
        let limited = http_body_util::Limited::new(body, limit);
        let bytes: Bytes = http_body_util::BodyExt::collect(limited).await?.to_bytes();
        // Store bytes in self (Bytes::clone is O(1) due to reference counting)
        *self = Self::from(bytes.clone());
        Ok(bytes)
    }
@@ -288,3 +309,33 @@ impl Body {
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[tokio::test]
    async fn test_store_all_limited_success() {
        let data = b"hello world";
        let mut body = Body::from(Bytes::from_static(data));
        let result = body.store_all_limited(20).await;
        assert!(result.is_ok());
        assert_eq!(result.unwrap().as_ref(), data);
    }

    #[tokio::test]
    async fn test_store_all_limited_exceeds() {
        let data = b"hello world";
        let mut body = Body::from(Bytes::from_static(data));
        let result = body.store_all_limited(5).await;
        assert!(result.is_err());
    }

    #[tokio::test]
    async fn test_store_all_limited_empty() {
        let mut body = Body::empty();
        let result = body.store_all_limited(10).await;
        assert!(result.is_ok());
        assert!(result.unwrap().is_empty());
    }
}
+8 −5
Original line number Diff line number Diff line
@@ -23,8 +23,8 @@ use crate::auth::{Credentials, S3Auth};
use crate::error::*;
use crate::header;
use crate::host::S3Host;
use crate::http;
use crate::http::Body;
use crate::http::{self, BodySizeLimitExceeded};
use crate::http::{OrderedHeaders, OrderedQs};
use crate::http::{Request, Response};
use crate::path::{ParseS3PathError, S3Path};
@@ -172,10 +172,13 @@ async fn extract_full_body(content_length: Option<u64>, body: &mut Body) -> S3Re
        return Ok(bytes);
    }

    let bytes = body
        .store_all_unlimited()
        .await
        .map_err(|e| S3Error::with_source(S3ErrorCode::InternalError, e))?;
    let bytes = body.store_all_limited(http::MAX_XML_BODY_SIZE).await.map_err(|e| {
        if e.is::<BodySizeLimitExceeded>() {
            S3Error::with_source(S3ErrorCode::MaxMessageLengthExceeded, e)
        } else {
            S3Error::with_source(S3ErrorCode::InternalError, e)
        }
    })?;

    if bytes.is_empty().not() {
        let content_length = content_length.ok_or(S3ErrorCode::MissingContentLength)?;
+44 −18
Original line number Diff line number Diff line
@@ -13,7 +13,6 @@ use crate::sig_v4::AmzDate;
use crate::sig_v4::UploadStream;
use crate::sig_v4::{AuthorizationV4, CredentialV4, PostSignatureV4, PresignedUrlV4};
use crate::stream::ByteStream as _;
use crate::utils::crypto::hex_sha256_string;
use crate::utils::is_base64_encoded;

use std::mem;
@@ -386,23 +385,9 @@ impl SignatureContext<'_> {
                    return Err(s3_error!(NotImplemented, "AWS4-ECDSA-P256-SHA256 signing method is not implemented yet"));
                }
                None => {
                    if matches!(*self.req_method, Method::GET | Method::HEAD) {
                        sig_v4::create_canonical_request(method, uri_path, query_strings, &headers, sig_v4::Payload::Empty)
                    } else {
                        let bytes = super::extract_full_body(self.content_length, self.req_body).await?;
                        if bytes.is_empty() {
                            sig_v4::create_canonical_request(method, uri_path, query_strings, &headers, sig_v4::Payload::Empty)
                        } else {
                            let payload_checksum = hex_sha256_string(&bytes);
                            sig_v4::create_canonical_request(
                                method,
                                uri_path,
                                query_strings,
                                &headers,
                                sig_v4::Payload::SingleChunk(&payload_checksum),
                            )
                        }
                    }
                    // According to AWS S3 protocol, x-amz-content-sha256 header is required for
                    // all requests authenticated with Signature V4. Reject if missing.
                    return Err(invalid_request!("missing header: x-amz-content-sha256"));
                }
            };
            let string_to_sign = sig_v4::create_string_to_sign(&canonical_request, &amz_date, region, service);
@@ -594,3 +579,44 @@ impl SignatureContext<'_> {
        })
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_extract_amz_content_sha256_missing() {
        // Test that extract_amz_content_sha256 returns None when header is missing
        let headers =
            OrderedHeaders::from_slice_unchecked(&[("host", "example.s3.amazonaws.com"), ("x-amz-date", "20130524T000000Z")]);
        let result = extract_amz_content_sha256(&headers).unwrap();
        assert!(result.is_none());
    }

    #[test]
    fn test_extract_amz_content_sha256_present() {
        // Test that extract_amz_content_sha256 returns Some when header is present
        let headers = OrderedHeaders::from_slice_unchecked(&[
            ("host", "example.s3.amazonaws.com"),
            ("x-amz-content-sha256", "UNSIGNED-PAYLOAD"),
            ("x-amz-date", "20130524T000000Z"),
        ]);
        let result = extract_amz_content_sha256(&headers).unwrap();
        assert!(result.is_some());
        assert!(matches!(result.unwrap(), AmzContentSha256::UnsignedPayload));
    }

    #[test]
    fn test_extract_amz_content_sha256_invalid() {
        // Test that extract_amz_content_sha256 returns error for invalid header value
        let headers = OrderedHeaders::from_slice_unchecked(&[
            ("host", "example.s3.amazonaws.com"),
            ("x-amz-content-sha256", "INVALID-VALUE"),
            ("x-amz-date", "20130524T000000Z"),
        ]);
        let result = extract_amz_content_sha256(&headers);
        assert!(result.is_err());
        let err = result.unwrap_err();
        assert!(err.message().unwrap().contains("x-amz-content-sha256"));
    }
}
+13 −9
Original line number Diff line number Diff line
@@ -98,8 +98,6 @@ const EMPTY_STRING_SHA256_HASH: &str = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4
pub enum Payload<'a> {
    /// unsigned
    Unsigned,
    /// empty
    Empty,
    /// single chunk
    SingleChunk(&'a str),
    /// multiple chunks
@@ -110,6 +108,13 @@ pub enum Payload<'a> {
    UnsignedMultipleChunksWithTrailer,
}

#[cfg(test)]
impl Payload<'_> {
    pub fn empty() -> Self {
        Payload::SingleChunk(EMPTY_STRING_SHA256_HASH)
    }
}

/// create canonical request
#[must_use]
pub fn create_canonical_request(
@@ -231,7 +236,6 @@ pub fn create_canonical_request(
        // <HashedPayload>
        match payload {
            Payload::Unsigned => ans.push_str("UNSIGNED-PAYLOAD"),
            Payload::Empty => ans.push_str(EMPTY_STRING_SHA256_HASH),
            Payload::SingleChunk(checksum) => ans.push_str(checksum),
            Payload::MultipleChunks => ans.push_str("STREAMING-AWS4-HMAC-SHA256-PAYLOAD"),
            Payload::MultipleChunksWithTrailer => ans.push_str("STREAMING-AWS4-HMAC-SHA256-PAYLOAD-TRAILER"),
@@ -539,7 +543,7 @@ mod tests {
        let method = Method::GET;
        let qs: &[(String, String)] = &[];

        let canonical_request = create_canonical_request(&method, path, qs, &headers, Payload::Empty);
        let canonical_request = create_canonical_request(&method, path, qs, &headers, Payload::empty());

        assert_eq!(
            canonical_request,
@@ -976,7 +980,7 @@ mod tests {

        let method = Method::GET;

        let canonical_request = create_canonical_request(&method, path, query_strings, &headers, Payload::Empty);
        let canonical_request = create_canonical_request(&method, path, query_strings, &headers, Payload::empty());
        assert_eq!(
            canonical_request,
            concat!(
@@ -1028,7 +1032,7 @@ mod tests {

        let method = Method::GET;

        let canonical_request = create_canonical_request(&method, path, query_strings, &headers, Payload::Empty);
        let canonical_request = create_canonical_request(&method, path, query_strings, &headers, Payload::empty());

        assert_eq!(
            canonical_request,
@@ -1163,7 +1167,7 @@ mod tests {

        let signed_header_names = &["content-md5", "host", "x-amz-content-sha256", "x-amz-date"];

        let payload = Payload::Empty;
        let payload = Payload::empty();
        let date = AmzDate::parse(x_amz_date).unwrap();
        let region = "us-east-1";
        let service = "s3";
@@ -1305,7 +1309,7 @@ mod tests {
        let method = Method::GET;
        let qs: &[(String, String)] = &[];

        let canonical_request = create_canonical_request(&method, "/bucket/key", qs, &headers, Payload::Empty);
        let canonical_request = create_canonical_request(&method, "/bucket/key", qs, &headers, Payload::empty());

        // According to AWS SigV4 spec:
        // - Multiple headers with the same name should be combined with comma-separated values
@@ -1369,7 +1373,7 @@ mod tests {
        let method = Method::GET;
        let qs: &[(String, String)] = &[];

        let canonical_request = create_canonical_request(&method, "/bucket/key", qs, &headers, Payload::Empty);
        let canonical_request = create_canonical_request(&method, "/bucket/key", qs, &headers, Payload::empty());

        // Both values should be normalized and combined with comma
        assert!(canonical_request.contains("x-amz-meta-custom:value1,value2 with spaces\n"));
+1 −0
Original line number Diff line number Diff line
@@ -85,6 +85,7 @@ pub fn hex_sha256_chunk<R>(chunk: &[Bytes], f: impl FnOnce(&str) -> R) -> R {
    hex_bytes32(sha256_chunk(chunk).as_ref(), f)
}

#[cfg(test)]
pub fn hex_sha256_string(data: &[u8]) -> String {
    hex_sha256(data, str::to_owned)
}
Loading