Unverified Commit f76013a5 authored by Nugine's avatar Nugine Committed by GitHub
Browse files

feat(s3s): typed etag (#349)

parent c34135ff
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -43,6 +43,7 @@ pub fn collect_rust_types(model: &smithy::Model, ops: &Operations) -> RustTypes
            "ContentType",   //
            "Event",         //
            "CachedTags",    //
            "ETag",          //
        ];

        if provided_types.contains(&rs_shape_name.as_str()) {
+9 −3
Original line number Diff line number Diff line
@@ -154,9 +154,15 @@ fn collect_xml_types<'a>(
                    q.push_back(variant.type_.as_str());
                }
            }
            rust::Type::Provided(ty) => {
                assert!(matches!(ty.name.as_str(), "Body" | "Event"));
            rust::Type::Provided(ty) => match ty.name.as_str() {
                "Body" | "Event" => {
                    // ignore
                }
                "ETag" => {
                    field_type_names.insert(ty.name.as_str());
                }
                _ => panic!(),
            },
            rust::Type::Map(_) => unimplemented!(),
            rust::Type::Timestamp(_) => {}
        }
+14 −0
Original line number Diff line number Diff line
@@ -211,3 +211,17 @@ impl AwsConversion for s3s::dto::Tag {
        y.build().map_err(S3Error::internal_error)
    }
}

impl AwsConversion for s3s::dto::ETag {
    type Target = String;

    type Error = S3Error;

    fn try_from_aws(x: Self::Target) -> S3Result<Self> {
        Self::parse_http_header(x.as_bytes()).map_err(S3Error::internal_error)
    }

    fn try_into_aws(x: Self) -> S3Result<Self::Target> {
        Ok(format!("\"{}\"", x.value()))
    }
}
+6 −9
Original line number Diff line number Diff line
@@ -109,7 +109,7 @@ impl S3 for FileSystem {
        let md5_sum = self.get_md5_sum(bucket, key).await?;

        let copy_object_result = CopyObjectResult {
            e_tag: Some(format!("\"{md5_sum}\"")),
            e_tag: Some(ETag::Strong(md5_sum)),
            last_modified: Some(last_modified),
            ..Default::default()
        };
@@ -235,7 +235,6 @@ impl S3 for FileSystem {
        let object_metadata = self.load_metadata(&input.bucket, &input.key, None).await?;

        let md5_sum = self.get_md5_sum(&input.bucket, &input.key).await?;
        let e_tag = format!("\"{md5_sum}\"");

        let info = self.load_internal_info(&input.bucket, &input.key).await?;
        let checksum = match &info {
@@ -251,7 +250,7 @@ impl S3 for FileSystem {
            content_range,
            last_modified: Some(last_modified),
            metadata: object_metadata,
            e_tag: Some(e_tag),
            e_tag: Some(ETag::Strong(md5_sum)),
            checksum_crc32: checksum.checksum_crc32,
            checksum_crc32c: checksum.checksum_crc32c,
            checksum_sha1: checksum.checksum_sha1,
@@ -584,10 +583,8 @@ impl S3 for FileSystem {
        crate::checksum::modify_internal_info(&mut info, &checksum);
        self.save_internal_info(&bucket, &key, &info).await?;

        let e_tag = format!("\"{md5_sum}\"");

        let output = PutObjectOutput {
            e_tag: Some(e_tag),
            e_tag: Some(ETag::Strong(md5_sum)),
            checksum_crc32: checksum.checksum_crc32,
            checksum_crc32c: checksum.checksum_crc32c,
            checksum_sha1: checksum.checksum_sha1,
@@ -657,7 +654,7 @@ impl S3 for FileSystem {
        debug!(path = %file_path.display(), ?size, %md5_sum, "write file");

        let output = UploadPartOutput {
            e_tag: Some(format!("\"{md5_sum}\"")),
            e_tag: Some(ETag::Strong(md5_sum)),
            ..Default::default()
        };
        Ok(S3Response::new(output))
@@ -722,7 +719,7 @@ impl S3 for FileSystem {

        let output = UploadPartCopyOutput {
            copy_part_result: Some(CopyPartResult {
                e_tag: Some(format!("\"{md5_sum}\"")),
                e_tag: Some(ETag::Strong(md5_sum)),
                ..Default::default()
            }),
            ..Default::default()
@@ -849,7 +846,7 @@ impl S3 for FileSystem {
                Ok(CompleteMultipartUploadOutput {
                    bucket: Some(bucket),
                    key: Some(key),
                    e_tag: Some(format!("\"{md5_sum}\"")),
                    e_tag: Some(ETag::Strong(md5_sum)),
                    ..Default::default()
                })
            })),
+258 −0
Original line number Diff line number Diff line
use bytes::Bytes;
use http::HeaderValue;
use http::header::InvalidHeaderValue;
use stdx::str::StrExt;

/// Entity Tag for the HTTP `ETag` header.
///
/// Strong: "value"; Weak: W/"value".
///
/// See RFC 7232 §2.3 and MDN:
/// + <https://www.rfc-editor.org/rfc/rfc7232#section-2.3>
/// + <https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers/ETag>
#[derive(Debug, Clone, PartialEq)]
pub enum ETag {
    /// Strong validator: "value"
    Strong(String),
    /// Weak validator: W/"value"
    Weak(String),
}

/// Errors returned when parsing an `ETag` header.
#[derive(Debug, thiserror::Error)]
pub enum ParseETagError {
    /// The bytes do not match the `ETag` syntax.
    #[error("ParseETagError: InvalidFormat")]
    InvalidFormat,
    /// Contains invalid characters (control chars, DEL 0x7f, or non-ASCII).
    #[error("ParseETagError: InvalidChar")]
    InvalidChar,
}

impl ETag {
    /// Returns the value if this is a strong `ETag`; otherwise None.
    #[must_use]
    pub fn as_strong(&self) -> Option<&str> {
        match self {
            ETag::Strong(s) => Some(s),
            ETag::Weak(_) => None,
        }
    }

    /// Returns the value if this is a weak `ETag`; otherwise None.
    #[must_use]
    pub fn as_weak(&self) -> Option<&str> {
        match self {
            ETag::Weak(s) => Some(s),
            ETag::Strong(_) => None,
        }
    }

    /// Returns the raw value without strength information.
    #[must_use]
    pub fn value(&self) -> &str {
        match self {
            ETag::Strong(s) | ETag::Weak(s) => s,
        }
    }

    fn check_header_value(s: &[u8]) -> bool {
        s.iter().all(|&b| b >= 32 && b != 127 || b == b'\t')
    }

    /// Parses an `ETag` from header bytes.
    ///
    /// # Errors
    /// + Returns `ParseETagError::InvalidFormat` if the bytes do not match the `ETag` syntax.
    /// + Returns `ParseETagError::InvalidChar` if the value contains invalid characters
    pub fn parse_http_header(src: &[u8]) -> Result<Self, ParseETagError> {
        // FIXME: this impl is not optimal unless `unsafe` is used
        match src {
            [b'"', val @ .., b'"'] => {
                if !Self::check_header_value(val) {
                    return Err(ParseETagError::InvalidChar);
                }
                let val = str::from_ascii_simd(val).map_err(|_| ParseETagError::InvalidChar)?;
                Ok(ETag::Strong(val.to_owned()))
            }
            [b'W', b'/', b'"', val @ .., b'"'] => {
                if !Self::check_header_value(val) {
                    return Err(ParseETagError::InvalidChar);
                }
                let val = str::from_ascii_simd(val).map_err(|_| ParseETagError::InvalidChar)?;
                Ok(ETag::Weak(val.to_owned()))
            }
            _ => Err(ParseETagError::InvalidFormat),
        }
    }

    /// Encodes this `ETag` as an HTTP header value.
    ///
    /// # Errors
    /// Returns `InvalidHeaderValue` if the `ETag` value contains invalid characters for HTTP headers.
    pub fn to_http_header(&self) -> Result<HeaderValue, InvalidHeaderValue> {
        let buf = match self {
            ETag::Strong(s) => {
                let mut buf = Vec::with_capacity(s.len() + 2);
                buf.push(b'"');
                buf.extend_from_slice(s.as_bytes());
                buf.push(b'"');
                buf
            }
            ETag::Weak(s) => {
                let mut buf = Vec::with_capacity(s.len() + 4);
                buf.extend_from_slice(b"W/\"");
                buf.extend_from_slice(s.as_bytes());
                buf.push(b'"');
                buf
            }
        };
        HeaderValue::from_maybe_shared(Bytes::from(buf))
    }
}

#[cfg(test)]
mod tests {
    use super::{ETag, ParseETagError};

    #[test]
    fn strong_value_and_header_ok() {
        let etag = ETag::Strong("abc123".to_string());
        assert_eq!(etag.value(), "abc123");
        let hv = etag.to_http_header().expect("strong etag header");
        assert_eq!(hv.as_bytes(), b"\"abc123\"");
    }

    #[test]
    fn weak_value_and_header_ok() {
        let etag = ETag::Weak("xyz".to_string());
        assert_eq!(etag.value(), "xyz");
        // 标准:弱 ETag 前缀为 W/"
        let hv = etag.to_http_header().expect("weak etag header");
        assert_eq!(hv.as_bytes(), b"W/\"xyz\"");
    }

    #[test]
    fn strong_empty_header_ok() {
        let etag = ETag::Strong(String::new());
        let hv = etag.to_http_header().expect("empty strong etag");
        assert_eq!(hv.as_bytes(), b"\"\"");
    }

    #[test]
    fn weak_empty_header_ok() {
        let etag = ETag::Weak(String::new());
        let hv = etag.to_http_header().expect("empty weak etag");
        assert_eq!(hv.as_bytes(), b"W/\"\"");
    }

    #[test]
    fn header_invalid_when_contains_newline() {
        // 含有换行等控制字符应返回错误
        let strong_bad = ETag::Strong("a\nb".to_string());
        assert!(strong_bad.to_http_header().is_err());

        let weak_bad = ETag::Weak("a\r\nb".to_string());
        assert!(weak_bad.to_http_header().is_err());
    }

    #[test]
    fn parse_strong_ok() {
        let etag = ETag::parse_http_header(b"\"abc123\"").expect("parse strong");
        assert_eq!(etag.as_strong(), Some("abc123"));
    }

    #[test]
    fn parse_weak_ok() {
        let etag = ETag::parse_http_header(b"W/\"xyz\"").expect("parse weak");
        assert_eq!(etag.as_weak(), Some("xyz"));
    }

    #[test]
    fn parse_empty_ok() {
        let s = ETag::parse_http_header(b"\"\"").expect("parse empty strong");
        assert_eq!(s.as_strong(), Some(""));

        let w = ETag::parse_http_header(b"W/\"\"").expect("parse empty weak");
        assert_eq!(w.as_weak(), Some(""));
    }

    #[test]
    fn parse_allows_tab() {
        let s = ETag::parse_http_header(b"\"a\tb\"").expect("tab in strong");
        assert_eq!(s.as_strong(), Some("a\tb"));

        let w = ETag::parse_http_header(b"W/\"a\tb\"").expect("tab in weak");
        assert_eq!(w.as_weak(), Some("a\tb"));
    }

    #[test]
    fn parse_invalid_format_cases() {
        let cases: &[&[u8]] = &[
            b"",
            b"abc",
            b"\"unclosed",
            b"W/\"unclosed",
            b"W/xyz",      // 缺少引号
            b"\"abc\"x",   // 尾随字符
            b"W/\"abc\"x", // 尾随字符
        ];
        for &c in cases {
            let err = ETag::parse_http_header(c).unwrap_err();
            assert!(matches!(err, ParseETagError::InvalidFormat), "case={c:?}");
        }
    }

    #[test]
    fn parse_invalid_char_cases() {
        // 含有换行/回车
        let err = ETag::parse_http_header(b"\"a\nb\"").unwrap_err();
        assert!(matches!(err, ParseETagError::InvalidChar));

        let err = ETag::parse_http_header(b"W/\"a\rb\"").unwrap_err();
        assert!(matches!(err, ParseETagError::InvalidChar));

        // 含有 DEL(0x7f)
        let err = ETag::parse_http_header(b"\"a\x7fb\"").unwrap_err();
        assert!(matches!(err, ParseETagError::InvalidChar));

        let err = ETag::parse_http_header(b"W/\"a\x7fb\"").unwrap_err();
        assert!(matches!(err, ParseETagError::InvalidChar));

        // 含有非 ASCII(触发 from_ascii_simd 错误)
        let err = ETag::parse_http_header(b"\"a\xc2\xb5b\"").unwrap_err(); // µ
        assert!(matches!(err, ParseETagError::InvalidChar));
    }

    #[test]
    fn to_header_allows_tab() {
        let etag = ETag::Strong("a\tb".to_string());
        let hv = etag.to_http_header().expect("header with tab");
        assert_eq!(hv.as_bytes(), b"\"a\tb\"");
    }

    #[test]
    fn header_invalid_when_contains_del_127() {
        let s = String::from_utf8(vec![b'a', 0x7f, b'b']).unwrap();
        assert!(ETag::Strong(s.clone()).to_http_header().is_err());
        assert!(ETag::Weak(s).to_http_header().is_err());
    }

    #[test]
    fn parse_and_header_roundtrip() {
        let values = ["", "abc", "a\tb", " !#$%&()*+,-./:;<=>?@[]^_`{|}~"];
        for v in values {
            // strong
            let e = ETag::Strong(v.to_string());
            let hv = e.to_http_header().expect("strong header");
            let p = ETag::parse_http_header(hv.as_bytes()).expect("parse strong back");
            assert_eq!(p.as_strong(), Some(v));

            // weak
            let e = ETag::Weak(v.to_string());
            let hv = e.to_http_header().expect("weak header");
            let p = ETag::parse_http_header(hv.as_bytes()).expect("parse weak back");
            assert_eq!(p.as_weak(), Some(v));
        }
    }
}
Loading