Unverified Commit b18cfbaa authored by Copilot's avatar Copilot Committed by GitHub
Browse files

Allow RFC2047-encoded metadata values (#434)



* Initial plan

* fix: support RFC2047 metadata values

Co-authored-by: default avatarNugine <30099658+Nugine@users.noreply.github.com>

* chore: refine metadata test assertion

Co-authored-by: default avatarNugine <30099658+Nugine@users.noreply.github.com>

* test: enforce metadata round-trip assertion

Co-authored-by: default avatarNugine <30099658+Nugine@users.noreply.github.com>

* fix

* fix

---------

Co-authored-by: default avatarcopilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
Co-authored-by: default avatarNugine <30099658+Nugine@users.noreply.github.com>
Co-authored-by: default avatarNugine <nugine@foxmail.com>
parent 7d38b516
Loading
Loading
Loading
Loading
+38 −6
Original line number Diff line number Diff line
@@ -24,6 +24,7 @@ pub fn register(tcx: &mut TestContext) {
    case!(tcx, Basic, Essential, test_head_operations);
    case!(tcx, Basic, Put, test_put_object_tiny);
    case!(tcx, Basic, Put, test_put_object_with_metadata);
    case!(tcx, Basic, Put, test_put_object_with_non_ascii_metadata);
    case!(tcx, Basic, Put, test_put_object_larger);
    case!(tcx, Basic, Put, test_put_object_with_checksum_algorithm);
    case!(tcx, Basic, Put, test_put_object_with_content_checksums);
@@ -271,9 +272,7 @@ impl TestFixture<Basic> for Put {
        let bucket = "test-put";
        let key = "file";

        delete_object_loose(s3, bucket, key).await?;
        delete_bucket_loose(s3, bucket).await?;

        delete_bucket_all(s3, bucket).await?;
        create_bucket(s3, bucket).await?;

        Ok(Self {
@@ -285,10 +284,9 @@ impl TestFixture<Basic> for Put {

    #[tracing::instrument(skip_all)]
    async fn teardown(self) -> Result {
        let Self { s3, bucket, key } = &self;
        let Self { s3, bucket, .. } = &self;

        delete_object_loose(s3, bucket, key).await?;
        delete_bucket_loose(s3, bucket).await?;
        delete_bucket_all(s3, bucket).await?;

        Ok(())
    }
@@ -359,6 +357,40 @@ impl Put {
        Ok(())
    }

    async fn test_put_object_with_non_ascii_metadata(self: Arc<Self>) -> Result {
        let s3 = &self.s3;
        let bucket = self.bucket.as_str();
        let key = "file-with-non-ascii-metadata";

        let content = "object with unicode metadata";
        let metadata_key = "greeting";
        let metadata_value = "你好,世界";
        let metadata_value_rfc2047 = [
            "=?UTF-8?q?=E4=BD=A0=E5=A5=BD=EF=BC=8C=E4=B8=96=E7=95=8C?=",
            "=?UTF-8?B?5L2g5aW977yM5LiW55WM?=",
        ];

        s3.put_object()
            .bucket(bucket)
            .key(key)
            .body(ByteStream::from_static(content.as_bytes()))
            .metadata(metadata_key, metadata_value)
            .send()
            .await?;

        let head_resp = s3.head_object().bucket(bucket).key(key).send().await?;
        let metadata = head_resp.metadata().unwrap();
        let value = metadata.get(metadata_key).unwrap();
        assert!(metadata_value_rfc2047.contains(&value.as_str()));

        let get_resp = s3.get_object().bucket(bucket).key(key).send().await?;
        let metadata = get_resp.metadata().expect("metadata should be returned");
        let value = metadata.get(metadata_key).map(String::as_str);
        assert!(value.is_some_and(|v| metadata_value_rfc2047.contains(&v)));

        Ok(())
    }

    async fn test_put_object_larger(self: Arc<Self>) -> Result {
        let s3 = &self.s3;
        let bucket = self.bucket.as_str();
+31 −0
Original line number Diff line number Diff line
@@ -55,6 +55,37 @@ pub async fn delete_bucket_strict(s3: &aws_sdk_s3::Client, bucket: &str) -> Resu
    Ok(())
}

#[tracing::instrument(skip(s3))]
pub async fn delete_bucket_all(s3: &aws_sdk_s3::Client, bucket: &str) -> Result {
    let mut continuation_token = None;
    loop {
        let result = s3
            .list_objects_v2()
            .bucket(bucket)
            .set_continuation_token(continuation_token)
            .send()
            .await;
        let Some(list_resp) = check(result, &["NoSuchBucket"])? else {
            return Ok(());
        };

        for obj in list_resp.contents() {
            if let Some(key) = obj.key() {
                s3.delete_object().bucket(bucket).key(key).send().await?;
            }
        }

        if list_resp.is_truncated() == Some(true) {
            continuation_token = list_resp.next_continuation_token().map(String::from);
        } else {
            break;
        }
    }

    delete_bucket_loose(s3, bucket).await?;
    Ok(())
}

#[tracing::instrument(skip(s3))]
pub async fn delete_object_loose(s3: &aws_sdk_s3::Client, bucket: &str, key: &str) -> Result {
    let result = s3.delete_object().bucket(bucket).key(key).send().await;
+4 −2
Original line number Diff line number Diff line
@@ -4,6 +4,7 @@ use crate::dto::{List, Metadata, StreamingBlob, Timestamp, TimestampFormat};
use crate::error::*;
use crate::http::{HeaderName, HeaderValue};
use crate::path::S3Path;
use crate::utils::rfc2047;
use crate::xml;

use std::fmt;
@@ -284,8 +285,9 @@ pub fn parse_opt_metadata(req: &Request) -> S3Result<Option<Metadata>> {
        let val = iter.next().unwrap();
        let None = iter.next() else { return Err(duplicate_header(name)) };

        let val = val.to_str().map_err(|err| invalid_header(err, name, val))?;
        metadata.insert(key.into(), val.into());
        let raw = std::str::from_utf8(val.as_bytes()).map_err(|err| invalid_header(err, name, val))?;
        let val = rfc2047::decode(raw).map_err(|err| invalid_header(err, name, val))?;
        metadata.insert(key.into(), val.into_owned());
    }
    if metadata.is_empty() {
        return Ok(None);
+5 −3
Original line number Diff line number Diff line
//! Ordered headers

use std::str::Utf8Error;

use hyper::HeaderMap;
use hyper::header::ToStrError;

use crate::utils::stable_sort_by_first;

@@ -34,11 +35,12 @@ impl<'a> OrderedHeaders<'a> {
    ///
    /// # Errors
    /// Returns [`ToStrError`] if header value cannot be converted to string slice
    pub fn from_headers(map: &'a HeaderMap) -> Result<Self, ToStrError> {
    pub fn from_headers(map: &'a HeaderMap) -> Result<Self, Utf8Error> {
        let mut headers: Vec<(&'a str, &'a str)> = Vec::with_capacity(map.len());

        for (name, value) in map {
            headers.push((name.as_str(), value.to_str()?));
            let value = std::str::from_utf8(value.as_bytes())?;
            headers.push((name.as_str(), value));
        }
        stable_sort_by_first(&mut headers);

+4 −1
Original line number Diff line number Diff line
@@ -8,6 +8,7 @@ use crate::error::{S3Error, S3Result};
use crate::http::KeepAliveBody;
use crate::http::{HeaderName, HeaderValue};
use crate::utils::format::fmt_timestamp;
use crate::utils::rfc2047;
use crate::xml;

use std::convert::Infallible;
@@ -151,7 +152,9 @@ pub fn add_opt_metadata(res: &mut Response, metadata: Option<Metadata>) -> S3Res
        for (key, val) in map {
            write!(&mut buf, "x-amz-meta-{key}").unwrap();
            let name = HeaderName::from_bytes(buf.as_bytes()).map_err(S3Error::internal_error)?;
            let value = HeaderValue::try_from(val).map_err(S3Error::internal_error)?;
            let value = rfc2047::encode(&val)
                .map_err(S3Error::internal_error)
                .and_then(|s| HeaderValue::try_from(s.as_ref()).map_err(S3Error::internal_error))?;
            res.headers.insert(name, value);
            buf.clear();
        }