Unverified Commit b86c15cf authored by Copilot's avatar Copilot Committed by GitHub
Browse files

Fix list operations to properly show common prefixes when delimiter is specified (#302)



* Fix list operations to properly show common prefixes when delimiter is specified

Co-authored-by: default avatarNugine <30099658+Nugine@users.noreply.github.com>

* Run cargo fmt to fix code formatting

Co-authored-by: default avatarNugine <30099658+Nugine@users.noreply.github.com>

* Fix clippy lint warnings - use as_deref, reverse boolean condition, use inclusive range

Co-authored-by: default avatarNugine <30099658+Nugine@users.noreply.github.com>

* Update crates/s3s-fs/src/s3.rs

Co-authored-by: default avatarCopilot <175728472+Copilot@users.noreply.github.com>

* Optimize prefix checks in list operations to avoid redundant evaluations

Co-authored-by: default avatarNugine <30099658+Nugine@users.noreply.github.com>

---------

Co-authored-by: default avatarcopilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
Co-authored-by: default avatarNugine <30099658+Nugine@users.noreply.github.com>
Co-authored-by: default avatarNugine <nugine@foxmail.com>
Co-authored-by: default avatarCopilot <175728472+Copilot@users.noreply.github.com>
parent 0d0ba3e2
Loading
Loading
Loading
Loading
+128 −34
Original line number Diff line number Diff line
@@ -345,6 +345,7 @@ impl S3 for FileSystem {

        Ok(v2_resp.map_output(|v2| ListObjectsOutput {
            contents: v2.contents,
            common_prefixes: v2.common_prefixes,
            delimiter: v2.delimiter,
            encoding_type: v2.encoding_type,
            name: v2.name,
@@ -363,9 +364,21 @@ impl S3 for FileSystem {
            return Err(s3_error!(NoSuchBucket));
        }

        let delimiter = input.delimiter.as_deref();
        let prefix = input.prefix.as_deref().unwrap_or("").trim_start_matches('/');

        let mut objects: Vec<Object> = default();
        let mut common_prefixes = std::collections::BTreeSet::new();

        if delimiter.is_some() {
            // When delimiter is provided, only list immediate contents (non-recursive)
            self.list_objects_with_delimiter(&path, prefix, delimiter.unwrap(), &mut objects, &mut common_prefixes)
                .await?;
        } else {
            // When no delimiter, do recursive listing (current behavior)
            let mut dir_queue: VecDeque<PathBuf> = default();
            dir_queue.push_back(path.clone());
            let prefix_is_empty = prefix.is_empty();

            while let Some(dir) = dir_queue.pop_front() {
                let mut iter = try_!(fs::read_dir(dir).await);
@@ -376,21 +389,13 @@ impl S3 for FileSystem {
                    } else {
                        let file_path = entry.path();
                        let key = try_!(file_path.strip_prefix(&path));
                    let delimiter = input.delimiter.as_ref().map_or("/", |d| d.as_str());
                    let Some(key_str) = normalize_path(key, delimiter) else {
                        let Some(key_str) = normalize_path(key, "/") else {
                            continue;
                        };

                    if let Some(ref prefix) = input.prefix {
                        let prefix_path: PathBuf = prefix.split(delimiter).collect();

                        let key_s = format!("{}", key.display());
                        let prefix_path_s = format!("{}", prefix_path.display());

                        if !key_s.starts_with(&prefix_path_s) {
                        if !prefix_is_empty && !key_str.starts_with(prefix) {
                            continue;
                        }
                    }

                        let metadata = try_!(entry.metadata().await);
                        let last_modified = Timestamp::from(try_!(metadata.modified()));
@@ -406,6 +411,7 @@ impl S3 for FileSystem {
                    }
                }
            }
        }

        objects.sort_by(|lhs, rhs| {
            let lhs_key = lhs.key.as_deref().unwrap_or("");
@@ -422,12 +428,24 @@ impl S3 for FileSystem {
            objects
        };

        let common_prefixes_list = if common_prefixes.is_empty() {
            None
        } else {
            Some(
                common_prefixes
                    .into_iter()
                    .map(|prefix| CommonPrefix { prefix: Some(prefix) })
                    .collect(),
            )
        };

        let key_count = try_!(i32::try_from(objects.len()));

        let output = ListObjectsV2Output {
            key_count: Some(key_count),
            max_keys: Some(key_count),
            contents: Some(objects),
            common_prefixes: common_prefixes_list,
            delimiter: input.delimiter,
            encoding_type: input.encoding_type,
            name: Some(input.bucket),
@@ -829,3 +847,79 @@ impl S3 for FileSystem {
        Ok(S3Response::new(AbortMultipartUploadOutput { ..Default::default() }))
    }
}

impl FileSystem {
    async fn list_objects_with_delimiter(
        &self,
        bucket_root: &Path,
        prefix: &str,
        delimiter: &str,
        objects: &mut Vec<Object>,
        common_prefixes: &mut std::collections::BTreeSet<String>,
    ) -> S3Result<()> {
        // For delimiter-based listing, we need to recursively scan all files
        // but group them according to the delimiter rules
        let mut dir_queue: VecDeque<PathBuf> = default();
        dir_queue.push_back(bucket_root.to_owned());
        let prefix_is_empty = prefix.is_empty();

        while let Some(dir) = dir_queue.pop_front() {
            let mut iter = try_!(fs::read_dir(dir).await);

            while let Some(entry) = try_!(iter.next_entry().await) {
                let file_type = try_!(entry.file_type().await);
                let entry_path = entry.path();

                // Calculate the key relative to the bucket root
                let key = try_!(entry_path.strip_prefix(bucket_root));
                let Some(key_str) = normalize_path(key, "/") else {
                    continue;
                };

                // Skip if doesn't match prefix
                if !prefix_is_empty && !key_str.starts_with(prefix) {
                    // For directories, also skip if they don't have potential to contain matching files
                    if file_type.is_dir() && !prefix.starts_with(&key_str) && !key_str.starts_with(prefix) {
                        continue;
                    }
                    if file_type.is_file() {
                        continue;
                    }
                }

                if file_type.is_dir() {
                    // Continue scanning this directory
                    dir_queue.push_back(entry_path);
                } else {
                    // For files, determine if they should be listed directly or as common prefixes
                    let remaining = &key_str[prefix.len()..];

                    if remaining.contains(delimiter) {
                        // File is in a subdirectory, add the subdirectory as common prefix
                        if let Some(delimiter_pos) = remaining.find(delimiter) {
                            let mut next_prefix = String::with_capacity(prefix.len() + delimiter_pos + 1);
                            next_prefix.push_str(prefix);
                            next_prefix.push_str(&remaining[..=delimiter_pos]);
                            common_prefixes.insert(next_prefix);
                        }
                    } else {
                        // File is at the current level, include it in objects
                        let metadata = try_!(entry.metadata().await);
                        let last_modified = Timestamp::from(try_!(metadata.modified()));
                        let size = metadata.len();

                        let object = Object {
                            key: Some(key_str),
                            last_modified: Some(last_modified),
                            size: Some(try_!(i64::try_from(size))),
                            ..Default::default()
                        };
                        objects.push(object);
                    }
                }
            }
        }

        Ok(())
    }
}
+123 −0
Original line number Diff line number Diff line
@@ -188,6 +188,129 @@ async fn test_list_objects_v2() -> Result<()> {
    Ok(())
}

#[tokio::test]
#[tracing::instrument]
async fn test_list_objects_v2_with_prefixes() -> Result<()> {
    let c = Client::new(config());
    let bucket = format!("test-list-prefixes-{}", Uuid::new_v4());
    let bucket_str = bucket.as_str();
    create_bucket(&c, bucket_str).await?;

    // Create files in nested directory structure
    let content = "hello world\n";
    let files = [
        "README.md",                   // Root level file
        "test/subdirectory/README.md", // Nested file
        "test/file.txt",               // File in test/ directory
        "other/dir/file.txt",          // File in other/dir/ directory
    ];

    for key in &files {
        c.put_object()
            .bucket(bucket_str)
            .key(*key)
            .body(ByteStream::from_static(content.as_bytes()))
            .send()
            .await?;
    }

    // List without delimiter - should return all files recursively
    let result = c.list_objects_v2().bucket(bucket_str).send().await;

    let response = log_and_unwrap!(result);
    let contents: Vec<_> = response.contents().iter().filter_map(|obj| obj.key()).collect();

    debug!("List without delimiter - objects: {:?}", contents);
    assert_eq!(contents.len(), 4);
    for key in &files {
        assert!(contents.contains(key), "Missing key: {key}");
    }

    // List with delimiter "/" - should return root files and common prefixes
    let result = c.list_objects_v2().bucket(bucket_str).delimiter("/").send().await;

    let response = log_and_unwrap!(result);

    // Should have one file at root level
    let contents: Vec<_> = response.contents().iter().filter_map(|obj| obj.key()).collect();
    debug!("List with delimiter - objects: {:?}", contents);
    assert_eq!(contents.len(), 1);
    assert!(contents.contains(&"README.md"));

    // Should have two common prefixes: "test/" and "other/"
    let prefixes: Vec<_> = response.common_prefixes().iter().filter_map(|cp| cp.prefix()).collect();
    debug!("List with delimiter - prefixes: {:?}", prefixes);
    assert_eq!(prefixes.len(), 2);
    assert!(prefixes.contains(&"test/"));
    assert!(prefixes.contains(&"other/"));

    // List with prefix "test/" and delimiter "/" - should return files in test/ and subdirectories
    let result = c
        .list_objects_v2()
        .bucket(bucket_str)
        .prefix("test/")
        .delimiter("/")
        .send()
        .await;

    let response = log_and_unwrap!(result);

    // Should have one file in test/ directory
    let contents: Vec<_> = response.contents().iter().filter_map(|obj| obj.key()).collect();
    debug!("List with prefix test/ - objects: {:?}", contents);
    assert_eq!(contents.len(), 1);
    assert!(contents.contains(&"test/file.txt"));

    // Should have one common prefix: "test/subdirectory/"
    let prefixes: Vec<_> = response.common_prefixes().iter().filter_map(|cp| cp.prefix()).collect();
    debug!("List with prefix test/ - prefixes: {:?}", prefixes);
    assert_eq!(prefixes.len(), 1);
    assert!(prefixes.contains(&"test/subdirectory/"));

    Ok(())
}

#[tokio::test]
#[tracing::instrument]
async fn test_list_objects_v1_with_prefixes() -> Result<()> {
    let c = Client::new(config());
    let bucket = format!("test-list-v1-prefixes-{}", Uuid::new_v4());
    let bucket_str = bucket.as_str();
    create_bucket(&c, bucket_str).await?;

    // Create a simple structure
    let content = "hello world\n";
    let files = ["README.md", "dir/file.txt"];

    for key in &files {
        c.put_object()
            .bucket(bucket_str)
            .key(*key)
            .body(ByteStream::from_static(content.as_bytes()))
            .send()
            .await?;
    }

    // Test list_objects (v1) with delimiter
    let result = c.list_objects().bucket(bucket_str).delimiter("/").send().await;

    let response = log_and_unwrap!(result);

    // Should have one file at root level
    let contents: Vec<_> = response.contents().iter().filter_map(|obj| obj.key()).collect();
    debug!("ListObjects v1 with delimiter - objects: {:?}", contents);
    assert_eq!(contents.len(), 1);
    assert!(contents.contains(&"README.md"));

    // Should have one common prefix: "dir/"
    let prefixes: Vec<_> = response.common_prefixes().iter().filter_map(|cp| cp.prefix()).collect();
    debug!("ListObjects v1 with delimiter - prefixes: {:?}", prefixes);
    assert_eq!(prefixes.len(), 1);
    assert!(prefixes.contains(&"dir/"));

    Ok(())
}

#[tokio::test]
#[tracing::instrument]
async fn test_single_object() -> Result<()> {