Unverified Commit 34369e2d authored by Zelda Hessler's avatar Zelda Hessler Committed by GitHub
Browse files

Update `FsBuilder`-based `ByteStreams` to support file offsets (#1361)

* add: offset setter to FsBuilder
remove: file_size setter from FsBuilder
add: length setter to FsBuilder
add: test for ByteStream w/ offset
add: test for ByteStream w/ length less than file size
update: path_based_bytestreams_with_builder test

* add: test ensuring offset and length work correctly together
add: test ensuring that no data is returned when offset is greater than file size
add: test ensuring that nothing breaks if length to read is larger than file size

* add: chunking test
add: CHANGELOG.next.toml entry
refactor: new code based on PR comments

* remove: unused use statement

* refactor: length API

* update: prefer unwrap() to returning error in tests

* update: prefer unwrap() to returning error in tests
add: test for Length::Exact behavior

* update: error if offset is greater than file length

* fix: test to work with new offset error

* update: error test to be more specific
parent 45323f74
Loading
Loading
Loading
Loading
+47 −0
Original line number Diff line number Diff line
@@ -22,3 +22,50 @@ message = "Log a debug event when a retry is going to be peformed"
references = ["smithy-rs#1352"]
meta = { "breaking" = false, "tada" = false, "bug" = false }
author = "jdisanti"

[[smithy-rs]]
message = """
The `aws_smithy_http::byte_stream::bytestream_util::FsBuilder` has been updated to allow for easier creation of
multi-part requests.

- `FsBuilder::offset` is a new method allowing users to specify an offset to start reading a file from.
- `FsBuilder::file_size` has been reworked into `FsBuilder::length` and is now used to specify the amount of data to read.

With these two methods, it's now simple to create a `ByteStream` that will read a single "chunk" of a file. The example
below demonstrates how you could divide a single `File` into consecutive chunks to create multiple `ByteStream`s.

```rust
let example_file_path = Path::new("/example.txt");
let example_file_size = tokio::fs::metadata(&example_file_path).await.unwrap().len();
let chunks = 6;
let chunk_size = file_size / chunks;
let mut byte_streams = Vec::new();

for i in 0..chunks {
    let length = if i == chunks - 1 {
        // If we're on the last chunk, the length to read might be less than a whole chunk.
        // We substract the size of all previous chunks from the total file size to get the
        // size of the final chunk.
        file_size - (i * chunk_size)
    } else {
        chunk_size
    };

    let byte_stream = ByteStream::read_from()
        .path(&file_path)
        .offset(i * chunk_size)
        .length(length)
        .build()
        .await?;

    byte_streams.push(byte_stream);
}

for chunk in byte_streams {
    // Make requests to a service
}
```
"""
references = ["aws-sdk-rust#494", "aws-sdk-rust#519"]
meta = { "breaking" = true, "tada" = true, "bug" = false }
author = "Velfi"
+1 −1
Original line number Diff line number Diff line
@@ -11,7 +11,7 @@ license = "Apache-2.0"
repository = "https://github.com/awslabs/smithy-rs"

[features]
rt-tokio = ["tokio/rt", "tokio/fs", "tokio-util/io"]
rt-tokio = ["tokio/rt", "tokio/fs", "tokio/io-util", "tokio-util/io"]
event-stream = ["aws-smithy-eventstream"]

[dependencies]
+6 −47
Original line number Diff line number Diff line
@@ -102,7 +102,7 @@
//! ```no_run
//! # #[cfg(feature = "rt-tokio")]
//! # {
//! use aws_smithy_http::byte_stream::ByteStream;
//! use aws_smithy_http::byte_stream::{ByteStream, Length};
//! use std::path::Path;
//! struct GetObjectInput {
//!     body: ByteStream
@@ -111,7 +111,7 @@
//! async fn bytestream_from_file() -> GetObjectInput {
//!     let bytestream = ByteStream::read_from().path("docs/some-large-file.csv")
//!         .buffer_size(32_784)
//!         .file_size(123_456)
//!         .length(Length::Exact(123_456))
//!         .build()
//!         .await
//!         .expect("valid path");
@@ -135,6 +135,8 @@ use std::task::{Context, Poll};

#[cfg(feature = "rt-tokio")]
mod bytestream_util;
#[cfg(feature = "rt-tokio")]
pub use bytestream_util::Length;

#[cfg(feature = "rt-tokio")]
pub use self::bytestream_util::FsBuilder;
@@ -272,7 +274,7 @@ impl ByteStream {
    /// ```no_run
    /// # #[cfg(feature = "rt-tokio")]
    /// # {
    /// use aws_smithy_http::byte_stream::ByteStream;
    /// use aws_smithy_http::byte_stream::{ByteStream, Length};
    ///
    /// async fn bytestream_from_file() -> ByteStream {
    ///     let bytestream = ByteStream::read_from()
@@ -280,7 +282,7 @@ impl ByteStream {
    ///         // Specify the size of the buffer used to read the file (in bytes, default is 4096)
    ///         .buffer_size(32_784)
    ///         // Specify the length of the file used (skips an additional call to retrieve the size)
    ///         .file_size(123_456)
    ///         .length(Length::Exact(123_456))
    ///         .build()
    ///         .await
    ///         .expect("valid path");
@@ -595,47 +597,4 @@ mod tests {

        Ok(())
    }

    #[cfg(feature = "rt-tokio")]
    #[tokio::test]
    async fn path_based_bytestreams_with_builder() -> Result<(), Box<dyn std::error::Error>> {
        use super::ByteStream;
        use bytes::Buf;
        use http_body::Body;
        use std::io::Write;
        use tempfile::NamedTempFile;
        let mut file = NamedTempFile::new()?;

        for i in 0..10000 {
            writeln!(file, "Brian was here. Briefly. {}", i)?;
        }
        let body = ByteStream::read_from()
            .path(&file)
            .buffer_size(16384)
            // This isn't the right file length - one shouldn't do this in real code
            .file_size(200)
            .build()
            .await?
            .into_inner();

        // assert that the file length specified size is used as size hint
        assert_eq!(body.size_hint().exact(), Some(200));

        let mut body1 = body.try_clone().expect("retryable bodies are cloneable");
        // read a little bit from one of the clones
        let some_data = body1
            .data()
            .await
            .expect("should have some data")
            .expect("read should not fail");
        // The size of one read should be equal to that of the buffer size
        assert_eq!(some_data.len(), 16384);

        assert_eq!(
            ByteStream::new(body1).collect().await?.remaining(),
            298890 - some_data.len()
        );

        Ok(())
    }
}
+421 −48

File changed.

Preview size limit exceeded, changes collapsed.