From 72a5c5b3e8bd6f722aade6aafb180b51d5f36ac4 Mon Sep 17 00:00:00 2001 From: Russell Cohen Date: Thu, 6 May 2021 18:59:12 -0400 Subject: [PATCH] Add initial set of docs (#313) * Add initial set of docs * More design documentation updates * Updates to the docs * Update tenets again to remove the dependeny tenet * Remove `!` * Add more design documentation * More updates to tenets * More tenets updates * tenets typo * Rephrase as 'AWS SDK for Rust' * rephrase tenets --- README.md | 7 + design/README.md | 8 + design/src/SUMMARY.md | 14 +- design/src/endpoint.md | 50 ----- design/src/faq.md | 10 + design/src/middleware.md | 6 - design/src/operation.md | 65 +------ design/src/overview.md | 51 +++++ design/src/smithy/aggregate_shapes.md | 191 +++++++++++++++++++ design/src/smithy/endpoint.md | 51 +++++ design/src/smithy/overview.md | 27 +++ design/src/smithy/recursive_shapes.md | 57 ++++++ design/src/smithy/simple_shapes.md | 58 ++++++ design/src/tenets.md | 27 +++ design/src/transport/middleware.md | 7 + design/src/transport/operation.md | 64 +++++++ design/src/transport/overview.md | 11 ++ rust-runtime/smithy-types/src/instant/mod.rs | 4 + rust-runtime/smithy-types/src/lib.rs | 10 + 19 files changed, 595 insertions(+), 123 deletions(-) create mode 100644 design/README.md create mode 100644 design/src/faq.md create mode 100644 design/src/overview.md create mode 100644 design/src/smithy/aggregate_shapes.md create mode 100644 design/src/smithy/endpoint.md create mode 100644 design/src/smithy/overview.md create mode 100644 design/src/smithy/recursive_shapes.md create mode 100644 design/src/smithy/simple_shapes.md create mode 100644 design/src/tenets.md create mode 100644 design/src/transport/middleware.md create mode 100644 design/src/transport/operation.md create mode 100644 design/src/transport/overview.md diff --git a/README.md b/README.md index a37698da8..66b420576 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,8 @@ Smithy code generators for Rust The nightly SDK build can be found under `Actions -> CI (take latest run) -> Artifacts` +[Design documentation (WIP)](https://ubiquitous-robot-7dc8d16d.pages.github.io/) + **All internal and external interfaces are considered unstable and subject to change without notice.** ## Setup @@ -31,5 +33,10 @@ pre-commit install ### Project Layout * `aws`: AWS specific codegen & Rust code (signing, endpoints, customizations, etc.) + Common commands: + * `./gradlew :aws:sdk:assemble`: Generate (but do not test / compile etc.) a fresh SDK into `sdk/build/aws-sdk` + * `./gradlew :aws:sdk:test`: Generate & run all tests for a fresh SDK + * `./gradlew :aws:sdk:{cargoCheck, cargoTest, cargoDocs, cargoClippy}`: Generate & run specified cargo command. * `codegen`: Whitelabel Smithy code generation * `codegen-test`: Smithy protocol test generation & integration tests for Smithy whitelabel code +* [`design`](design): Design documentation. See the [design/README.md](design/README.md) for details about building / viewing. diff --git a/design/README.md b/design/README.md new file mode 100644 index 000000000..dcf41f612 --- /dev/null +++ b/design/README.md @@ -0,0 +1,8 @@ +Design docs are hosted [here](https://ubiquitous-robot-7dc8d16d.pages.github.io/). + +To render design docs locally: +``` +cargo install mdbook +mdbook serve & +open http://localhost:3000 +``` diff --git a/design/src/SUMMARY.md b/design/src/SUMMARY.md index 1db8af1da..f855a9359 100644 --- a/design/src/SUMMARY.md +++ b/design/src/SUMMARY.md @@ -1,5 +1,13 @@ # Summary +- [Design Overview](./overview.md) +- [Tenets](./tenets.md) +- [Design FAQ](./faq.md) +- [Transport](transport/overview.md) + - [Http Operations](transport/operation.md) + - [HTTP middleware](transport/middleware.md) -- [Http Operations](./operation.md) -- [Endpoint Resolution](./endpoint.md) -- [HTTP middleware](./middleware.md) +- [Smithy](./smithy/overview.md) + - [Simple Shapes](./smithy/simple_shapes.md) + - [Recursive Shapes](./smithy/recursive_shapes.md) + - [Aggregate Shapes](./smithy/aggregate_shapes.md) + - [Endpoint Resolution](smithy/endpoint.md) diff --git a/design/src/endpoint.md b/design/src/endpoint.md index 9aa4e8d78..ce7992de9 100644 --- a/design/src/endpoint.md +++ b/design/src/endpoint.md @@ -1,51 +1 @@ # Endpoint Resolution - -## Requirements -The core codegen generates HTTP requests that do not contain an authority, scheme or post. These properties must be set later based on configuration. Existing AWS services have a number of requirements that increase the complexity: - -1. Endpoints must support manual configuration by end users: -```rust -let config = dynamodb::Config::builder() - .endpoint(StaticEndpoint::for_uri("http://localhost:8000")) -``` - -When a user specifies a custom endpoint URI, _typically_ they will want to avoid having this URI mutated by other endpoint discovery machinery. - -2. Endpoints must support being customized on a per-operation basis by the endpoint trait. This will prefix the base endpoint, potentially driven by fields of the operation. [Docs](https://awslabs.github.io/smithy/1.0/spec/core/endpoint-traits.html#endpoint-trait) - -3. Endpoints must support being customized by [endpoint discovery](https://awslabs.github.io/smithy/1.0/spec/aws/aws-core.html#client-endpoint-discovery). A request, customized by a predefined set of fields from the input operation is dispatched to a specific URI. That operation returns the endpoint that should be used. Endpoints must be cached by a cache key containing: -``` -(access_key_id, [all input fields], operation) -``` -Endpoints retrieved in this way specify a TTL. - -4. Endpoints must be able to customize the signing (and other phases of the operation). For example, requests sent to a global region will have a region set by the endpoint provider. - - -## Design - -Configuration objects for services _must_ contain an `Endpoint`. This endpoint may be set by a user or it will default to the `endpointPrefix` from the service definition. In the case of endpoint discovery, _this_ is the endpoint that we will start with. - -During operation construction (see [Operation Construction](operation.md#operation-construction)) an `EndpointPrefix` may be set on the property bag. The eventual endpoint middleware will search for this in the property bag and (depending on the URI mutability) utilize this prefix when setting the endpoint. - -In the case of endpoint discovery, we envision a different pattern: -```rust -// EndpointClient manages the endpoint cache -let (tx, rx) = dynamodb::EndpointClient::new(); -let client = aws_hyper::Client::new(); -// `endpoint_req` is an operation that can be dispatched to retrieve endpoints -// During operation construction, the endpoint resolver is configured to be `rx` instead static endpoint -// resolver provided by the service. -let (endpoint_req, req) = GetRecord::builder().endpoint_disco(rx).build_with_endpoint(); -// depending on the duration of endpoint expiration, this may be spawned into a separate task to continuously -// refresh endpoints. -if tx.needs(endpoint_req) { - let new_endpoint = client. - call(endpoint_req) - .await; - tx.send(new_endpoint) -} -let rsp = client.call(req).await?; -``` - -We believe that this design results in an SDK that both offers customers more control & reduces the likelihood of bugs from nested operation dispatch. Endpoint resolution is currently extremely rare in AWS services so this design may remain a prototype while we solidify other behaviors. diff --git a/design/src/faq.md b/design/src/faq.md new file mode 100644 index 000000000..da4dd5be3 --- /dev/null +++ b/design/src/faq.md @@ -0,0 +1,10 @@ +# Design FAQ +### What is Smithy? +Smithy is the interface design language used by AWS services. `smithy-rs` allows users to generate a Rust client for any Smithy based service (pending protocol support), including those outside of AWS. + +### Why is there one crate per service? +1. Compilation time: Although it's possible to use cargo features to conditionally compile individual services, we decided that this added significant complexity to the generated code. In Rust the "unit of compilation" is a Crate, so by using smaller crates we can get better compilation parallelism. + +2. Versioning: It is expected that over time we may major-version-bump individual services. New updates will be pushed for _some_ AWS service nearly every day. Maintaining separate crates allows us to only increment versions for the relevant pieces that change. + +It is worth noting that this isn't a set-in-stone design decision. A parent crate may be even be created at some point! diff --git a/design/src/middleware.md b/design/src/middleware.md index 07a1e4738..8573d5e17 100644 --- a/design/src/middleware.md +++ b/design/src/middleware.md @@ -1,7 +1 @@ # HTTP middleware - -Signing, endpoint specification, and logging are all handled as middleware. The Rust SDK takes a minimalist approach to middleware: - -Middleware is defined as minimally as possible, then adapted into the middleware system used by the IO layer. Tower is the de facto standard for HTTP middleware in Rust—we will probably use it. But we also want to make our middleware usable for users who aren't using Tower (or if we decide to not use Tower in the long run). - -Because of this, rather than implementing all our middleware as "Tower Middleware", we implement it narrowly (eg. as a function that operates on `operation::Request`), then define optional adapters to make our middleware tower compatible. diff --git a/design/src/operation.md b/design/src/operation.md index 92bc37ee1..a345e7c25 100644 --- a/design/src/operation.md +++ b/design/src/operation.md @@ -1,64 +1 @@ -# HTTP-based Operations -The Smithy code generator for Rust (and by extension), the AWS SDK use an `Operation` abstraction to provide a unified -interface for dispatching requests. `Operation`s contain: -* A base HTTP request (with a potentially streaming body) -* A typed property bag of configuration options -* A fully generic response handler - -In the typical case, these configuration options include things like a `CredentialsProvider`, however, they can also be -full middleware layers that will get added by the dispatch stack. - -## Operation Phases -This section details the flow of a request through the SDK until a response is returned to the user. - -### Input Construction - -A customer interacts with the SDK builders to construct an input. The `build()` method on an input returns -an `Operation`. This codifies the base HTTP request & all the configuration and middleware layers required to modify and dispatch the request. - -```rust,ignore -pub struct Operation { - request: Request, - response_handler: H, - _retry_policy: R, -} - -pub struct Request { - base: http::Request, - configuration: PropertyBag, -} -``` - -For most requests, `.build()` will NOT consume the input. A user can call `.build()` multiple times to produce multiple operations from the same input. - -By using a property bag, we can define the `Operation` in Smithy core. AWS specific configuration can be added later in the stack. - -### Operation Construction -In order to construct an operation, the generated code injects appropriate middleware & configuration via the configuration property bag. It does this by reading the configuration properties out of the service -config, copying them as necessary, and loading them into the `Request`: - -```rust,ignore -// This is approximately the generated code, I've cleaned a few things up for readability. -pub fn build(self, config: &dynamodb::config::Config) -> Operation { - let op = BatchExecuteStatement::new(BatchExecuteStatementInput { - statements: self.statements, - }); - let req = op.build_http_request().map(SdkBody::from); - - let mut req = operation::Request::new(req); - let mut conf = req.config_mut(); - conf.insert_signing_config(config.signing_service()); - conf.insert_endpoint_resolver(config.endpoint_resolver.clone()); - Operation::new(req) -} -``` - -### Operation Dispatch and Middleware - -The Rust SDK endeavors to behave as predictably as possible. This means that if at all possible we will not dispatch extra HTTP requests during the dispatch of normal operation. Making this work is covered in more detail in the design of credentials providers & endpoint resolution. - -The upshot is that we will always prefer a design where the user has explicit control of when credentials are loaded and endpoints are resolved. This doesn't mean that users can't use easy-to-use options (We will provide an automatically refreshing credentials provider), however, the credential provider won't load requests during the dispatch of an individual request. - -## Operation Parsing and Response Loading - -The fundamental trait for HTTP-based protocols is `ParseHttpResponse` +# Http Operations diff --git a/design/src/overview.md b/design/src/overview.md new file mode 100644 index 000000000..975532c61 --- /dev/null +++ b/design/src/overview.md @@ -0,0 +1,51 @@ +# Design Overview + +The AWS Rust SDK aims to provide an official, high quality & complete interface to AWS services. We plan to eventually use the CRT to provide signing & credential management. The Rust SDK will provide first-class support for the CRT as well as [Tokio ](https://tokio.rs/) & [Hyper](https://hyper.rs). The Rust SDK empowers advanced customers to bring their own HTTP/IO implementations. + +Our design choices are guided by our [Tenets](./tenets.md). + +## Acknowledgments + +The design builds on the learnings, ideas, hard work, and GitHub issues of the 142 Rusoto contributors & thousands of users who built this first and learned the hard way. + +## External API Overview + +The Rust SDK is "modular" meaning that each AWS service is its own crate. Each crate provides two layers to access the service: +1. The "fluent" API. For most use cases, a high level API that ties together connection management and serialization will be the quickest path to success. + +```rust +#[tokio::main] +async fn main() { + let client = dynamodb::Client::from_env(); + let tables = client + .list_tables() + .limit(10) + .send() + .await.expect("failed to load tables"); +} +``` + +2. The "low-level" API: It is also possible for customers to assemble the pieces themselves. This offers more control over operation construction & dispatch semantics: + +```rust +#[tokio::main] +async fn main() { + let conf = dynamodb::Config::builder().build(); + let conn = aws_hyper::Client::https(); + let operation = dynamodb::ListTables::builder() + .limit(10) + .build(&conf) + .expect("invalid operation"); + let tables = conn.call(operation).await.expect("failed to list tables"); +} +``` + +The Fluent API is implemented as a thin wrapper around the core API to improve ergonomics. + +## Internals +The Rust SDK is built on Tower Middleware, Tokio & Hyper. We're continuing to iterate on the internals to enable running the AWS SDK in other executors & HTTP stacks. As an example, you can see a demo of adding `reqwest` as a custom HTTP stack to gain access to its HTTP Proxy support! + +For more details about the SDK internals see [Operation Design](transport/operation.md) + +## Code Generation +The Rust SDK is code generated from Smithy models, using Smithy codegeneration utilities. The Code generation is written in Kotlin. More details can be found in the [Smithy](./smithy/overview.md) section. diff --git a/design/src/smithy/aggregate_shapes.md b/design/src/smithy/aggregate_shapes.md new file mode 100644 index 000000000..0a9bdcbdb --- /dev/null +++ b/design/src/smithy/aggregate_shapes.md @@ -0,0 +1,191 @@ +# Aggregate Shapes + +| Smithy Type | Rust Type | +| ----------- | ----------- | +| [List](#list) | `Vec` | +| [Set](#set) | `Vec` | +| [Map](#map) | `HashMap` | +| [Structure](#structure) | `struct` | +| [Union](#union) | `enum` | + +Most generated types are controlled by [SymbolVisitor](https://github.com/awslabs/smithy-rs/blob/main/codegen/src/main/kotlin/software/amazon/smithy/rust/codegen/smithy/SymbolVisitor.kt). + +## List +List objects in Smithy are transformed into vectors in Rust. Based on the output of the [NullableIndex](https://awslabs.github.io/smithy/javadoc/1.5.1/software/amazon/smithy/model/knowledge/NullableIndex.html), the generated list may be `Vec` or `Vec>`. + +## Set +Because floats are not Hashable in Rust, for simplicity smithy-rs translates all sets to into `Vec` instead of `HashSet`. In the future, a breaking change may be made to introduce a library-provided wrapper type for Sets. + +## Map +Because `key` MUST be a string in Smithy maps, we avoid the hashibility issue encountered with `Set`. There are optimizations that could be considered (eg. since these maps will probably never be modified), however, pending customer feedback, Smithy Maps become `HashMap` in Rust. + +## Structure +> See `StructureGenerator.kt` for more details + +Smithy `structure` becomes a `struct` in Rust. Backwards compatibility & usability concerns lead to a few design choices: + + 1. As specified by `NullableIndex`, fields are `Option` when Smithy models them as nullable. + 2. All structs are marked `#[non_exhaustive]` + 3. All structs derive `Debug` & `PartialEq`. Structs **do not** derive `Eq` because a `float` member may be added in the future. + 4. Struct fields are public. Public struct fields allow for [split borrows](https://doc.rust-lang.org/nomicon/borrow-splitting.html). When working with output objects this significantly improves ergonomics, especially with optional fields. + ```rust,ignore + let out = dynamo::ListTablesOutput::new(); + out.some_field.unwrap(); // <- partial move, impossible with an accessor + ``` + 5. Builders are generated for structs that provide ergonomic and backwards compatible constructors. A builder for a struct is always available via the convenience method `SomeStruct::builder()` + 6. Structures manually implement debug: In order to support the [sensitive trait](https://awslabs.github.io/smithy/1.0/spec/core/documentation-traits.html#sensitive-trait), a `Debug` implementation for structures is manually generated. + +### Example Structure Output +**Smithy Input**: + +```java +@documentation("

Contains I/O usage metrics...") +structure IOUsage { + @documentation("... elided") + ReadIOs: ReadIOs, + @documentation("... elided") + WriteIOs: WriteIOs +} + +long ReadIOs + +long WriteIOs +``` +**Rust Output**: +```rust,ignore +///

Contains I/O usage metrics for a command that was invoked.

+#[non_exhaustive] +#[derive(serde::Deserialize, serde::Serialize, std::clone::Clone, std::cmp::PartialEq)] +pub struct IOUsage { + ///

The number of read I/O requests that the command made.

+ #[serde(rename = "ReadIOs")] + pub read_i_os: i64, + ///

The number of write I/O requests that the command made.

+ #[serde(rename = "WriteIOs")] + pub write_i_os: i64, +} +impl std::fmt::Debug for IOUsage { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut formatter = f.debug_struct("IOUsage"); + formatter.field("read_i_os", &self.read_i_os); + formatter.field("write_i_os", &self.write_i_os); + formatter.finish() + } +} +/// See [`IOUsage`](crate::model::IOUsage) +pub mod io_usage { + /// A builder for [`IOUsage`](crate::model::IOUsage) + #[non_exhaustive] + #[derive(Debug, Clone, Default)] + pub struct Builder { + read_i_os: std::option::Option, + write_i_os: std::option::Option, + } + impl Builder { + ///

The number of read I/O requests that the command made.

+ pub fn read_i_os(mut self, inp: i64) -> Self { + self.read_i_os = Some(inp); + self + } + pub fn set_read_i_os(mut self, inp: i64) -> Self { + self.read_i_os = Some(inp); + self + } + ///

The number of write I/O requests that the command made.

+ pub fn write_i_os(mut self, inp: i64) -> Self { + self.write_i_os = Some(inp); + self + } + pub fn set_write_i_os(mut self, inp: i64) -> Self { + self.write_i_os = Some(inp); + self + } + /// Consumes the builder and constructs a [`IOUsage`](crate::model::IOUsage) + pub fn build(self) -> crate::model::IOUsage { + crate::model::IOUsage { + read_i_os: self.read_i_os.unwrap_or_default(), + write_i_os: self.write_i_os.unwrap_or_default(), + } + } + } +} +impl IOUsage { + /// Creates a new builder-style object to manufacture [`IOUsage`](crate::model::IOUsage) + pub fn builder() -> crate::model::io_usage::Builder { + crate::model::io_usage::Builder::default() + } +} +``` + +## Union +Smithy `Union` is modeled as `enum` in Rust. + + 1. Generated `enum`s must be marked `#[non_exhaustive]`. + 2. Generated `enum`s must provide an `Unknown` variant. If parsing receives an unknown input that doesn't match any of the given union variants, `Unknown` should be constructed. [Tracking Issue](https://github.com/awslabs/smithy-rs/issues/185). + 1. Union members (enum variants) are **not** nullable, because Smithy union members cannot contain null values. + 2. When union members contain references to other shapes, we generate a wrapping variant (see below). + 3. Union members do not require `#[non_exhaustive]`, because changing the shape targeted by a union member is not backwards compatible. + 4. `is_variant` and `as_variant` helper functions are generated to improve ergonomics. + +### Generated Union Example +The union generated for a simplified `dynamodb::AttributeValue` +**Smithy**: +```java +namespace test + +union AttributeValue { + @documentation("A string value") + string: String, + bool: Boolean, + bools: BoolList, + map: ValueMap +} + +map ValueMap { + key: String, + value: AttributeValue +} + +list BoolList { + member: Boolean +} +``` +**Rust**: +```rust,ignore +#[non_exhaustive] +#[derive(std::clone::Clone, std::cmp::PartialEq, std::fmt::Debug)] +pub enum AttributeValue { + /// a string value + String(std::string::String), + Bool(bool), + Bools(std::vec::Vec), + Map(std::collections::HashMap), +} + +impl AttributeValue { + pub fn as_bool(&self) -> Option<&bool> { + if let AttributeValue::Bool(val) = &self { Some(&val) } else { None } + } + pub fn is_bool(&self) -> bool { + self.as_bool().is_some() + } + pub fn as_bools(&self) -> Option<&std::vec::Vec> { + if let AttributeValue::Bools(val) = &self { Some(&val) } else { None } + } + pub fn is_bools(&self) -> bool { + self.as_bools().is_some() + } + pub fn as_map(&self) -> Option<&std::collections::HashMap> { + if let AttributeValue::Map(val) = &self { Some(&val) } else { None } + } + pub fn is_map(&self) -> bool { + self.as_map().is_some() + } + pub fn as_string(&self) -> Option<&std::string::String> { + if let AttributeValue::String(val) = &self { Some(&val) } else { None } + } + pub fn is_string(&self) -> bool { + self.as_string().is_some() + } +} +``` diff --git a/design/src/smithy/endpoint.md b/design/src/smithy/endpoint.md new file mode 100644 index 000000000..699f17020 --- /dev/null +++ b/design/src/smithy/endpoint.md @@ -0,0 +1,51 @@ +# Endpoint Resolution + +## Requirements +The core codegen generates HTTP requests that do not contain an authority, scheme or post. These properties must be set later based on configuration. Existing AWS services have a number of requirements that increase the complexity: + +1. Endpoints must support manual configuration by end users: +```rust +let config = dynamodb::Config::builder() + .endpoint(StaticEndpoint::for_uri("http://localhost:8000")) +``` + +When a user specifies a custom endpoint URI, _typically_ they will want to avoid having this URI mutated by other endpoint discovery machinery. + +2. Endpoints must support being customized on a per-operation basis by the endpoint trait. This will prefix the base endpoint, potentially driven by fields of the operation. [Docs](https://awslabs.github.io/smithy/1.0/spec/core/endpoint-traits.html#endpoint-trait) + +3. Endpoints must support being customized by [endpoint discovery](https://awslabs.github.io/smithy/1.0/spec/aws/aws-core.html#client-endpoint-discovery). A request, customized by a predefined set of fields from the input operation is dispatched to a specific URI. That operation returns the endpoint that should be used. Endpoints must be cached by a cache key containing: +``` +(access_key_id, [all input fields], operation) +``` +Endpoints retrieved in this way specify a TTL. + +4. Endpoints must be able to customize the signing (and other phases of the operation). For example, requests sent to a global region will have a region set by the endpoint provider. + + +## Design + +Configuration objects for services _must_ contain an `Endpoint`. This endpoint may be set by a user or it will default to the `endpointPrefix` from the service definition. In the case of endpoint discovery, _this_ is the endpoint that we will start with. + +During operation construction (see [Operation Construction](../transport/operation.md#operation-construction)) an `EndpointPrefix` may be set on the property bag. The eventual endpoint middleware will search for this in the property bag and (depending on the URI mutability) utilize this prefix when setting the endpoint. + +In the case of endpoint discovery, we envision a different pattern: +```rust +// EndpointClient manages the endpoint cache +let (tx, rx) = dynamodb::EndpointClient::new(); +let client = aws_hyper::Client::new(); +// `endpoint_req` is an operation that can be dispatched to retrieve endpoints +// During operation construction, the endpoint resolver is configured to be `rx` instead static endpoint +// resolver provided by the service. +let (endpoint_req, req) = GetRecord::builder().endpoint_disco(rx).build_with_endpoint(); +// depending on the duration of endpoint expiration, this may be spawned into a separate task to continuously +// refresh endpoints. +if tx.needs(endpoint_req) { + let new_endpoint = client. + call(endpoint_req) + .await; + tx.send(new_endpoint) +} +let rsp = client.call(req).await?; +``` + +We believe that this design results in an SDK that both offers customers more control & reduces the likelihood of bugs from nested operation dispatch. Endpoint resolution is currently extremely rare in AWS services so this design may remain a prototype while we solidify other behaviors. diff --git a/design/src/smithy/overview.md b/design/src/smithy/overview.md new file mode 100644 index 000000000..bf90fa92b --- /dev/null +++ b/design/src/smithy/overview.md @@ -0,0 +1,27 @@ +# Smithy +The Rust SDK uses Smithy models and code generation tooling to generate an SDK. Smithy is an open source IDL (interface design language) developed by Amazon. Although the Rust SDK uses Smithy models for AWS services, smithy-rs and Smithy models in general are not AWS specific. + +Design documentation here covers both our implementation of Smithy Primitives (eg. simple shape) as well as more complex Smithy traits like [Endpoint](./endpoint.md). + +## Internals +Smithy introduces a few concepts that are defined here: + +1. Shape: The core Smithy primitive. A smithy model is composed of nested shapes defining an API. +1. `Symbol`: A Representation of a type including namespaces & and any dependencies required to use a type. A shape can be converted into a symbol by a `SymbolVisitor`. A `SymbolVisitor` maps shapes to types in your programming language (eg. Rust). In the Rust SDK, see [SymbolVisitor.kt](https://github.com/awslabs/smithy-rs/blob/c049a37f8cba5f9bec2e96c28db83e7efb2edc53/codegen/src/main/kotlin/software/amazon/smithy/rust/codegen/smithy/SymbolVisitor.kt). Symbol visitors are composable—many specific behaviors are mixed in via small & focused symbol providers, eg. support for the streaming trait is mixed in separately. +2. `Writer`: Writers are code generation primitives that collect code prior to being written to a file. Writers enable language specific helpers to be added to simplify codegen for a given language. For example, `smithy-rs` adds `rustBlock` to [`RustWriter`](https://github.com/awslabs/smithy-rs/blob/908dec558e26bbae6fe4b7d9d1c221dd81699b59/codegen/src/main/kotlin/software/amazon/smithy/rust/codegen/rustlang/RustWriter.kt) to create a "Rust block" of code. + ```kotlin + writer.rustBlock("struct Model") { + model.fields.forEach { + write("${field.name}: #T", field.symbol) + } + } + ``` + This would produce something like: + ```rust + struct Model { + field1: u32, + field2: String + } + ``` + +3. Generators: A Generator, eg. `StructureGenerator`, `UnionGenerator` generates more complex Rust code from a Smithy model. Protocol generators pull these individual tools together to generate code for an entire service / protocol. diff --git a/design/src/smithy/recursive_shapes.md b/design/src/smithy/recursive_shapes.md new file mode 100644 index 000000000..b1344c954 --- /dev/null +++ b/design/src/smithy/recursive_shapes.md @@ -0,0 +1,57 @@ +# Recursive Shapes +> NB: Throughout this document, the word "box" always refers to a Rust [`Box`](https://doc.rust-lang.org/std/boxed/struct.Box.html), a heap allocated pointer to T, and not the Smithy concept of boxed vs. unboxed. + +Recursive shapes pose a problem for Rust, because the following Rust code will not compile: + +```rust,compile_fail +struct TopStructure { + intermediate: IntermediateStructure +} + +struct IntermediateStructure { + top: Option +} +``` + +```rust,ignore + | +3 | struct TopStructure { + | ^^^^^^^^^^^^^^^^^^^ recursive type has infinite size +4 | intermediate: IntermediateStructure + | ----------------------------------- recursive without indirection + | + = help: insert indirection (e.g., a `Box`, `Rc`, or `&`) at some point to make `main::TopStructure` representable +``` + +This occurs because Rust types must be a size known at compile time. The way around this, as the message suggests, is to Box the offending type. `smithy-rs` implements this design in [RecursiveShapeBoxer.kt](https://github.com/awslabs/smithy-rs/blob/main/codegen/src/main/kotlin/software/amazon/smithy/rust/codegen/smithy/transformers/RecursiveShapeBoxer.kt) + +To support this, as the message suggests, we must "`Box`" the offending type. There is a touch of trickiness—only one element in the cycle needs to be boxed, but we need to select it deterministically such that we always pick the same element between multiple codegen runs. To do this the Rust SDK will: + +1. Topologically sort the graph of shapes. +2. Identify cycles that do not pass through an existing Box, List, Set, or Map +3. For each cycle, select the earliest shape alphabetically & mark it as Box in the Smithy model by attaching the custom `RustBoxTrait` to the member. +4. Go back to step 1. + +This would produce valid Rust: + +```rust +struct TopStructure { + intermediate: IntermediateStructure +} + +struct IntermediateStructure { + top: Box> +} +``` + +**Backwards Compatibility Note!** + +Box is not generally compatible with T in Rust. There are several unlikely but valid model changes that will cause the SDK to produce code that may break customers. If these are problematic, all are avoidable with customizations. + +1. A recursive link is added to an existing structure. This causes a member that was not boxed before to become Box. + + > **Workaround**: Mark the new member as Box in a customization. + +1. A field is removed from a structure that removes the recursive dependency. The SDK would generate T instead of Box. + + > **Workaround**: Mark the member that used to be boxed as Box in a customization. The Box will be unnecessary, but we will keep it for backwards compatibility. diff --git a/design/src/smithy/simple_shapes.md b/design/src/smithy/simple_shapes.md new file mode 100644 index 000000000..13ecbd8aa --- /dev/null +++ b/design/src/smithy/simple_shapes.md @@ -0,0 +1,58 @@ +# Simple Shapes +| Smithy Type (links to design discussions) | Rust Type (links to Rust documentation) | +| ----------- | ----------- | +| blob | `Vec` | +| boolean | [`bool`](https://doc.rust-lang.org/std/primitive.bool.html) | +| [string](#strings) | [`String`](https://doc.rust-lang.org/std/string/struct.String.html) | +| byte | `i8` | +| short | `i16` | +| integer | `i32` | +| long | `i64` | +| float | `f32` | +| double | `f64` | +| [bigInteger](#big-numbers) | `BigInteger` (Not implemented yet) | +| [bigDecimal](#big-numbers) | `BigDecimal` (Not implemented yet) | +| [timestamp](#timestamps) | [`Instant`](https://github.com/awslabs/smithy-rs/blob/main/rust-runtime/smithy-types/src/instant/mod.rs) | +| [document](#documents) | `Document` (https://github.com/awslabs/smithy-rs/blob/v0.6-rc.1/rust-runtime/smithy-types/src/lib.rs#L33-L41) | + +### Big Numbers +Rust currently has no standard library or universally accepted large-number crate. Until one is stabilized, a string representation is a reasonable compromise: + +```rust +pub struct BigInteger(String); +pub struct BigDecimal(String); +``` + +This will enable us to add helpers over time as requested. Users will also be able to define their own conversions into their preferred large-number libraries. + +As of 4/17/2021 BigInteger / BigDecimal are not included in AWS models. Implementation is tracked [here](https://github.com/awslabs/smithy-rs/issues/312). +### Timestamps +[chrono](https://github.com/chronotope/chrono) is the current de facto library for datetime in Rust, but it is pre-1.0. Instants are represented by an SDK defined structure modeled on `std::time::Duration` from the Rust standard library. + +```rust +{{#include ../../../rust-runtime/smithy-types/src/instant/mod.rs:instant}} +``` + +A `to_chrono()` method on `Instant` enables conversion from SDK instants to `chrono` dates. + +### Strings +Rust has two different String representations: +* `String`, an owned, heap allocated string. +* `&str`, a reference to a string, owned elsewhere. + +In ideal world, input shapes, where there is no reason for the strings to be owned would use `&'a str`. Outputs would likely use `String`. However, Smithy does not provide a distinction between input and output shapes. + +A third compromise could be storing `Arc`, an atomic reference counted pointer to a `String`. This may be ideal for certain advanced users, but is likely to confuse most users and produces worse ergonomics. _This is an open design area where we will seek user feedback._ Rusoto uses `String` and there has been [one feature request](https://github.com/rusoto/rusoto/issues/1806) to date to change that. + +Current models represent strings as `String`. + +### Document Types + +Smithy defines the concept of "Document Types": +> [Documents represent] protocol-agnostic open content that is accessed like JSON data. Open content is useful for modeling unstructured data that has no schema, data that can't be modeled using rigid types, or data that has a schema that evolves outside of the purview of a model. The serialization format of a document is an implementation detail of a protocol and MUST NOT have any effect on the types exposed by tooling to represent a document value. + +```rust +{{#include ../../../rust-runtime/smithy-types/src/lib.rs:document}} +``` + +Individual protocols define their own document serialization behavior, typically by creating a newtype around `Document` that implements `serde::Serialize/serde::Deserialize`. See [Document Json Serialization](https://github.com/awslabs/smithy-rs/blob/138320e99e6c7aaf14217d07cf996ba07349dd5e/rust-runtime/inlineable/src/doc_json.rs) diff --git a/design/src/tenets.md b/design/src/tenets.md new file mode 100644 index 000000000..b904e25bc --- /dev/null +++ b/design/src/tenets.md @@ -0,0 +1,27 @@ +# Rust SDK Design Tenets +> Unless you know better ones! These are our tenets today, but we'd love your thoughts. Do you wish we had different priorities? Let us know by opening and issue or starting a discussion. +1. [**Batteries included, but replaceable.**](#batteries-included-but-replaceable) The AWS SDK for Rust should provide a best-in-class experience for many use cases, **but**, customers will use the SDK in unqiue and unexpected ways. **Meet customers where they are;** strive to be compatible with their tools. Provide mechanisms to allow customers make different choices. +2. [**Make common problems easy to solve.**](#make-common-problems-easy-to-solve) The AWS SDK for Rust should uncommon problems solvable. Guide customers to patterns that set them up for long-term success. +3. [**Design for the Future.**](#design-for-the-future) The AWS SDK for Rust should evolve with AWS without breaking existing customers. APIs will evolve in unpredictable directions, new protocols will gain adoption, and new services will be created that we never could have imagined. Don’t simplify or unify code today that prevents evolution tomorrow. + +## Details, Justifications, and Ramifications + +### Batteries included, but replaceable. + +Some customers will use the Rust SDK as their first experience with async Rust, potentially **any** Rust. They may not be familiar with Tokio or the concept of an async executor. We are not afraid to have an opinion about the best solution for most customers. + +Other customers will come to the SDK with specific requirements. Perhaps they're integrating the SDK into a much larger project that uses `async_std`. Maybe they need to set custom headers, modify the user agent, or audit every request. They should be able to use the Rust SDK without forking it to meet their needs. + +### Make common problems easy to solve + +If solving a common problem isn’t obvious from the API, it should be obvious from the documentation. The SDK should guide users towards the best solutions for common tasks, **first** with well named methods, **second** with documentation, and **third** with real -world usage examples. Provide misuse resistant APIs. Async Rust has the potential to introduce subtle bugs; the Rust SDK should help customers avoid them. + +### Design for the Future + +APIs evolve in unpredictable ways, and it's crucial that the SDK can evolve without breaking existing customers. This means designing the SDK so that fundamental changes to the internals can be made without altering the external interface we surface to customers: + +* Keeping the shared core as small & opaque as possible. +* Don’t leak our internal dependencies to customers +* With every design choice, consider, "Can I reverse this choice in the future?" + +This may not result in DRY code, and that’s OK! Code that is auto generated has different goals and tradeoffs than code that has been written by hand. diff --git a/design/src/transport/middleware.md b/design/src/transport/middleware.md new file mode 100644 index 000000000..07a1e4738 --- /dev/null +++ b/design/src/transport/middleware.md @@ -0,0 +1,7 @@ +# HTTP middleware + +Signing, endpoint specification, and logging are all handled as middleware. The Rust SDK takes a minimalist approach to middleware: + +Middleware is defined as minimally as possible, then adapted into the middleware system used by the IO layer. Tower is the de facto standard for HTTP middleware in Rust—we will probably use it. But we also want to make our middleware usable for users who aren't using Tower (or if we decide to not use Tower in the long run). + +Because of this, rather than implementing all our middleware as "Tower Middleware", we implement it narrowly (eg. as a function that operates on `operation::Request`), then define optional adapters to make our middleware tower compatible. diff --git a/design/src/transport/operation.md b/design/src/transport/operation.md new file mode 100644 index 000000000..92bc37ee1 --- /dev/null +++ b/design/src/transport/operation.md @@ -0,0 +1,64 @@ +# HTTP-based Operations +The Smithy code generator for Rust (and by extension), the AWS SDK use an `Operation` abstraction to provide a unified +interface for dispatching requests. `Operation`s contain: +* A base HTTP request (with a potentially streaming body) +* A typed property bag of configuration options +* A fully generic response handler + +In the typical case, these configuration options include things like a `CredentialsProvider`, however, they can also be +full middleware layers that will get added by the dispatch stack. + +## Operation Phases +This section details the flow of a request through the SDK until a response is returned to the user. + +### Input Construction + +A customer interacts with the SDK builders to construct an input. The `build()` method on an input returns +an `Operation`. This codifies the base HTTP request & all the configuration and middleware layers required to modify and dispatch the request. + +```rust,ignore +pub struct Operation { + request: Request, + response_handler: H, + _retry_policy: R, +} + +pub struct Request { + base: http::Request, + configuration: PropertyBag, +} +``` + +For most requests, `.build()` will NOT consume the input. A user can call `.build()` multiple times to produce multiple operations from the same input. + +By using a property bag, we can define the `Operation` in Smithy core. AWS specific configuration can be added later in the stack. + +### Operation Construction +In order to construct an operation, the generated code injects appropriate middleware & configuration via the configuration property bag. It does this by reading the configuration properties out of the service +config, copying them as necessary, and loading them into the `Request`: + +```rust,ignore +// This is approximately the generated code, I've cleaned a few things up for readability. +pub fn build(self, config: &dynamodb::config::Config) -> Operation { + let op = BatchExecuteStatement::new(BatchExecuteStatementInput { + statements: self.statements, + }); + let req = op.build_http_request().map(SdkBody::from); + + let mut req = operation::Request::new(req); + let mut conf = req.config_mut(); + conf.insert_signing_config(config.signing_service()); + conf.insert_endpoint_resolver(config.endpoint_resolver.clone()); + Operation::new(req) +} +``` + +### Operation Dispatch and Middleware + +The Rust SDK endeavors to behave as predictably as possible. This means that if at all possible we will not dispatch extra HTTP requests during the dispatch of normal operation. Making this work is covered in more detail in the design of credentials providers & endpoint resolution. + +The upshot is that we will always prefer a design where the user has explicit control of when credentials are loaded and endpoints are resolved. This doesn't mean that users can't use easy-to-use options (We will provide an automatically refreshing credentials provider), however, the credential provider won't load requests during the dispatch of an individual request. + +## Operation Parsing and Response Loading + +The fundamental trait for HTTP-based protocols is `ParseHttpResponse` diff --git a/design/src/transport/overview.md b/design/src/transport/overview.md new file mode 100644 index 000000000..42bf31881 --- /dev/null +++ b/design/src/transport/overview.md @@ -0,0 +1,11 @@ +# Transport +The transport layer of smithy-rs and the Rust SDK. Our goal is support customers to bring their own HTTP stack and runtime. + +## Where we are today +`aws-hyper` assembles a middleware stack with `tower`. It provides a way to use an HTTP client other than Hyper, however, it currently has a hard dependency on Hyper & Tokio. `hyper::Body` is being used directly as the body implementation for responses. + +## Where we want to go +1. Extend `HttpService` to add a `sleep` method. This is required to enable runtimes other than Tokio to define how they should sleep. +2. Replace `hyper::Body` in responses with SDK Body. For now, SDKBody will probably privately wrap `hyper::Body`. +3. Merge `aws-hyper` into `aws-http`. Tokio becomes an optional feature—When the Tokio feature is opted out the "fast path" variants for the connection variants are `cfg`'d out. +4. By default, customers get a fully baked HTTP stack, but they can opt out of certain features and BYO implementation of `HttpService`. diff --git a/rust-runtime/smithy-types/src/instant/mod.rs b/rust-runtime/smithy-types/src/instant/mod.rs index 079dcd0da..776d5fd5b 100644 --- a/rust-runtime/smithy-types/src/instant/mod.rs +++ b/rust-runtime/smithy-types/src/instant/mod.rs @@ -10,12 +10,16 @@ use std::time::{SystemTime, UNIX_EPOCH}; mod format; +/* ANCHOR: instant */ + #[derive(Debug, PartialEq, Clone, Copy)] pub struct Instant { seconds: i64, subsecond_nanos: u32, } +/* ANCHOR_END: instant */ + impl Instant { pub fn from_epoch_seconds(epoch_seconds: i64) -> Self { Instant { diff --git a/rust-runtime/smithy-types/src/lib.rs b/rust-runtime/smithy-types/src/lib.rs index 22c8ea81d..08f83659b 100644 --- a/rust-runtime/smithy-types/src/lib.rs +++ b/rust-runtime/smithy-types/src/lib.rs @@ -30,6 +30,14 @@ impl AsRef<[u8]> for Blob { } } +/* ANCHOR: document */ + +/// Document Type +/// +/// Document types represents protocol-agnostic open content that is accessed like JSON data. +/// Open content is useful for modeling unstructured data that has no schema, data that can't be +/// modeled using rigid types, or data that has a schema that evolves outside of the purview of a model. +/// The serialization format of a document is an implementation detail of a protocol. #[derive(Debug, Clone, PartialEq)] pub enum Document { Object(HashMap), @@ -49,6 +57,8 @@ pub enum Number { Float(f64), } +/* ANCHOR_END: document */ + /// Generic Error type /// /// For many services, Errors are modeled. However, many services only partially model errors or don't -- GitLab