Update Transcribe code example (#713) (e3e6e0d2) · Commits · Public Repositories / Smithy Rust

aws/SDK_CHANGELOG.md

+1 −1

Original line number	Diff line number	Diff line
		@@ -9,6 +9,7 @@ vNext (Month Day, Year)
		New This Week

		- :tada: Add presigned request support and examples for S3 GetObject and PutObject (smithy-rs#731)
		- Updated Transcribe code example to take an audio file as a command-line option and added readme.

		v0.0.19-alpha (September 24th, 2021)
		====================================
		@@ -33,7 +34,6 @@ Thank you for your contributions! :heart:

		- @jonhoo (smithy-rs#703)


		v0.0.18-alpha (September 14th, 2021)
		=======================

aws/sdk/examples/transcribestreaming/Cargo.toml

+3 −4

Original line number	Diff line number	Diff line
		[package]
		name = "transcribestreaming"
		name = "transcribestreaming_code_example"
		version = "0.1.0"
		authors = ["John DiSanti <jdisanti@amazon.com>"]
		authors = ["John DiSanti <jdisanti@amazon.com>", "Doug Schwartz <dougsch@amazon.com>"]
		edition = "2018"

		# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
		@@ -9,10 +9,9 @@ edition = "2018"
		[dependencies]
		aws-config = { path = "../../build/aws-sdk/aws-config" }
		aws-sdk-transcribestreaming = { package = "aws-sdk-transcribestreaming", path = "../../build/aws-sdk/transcribestreaming" }
		aws-types = { path = "../../build/aws-sdk/aws-types" }

		async-stream = "0.3"
		bytes = "1"
		hound = "3.4"
		structopt = { version = "0.3", default-features = false }
		tokio = { version = "1", features = ["full"] }
		tracing-subscriber = "0.2.18"

aws/sdk/examples/transcribestreaming/README.md

0 → 100644

+47 −0

Original line number	Diff line number	Diff line
		# AWS SDK for Rust code examples for Amazon Transcribe

		Amazon Transcribe provides transcription services for audio files.

		## Purpose

		This example demonstrate how to perform an Amazon Transcribe operation using the alpha version of the AWS SDK for Rust.

		## Prerequisites

		You must have an AWS account, and have configured your default credentials and AWS Region as described in [https://github.com/awslabs/aws-sdk-rust](https://github.com/awslabs/aws-sdk-rust).

		## Running the code

		### transcribestreaming

		This example displays a transcription of a WAV audio file.

		` cargo run -- -a AUDIO-FILE [-r REGION] [-v]`

		- _AUDIO-FILE_ is the name of the audio file to transcribe. It must be in WAV format; the example converts the WAV file content to __pcm__ format for Amazon Transcribe.
		Note that Amazon Transcribe supports encoding in __pcm__, __ogg-opus__, and __flac__ formats.
		- _REGION_ is the Region in which the client is created.
		If not supplied, uses the value of the __AWS_REGION__ environment variable.
		If the environment variable is not set, defaults to __us-west-2__.
		- __-v__ displays additional information.

		If you run it with the WAV file in __audio/hello-transcribe-8000.wav__, you should see the following transcribed text:

		```
		Good day to you transcribe.
		This is Polly talking to you from the Rust ST K.
		```

		### Notes

		- We recommend that you grant this code least privilege,
		or at most the minimum permissions required to perform the task.
		For more information, see
		[Grant Least Privilege](https://docs.aws.amazon.com/IAM/latest/UserGuide/best-practices.html#grant-least-privilege)
		in the AWS Identity and Access Management User Guide.
		- This code has not been tested in all AWS Regions.
		Some AWS services are available only in specific
		[Regions](https://aws.amazon.com/about-aws/global-infrastructure/regional-product-services).
		- Running this code might result in charges to your AWS account.

		Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. SPDX-License-Identifier: Apache-2.0

aws/sdk/examples/transcribestreaming/src/main.rs

+55 −9

Original line number	Diff line number	Diff line
		@@ -4,27 +4,73 @@
		*/

		use async_stream::stream;

		use aws_config::meta::region::RegionProviderChain;
		use aws_sdk_transcribestreaming::model::{
		AudioEvent, AudioStream, LanguageCode, MediaEncoding, TranscriptResultStream,
		};
		use aws_sdk_transcribestreaming::{Blob, Client, Error, Region};
		use aws_sdk_transcribestreaming::{Blob, Client, Error, Region, PKG_VERSION};
		use bytes::BufMut;
		use std::time::Duration;
		use structopt::StructOpt;

		#[derive(Debug, StructOpt)]
		struct Opt {
		/// The AWS Region.
		#[structopt(short, long)]
		region: Option<String>,

		/// The name of the audio file.
		#[structopt(short, long)]
		audio_file: String,

		/// Whether to display additional information.
		#[structopt(short, long)]
		verbose: bool,
		}

		const CHUNK_SIZE: usize = 8192;

		/// Transcribes an audio file to text.
		/// # Arguments
		///
		/// * `-a AUDIO_FILE` - The name of the audio file.
		/// It must be a WAV file, which is converted to __pcm__ format for Amazon Transcribe.
		/// Amazon transcribe also supports __ogg-opus__ and __flac__ formats.
		/// * `[-r REGION]` - The Region in which the client is created.
		/// If not supplied, uses the value of the AWS_REGION environment variable.
		/// If the environment variable is not set, defaults to us-west-2.
		/// * `[-v]` - Whether to display additional information.
		#[tokio::main]
		async fn main() -> Result<(), Error> {
		tracing_subscriber::fmt::init();

		let region_provider = RegionProviderChain::default_provider().or_else(Region::new("us-west-2"));
		let Opt {
		region,
		audio_file,
		verbose,
		} = Opt::from_args();

		let region_provider = RegionProviderChain::first_try(region.map(Region::new))
		.or_default_provider()
		.or_else(Region::new("us-west-2"));

		println!();

		if verbose {
		println!("Transcribe client version: {}", PKG_VERSION);
		println!(
		"Region: {}",
		region_provider.region().await.unwrap().as_ref()
		);
		println!("Audio filename: {}", &audio_file);
		println!();
		}

		let shared_config = aws_config::from_env().region(region_provider).load().await;
		let client = Client::new(&shared_config);

		let input_stream = stream! {
		let pcm = pcm_data();
		let pcm = pcm_data(&*audio_file);
		for chunk in pcm.chunks(CHUNK_SIZE) {
		// Sleeping isn't necessary, but emphasizes the streaming aspect of this
		tokio::time::sleep(Duration::from_millis(100)).await;
		@@ -48,7 +94,9 @@ async fn main() -> Result<(), Error> {
		let transcript = transcript_event.transcript.unwrap();
		for result in transcript.results.unwrap_or_else(\|\| Vec::new()) {
		if result.is_partial {
		if verbose {
		println!("Partial: {:?}", result);
		}
		} else {
		let first_alternative = &result.alternatives.as_ref().unwrap()[0];
		full_message += first_alternative.transcript.as_ref().unwrap();
		@@ -60,14 +108,12 @@ async fn main() -> Result<(), Error> {
		}
		}
		println!("\nFully transcribed message:\n\n{}", full_message);
		println!("Done.");

		Ok(())
		}

		fn pcm_data() -> Vec<u8> {
		let audio = include_bytes!("../audio/hello-transcribe-8000.wav");
		let reader = hound::WavReader::new(&audio[..]).unwrap();
		fn pcm_data(audio_file: &str) -> Vec<u8> {
		let reader = hound::WavReader::open(audio_file).unwrap();
		let samples_result: hound::Result<Vec<i16>> = reader.into_samples::<i16>().collect();

		let mut pcm: Vec<u8> = Vec::new();

Admin message