Unverified Commit c43fc71c authored by Russell Cohen's avatar Russell Cohen Committed by GitHub
Browse files

Fix the canary by using edit distance (#3406)

## Testing
- deployed to my personal account

----

_By submitting this pull request, I confirm that you can use, modify,
copy, and redistribute this contribution, under the terms of your
choice._
parent f573cc22
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -21,7 +21,7 @@ From there, you can just point the `canary-runner` to the `cdk-outputs.json` to

```bash
cd canary-runner
cargo run -- run --sdk-release-tag <version> --musl --cdk-outputs ../cdk-outputs.json
cargo run -- run --sdk-release-tag <version> --musl --cdk-output ../cdk-outputs.json
```

__NOTE:__ You may want to add a `--profile` to the `deploy` command to select a specific credential
+1 −1
Original line number Diff line number Diff line
@@ -84,7 +84,7 @@ impl CanaryEnv {
        // Amazon Transcribe starts returning different output for the same audio.
        let expected_transcribe_result = env::var("CANARY_EXPECTED_TRANSCRIBE_RESULT")
            .unwrap_or_else(|_| {
                "Good day to you transcribe. This is Polly talking to you from the Rust S. D. K."
                "Good day to you transcribe. This is Polly talking to you from the Rust SDK."
                    .to_string()
            });

+12 −9
Original line number Diff line number Diff line
@@ -3,12 +3,13 @@
 * SPDX-License-Identifier: Apache-2.0
 */

use crate::canary::CanaryError;
use crate::mk_canary;
use anyhow::bail;
use async_stream::stream;
use aws_config::SdkConfig;
use aws_sdk_transcribestreaming as transcribe;
use bytes::BufMut;
use edit_distance::edit_distance;
use transcribe::primitives::Blob;
use transcribe::types::{
    AudioEvent, AudioStream, LanguageCode, MediaEncoding, TranscriptResultStream,
@@ -64,18 +65,20 @@ pub async fn transcribe_canary(
        }
    }

    if expected_transcribe_result != full_message.trim() {
        Err(CanaryError(format!(
    let dist = edit_distance(&expected_transcribe_result, full_message.trim());
    let max_edit_distance = 10;
    if dist > max_edit_distance {
        bail!(
            "Transcription from Transcribe doesn't look right:\n\
            Expected: `{}`\n\
            Actual:   `{}`\n",
            Actual:   `{}`\n. The maximum allowed edit distance is {}. This had an edit distance of {}",
            expected_transcribe_result,
            full_message.trim()
        ))
        .into())
    } else {
        Ok(())
            full_message.trim(),
            max_edit_distance,
            dist
        )
    }
    Ok(())
}

fn pcm_data() -> Vec<u8> {
+3 −0
Original line number Diff line number Diff line
@@ -54,6 +54,7 @@ uuid = { version = "0.8", features = ["v4"] }
tokio-stream = "0"
tracing-texray = "0.1.1"
reqwest = { version = "0.11.14", features = ["rustls-tls"], default-features = false }
edit-distance = "2"
"#;

const REQUIRED_SDK_CRATES: &[&str] = &[
@@ -451,6 +452,7 @@ uuid = { version = "0.8", features = ["v4"] }
tokio-stream = "0"
tracing-texray = "0.1.1"
reqwest = { version = "0.11.14", features = ["rustls-tls"], default-features = false }
edit-distance = "2"
aws-config = { path = "some/sdk/path/aws-config", features = ["behavior-version-latest"] }
aws-sdk-s3 = { path = "some/sdk/path/s3" }
aws-sdk-ec2 = { path = "some/sdk/path/ec2" }
@@ -515,6 +517,7 @@ uuid = { version = "0.8", features = ["v4"] }
tokio-stream = "0"
tracing-texray = "0.1.1"
reqwest = { version = "0.11.14", features = ["rustls-tls"], default-features = false }
edit-distance = "2"
aws-config = { version = "0.46.0", features = ["behavior-version-latest"] }
aws-sdk-s3 = "0.20.0"
aws-sdk-ec2 = "0.19.0"