Unverified Commit dc8f1743 authored by John DiSanti's avatar John DiSanti Committed by GitHub
Browse files

Retry Gradle daemon startup failures in `sdk-sync` (#1504)

parent 149b92b3
Loading
Loading
Loading
Loading
+25 −16
Original line number Diff line number Diff line
@@ -11,7 +11,7 @@ use smithy_rs_tool_common::shell::handle_failure;
use std::path::{Path, PathBuf};
use std::process::Command;
use std::sync::Arc;
use tracing::{error, info, instrument};
use tracing::{error, info, instrument, warn};

#[derive(Clone, Debug)]
pub struct CodeGenSettings {
@@ -129,7 +129,7 @@ impl DefaultSdkGenerator {
        Ok(())
    }

    fn do_aws_sdk_assemble(&self) -> Result<()> {
    fn do_aws_sdk_assemble(&self, attempt: u32) -> Result<()> {
        let mut command = Command::new("./gradlew");
        command.arg("--no-daemon"); // Don't let Gradle continue running after the build
        command.arg("--no-parallel"); // Disable Gradle parallelism
@@ -185,6 +185,8 @@ impl DefaultSdkGenerator {
            "-Paws.sdk.examples.revision={}",
            &self.aws_doc_sdk_examples_revision
        ));
        // This property doesn't affect the build at all, but allows us to reliably test retry with `fake-sdk-assemble`
        command.arg(format!("-Paws.sdk.sync.attempt={}", attempt));
        command.arg("aws:sdk:assemble");
        command.current_dir(self.smithy_rs.path());

@@ -198,22 +200,29 @@ impl DefaultSdkGenerator {
    /// Runs `aws:sdk:assemble` target with property `aws.fullsdk=true` set
    #[instrument(skip(self))]
    fn aws_sdk_assemble(&self) -> Result<()> {
        let result = self.do_aws_sdk_assemble();
        if let Err(err) = &result {
            error!("Codegen failed: {}", err);
            // On failure, do a dump of running processes to give more insight into if there is a process leak going on
            match Command::new("ps").arg("-ef").output() {
                Ok(output) => info!(
                    "Running processes shortly after failure:\n---\n{}---\n",
                    String::from_utf8_lossy(&output.stdout)
                ),
                Err(err) => info!(
                    "Failed to get running processes shortly after failure: {}",
                    err
                ),
            }
        // Retry gradle daemon startup failures up to 3 times
        let (mut attempt, max_attempts) = (1, 3);
        loop {
            match self.do_aws_sdk_assemble(attempt) {
                Ok(_) => return Ok(()),
                Err(err) => {
                    let error_message = format!("{}", err);
                    let should_retry = attempt < max_attempts
                        && error_message
                            .contains("Timeout waiting to connect to the Gradle daemon");
                    if !should_retry {
                        error!("Codegen failed after {} attempt(s): {}", attempt, err);
                        return Err(err);
                    } else {
                        warn!(
                            "Gradle daemon start failed. Will retry. Full error: {}",
                            error_message
                        );
                    }
                }
            }
            attempt += 1;
        }
        result
    }
}

+5 −0
Original line number Diff line number Diff line
@@ -38,6 +38,11 @@ def get_models_path():
    return get_property("aws.sdk.models.path")


# Fail on the first few attempts to test retry
if int(get_property("aws.sdk.sync.attempt")) < 3:
    print("Timeout waiting to connect to the Gradle daemon")
    sys.exit(1)

# Verify the models path was set correctly
models_path = get_models_path()
if models_path is None or not os.path.isfile(f"{models_path}/s3.json"):