Unverified Commit 9156aca9 authored by John DiSanti's avatar John DiSanti Committed by GitHub
Browse files

Add observability and more configuration to `sdk-sync` (#1452)

* Make `sdk-sync` Gradle heap/metaspace constraints configurable
* Dump process info on `sdk-sync` codegen failure
* Periodically log progress information in `sdk-sync`
* Enable verbose GC for codegen and use serial GC
parent ea2ae7bf
Loading
Loading
Loading
Loading
+47 −18
Original line number Diff line number Diff line
@@ -60,12 +60,30 @@ version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"

[[package]]
name = "bytesize"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c58ec36aac5066d5ca17df51b3e70279f5670a72102f5752cb7e7c856adfc70"

[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"

[[package]]
name = "chrono"
version = "0.4.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73"
dependencies = [
 "libc",
 "num-integer",
 "num-traits",
 "winapi",
]

[[package]]
name = "clap"
version = "3.1.8"
@@ -96,12 +114,6 @@ dependencies = [
 "syn",
]

[[package]]
name = "core-foundation-sys"
version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc"

[[package]]
name = "crossbeam-channel"
version = "0.5.4"
@@ -314,6 +326,12 @@ dependencies = [
 "autocfg",
]

[[package]]
name = "minimal-lexical"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"

[[package]]
name = "mockall"
version = "0.11.0"
@@ -341,6 +359,16 @@ dependencies = [
 "syn",
]

[[package]]
name = "nom"
version = "7.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8903e5a29a317527874d0402f867152a3d21c908bb0b933e416c65e301d4c36"
dependencies = [
 "memchr",
 "minimal-lexical",
]

[[package]]
name = "normalize-line-endings"
version = "0.3.0"
@@ -348,12 +376,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "61807f77802ff30975e01f4f071c8ba10c022052f98b3294119f3e615d13e5be"

[[package]]
name = "ntapi"
version = "0.3.7"
name = "num-integer"
version = "0.1.45"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c28774a7fd2fbb4f0babd8237ce554b73af68021b5f695a3cebd6c59bac0980f"
checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9"
dependencies = [
 "winapi",
 "autocfg",
 "num-traits",
]

[[package]]
@@ -578,7 +607,7 @@ dependencies = [
 "regex",
 "serde",
 "smithy-rs-tool-common",
 "sysinfo",
 "systemstat",
 "tempfile",
 "toml",
 "tracing",
@@ -650,16 +679,16 @@ dependencies = [
]

[[package]]
name = "sysinfo"
version = "0.23.11"
name = "systemstat"
version = "0.1.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3bf915673a340ee41f2fc24ad1286c75ea92026f04b65a0d0e5132d80b95fc61"
checksum = "5f5dc96f7634f46ac7e485b8c051f5b89ec8ee5cc023236dd12fe4ae2fb52f80"
dependencies = [
 "cfg-if",
 "core-foundation-sys",
 "bytesize",
 "chrono",
 "lazy_static",
 "libc",
 "ntapi",
 "once_cell",
 "nom",
 "winapi",
]

+1 −1
Original line number Diff line number Diff line
@@ -22,7 +22,7 @@ num_cpus = "1.13.1"
rayon = "1.5.2"
serde = { version = "1.0.136", features = ["derive"] }
smithy-rs-tool-common = { version = "0.1", path = "../smithy-rs-tool-common" }
sysinfo = { version = "0.23.11", default-features = false }
systemstat = "0.1.11"
tempfile = "3.3.0"
toml = "0.5.9"
tracing = "0.1.34"
+33 −5
Original line number Diff line number Diff line
@@ -6,10 +6,11 @@
use anyhow::{Context, Result};
use clap::Parser;
use sdk_sync::init_tracing;
use sdk_sync::sync::gen::CodeGenSettings;
use sdk_sync::sync::Sync;
use smithy_rs_tool_common::macros::here;
use std::path::PathBuf;
use sysinfo::{System, SystemExt};
use systemstat::{Platform, System};
use tracing::info;

const CODEGEN_MIN_RAM_REQUIRED_GB: usize = 2;
@@ -36,6 +37,29 @@ struct Args {
    /// system property) to use for Smithy codegen. Defaults to 1.
    #[clap(long)]
    smithy_parallelism: Option<usize>,
    /// The maximum Java heap space (in megabytes) that the Gradle daemon is allowed to use during code generation.
    #[clap(long)]
    max_gradle_heap_megabytes: Option<usize>,
    /// The maximum Java metaspace (in megabytes) that the Gradle daemon is allowed to use during code generation.
    #[clap(long)]
    max_gradle_metaspace_megabytes: Option<usize>,
}

impl Args {
    fn codegen_settings(&self) -> CodeGenSettings {
        let defaults = CodeGenSettings::default();
        CodeGenSettings {
            smithy_parallelism: self
                .smithy_parallelism
                .unwrap_or(defaults.smithy_parallelism),
            max_gradle_heap_megabytes: self
                .max_gradle_heap_megabytes
                .unwrap_or(defaults.max_gradle_heap_megabytes),
            max_gradle_metaspace_megabytes: self
                .max_gradle_metaspace_megabytes
                .unwrap_or(defaults.max_gradle_metaspace_megabytes),
        }
    }
}

/// This tool syncs codegen changes from smithy-rs, examples changes from aws-doc-sdk-examples,
@@ -59,13 +83,11 @@ fn main() -> Result<()> {
    init_tracing();
    let args = Args::parse();

    let sys = System::new_all();
    let available_ram_gb = (sys.available_memory() / 1024 / 1024) as usize;
    let available_ram_gb = available_ram_gb();
    let num_cpus = num_cpus::get_physical();
    info!("Available RAM (GB): {available_ram_gb}");
    info!("Num physical CPUs: {num_cpus}");

    let smithy_parallelism = args.smithy_parallelism.unwrap_or(1);
    let sync_threads = if let Some(sync_threads) = args.sync_threads {
        sync_threads
    } else {
@@ -84,8 +106,14 @@ fn main() -> Result<()> {
        &args.aws_doc_sdk_examples.canonicalize().context(here!())?,
        &args.aws_sdk_rust.canonicalize().context(here!())?,
        &args.smithy_rs.canonicalize().context(here!())?,
        smithy_parallelism,
        args.codegen_settings(),
    )?;

    sync.sync().map_err(|e| e.context("The sync failed"))
}

fn available_ram_gb() -> usize {
    let sys = System::new();
    let memory = sys.memory().expect("determine free memory");
    (memory.free.as_u64() / 1024 / 1024 / 1024) as usize
}
+91 −9
Original line number Diff line number Diff line
@@ -3,7 +3,7 @@
 * SPDX-License-Identifier: Apache-2.0
 */

use self::gen::{DefaultSdkGenerator, SdkGenerator};
use self::gen::{CodeGenSettings, DefaultSdkGenerator, SdkGenerator};
use crate::fs::{DefaultFs, Fs};
use crate::git::{Commit, Git, GitCLI};
use crate::versions::{DefaultVersions, Versions, VersionsManifest};
@@ -11,7 +11,12 @@ use anyhow::{bail, Context, Result};
use smithy_rs_tool_common::macros::here;
use std::collections::BTreeSet;
use std::path::{Path, PathBuf};
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::mpsc::{Sender, TryRecvError};
use std::sync::Arc;
use std::thread;
use std::time::Duration;
use systemstat::{ByteSize, Platform, System};
use tracing::{debug, info, info_span};
use tracing_attributes::instrument;

@@ -21,6 +26,73 @@ pub const BOT_NAME: &str = "AWS SDK Rust Bot";
pub const BOT_EMAIL: &str = "aws-sdk-rust-primary@amazon.com";
pub const MODEL_STASH_BRANCH_NAME: &str = "__sdk_sync__models_";

#[derive(Default)]
struct SyncProgress {
    commits_completed: AtomicUsize,
    total_commits: AtomicUsize,
}

struct ProgressThread {
    handle: Option<thread::JoinHandle<()>>,
    tx: Sender<bool>,
}

impl ProgressThread {
    pub fn spawn(progress: Arc<SyncProgress>) -> ProgressThread {
        let (tx, rx) = std::sync::mpsc::channel();
        let handle = thread::spawn(move || {
            let mut done = false;
            let system = System::new();
            while !done {
                let cpu = system.cpu_load_aggregate().ok();
                for _ in 0..15 {
                    thread::sleep(Duration::from_secs(1));
                    if !matches!(rx.try_recv(), Err(TryRecvError::Empty)) {
                        done = true;
                        break;
                    }
                }
                let cpu = if let Some(Ok(cpu)) = cpu.map(|cpu| cpu.done()) {
                    format!("{:.1}", 100.0 - cpu.idle * 100.0)
                } else {
                    "error".to_string()
                };
                let (memory, swap) = system.memory_and_swap().unwrap();
                info!(
                    "Progress: smithy-rs commit {}/{}, cpu use: {}, memory used: {}, swap used: {}",
                    progress.commits_completed.load(Ordering::Relaxed),
                    progress.total_commits.load(Ordering::Relaxed),
                    cpu,
                    Self::format_memory(memory.free, memory.total),
                    Self::format_memory(swap.free, swap.total),
                );
            }
        });
        ProgressThread {
            handle: Some(handle),
            tx,
        }
    }

    fn format_memory(free: ByteSize, total: ByteSize) -> String {
        let (free, total) = (free.as_u64(), total.as_u64());
        let format_part = |val: u64| format!("{:.3}GB", val as f64 / 1024.0 / 1024.0 / 1024.0);
        format!(
            "{}/{}",
            format_part(total.saturating_sub(free)),
            format_part(total)
        )
    }
}

impl Drop for ProgressThread {
    fn drop(&mut self) {
        // Attempt to stop the loop in the thread
        let _ = self.tx.send(true);
        let _ = self.handle.take().map(|handle| handle.join());
    }
}

pub struct Sync {
    aws_doc_sdk_examples: Arc<dyn Git>,
    aws_sdk_rust: Arc<dyn Git>,
@@ -28,7 +100,8 @@ pub struct Sync {
    fs: Arc<dyn Fs>,
    versions: Arc<dyn Versions>,
    previous_versions_manifest: Arc<PathBuf>,
    smithy_parallelism: usize,
    codegen_settings: CodeGenSettings,
    progress: Arc<SyncProgress>,
    // Keep a reference to the temp directory so that it doesn't get cleaned up until the sync is complete
    _temp_dir: Arc<tempfile::TempDir>,
}
@@ -38,7 +111,7 @@ impl Sync {
        aws_doc_sdk_examples_path: &Path,
        aws_sdk_rust_path: &Path,
        smithy_rs_path: &Path,
        smithy_parallelism: usize,
        codegen_settings: CodeGenSettings,
    ) -> Result<Self> {
        let _temp_dir = Arc::new(tempfile::tempdir().context(here!("create temp dir"))?);
        let aws_sdk_rust = Arc::new(GitCLI::new(aws_sdk_rust_path)?);
@@ -58,7 +131,8 @@ impl Sync {
            fs,
            versions: Arc::new(DefaultVersions::new()),
            previous_versions_manifest,
            smithy_parallelism,
            codegen_settings,
            progress: Default::default(),
            _temp_dir,
        })
    }
@@ -78,13 +152,16 @@ impl Sync {
            fs: Arc::new(fs),
            versions: Arc::new(versions),
            previous_versions_manifest: Arc::new(PathBuf::from("doesnt-matter-for-tests")),
            smithy_parallelism: 1,
            codegen_settings: Default::default(),
            progress: Default::default(),
            _temp_dir: Arc::new(tempfile::tempdir().unwrap()),
        }
    }

    #[instrument(skip(self))]
    pub fn sync(&self) -> Result<()> {
        let _progress_thread = ProgressThread::spawn(self.progress.clone());

        info!("Loading versions.toml...");
        let versions = self
            .versions
@@ -162,7 +239,7 @@ impl Sync {
            self.fs.clone(),
            None,
            self.smithy_rs.path(),
            self.smithy_parallelism,
            &self.codegen_settings,
        )
        .context(here!())?;
        let generated_sdk = sdk_gen.generate_sdk().context(here!())?;
@@ -203,6 +280,9 @@ impl Sync {
        }

        info!("Syncing {} commit(s)...", commits.len());
        self.progress
            .total_commits
            .store(commits.len(), Ordering::Relaxed);

        // Generate code in parallel for each individual commit
        let code_gen_paths = {
@@ -211,7 +291,8 @@ impl Sync {
            let examples_revision = versions.aws_doc_sdk_examples_revision.clone();
            let examples_path = self.aws_sdk_rust.path().join("examples");
            let fs = self.fs.clone();
            let smithy_parallelism = self.smithy_parallelism;
            let codegen_settings = self.codegen_settings.clone();
            let progress = self.progress.clone();

            commits
                .par_iter()
@@ -235,10 +316,11 @@ impl Sync {
                        fs.clone(),
                        Some(commit.hash.clone()),
                        smithy_rs.path(),
                        smithy_parallelism,
                        &codegen_settings,
                    )
                    .context(here!())?;
                    let sdk_path = sdk_gen.generate_sdk().context(here!())?;
                    progress.commits_completed.fetch_add(1, Ordering::Relaxed);
                    Ok((commit, sdk_path))
                })
                .collect::<Result<Vec<_>>>()?
@@ -287,7 +369,7 @@ impl Sync {
            self.fs.clone(),
            None,
            self.smithy_rs.path(),
            self.smithy_parallelism,
            &self.codegen_settings,
        )
        .context(here!())?;
        let generated_sdk = sdk_gen.generate_sdk().context(here!())?;
+53 −13
Original line number Diff line number Diff line
@@ -13,6 +13,23 @@ use std::process::Command;
use std::sync::Arc;
use tracing::{info, instrument};

#[derive(Clone, Debug)]
pub struct CodeGenSettings {
    pub smithy_parallelism: usize,
    pub max_gradle_heap_megabytes: usize,
    pub max_gradle_metaspace_megabytes: usize,
}

impl Default for CodeGenSettings {
    fn default() -> Self {
        Self {
            smithy_parallelism: 1,
            max_gradle_heap_megabytes: 512,
            max_gradle_metaspace_megabytes: 512,
        }
    }
}

pub struct GeneratedSdk {
    path: PathBuf,
    // Keep a reference to the temp directory so that it doesn't get cleaned up
@@ -48,7 +65,7 @@ pub struct DefaultSdkGenerator {
    examples_path: PathBuf,
    fs: Arc<dyn Fs>,
    smithy_rs: Box<dyn Git>,
    smithy_parallelism: usize,
    settings: CodeGenSettings,
    temp_dir: Arc<tempfile::TempDir>,
}

@@ -61,7 +78,7 @@ impl DefaultSdkGenerator {
        fs: Arc<dyn Fs>,
        reset_to_commit: Option<CommitHash>,
        original_smithy_rs_path: &Path,
        smithy_parallelism: usize,
        settings: &CodeGenSettings,
    ) -> Result<Self> {
        let temp_dir = tempfile::tempdir().context(here!("create temp dir"))?;
        GitCLI::new(original_smithy_rs_path)
@@ -82,7 +99,7 @@ impl DefaultSdkGenerator {
            examples_path: examples_path.into(),
            fs,
            smithy_rs: Box::new(smithy_rs) as Box<dyn Git>,
            smithy_parallelism,
            settings: settings.clone(),
            temp_dir: Arc::new(temp_dir),
        })
    }
@@ -110,9 +127,7 @@ impl DefaultSdkGenerator {
        Ok(())
    }

    /// Runs `aws:sdk:assemble` target with property `aws.fullsdk=true` set
    #[instrument(skip(self))]
    fn aws_sdk_assemble(&self) -> Result<()> {
    fn do_aws_sdk_assemble(&self) -> Result<()> {
        info!("Generating the SDK...");

        let mut command = Command::new("./gradlew");
@@ -126,14 +141,19 @@ impl DefaultSdkGenerator {
        command.arg(format!(
            "-Dorg.gradle.jvmargs={}",
            [
                // Retain default Gradle JVM args
                "-Xmx512m",
                "-XX:MaxMetaspaceSize=256m",
                // Configure Gradle JVM memory settings
                format!("-Xmx{}m", self.settings.max_gradle_heap_megabytes),
                format!(
                    "-XX:MaxMetaspaceSize={}m",
                    self.settings.max_gradle_metaspace_megabytes
                ),
                "-XX:+UseSerialGC".to_string(),
                "-verbose:gc".to_string(),
                // Disable incremental compilation and caching since we're compiling exactly once per commit
                "-Dkotlin.incremental=false",
                "-Dkotlin.caching.enabled=false",
                "-Dkotlin.incremental=false".to_string(),
                "-Dkotlin.caching.enabled=false".to_string(),
                // Run the compiler in the gradle daemon process to avoid more forking thrash
                "-Dkotlin.compiler.execution.strategy=in-process"
                "-Dkotlin.compiler.execution.strategy=in-process".to_string()
            ]
            .join(" ")
        ));
@@ -141,7 +161,7 @@ impl DefaultSdkGenerator {
        // Disable Smithy's codegen parallelism in favor of sdk-sync parallelism
        command.arg(format!(
            "-Djava.util.concurrent.ForkJoinPool.common.parallelism={}",
            self.smithy_parallelism
            self.settings.smithy_parallelism
        ));

        command.arg("-Paws.fullsdk=true");
@@ -162,6 +182,26 @@ impl DefaultSdkGenerator {
        handle_failure("aws_sdk_assemble", &output)?;
        Ok(())
    }

    /// Runs `aws:sdk:assemble` target with property `aws.fullsdk=true` set
    #[instrument(skip(self))]
    fn aws_sdk_assemble(&self) -> Result<()> {
        let result = self.do_aws_sdk_assemble();
        if result.is_err() {
            // On failure, do a dump of running processes to give more insight into if there is a process leak going on
            match Command::new("ps").arg("-ef").output() {
                Ok(output) => info!(
                    "Running processes shortly after failure:\n---\n{}---\n",
                    String::from_utf8_lossy(&output.stdout)
                ),
                Err(err) => info!(
                    "Failed to get running processes shortly after failure: {}",
                    err
                ),
            }
        }
        result
    }
}

impl SdkGenerator for DefaultSdkGenerator {
Loading