Unverified Commit 357415f3 authored by ysaito1001's avatar ysaito1001 Committed by GitHub
Browse files

Fix CI step for `cargo semver checks` (#4033)

## Motivation and Context
Fixes CI step for running `cargo semver checks`

## Description
The said CI step has been failing for the past three months. The most
recent failure observed in CI looks like the following, since we
[upgraded the version of `cargo-semver-checks` to 0.36
](https://github.com/smithy-lang/smithy-rs/pull/3936)
```
error: package `aws-runtime` is ambiguous: it is defined by in multiple manifests within the root path /home/build/workspace/smithy-rs/target/semver-checks/git-base/be980cd7049d9acdb38f68dd11c465b3a242733a

defined in:
  /home/build/workspace/smithy-rs/target/semver-checks/git-base/be980cd7049d9acdb38f68dd11c465b3a242733a/aws/rust-runtime/aws-runtime/Cargo.toml
  /home/build/workspace/smithy-rs/target/semver-checks/git-base/be980cd7049d9acdb38f68dd11c465b3a242733a/tmp-codegen-diff/aws-sdk/sdk/aws-runtime/Cargo.toml
```
Referring to the diagram in [a previous relevant
PR](https://github.com/smithy-lang/smithy-rs/pull/3272), we see that the
`tmp-codegen-diff` directory is used as [the output
directory](https://github.com/smithy-lang/smithy-rs/blob/e394ad8b099ea638e0f9c7549295aaebccc36a61/tools/ci-scripts/codegen-diff/diff_lib.py#L11)
by `diff_lib.py`. However, the current version of `cargo semver checks`
no longer supports duplicated crates within the same root directory
([PR](https://github.com/obi1kenobi/cargo-semver-checks/pull/887)).

This requires a solution where we maintain a single source of crate
layout to validate against `cargo-semver-checks` under `smithy-rs`. To
address this, this PR implements the following approach:
- (Bonus point) Reverts the changes made in [the previous relevant
PR](https://github.com/smithy-lang/smithy-rs/pull/3272).
- Updates `semver-checks.py` to stop using
`checkout_commit_and_generate` in `diff_lib.py`, which automatically
places both the runtime crates and the generated SDK crates into
`tmp-codegen-diff`, leading to conflicts with the crates in
`rust-runtime` and in `aws/rust-runtime`.
- When `semver-checks.py` creates the `base` and `current` branches, we
remove runtime crates from both `rust-runtime` and `aws/rust-runtime` to
uniquify the runtime crates in the `aws/sdk/build/aws-sdk/sdk`
directory. Note that the removal of crates only occurs in the `base` and
`current` branches and does not impact the main branch.
- Moves the `aws/sdk/build/aws-sdk` directory to the root of `smithy-rs`
for a reason explained in dd021d1.

## Testing
- CI passed `cargo semver checks`
- Removed [this enum
variant](https://github.com/smithy-lang/smithy-rs/blob/e394ad8b099ea638e0f9c7549295aaebccc36a61/rust-runtime/aws-smithy-runtime/src/client/sdk_feature.rs#L13)
intentionally to cause an API breakage, the step correctly detected it
```
--- failure enum_variant_missing: pub enum variant removed or renamed ---

Description:
A publicly-visible enum has at least one variant that is no longer available under its prior name. It may have been renamed or removed entirely.
        ref: https://doc.rust-lang.org/cargo/reference/semver.html#item-remove
       impl: https://github.com/obi1kenobi/cargo-semver-checks/tree/v0.36.0/src/lints/enum_variant_missing.ron

Failed in:
  variant BusinessMetric::GzipRequestCompression, previously in file /Users/awsaito/src/smithy-rs/target/semver-checks/git-base/d87d9ee34a13da0ad4c0cdcbfa2c74b7f98d278c/aws-sdk/sdk/aws-runtime/src/user_agent/metrics.rs:108
```

----

_By submitting this pull request, I confirm that you can use, modify,
copy, and redistribute this contribution, under the terms of your
choice._
parent 0f215bb7
Loading
Loading
Loading
Loading
+12 −15
Original line number Diff line number Diff line
@@ -26,7 +26,14 @@ def running_in_docker_build():
    return os.environ.get("SMITHY_RS_DOCKER_BUILD_IMAGE") == "1"


def checkout_commit_and_generate(revision_sha, branch_name, targets=None, preserve_aws_sdk_build=False):
def run_git_commit_as_github_action(revision_sha):
    get_cmd_output(f"git -c 'user.name=GitHub Action (generated code preview)' "
                   f"-c 'user.name={COMMIT_AUTHOR_NAME}' "
                   f"-c 'user.email={COMMIT_AUTHOR_EMAIL}' "
                   f"commit --no-verify -m 'Generated code for {revision_sha}' --allow-empty")


def checkout_commit_and_generate(revision_sha, branch_name, targets=None):
    if running_in_docker_build():
        eprint(f"Fetching base revision {revision_sha} from GitHub...")
        run(f"git fetch --no-tags --progress --no-recurse-submodules --depth=1 origin {revision_sha}")
@@ -34,10 +41,10 @@ def checkout_commit_and_generate(revision_sha, branch_name, targets=None, preser
    # Generate code for HEAD
    eprint(f"Creating temporary branch {branch_name} with generated code for {revision_sha}")
    run(f"git checkout {revision_sha} -B {branch_name}")
    generate_and_commit_generated_code(revision_sha, targets, preserve_aws_sdk_build)
    generate_and_commit_generated_code(revision_sha, targets)


def generate_and_commit_generated_code(revision_sha, targets=None, preserve_aws_sdk_build=False):
def generate_and_commit_generated_code(revision_sha, targets=None):
    targets = targets or [
        target_codegen_client,
        target_codegen_server,
@@ -56,10 +63,6 @@ def generate_and_commit_generated_code(revision_sha, targets=None, preserve_aws_
    get_cmd_output(f"rm -rf {OUTPUT_PATH}")
    get_cmd_output(f"mkdir {OUTPUT_PATH}")
    if target_aws_sdk in targets:
        # Compiling aws-config for semver checks baseline requires build artifacts to exist under aws/sdk/build
        if preserve_aws_sdk_build:
            get_cmd_output(f"cp -r aws/sdk/build/aws-sdk {OUTPUT_PATH}/")
        else:
        get_cmd_output(f"mv aws/sdk/build/aws-sdk {OUTPUT_PATH}/")
    for target in [target_codegen_client, target_codegen_server]:
        if target in targets:
@@ -93,13 +96,7 @@ def generate_and_commit_generated_code(revision_sha, targets=None, preserve_aws_
        f"xargs rm -f", shell=True)

    get_cmd_output(f"git add -f {OUTPUT_PATH}")
    if preserve_aws_sdk_build:
        get_cmd_output(f"git add -f aws/sdk/build")

    get_cmd_output(f"git -c 'user.name=GitHub Action (generated code preview)' "
                   f"-c 'user.name={COMMIT_AUTHOR_NAME}' "
                   f"-c 'user.email={COMMIT_AUTHOR_EMAIL}' "
                   f"commit --no-verify -m 'Generated code for {revision_sha}' --allow-empty")
    run_git_commit_as_github_action(revision_sha)


def make_diff(title, path_to_diff, base_commit_sha, head_commit_sha, suffix, whitespace):
+40 −7
Original line number Diff line number Diff line
@@ -4,12 +4,45 @@
#  SPDX-License-Identifier: Apache-2.0
import sys
import os
from diff_lib import get_cmd_output, get_cmd_status, eprint, running_in_docker_build, checkout_commit_and_generate, \
    OUTPUT_PATH

from diff_lib import get_cmd_output, get_cmd_status, eprint, run, run_git_commit_as_github_action, running_in_docker_build

CURRENT_BRANCH = 'current'
BASE_BRANCH = 'base'


def checkout_commit_and_generate(revision_sha, branch_name, repository_root):
    if running_in_docker_build():
        eprint(f"Fetching base revision {revision_sha} from GitHub...")
        run(f"git fetch --no-tags --progress --no-recurse-submodules --depth=1 origin {revision_sha}")

    # Generate code for HEAD
    eprint(f"Creating temporary branch {branch_name} with generated code for {revision_sha}")
    run(f"git checkout {revision_sha} -B {branch_name}")
    _generate_and_commit(revision_sha, repository_root)


def _generate_and_commit(revision_sha, repository_root):
    get_cmd_output(f"./gradlew --rerun-tasks aws:sdk:clean")
    get_cmd_output(f"./gradlew --rerun-tasks aws:sdk:assemble")

    # Remove runtime crates from the repository root to prevent `cargo-semver-checks` from failing
    # due to duplicate crates under the same root. See https://github.com/obi1kenobi/cargo-semver-checks/pull/887
    get_cmd_output(f"git rm -r {repository_root}/aws/rust-runtime")
    get_cmd_output(f"git rm -r {repository_root}/rust-runtime")

    # From this point forward, the single source of truth for the crate layout in `cargo-semver-checks`
    # is the `aws-sdk` located under the SDK's build directory.
    # However, if we commit `aws/sdk/build/aws-sdk` directly to the branch, path entries under `aws/sdk/build/`
    # will be ignored by `cargo-semver-checks`, as it uses `Walk` from the `ignore` crate.
    # https://github.com/obi1kenobi/cargo-semver-checks/blob/f55934264edbd4808fc8a7bdb9bc0350b1cc33db/src/rustdoc_gen.rs#L359
    # To address this, we need to relocate the build artifact to a directory not included in `.gitignore`,
    # and we’ve chosen the repository root arbitrarily.
    get_cmd_output(f"mv {repository_root}/aws/sdk/build/aws-sdk {repository_root}")
    get_cmd_output(f"git add -f {repository_root}/aws-sdk")

    run_git_commit_as_github_action(revision_sha)


# This script runs `cargo semver-checks` against a previous version of codegen
def main(skip_generation=False):
    if len(sys.argv) != 3:
@@ -27,10 +60,10 @@ def main(skip_generation=False):
        sys.exit(1)

    if not skip_generation:
        checkout_commit_and_generate(head_commit_sha, CURRENT_BRANCH, targets=['aws:sdk'])
        checkout_commit_and_generate(base_commit_sha, BASE_BRANCH, targets=['aws:sdk'], preserve_aws_sdk_build=True)
        checkout_commit_and_generate(head_commit_sha, CURRENT_BRANCH, repository_root)
        checkout_commit_and_generate(base_commit_sha, BASE_BRANCH, repository_root)
    get_cmd_output(f'git checkout {CURRENT_BRANCH}')
    sdk_directory = os.path.join(OUTPUT_PATH, 'aws-sdk', 'sdk')
    sdk_directory = os.path.join(repository_root, 'aws-sdk', 'sdk')
    os.chdir(sdk_directory)

    failures = []
@@ -41,7 +74,7 @@ def main(skip_generation=False):
        eprint(f'checking {path}...', end='')
        if path in deny_list:
            eprint(f"skipping {path} because it is in 'deny_list'")
        elif get_cmd_status(f'git cat-file -e base:{sdk_directory}/{path}/Cargo.toml') != 0:
        elif get_cmd_status(f'git cat-file -e base:./{path}/Cargo.toml') != 0:
            eprint(f'skipping {path} because it does not exist in base')
        else:
            (_, out, _) = get_cmd_output('cargo pkgid', cwd=path, quiet=True)