Unverified Commit e78d40f0 authored by John DiSanti's avatar John DiSanti Committed by GitHub
Browse files

Add throttling retry to Docker build image pull (#1297)

parent 3aa1e36b
Loading
Loading
Loading
Loading
+337 −48
Original line number Diff line number Diff line
#!/bin/bash
#!/usr/bin/env python3
#
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0.
#

set -uexo pipefail
from enum import Enum
from unittest.mock import call, MagicMock
import os
import shlex
import subprocess
import sys
import time
import unittest

REMOTE_BASE_IMAGE_NAME = "public.ecr.aws/w0m4q9l7/github-awslabs-smithy-rs-ci"
LOCAL_BASE_IMAGE_NAME = "smithy-rs-base-image"
BUILD_IMAGE_NAME = "smithy-rs-build-image"
LOCAL_TAG = "local"

C_YELLOW = '\033[1;33m'
C_RESET = '\033[0m'

START_PATH="$(pwd)"
SCRIPT_PATH="$(realpath "$(dirname "$0")")"
cd "${SCRIPT_PATH}"

TOOLS_PATH="$(git rev-parse --show-toplevel)/tools"
def announce(message):
    print(f"{C_YELLOW}{message}{C_RESET}")


class DockerPullResult(Enum):
    SUCCESS = 1
    ERROR_THROTTLED = 2
    NOT_FOUND = 3


# Script context
class Context:
    def __init__(self, start_path, script_path, tools_path, user_id, image_tag, allow_local_build, github_actions):
        self.start_path = start_path
        self.script_path = script_path
        self.tools_path = tools_path
        self.user_id = user_id
        self.image_tag = image_tag
        self.allow_local_build = allow_local_build
        self.github_actions = github_actions

    @staticmethod
    def default():
        start_path = os.path.realpath(os.curdir)
        script_path = os.path.dirname(os.path.realpath(__file__))
        tools_path = get_cmd_output("git rev-parse --show-toplevel", cwd=script_path)[1] + "/tools"
        user_id = get_cmd_output("id -u")[1]
        image_tag = get_cmd_output("./ci-build/tools-hash", cwd=tools_path)[1]
        allow_local_build = os.getenv("ALLOW_LOCAL_BUILD") != "false"
        github_actions = os.getenv("GITHUB_ACTIONS") == "true"
        print(f"Start path: {start_path}")
        print(f"Script path: {script_path}")
        print(f"Tools path: {tools_path}")
        print(f"User ID: {user_id}")
        print(f"Required base image tag: {image_tag}")
        print(f"Allow local build: {allow_local_build}")
        print(f"Running in GitHub Actions: {github_actions}")
        return Context(start_path, script_path, tools_path, user_id, image_tag, allow_local_build, github_actions)


# Mockable shell commands
class Shell:
    # Returns True if the given `image_name` and `image_tag` exist locally
    def docker_image_exists_locally(self, image_name, image_tag):
        (status, _) = get_cmd_output(f"docker inspect \"{image_name}:{image_tag}\"", check=False)
        return status == 0

    # Pulls the requested `image_name` with `image_tag`. Returns `DockerPullResult`.
    def docker_pull(self, image_name, image_tag):
        (status, output) = get_cmd_output(f"docker pull \"{image_name}:{image_tag}\"", check=False)
        if status == 0:
            return DockerPullResult.SUCCESS
        elif "toomanyrequests: Rate exceeded" in output:
            return DockerPullResult.ERROR_THROTTLED
        else:
            return DockerPullResult.NOT_FOUND

    # Builds the base image with the Dockerfile in `path` and tags with with `image_tag`
    def docker_build_base_image(self, image_tag, path):
        run(f"docker build -t \"smithy-rs-base-image:{image_tag}\" .", cwd=path)

# The image tag is the SHA-1 hash of the `tools` directory when taking the `.gitignore` into consideration
IMAGE_TAG=$("${SCRIPT_PATH}/tools-hash")
IMAGE_NAME="public.ecr.aws/w0m4q9l7/github-awslabs-smithy-rs-ci"
    # Builds the local build image
    def docker_build_build_image(self, user_id, script_path):
        run(
            f"docker build -t smithy-rs-build-image --file add-local-user.dockerfile --build-arg=USER_ID={user_id} .",
            cwd=script_path
        )

    # Saves the Docker image named `image_name` with `image_tag` to `output_path`
    def docker_save(self, image_name, image_tag, output_path):
        run(f"docker save -o \"{output_path}\" \"{image_name}:{image_tag}\"")

    # Tags an image with a new image name and tag
    def docker_tag(self, image_name, image_tag, new_image_name, new_image_tag):
        run(f"docker tag \"{image_name}:{image_tag}\" \"{new_image_name}:{new_image_tag}\"")


# Pulls a Docker image and retries if it gets throttled
def docker_pull_with_retry(shell, image_name, image_tag, sleep_time=30):
    for attempt in range(1, 5):
        announce(f"Attempting to pull remote image {image_name}:{image_tag} (attempt {attempt})...")
        result = shell.docker_pull(image_name, image_tag)
        if result == DockerPullResult.ERROR_THROTTLED:
            announce("Docker pull failed due to throttling. Waiting and trying again...")
            time.sleep(sleep_time)
        else:
            return result
    # Hit max retries; the image probably exists, but we are getting throttled hard. Fail.
    announce("Image pulling throttled for too many attempts. The remote image might exist, but we can't get it.")
    return DockerPullResult.ERROR_THROTTLED


# Runs a shell command
def run(command, cwd=None):
    subprocess.run(shlex.split(command), stdout=sys.stderr, stderr=sys.stderr, cwd=cwd, check=True)


# Returns (status, output) from a shell command
def get_cmd_output(command, cwd=None, check=True):
    result = subprocess.run(
        shlex.split(command),
        capture_output=True,
        check=check,
        cwd=cwd
    )
    return (result.returncode, result.stdout.decode("utf-8").strip())


def acquire_build_image(context=Context.default(), shell=Shell()):
    # If the image doesn't already exist locally, then look remotely
if ! docker inspect "smithy-rs-base-image:${IMAGE_TAG}" >/dev/null; then
  echo -e "${C_YELLOW}Attempting to pull remote image ${IMAGE_NAME}:${IMAGE_TAG}...${C_RESET}"
  if ! docker pull "${IMAGE_NAME}:${IMAGE_TAG}" >/dev/null; then
      # If there is no remote image with the matching IMAGE_TAG, then build one locally
      if [[ "${ALLOW_LOCAL_BUILD:-}" == "false" ]]; then
          echo -e "${C_YELLOW}Local build turned off by ALLOW_LOCAL_BUILD env var. Aborting.${C_RESET}"
          exit 1
      fi

      echo -e "${C_YELLOW}Failed to pull remote image, which can happen if it doesn't exist. Building a new image locally...${C_RESET}"

      pushd "${TOOLS_PATH}" &>/dev/null
      docker build -t "smithy-rs-base-image:${IMAGE_TAG}" .
      popd  &>/dev/null

      if [[ "${GITHUB_ACTIONS:-}" == "true" ]]; then
          echo -e "${C_YELLOW}Saving base image for use in later jobs...${C_RESET}"
          docker save -o "${START_PATH}/smithy-rs-base-image" "smithy-rs-base-image:${IMAGE_TAG}"
      fi
  else
      # Otherwise, we have successfully pulled the remote image
      echo -e "${C_YELLOW}Successfully pulled remote image!${C_RESET}"
      docker tag "${IMAGE_NAME}:${IMAGE_TAG}" "smithy-rs-base-image:${IMAGE_TAG}"
  fi
else
  echo -e "${C_YELLOW}Base image found locally! No retrieval or rebuild necessary.${C_RESET}"
fi

echo -e "${C_YELLOW}Creating local build image...${C_RESET}"
docker tag "smithy-rs-base-image:${IMAGE_TAG}" smithy-rs-base-image:local
docker build -t smithy-rs-build-image --file add-local-user.dockerfile --build-arg=USER_ID="$(id -u)" .
    if not shell.docker_image_exists_locally(LOCAL_BASE_IMAGE_NAME, context.image_tag):
        announce("Base image not found locally.")
        if docker_pull_with_retry(shell, REMOTE_BASE_IMAGE_NAME, context.image_tag) != DockerPullResult.SUCCESS:
            if not context.allow_local_build:
                announce("Local build turned off by ALLOW_LOCAL_BUILD env var. Aborting.")
                return 1

            announce("Failed to pull remote image, which can happen if it doesn't exist. Building a new image locally.")
            shell.docker_build_base_image(context.image_tag, context.tools_path)

            if context.github_actions:
                announce("Saving base image for use in later jobs...")
                shell.docker_save(
                    LOCAL_BASE_IMAGE_NAME,
                    context.image_tag,
                    context.start_path + "/smithy-rs-base-image"
                )
        else:
            announce("Successfully pulled remote image!")
            shell.docker_tag(REMOTE_BASE_IMAGE_NAME, context.image_tag, LOCAL_BASE_IMAGE_NAME, context.image_tag)
    else:
        announce("Base image found locally! No retrieval or rebuild necessary.")

    announce("Creating local build image...")
    shell.docker_tag(LOCAL_BASE_IMAGE_NAME, context.image_tag, LOCAL_BASE_IMAGE_NAME, LOCAL_TAG)
    shell.docker_build_build_image(context.user_id, context.script_path)
    return 0


class SelfTest(unittest.TestCase):
    def test_context(self, allow_local_build=False, github_actions=False):
        return Context(
            start_path="/tmp/test/start-path",
            script_path="/tmp/test/script-path",
            tools_path="/tmp/test/tools-path",
            user_id="123",
            image_tag="someimagetag",
            allow_local_build=allow_local_build,
            github_actions=github_actions
        )

    def mock_shell(self):
        shell = Shell()
        shell.docker_build_base_image = MagicMock()
        shell.docker_build_build_image = MagicMock()
        shell.docker_image_exists_locally = MagicMock()
        shell.docker_pull = MagicMock()
        shell.docker_save = MagicMock()
        shell.docker_tag = MagicMock()
        return shell

    def test_retry_immediate_success(self):
        shell = self.mock_shell()
        shell.docker_pull.side_effect = [DockerPullResult.SUCCESS]
        self.assertEqual(
            DockerPullResult.SUCCESS,
            docker_pull_with_retry(shell, "test-image", "test-image-tag", sleep_time=0)
        )

    def test_retry_immediate_not_found(self):
        shell = self.mock_shell()
        shell.docker_pull.side_effect = [DockerPullResult.NOT_FOUND]
        self.assertEqual(
            DockerPullResult.NOT_FOUND,
            docker_pull_with_retry(shell, "test-image", "test-image-tag", sleep_time=0)
        )

    def test_retry_throttling_then_success(self):
        shell = self.mock_shell()
        shell.docker_pull.side_effect = [
            DockerPullResult.ERROR_THROTTLED,
            DockerPullResult.ERROR_THROTTLED,
            DockerPullResult.SUCCESS
        ]
        self.assertEqual(
            DockerPullResult.SUCCESS,
            docker_pull_with_retry(shell, "test-image", "test-image-tag", sleep_time=0)
        )

    def test_retry_throttling_then_not_found(self):
        shell = self.mock_shell()
        shell.docker_pull.side_effect = [
            DockerPullResult.ERROR_THROTTLED,
            DockerPullResult.NOT_FOUND
        ]
        self.assertEqual(
            DockerPullResult.NOT_FOUND,
            docker_pull_with_retry(shell, "test-image", "test-image-tag", sleep_time=0)
        )

    def test_retry_max_attempts(self):
        shell = self.mock_shell()
        shell.docker_pull.side_effect = [
            DockerPullResult.ERROR_THROTTLED,
            DockerPullResult.ERROR_THROTTLED,
            DockerPullResult.ERROR_THROTTLED,
            DockerPullResult.ERROR_THROTTLED,
            DockerPullResult.ERROR_THROTTLED,
        ]
        self.assertEqual(
            DockerPullResult.ERROR_THROTTLED,
            docker_pull_with_retry(shell, "test-image", "test-image-tag", sleep_time=0)
        )

    # When: the base image already exists locally with the right image tag
    # It should: build a local build image using that local base image
    def test_image_exists_locally_already(self):
        shell = self.mock_shell()
        shell.docker_image_exists_locally.side_effect = [True]

        self.assertEqual(0, acquire_build_image(self.test_context(), shell))

        shell.docker_image_exists_locally.assert_called_once()
        shell.docker_tag.assert_called_with(LOCAL_BASE_IMAGE_NAME, "someimagetag", LOCAL_BASE_IMAGE_NAME, LOCAL_TAG)
        shell.docker_build_build_image.assert_called_with("123", "/tmp/test/script-path")

    # When:
    #  - the base image doesn't exist locally
    #  - the base image doesn't exist remotely
    #  - local builds are allowed
    #  - NOT running in GitHub Actions
    # It should: build a local image from scratch and NOT save it to file
    def test_image_local_build(self):
        context = self.test_context(allow_local_build=True)
        shell = self.mock_shell()
        shell.docker_image_exists_locally.side_effect = [False]
        shell.docker_pull.side_effect = [DockerPullResult.NOT_FOUND]

        self.assertEqual(0, acquire_build_image(context, shell))
        shell.docker_image_exists_locally.assert_called_once()
        shell.docker_build_base_image.assert_called_with("someimagetag", "/tmp/test/tools-path")
        shell.docker_save.assert_not_called()
        shell.docker_tag.assert_called_with(LOCAL_BASE_IMAGE_NAME, "someimagetag", LOCAL_BASE_IMAGE_NAME, LOCAL_TAG)
        shell.docker_build_build_image.assert_called_with("123", "/tmp/test/script-path")

    # When:
    #  - the base image doesn't exist locally
    #  - the base image doesn't exist remotely
    #  - local builds are allowed
    #  - running in GitHub Actions
    # It should: build a local image from scratch and save it to file
    def test_image_local_build_github_actions(self):
        context = self.test_context(allow_local_build=True, github_actions=True)
        shell = self.mock_shell()
        shell.docker_image_exists_locally.side_effect = [False]
        shell.docker_pull.side_effect = [DockerPullResult.NOT_FOUND]

        self.assertEqual(0, acquire_build_image(context, shell))
        shell.docker_image_exists_locally.assert_called_once()
        shell.docker_build_base_image.assert_called_with("someimagetag", "/tmp/test/tools-path")
        shell.docker_save.assert_called_with(
            LOCAL_BASE_IMAGE_NAME,
            "someimagetag",
            "/tmp/test/start-path/smithy-rs-base-image"
        )
        shell.docker_tag.assert_called_with(LOCAL_BASE_IMAGE_NAME, "someimagetag", LOCAL_BASE_IMAGE_NAME, LOCAL_TAG)
        shell.docker_build_build_image.assert_called_with("123", "/tmp/test/script-path")

    # When:
    #  - the base image doesn't exist locally
    #  - the base image doesn't exist remotely
    #  - local builds are NOT allowed
    # It should: fail since local builds are not allowed
    def test_image_fail_local_build_disabled(self):
        context = self.test_context(allow_local_build=False)
        shell = self.mock_shell()
        shell.docker_image_exists_locally.side_effect = [False]
        shell.docker_pull.side_effect = [DockerPullResult.NOT_FOUND]

        self.assertEqual(1, acquire_build_image(context, shell))
        shell.docker_image_exists_locally.assert_called_once()
        shell.docker_build_base_image.assert_not_called()
        shell.docker_save.assert_not_called()
        shell.docker_tag.assert_not_called()
        shell.docker_build_build_image.assert_not_called()

    # When:
    #  - the base image doesn't exist locally
    #  - the base image exists remotely
    # It should: pull the remote image and tag it
    def test_pull_remote_image(self):
        context = self.test_context(allow_local_build=False)
        shell = self.mock_shell()
        shell.docker_image_exists_locally.side_effect = [False]
        shell.docker_pull.side_effect = [DockerPullResult.SUCCESS]

        self.assertEqual(0, acquire_build_image(context, shell))
        shell.docker_image_exists_locally.assert_called_once()
        shell.docker_build_base_image.assert_not_called()
        shell.docker_save.assert_not_called()
        shell.docker_tag.assert_has_calls([
            call(REMOTE_BASE_IMAGE_NAME, "someimagetag", LOCAL_BASE_IMAGE_NAME, "someimagetag"),
            call(LOCAL_BASE_IMAGE_NAME, "someimagetag", LOCAL_BASE_IMAGE_NAME, LOCAL_TAG)
        ])
        shell.docker_build_build_image.assert_called_with("123", "/tmp/test/script-path")


def main():
    # Run unit tests if given `--self-test` argument
    if len(sys.argv) > 1 and sys.argv[1] == "--self-test":
        sys.argv.pop()
        unittest.main()
    else:
        sys.exit(acquire_build_image())


if __name__ == "__main__":
    main()