#!/usr/bin/env bash # Build + push the baked SageMaker RL image to this account's private ECR (F3 §3.2). # One-time (Admin). The one-shot smoke does NOT need this — use # python examples/gsm8k_grpo/run_sagemaker_launch.py --image dlc # which runs on the stock DLC + source_dir with no local build. Use this script # for the repeatable path (no per-job pip-install) and the DiLoCo N-replica # executor (which passes ContainerEntrypoint and wants the framework baked in). # # NOTE: the DLC base is GPU/linux-amd64 (~15 GB). On an Apple-Silicon host you # must cross-build: pass --platform linux/amd64 (set below). This is slow under # emulation; prefer building on a linux/amd64 host or CodeBuild for real use. set -euo pipefail REGION="${REGION:-us-west-2}" ACCOUNT="${ACCOUNT:-386931836011}" REPO="${REPO:-composer-rl}" TAG="${TAG:-smoke}" DLC_ACCOUNT="763104351884" REGISTRY="${ACCOUNT}.dkr.ecr.${REGION}.amazonaws.com" IMAGE="${REGISTRY}/${REPO}:${TAG}" REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" echo "[ecr] region=${REGION} image=${IMAGE}" # 1. Ensure the ECR repo exists (idempotent). aws ecr describe-repositories --repository-names "${REPO}" --region "${REGION}" >/dev/null 2>&1 \ || aws ecr create-repository --repository-name "${REPO}" --region "${REGION}" >/dev/null # 2. Log in to BOTH the DLC registry (to pull the base) and our own (to push). aws ecr get-login-password --region "${REGION}" \ | docker login --username AWS --password-stdin "${DLC_ACCOUNT}.dkr.ecr.${REGION}.amazonaws.com" aws ecr get-login-password --region "${REGION}" \ | docker login --username AWS --password-stdin "${REGISTRY}" # 3. Build (cross-arch on Apple Silicon) + push. docker build --platform linux/amd64 \ -f "${REPO_ROOT}/docker/Dockerfile.sagemaker" \ -t "${IMAGE}" "${REPO_ROOT}" docker push "${IMAGE}" echo "[ecr] pushed ${IMAGE}" echo "[ecr] launch with: python examples/gsm8k_grpo/run_sagemaker_launch.py --image ${IMAGE}"