Spaces:
Sleeping
Sleeping
| # FoundationPose deployment script (optimized for HuggingFace) | |
| set -e | |
| if [ -z "${BASH_VERSION:-}" ]; then | |
| exec /bin/bash "$0" "$@" | |
| fi | |
| IMAGE_NAME_L1="gpue/foundationpose-base-l1" | |
| IMAGE_NAME_L2="gpue/foundationpose-base-l2" | |
| TAG="latest" | |
| PLATFORM="linux/amd64" | |
| HF_SPACE="gpue/foundationpose" | |
| ENV_FILE=".env" | |
| exec > >(tee -a deploy.logs) 2>&1 | |
| echo "===================================" | |
| echo "FoundationPose Deployment" | |
| echo "===================================" | |
| echo "" | |
| # Load tokens from .env | |
| if [ -f "${ENV_FILE}" ]; then | |
| set -a | |
| # shellcheck disable=SC1090 | |
| source "${ENV_FILE}" | |
| set +a | |
| else | |
| echo "Warning: ${ENV_FILE} not found" | |
| fi | |
| # Ensure huggingface_hub (and hf CLI) are available via local venv | |
| VENV_DIR=".deploy-venv" | |
| PY_BIN="${VENV_DIR}/bin/python3" | |
| HF_BIN="${VENV_DIR}/bin/hf" | |
| if [ ! -x "${PY_BIN}" ]; then | |
| echo "Creating deploy venv at ${VENV_DIR}..." | |
| python3 -m venv "${VENV_DIR}" | |
| fi | |
| if ! "${PY_BIN}" -c "import huggingface_hub" >/dev/null 2>&1; then | |
| echo "Installing huggingface_hub in deploy venv..." | |
| "${PY_BIN}" -m pip install --quiet huggingface_hub | |
| fi | |
| # Hash helper for build gating | |
| hash_files() { | |
| "${PY_BIN}" - <<'PY' "$@" | |
| import hashlib | |
| import sys | |
| from pathlib import Path | |
| paths = [Path(p) for p in sys.argv[1:]] | |
| hasher = hashlib.sha256() | |
| for path in paths: | |
| hasher.update(path.as_posix().encode("utf-8")) | |
| hasher.update(b"\\0") | |
| hasher.update(path.read_bytes()) | |
| hasher.update(b"\\0") | |
| print(hasher.hexdigest()) | |
| PY | |
| } | |
| mkdir -p .deploy | |
| L1_INPUTS=(Dockerfile.base) | |
| L2_INPUTS=(Dockerfile.base download_weights.py) | |
| L1_HASH=$(hash_files "${L1_INPUTS[@]}") | |
| L2_HASH=$(hash_files "${L2_INPUTS[@]}") | |
| LAST_L1_HASH_FILE=".deploy/last_l1.sha" | |
| LAST_L2_HASH_FILE=".deploy/last_l2.sha" | |
| SKIP_L1=0 | |
| SKIP_L2=0 | |
| if [ -f "${LAST_L1_HASH_FILE}" ] && [ "$(cat "${LAST_L1_HASH_FILE}")" = "${L1_HASH}" ]; then | |
| SKIP_L1=1 | |
| echo "L1 inputs unchanged; skipping L1 image job." | |
| fi | |
| if [ -f "${LAST_L2_HASH_FILE}" ] && [ "$(cat "${LAST_L2_HASH_FILE}")" = "${L2_HASH}" ]; then | |
| SKIP_L2=1 | |
| echo "L2 inputs unchanged; skipping L2 image job." | |
| fi | |
| # Initialize git repo if needed (for job context) | |
| if [ ! -d .git ]; then | |
| echo "Initializing git repository..." | |
| git init | |
| git remote add origin "https://huggingface.co/spaces/${HF_SPACE}" | |
| echo "✓ Git repository initialized" | |
| echo "" | |
| fi | |
| # Commit local changes before job so the job can build the right ref | |
| if [[ -n $(git status -s) ]]; then | |
| echo "Committing changes for job context..." | |
| git add Dockerfile Dockerfile.base requirements.txt deploy.sh app.py client.py estimator.py masks.py scripts/run_hf_image_job.py download_weights.py | |
| if git diff --cached --quiet; then | |
| echo "No staged changes for job context" | |
| else | |
| git commit -m "Prepare job build context" | |
| echo "✓ Job context committed" | |
| fi | |
| fi | |
| JOB_REF="" | |
| if [ "${SKIP_L1}" -eq 0 ] || [ "${SKIP_L2}" -eq 0 ]; then | |
| # Push a temporary ref for the job to build from | |
| JOB_REF="job-build-$(date +%Y%m%d-%H%M%S)" | |
| echo "Pushing job ref: ${JOB_REF}" | |
| git push "https://huggingface.co/spaces/${HF_SPACE}" "HEAD:${JOB_REF}" --force | |
| echo "✓ Job ref pushed" | |
| echo "" | |
| fi | |
| if [ "${SKIP_L1}" -eq 0 ]; then | |
| echo "Stage 1: Building L1 base image via HF Job" | |
| echo "Platform: ${PLATFORM}" | |
| echo "Image: ${IMAGE_NAME_L1}:${TAG}" | |
| echo "" | |
| JOB_OUTPUT=$("${PY_BIN}" scripts/run_hf_image_job.py \ | |
| --image-name "${IMAGE_NAME_L1}" \ | |
| --tag "${TAG}" \ | |
| --platform "${PLATFORM}" \ | |
| --dockerfile "Dockerfile.base" \ | |
| --target "foundationpose-base-l1" \ | |
| --flavor "l40sx1" \ | |
| --git-repo "https://huggingface.co/spaces/${HF_SPACE}" \ | |
| --git-ref "${JOB_REF}" 2>&1 | tee /tmp/hf_image_job.log) | |
| JOB_ID=$(echo "${JOB_OUTPUT}" | awk '/Job ID:/ {print $3}') | |
| if [ -z "${JOB_ID}" ]; then | |
| echo "Warning: Could not parse HF job id. See /tmp/hf_image_job.log" | |
| else | |
| echo "Following job logs until completion..." | |
| if [ -x "${HF_BIN}" ]; then | |
| HF_BIN_PATH="${HF_BIN}" JOB_ID="${JOB_ID}" "${PY_BIN}" - <<'PY' | |
| import json | |
| import os | |
| import subprocess | |
| import sys | |
| import time | |
| hf = os.environ["HF_BIN_PATH"] | |
| job_id = os.environ["JOB_ID"] | |
| log_proc = subprocess.Popen([hf, "jobs", "logs", job_id], stdout=sys.stdout, stderr=sys.stderr) | |
| try: | |
| while True: | |
| inspect = subprocess.run([hf, "jobs", "inspect", job_id], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, text=True) | |
| if inspect.returncode == 0: | |
| try: | |
| data = json.loads(inspect.stdout)[0] | |
| stage = (data.get("status") or {}).get("stage", "UNKNOWN") | |
| except Exception: | |
| stage = "UNKNOWN" | |
| else: | |
| stage = "UNKNOWN" | |
| if stage in {"SUCCESS","SUCCEEDED","COMPLETED","DONE","FAILED","ERROR","CANCELED","CANCELLED"}: | |
| break | |
| time.sleep(15) | |
| finally: | |
| log_proc.terminate() | |
| try: | |
| log_proc.wait(timeout=5) | |
| except Exception: | |
| log_proc.kill() | |
| PY | |
| echo "" | |
| echo "Job status:" | |
| "${HF_BIN}" jobs inspect "${JOB_ID}" || true | |
| else | |
| echo "hf CLI not available; job logs skipped" | |
| fi | |
| fi | |
| if [ -n "${JOB_ID}" ] && [ -x "${HF_BIN}" ]; then | |
| echo "" | |
| echo "Waiting for L1 image build job to complete..." | |
| for i in $(seq 1 40); do | |
| JOB_STAGE=$("${HF_BIN}" jobs inspect "${JOB_ID}" | python3 -c "import sys, json; data=json.load(sys.stdin)[0]; print(data.get('status', {}).get('stage', 'UNKNOWN'))" 2>/dev/null || echo "UNKNOWN") | |
| echo " Job stage: ${JOB_STAGE}" | |
| if [[ "${JOB_STAGE}" =~ ^(SUCCESS|SUCCEEDED|COMPLETED|DONE)$ ]]; then | |
| echo "✓ L1 image build job completed" | |
| echo "${L1_HASH}" > "${LAST_L1_HASH_FILE}" | |
| break | |
| elif [[ "${JOB_STAGE}" =~ ^(FAILED|ERROR|CANCELED|CANCELLED)$ ]]; then | |
| echo "✗ Image build job failed: ${JOB_STAGE}" | |
| exit 1 | |
| else | |
| sleep 30 | |
| fi | |
| done | |
| fi | |
| fi | |
| echo "" | |
| if [ "${SKIP_L2}" -eq 0 ]; then | |
| echo "Stage 2: Building L2 base image via HF Job" | |
| echo "" | |
| JOB_OUTPUT_L2=$("${PY_BIN}" scripts/run_hf_image_job.py \ | |
| --image-name "${IMAGE_NAME_L2}" \ | |
| --tag "${TAG}" \ | |
| --platform "${PLATFORM}" \ | |
| --dockerfile "Dockerfile.base" \ | |
| --target "foundationpose-base-l2" \ | |
| --flavor "l40sx1" \ | |
| --git-repo "https://huggingface.co/spaces/${HF_SPACE}" \ | |
| --git-ref "${JOB_REF}" 2>&1 | tee /tmp/hf_image_job_l2.log) | |
| JOB_ID_L2=$(echo "${JOB_OUTPUT_L2}" | awk '/Job ID:/ {print $3}') | |
| if [ -z "${JOB_ID_L2}" ]; then | |
| echo "Warning: Could not parse HF job id for L2. See /tmp/hf_image_job_l2.log" | |
| else | |
| echo "Following L2 job logs until completion..." | |
| if [ -x "${HF_BIN}" ]; then | |
| HF_BIN_PATH="${HF_BIN}" JOB_ID="${JOB_ID_L2}" "${PY_BIN}" - <<'PY' | |
| import json | |
| import os | |
| import subprocess | |
| import sys | |
| import time | |
| hf = os.environ["HF_BIN_PATH"] | |
| job_id = os.environ["JOB_ID"] | |
| log_proc = subprocess.Popen([hf, "jobs", "logs", job_id], stdout=sys.stdout, stderr=sys.stderr) | |
| try: | |
| while True: | |
| inspect = subprocess.run([hf, "jobs", "inspect", job_id], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, text=True) | |
| if inspect.returncode == 0: | |
| try: | |
| data = json.loads(inspect.stdout)[0] | |
| stage = (data.get("status") or {}).get("stage", "UNKNOWN") | |
| except Exception: | |
| stage = "UNKNOWN" | |
| else: | |
| stage = "UNKNOWN" | |
| if stage in {"SUCCESS","SUCCEEDED","COMPLETED","DONE","FAILED","ERROR","CANCELED","CANCELLED"}: | |
| break | |
| time.sleep(15) | |
| finally: | |
| log_proc.terminate() | |
| try: | |
| log_proc.wait(timeout=5) | |
| except Exception: | |
| log_proc.kill() | |
| PY | |
| echo "" | |
| echo "L2 job status:" | |
| "${HF_BIN}" jobs inspect "${JOB_ID_L2}" || true | |
| else | |
| echo "hf CLI not available; job logs skipped" | |
| fi | |
| fi | |
| if [ -n "${JOB_ID_L2}" ] && [ -x "${HF_BIN}" ]; then | |
| echo "" | |
| echo "Waiting for L2 image build job to complete..." | |
| for i in $(seq 1 60); do | |
| JOB_STAGE=$("${HF_BIN}" jobs inspect "${JOB_ID_L2}" | python3 -c "import sys, json; data=json.load(sys.stdin)[0]; print(data.get('status', {}).get('stage', 'UNKNOWN'))" 2>/dev/null || echo "UNKNOWN") | |
| echo " Job stage: ${JOB_STAGE}" | |
| if [[ "${JOB_STAGE}" =~ ^(SUCCESS|SUCCEEDED|COMPLETED|DONE)$ ]]; then | |
| echo "✓ L2 image build job completed" | |
| echo "${L2_HASH}" > "${LAST_L2_HASH_FILE}" | |
| break | |
| elif [[ "${JOB_STAGE}" =~ ^(FAILED|ERROR|CANCELED|CANCELLED)$ ]]; then | |
| echo "✗ L2 image build job failed: ${JOB_STAGE}" | |
| exit 1 | |
| else | |
| sleep 30 | |
| fi | |
| done | |
| fi | |
| fi | |
| echo "" | |
| echo "Stage 3: Deploying to HuggingFace Space" | |
| echo "" | |
| # Initialize git repo if needed | |
| if [ ! -d .git ]; then | |
| echo "Initializing git repository..." | |
| git init | |
| git remote add origin "https://huggingface.co/spaces/${HF_SPACE}" | |
| echo "✓ Git repository initialized" | |
| echo "" | |
| fi | |
| # Bump build number to force Space rebuild | |
| BUILDNUM_FILE="buildnum.txt" | |
| "${PY_BIN}" - <<'PY' | |
| from pathlib import Path | |
| path = Path("buildnum.txt") | |
| try: | |
| current = int(path.read_text().strip()) | |
| except Exception: | |
| current = 0 | |
| path.write_text(f"{current + 1}\n") | |
| print(f"Updated buildnum.txt -> {current + 1}") | |
| PY | |
| # Check if there are changes to commit | |
| if [[ -n $(git status -s) ]]; then | |
| echo "Committing changes..." | |
| git add . | |
| git commit -m "Update base image build and deps" | |
| echo "✓ Changes committed" | |
| else | |
| echo "No changes to commit" | |
| fi | |
| # Push to HuggingFace | |
| echo "" | |
| echo "Pushing to HuggingFace Space: ${HF_SPACE}" | |
| git push "https://huggingface.co/spaces/${HF_SPACE}" main --force | |
| echo "" | |
| echo "✓ Pushed to HuggingFace" | |
| echo "" | |
| echo "HuggingFace will now:" | |
| echo " 1. Pull base image from DockerHub (${IMAGE_NAME_L2}:${TAG})" | |
| echo " 2. Start the Gradio app" | |
| echo "" | |
| # Follow build logs | |
| echo "Following build logs..." | |
| echo "Press Ctrl+C to stop watching" | |
| echo "" | |
| HF_TOKEN="${HUGGINGFACE_TOKEN:-${HF_TOKEN:-}}" | |
| export HF_TOKEN | |
| if [ -n "${HF_TOKEN}" ]; then | |
| curl -N -H "Authorization: Bearer ${HF_TOKEN}" \ | |
| "https://huggingface.co/api/spaces/${HF_SPACE}/logs/build" 2>/dev/null | \ | |
| while IFS= read -r line; do | |
| echo "$line" | grep -o '"data":"[^"]*"' | sed 's/"data":"//;s/"$//' | sed 's/\\n/\n/g' | |
| done | |
| echo "" | |
| echo "====================================" | |
| echo "Build Status Check" | |
| echo "====================================" | |
| echo "" | |
| # Wait a moment for status to update | |
| sleep 2 | |
| # Check final build status | |
| STATUS_JSON=$(curl -s -H "Authorization: Bearer ${HF_TOKEN}" \ | |
| "https://huggingface.co/api/spaces/${HF_SPACE}") | |
| STAGE=$(echo "$STATUS_JSON" | python3 -c "import sys, json; data=json.load(sys.stdin); print(data.get('runtime', {}).get('stage', 'UNKNOWN'))" 2>/dev/null) | |
| ERROR_MSG=$(echo "$STATUS_JSON" | python3 -c "import sys, json; data=json.load(sys.stdin); print(data.get('runtime', {}).get('errorMessage', ''))" 2>/dev/null) | |
| echo "Final Status: ${STAGE}" | |
| if [ "${STAGE}" = "RUNNING" ]; then | |
| echo "✓ Deployment successful!" | |
| echo "" | |
| echo "Space URL: https://${HF_SPACE/\//-}.hf.space" | |
| echo "API URL: https://${HF_SPACE/\//-}.hf.space/gradio_api/info" | |
| echo "" | |
| echo "Test with: cd ../training && make test-perception-api" | |
| elif [ "${STAGE}" = "BUILD_ERROR" ]; then | |
| echo "✗ Build failed!" | |
| if [ -n "${ERROR_MSG}" ]; then | |
| echo "Error: ${ERROR_MSG}" | |
| fi | |
| echo "" | |
| echo "If still getting OOM errors, consider:" | |
| echo " - Moving weights to runtime download (not build time)" | |
| echo " - Requesting larger build instance from HuggingFace" | |
| echo " - Using only CUDA arch 7.5 (T4 only)" | |
| exit 1 | |
| else | |
| echo "Status: ${STAGE}" | |
| if [ -n "${ERROR_MSG}" ]; then | |
| echo "Message: ${ERROR_MSG}" | |
| fi | |
| fi | |
| echo "" | |
| echo "Following application logs for 1 minute..." | |
| LOG_URL="https://huggingface.co/api/spaces/${HF_SPACE}/logs/run" | |
| export LOG_URL | |
| python3 - <<'PY' | |
| import os | |
| import subprocess | |
| import sys | |
| import time | |
| log_url = os.environ.get("LOG_URL") | |
| token = os.environ.get("HF_TOKEN") | |
| if not log_url or not token: | |
| print("Skipping app logs: missing LOG_URL or HF_TOKEN") | |
| raise SystemExit(0) | |
| proc = subprocess.Popen( | |
| ["curl", "-N", "-H", f"Authorization: Bearer {token}", log_url], | |
| stdout=sys.stdout, | |
| stderr=subprocess.DEVNULL, | |
| ) | |
| try: | |
| time.sleep(60) | |
| finally: | |
| proc.terminate() | |
| try: | |
| proc.wait(timeout=5) | |
| except Exception: | |
| proc.kill() | |
| PY | |
| else | |
| echo "Warning: HF token not available; cannot follow logs" | |
| echo "To follow logs manually:" | |
| echo " curl -N -H \"Authorization: Bearer \$HF_TOKEN\" \"https://huggingface.co/api/spaces/${HF_SPACE}/logs/build\"" | |
| fi | |