Spaces:
Running on T4
Running on T4
Georg commited on
Commit ·
22ad055
1
Parent(s): bd9a893
Prepare job build context
Browse files
deploy.sh
CHANGED
|
@@ -3,7 +3,8 @@
|
|
| 3 |
|
| 4 |
set -e
|
| 5 |
|
| 6 |
-
|
|
|
|
| 7 |
TAG="latest"
|
| 8 |
PLATFORM="linux/amd64"
|
| 9 |
HF_SPACE="gpue/foundationpose"
|
|
@@ -65,17 +66,17 @@ git push "https://huggingface.co/spaces/${HF_SPACE}" "HEAD:${JOB_REF}" --force
|
|
| 65 |
echo "✓ Job ref pushed"
|
| 66 |
echo ""
|
| 67 |
|
| 68 |
-
echo "Stage 1: Building base image via HF Job"
|
| 69 |
echo "Platform: ${PLATFORM}"
|
| 70 |
-
echo "Image: ${
|
| 71 |
echo ""
|
| 72 |
|
| 73 |
JOB_OUTPUT=$("${PY_BIN}" scripts/run_hf_image_job.py \
|
| 74 |
-
--image-name "${
|
| 75 |
--tag "${TAG}" \
|
| 76 |
--platform "${PLATFORM}" \
|
| 77 |
--dockerfile "Dockerfile.base" \
|
| 78 |
-
--target "foundationpose-base-
|
| 79 |
--flavor "l40sx1" \
|
| 80 |
--git-repo "https://huggingface.co/spaces/${HF_SPACE}" \
|
| 81 |
--git-ref "${JOB_REF}" 2>&1 | tee /tmp/hf_image_job.log)
|
|
@@ -115,7 +116,7 @@ fi
|
|
| 115 |
|
| 116 |
if [ -n "${JOB_ID}" ] && [ -x "${HF_BIN}" ]; then
|
| 117 |
echo ""
|
| 118 |
-
echo "Waiting for image build job to complete..."
|
| 119 |
for i in $(seq 1 40); do
|
| 120 |
JOB_STAGE=$("${HF_BIN}" jobs inspect "${JOB_ID}" | python3 -c "import sys, json; data=json.load(sys.stdin)[0]; print(data.get('status', {}).get('stage', 'UNKNOWN'))" 2>/dev/null || echo "UNKNOWN")
|
| 121 |
echo " Job stage: ${JOB_STAGE}"
|
|
@@ -136,7 +137,76 @@ if [ -n "${JOB_ID}" ] && [ -x "${HF_BIN}" ]; then
|
|
| 136 |
fi
|
| 137 |
|
| 138 |
echo ""
|
| 139 |
-
echo "Stage 2:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 140 |
echo ""
|
| 141 |
|
| 142 |
# Initialize git repo if needed
|
|
@@ -167,7 +237,7 @@ echo ""
|
|
| 167 |
echo "✓ Pushed to HuggingFace"
|
| 168 |
echo ""
|
| 169 |
echo "HuggingFace will now:"
|
| 170 |
-
echo " 1. Pull base image from DockerHub (${
|
| 171 |
echo " 2. Build CUDA extensions"
|
| 172 |
echo " 3. Download model weights"
|
| 173 |
echo " 4. Start the Gradio app"
|
|
|
|
| 3 |
|
| 4 |
set -e
|
| 5 |
|
| 6 |
+
IMAGE_NAME_L1="gpue/foundationpose-base-l1"
|
| 7 |
+
IMAGE_NAME_L2="gpue/foundationpose-base-l2"
|
| 8 |
TAG="latest"
|
| 9 |
PLATFORM="linux/amd64"
|
| 10 |
HF_SPACE="gpue/foundationpose"
|
|
|
|
| 66 |
echo "✓ Job ref pushed"
|
| 67 |
echo ""
|
| 68 |
|
| 69 |
+
echo "Stage 1: Building L1 base image via HF Job"
|
| 70 |
echo "Platform: ${PLATFORM}"
|
| 71 |
+
echo "Image: ${IMAGE_NAME_L1}:${TAG}"
|
| 72 |
echo ""
|
| 73 |
|
| 74 |
JOB_OUTPUT=$("${PY_BIN}" scripts/run_hf_image_job.py \
|
| 75 |
+
--image-name "${IMAGE_NAME_L1}" \
|
| 76 |
--tag "${TAG}" \
|
| 77 |
--platform "${PLATFORM}" \
|
| 78 |
--dockerfile "Dockerfile.base" \
|
| 79 |
+
--target "foundationpose-base-l1" \
|
| 80 |
--flavor "l40sx1" \
|
| 81 |
--git-repo "https://huggingface.co/spaces/${HF_SPACE}" \
|
| 82 |
--git-ref "${JOB_REF}" 2>&1 | tee /tmp/hf_image_job.log)
|
|
|
|
| 116 |
|
| 117 |
if [ -n "${JOB_ID}" ] && [ -x "${HF_BIN}" ]; then
|
| 118 |
echo ""
|
| 119 |
+
echo "Waiting for L1 image build job to complete..."
|
| 120 |
for i in $(seq 1 40); do
|
| 121 |
JOB_STAGE=$("${HF_BIN}" jobs inspect "${JOB_ID}" | python3 -c "import sys, json; data=json.load(sys.stdin)[0]; print(data.get('status', {}).get('stage', 'UNKNOWN'))" 2>/dev/null || echo "UNKNOWN")
|
| 122 |
echo " Job stage: ${JOB_STAGE}"
|
|
|
|
| 137 |
fi
|
| 138 |
|
| 139 |
echo ""
|
| 140 |
+
echo "Stage 2: Building L2 base image via HF Job"
|
| 141 |
+
echo ""
|
| 142 |
+
|
| 143 |
+
JOB_OUTPUT_L2=$("${PY_BIN}" scripts/run_hf_image_job.py \
|
| 144 |
+
--image-name "${IMAGE_NAME_L2}" \
|
| 145 |
+
--tag "${TAG}" \
|
| 146 |
+
--platform "${PLATFORM}" \
|
| 147 |
+
--dockerfile "Dockerfile.base" \
|
| 148 |
+
--target "foundationpose-base-l2" \
|
| 149 |
+
--flavor "l40sx1" \
|
| 150 |
+
--git-repo "https://huggingface.co/spaces/${HF_SPACE}" \
|
| 151 |
+
--git-ref "${JOB_REF}" 2>&1 | tee /tmp/hf_image_job_l2.log)
|
| 152 |
+
|
| 153 |
+
JOB_ID_L2=$(echo "${JOB_OUTPUT_L2}" | awk '/Job ID:/ {print $3}')
|
| 154 |
+
if [ -z "${JOB_ID_L2}" ]; then
|
| 155 |
+
echo "Warning: Could not parse HF job id for L2. See /tmp/hf_image_job_l2.log"
|
| 156 |
+
else
|
| 157 |
+
echo "Following L2 job logs for 1 minute..."
|
| 158 |
+
if [ -x "${HF_BIN}" ]; then
|
| 159 |
+
HF_BIN_PATH="${HF_BIN}" JOB_ID="${JOB_ID_L2}" "${PY_BIN}" - <<'PY'
|
| 160 |
+
import os
|
| 161 |
+
import subprocess
|
| 162 |
+
import sys
|
| 163 |
+
import time
|
| 164 |
+
|
| 165 |
+
hf = os.environ["HF_BIN_PATH"]
|
| 166 |
+
job_id = os.environ["JOB_ID"]
|
| 167 |
+
|
| 168 |
+
proc = subprocess.Popen([hf, "jobs", "logs", job_id], stdout=sys.stdout, stderr=sys.stderr)
|
| 169 |
+
try:
|
| 170 |
+
time.sleep(60)
|
| 171 |
+
finally:
|
| 172 |
+
proc.terminate()
|
| 173 |
+
try:
|
| 174 |
+
proc.wait(timeout=5)
|
| 175 |
+
except Exception:
|
| 176 |
+
proc.kill()
|
| 177 |
+
PY
|
| 178 |
+
echo ""
|
| 179 |
+
echo "L2 job status:"
|
| 180 |
+
"${HF_BIN}" jobs inspect "${JOB_ID_L2}" || true
|
| 181 |
+
else
|
| 182 |
+
echo "hf CLI not available; job logs skipped"
|
| 183 |
+
fi
|
| 184 |
+
fi
|
| 185 |
+
|
| 186 |
+
if [ -n "${JOB_ID_L2}" ] && [ -x "${HF_BIN}" ]; then
|
| 187 |
+
echo ""
|
| 188 |
+
echo "Waiting for L2 image build job to complete..."
|
| 189 |
+
for i in $(seq 1 60); do
|
| 190 |
+
JOB_STAGE=$("${HF_BIN}" jobs inspect "${JOB_ID_L2}" | python3 -c "import sys, json; data=json.load(sys.stdin)[0]; print(data.get('status', {}).get('stage', 'UNKNOWN'))" 2>/dev/null || echo "UNKNOWN")
|
| 191 |
+
echo " Job stage: ${JOB_STAGE}"
|
| 192 |
+
case "${JOB_STAGE}" in
|
| 193 |
+
SUCCESS|SUCCEEDED|COMPLETED|DONE)
|
| 194 |
+
echo "✓ L2 image build job completed"
|
| 195 |
+
break
|
| 196 |
+
;;
|
| 197 |
+
FAILED|ERROR|CANCELED|CANCELLED)
|
| 198 |
+
echo "✗ L2 image build job failed: ${JOB_STAGE}"
|
| 199 |
+
exit 1
|
| 200 |
+
;;
|
| 201 |
+
*)
|
| 202 |
+
sleep 30
|
| 203 |
+
;;
|
| 204 |
+
esac
|
| 205 |
+
done
|
| 206 |
+
fi
|
| 207 |
+
|
| 208 |
+
echo ""
|
| 209 |
+
echo "Stage 3: Deploying to HuggingFace Space"
|
| 210 |
echo ""
|
| 211 |
|
| 212 |
# Initialize git repo if needed
|
|
|
|
| 237 |
echo "✓ Pushed to HuggingFace"
|
| 238 |
echo ""
|
| 239 |
echo "HuggingFace will now:"
|
| 240 |
+
echo " 1. Pull base image from DockerHub (${IMAGE_NAME_L2}:${TAG})"
|
| 241 |
echo " 2. Build CUDA extensions"
|
| 242 |
echo " 3. Download model weights"
|
| 243 |
echo " 4. Start the Gradio app"
|