Georg commited on
Commit
22ad055
·
1 Parent(s): bd9a893

Prepare job build context

Browse files
Files changed (1) hide show
  1. deploy.sh +78 -8
deploy.sh CHANGED
@@ -3,7 +3,8 @@
3
 
4
  set -e
5
 
6
- IMAGE_NAME="gpue/foundationpose-base-l2"
 
7
  TAG="latest"
8
  PLATFORM="linux/amd64"
9
  HF_SPACE="gpue/foundationpose"
@@ -65,17 +66,17 @@ git push "https://huggingface.co/spaces/${HF_SPACE}" "HEAD:${JOB_REF}" --force
65
  echo "✓ Job ref pushed"
66
  echo ""
67
 
68
- echo "Stage 1: Building base image via HF Job"
69
  echo "Platform: ${PLATFORM}"
70
- echo "Image: ${IMAGE_NAME}:${TAG}"
71
  echo ""
72
 
73
  JOB_OUTPUT=$("${PY_BIN}" scripts/run_hf_image_job.py \
74
- --image-name "${IMAGE_NAME}" \
75
  --tag "${TAG}" \
76
  --platform "${PLATFORM}" \
77
  --dockerfile "Dockerfile.base" \
78
- --target "foundationpose-base-l2" \
79
  --flavor "l40sx1" \
80
  --git-repo "https://huggingface.co/spaces/${HF_SPACE}" \
81
  --git-ref "${JOB_REF}" 2>&1 | tee /tmp/hf_image_job.log)
@@ -115,7 +116,7 @@ fi
115
 
116
  if [ -n "${JOB_ID}" ] && [ -x "${HF_BIN}" ]; then
117
  echo ""
118
- echo "Waiting for image build job to complete..."
119
  for i in $(seq 1 40); do
120
  JOB_STAGE=$("${HF_BIN}" jobs inspect "${JOB_ID}" | python3 -c "import sys, json; data=json.load(sys.stdin)[0]; print(data.get('status', {}).get('stage', 'UNKNOWN'))" 2>/dev/null || echo "UNKNOWN")
121
  echo " Job stage: ${JOB_STAGE}"
@@ -136,7 +137,76 @@ if [ -n "${JOB_ID}" ] && [ -x "${HF_BIN}" ]; then
136
  fi
137
 
138
  echo ""
139
- echo "Stage 2: Deploying to HuggingFace Space"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
  echo ""
141
 
142
  # Initialize git repo if needed
@@ -167,7 +237,7 @@ echo ""
167
  echo "✓ Pushed to HuggingFace"
168
  echo ""
169
  echo "HuggingFace will now:"
170
- echo " 1. Pull base image from DockerHub (${IMAGE_NAME}:${TAG})"
171
  echo " 2. Build CUDA extensions"
172
  echo " 3. Download model weights"
173
  echo " 4. Start the Gradio app"
 
3
 
4
  set -e
5
 
6
+ IMAGE_NAME_L1="gpue/foundationpose-base-l1"
7
+ IMAGE_NAME_L2="gpue/foundationpose-base-l2"
8
  TAG="latest"
9
  PLATFORM="linux/amd64"
10
  HF_SPACE="gpue/foundationpose"
 
66
  echo "✓ Job ref pushed"
67
  echo ""
68
 
69
+ echo "Stage 1: Building L1 base image via HF Job"
70
  echo "Platform: ${PLATFORM}"
71
+ echo "Image: ${IMAGE_NAME_L1}:${TAG}"
72
  echo ""
73
 
74
  JOB_OUTPUT=$("${PY_BIN}" scripts/run_hf_image_job.py \
75
+ --image-name "${IMAGE_NAME_L1}" \
76
  --tag "${TAG}" \
77
  --platform "${PLATFORM}" \
78
  --dockerfile "Dockerfile.base" \
79
+ --target "foundationpose-base-l1" \
80
  --flavor "l40sx1" \
81
  --git-repo "https://huggingface.co/spaces/${HF_SPACE}" \
82
  --git-ref "${JOB_REF}" 2>&1 | tee /tmp/hf_image_job.log)
 
116
 
117
  if [ -n "${JOB_ID}" ] && [ -x "${HF_BIN}" ]; then
118
  echo ""
119
+ echo "Waiting for L1 image build job to complete..."
120
  for i in $(seq 1 40); do
121
  JOB_STAGE=$("${HF_BIN}" jobs inspect "${JOB_ID}" | python3 -c "import sys, json; data=json.load(sys.stdin)[0]; print(data.get('status', {}).get('stage', 'UNKNOWN'))" 2>/dev/null || echo "UNKNOWN")
122
  echo " Job stage: ${JOB_STAGE}"
 
137
  fi
138
 
139
  echo ""
140
+ echo "Stage 2: Building L2 base image via HF Job"
141
+ echo ""
142
+
143
+ JOB_OUTPUT_L2=$("${PY_BIN}" scripts/run_hf_image_job.py \
144
+ --image-name "${IMAGE_NAME_L2}" \
145
+ --tag "${TAG}" \
146
+ --platform "${PLATFORM}" \
147
+ --dockerfile "Dockerfile.base" \
148
+ --target "foundationpose-base-l2" \
149
+ --flavor "l40sx1" \
150
+ --git-repo "https://huggingface.co/spaces/${HF_SPACE}" \
151
+ --git-ref "${JOB_REF}" 2>&1 | tee /tmp/hf_image_job_l2.log)
152
+
153
+ JOB_ID_L2=$(echo "${JOB_OUTPUT_L2}" | awk '/Job ID:/ {print $3}')
154
+ if [ -z "${JOB_ID_L2}" ]; then
155
+ echo "Warning: Could not parse HF job id for L2. See /tmp/hf_image_job_l2.log"
156
+ else
157
+ echo "Following L2 job logs for 1 minute..."
158
+ if [ -x "${HF_BIN}" ]; then
159
+ HF_BIN_PATH="${HF_BIN}" JOB_ID="${JOB_ID_L2}" "${PY_BIN}" - <<'PY'
160
+ import os
161
+ import subprocess
162
+ import sys
163
+ import time
164
+
165
+ hf = os.environ["HF_BIN_PATH"]
166
+ job_id = os.environ["JOB_ID"]
167
+
168
+ proc = subprocess.Popen([hf, "jobs", "logs", job_id], stdout=sys.stdout, stderr=sys.stderr)
169
+ try:
170
+ time.sleep(60)
171
+ finally:
172
+ proc.terminate()
173
+ try:
174
+ proc.wait(timeout=5)
175
+ except Exception:
176
+ proc.kill()
177
+ PY
178
+ echo ""
179
+ echo "L2 job status:"
180
+ "${HF_BIN}" jobs inspect "${JOB_ID_L2}" || true
181
+ else
182
+ echo "hf CLI not available; job logs skipped"
183
+ fi
184
+ fi
185
+
186
+ if [ -n "${JOB_ID_L2}" ] && [ -x "${HF_BIN}" ]; then
187
+ echo ""
188
+ echo "Waiting for L2 image build job to complete..."
189
+ for i in $(seq 1 60); do
190
+ JOB_STAGE=$("${HF_BIN}" jobs inspect "${JOB_ID_L2}" | python3 -c "import sys, json; data=json.load(sys.stdin)[0]; print(data.get('status', {}).get('stage', 'UNKNOWN'))" 2>/dev/null || echo "UNKNOWN")
191
+ echo " Job stage: ${JOB_STAGE}"
192
+ case "${JOB_STAGE}" in
193
+ SUCCESS|SUCCEEDED|COMPLETED|DONE)
194
+ echo "✓ L2 image build job completed"
195
+ break
196
+ ;;
197
+ FAILED|ERROR|CANCELED|CANCELLED)
198
+ echo "✗ L2 image build job failed: ${JOB_STAGE}"
199
+ exit 1
200
+ ;;
201
+ *)
202
+ sleep 30
203
+ ;;
204
+ esac
205
+ done
206
+ fi
207
+
208
+ echo ""
209
+ echo "Stage 3: Deploying to HuggingFace Space"
210
  echo ""
211
 
212
  # Initialize git repo if needed
 
237
  echo "✓ Pushed to HuggingFace"
238
  echo ""
239
  echo "HuggingFace will now:"
240
+ echo " 1. Pull base image from DockerHub (${IMAGE_NAME_L2}:${TAG})"
241
  echo " 2. Build CUDA extensions"
242
  echo " 3. Download model weights"
243
  echo " 4. Start the Gradio app"