File size: 12,965 Bytes
bbc3fdc
3968781
bbc3fdc
 
 
da62a99
 
 
 
22ad055
 
bbc3fdc
 
 
c58f0bb
bbc3fdc
738da22
 
bbc3fdc
3968781
bbc3fdc
 
 
c58f0bb
 
 
 
 
 
 
 
 
 
738da22
 
 
 
 
 
 
 
 
 
 
 
 
c58f0bb
 
da62a99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dde1d40
da62a99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bd9a893
 
 
 
 
 
 
 
 
 
 
 
 
297bffe
 
 
 
 
 
bd9a893
 
da62a99
 
 
 
 
 
 
 
 
bd9a893
da62a99
22ad055
bbc3fdc
22ad055
bbc3fdc
 
738da22
22ad055
c58f0bb
 
 
22ad055
8d70077
bd9a893
 
c58f0bb
 
 
 
a0f9c96
75b68fd
738da22
bd9a893
75b68fd
bd9a893
 
 
 
 
 
 
 
75b68fd
bd9a893
75b68fd
 
 
 
 
 
 
 
 
 
 
 
 
bd9a893
75b68fd
bd9a893
75b68fd
bd9a893
75b68fd
bd9a893
c58f0bb
 
bd9a893
c58f0bb
 
a0f9c96
bbc3fdc
 
bd9a893
 
22ad055
bd9a893
 
 
74ae2d1
 
 
 
 
 
 
 
 
 
bd9a893
 
da62a99
bd9a893
bbc3fdc
da62a99
22ad055
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75b68fd
22ad055
 
75b68fd
22ad055
 
 
 
 
 
 
 
75b68fd
22ad055
75b68fd
 
 
 
 
 
 
 
 
 
 
 
 
22ad055
75b68fd
22ad055
75b68fd
22ad055
75b68fd
22ad055
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74ae2d1
 
 
 
 
 
 
 
 
 
22ad055
 
da62a99
22ad055
 
 
bbc3fdc
 
 
 
 
 
c58f0bb
bbc3fdc
 
 
 
c10959e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bbc3fdc
 
 
c7f9209
c58f0bb
bbc3fdc
 
 
 
 
 
 
 
c58f0bb
bbc3fdc
 
 
 
 
22ad055
92da831
bbc3fdc
 
 
 
 
 
 
c58f0bb
738da22
c58f0bb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bbc3fdc
c58f0bb
 
3968781
c58f0bb
 
 
 
3968781
c58f0bb
 
3968781
c58f0bb
 
 
 
 
3968781
c58f0bb
 
 
 
 
 
bbc3fdc
c58f0bb
 
 
 
bbc3fdc
c58f0bb
 
 
92da831
738da22
c58f0bb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bbc3fdc
c58f0bb
bbc3fdc
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
#!/bin/bash
# FoundationPose deployment script (optimized for HuggingFace)

set -e

if [ -z "${BASH_VERSION:-}" ]; then
    exec /bin/bash "$0" "$@"
fi

IMAGE_NAME_L1="gpue/foundationpose-base-l1"
IMAGE_NAME_L2="gpue/foundationpose-base-l2"
TAG="latest"
PLATFORM="linux/amd64"
HF_SPACE="gpue/foundationpose"
ENV_FILE=".env"

exec > >(tee -a deploy.logs) 2>&1

echo "==================================="
echo "FoundationPose Deployment"
echo "==================================="
echo ""

# Load tokens from .env
if [ -f "${ENV_FILE}" ]; then
    set -a
    # shellcheck disable=SC1090
    source "${ENV_FILE}"
    set +a
else
    echo "Warning: ${ENV_FILE} not found"
fi

# Ensure huggingface_hub (and hf CLI) are available via local venv
VENV_DIR=".deploy-venv"
PY_BIN="${VENV_DIR}/bin/python3"
HF_BIN="${VENV_DIR}/bin/hf"

if [ ! -x "${PY_BIN}" ]; then
    echo "Creating deploy venv at ${VENV_DIR}..."
    python3 -m venv "${VENV_DIR}"
fi

if ! "${PY_BIN}" -c "import huggingface_hub" >/dev/null 2>&1; then
    echo "Installing huggingface_hub in deploy venv..."
    "${PY_BIN}" -m pip install --quiet huggingface_hub
fi

# Hash helper for build gating
hash_files() {
    "${PY_BIN}" - <<'PY' "$@"
import hashlib
import sys
from pathlib import Path

paths = [Path(p) for p in sys.argv[1:]]
hasher = hashlib.sha256()
for path in paths:
    hasher.update(path.as_posix().encode("utf-8"))
    hasher.update(b"\\0")
    hasher.update(path.read_bytes())
    hasher.update(b"\\0")
print(hasher.hexdigest())
PY
}

mkdir -p .deploy

L1_INPUTS=(Dockerfile.base)
L2_INPUTS=(Dockerfile.base download_weights.py)

L1_HASH=$(hash_files "${L1_INPUTS[@]}")
L2_HASH=$(hash_files "${L2_INPUTS[@]}")

LAST_L1_HASH_FILE=".deploy/last_l1.sha"
LAST_L2_HASH_FILE=".deploy/last_l2.sha"

SKIP_L1=0
SKIP_L2=0

if [ -f "${LAST_L1_HASH_FILE}" ] && [ "$(cat "${LAST_L1_HASH_FILE}")" = "${L1_HASH}" ]; then
    SKIP_L1=1
    echo "L1 inputs unchanged; skipping L1 image job."
fi

if [ -f "${LAST_L2_HASH_FILE}" ] && [ "$(cat "${LAST_L2_HASH_FILE}")" = "${L2_HASH}" ]; then
    SKIP_L2=1
    echo "L2 inputs unchanged; skipping L2 image job."
fi

# Initialize git repo if needed (for job context)
if [ ! -d .git ]; then
    echo "Initializing git repository..."
    git init
    git remote add origin "https://huggingface.co/spaces/${HF_SPACE}"
    echo "✓ Git repository initialized"
    echo ""
fi

# Commit local changes before job so the job can build the right ref
if [[ -n $(git status -s) ]]; then
    echo "Committing changes for job context..."
    git add Dockerfile Dockerfile.base requirements.txt deploy.sh app.py client.py estimator.py masks.py scripts/run_hf_image_job.py download_weights.py
    if git diff --cached --quiet; then
        echo "No staged changes for job context"
    else
        git commit -m "Prepare job build context"
        echo "✓ Job context committed"
    fi
fi

JOB_REF=""
if [ "${SKIP_L1}" -eq 0 ] || [ "${SKIP_L2}" -eq 0 ]; then
    # Push a temporary ref for the job to build from
    JOB_REF="job-build-$(date +%Y%m%d-%H%M%S)"
    echo "Pushing job ref: ${JOB_REF}"
    git push "https://huggingface.co/spaces/${HF_SPACE}" "HEAD:${JOB_REF}" --force
    echo "✓ Job ref pushed"
    echo ""
fi

if [ "${SKIP_L1}" -eq 0 ]; then
echo "Stage 1: Building L1 base image via HF Job"
echo "Platform: ${PLATFORM}"
echo "Image: ${IMAGE_NAME_L1}:${TAG}"
echo ""

JOB_OUTPUT=$("${PY_BIN}" scripts/run_hf_image_job.py \
    --image-name "${IMAGE_NAME_L1}" \
    --tag "${TAG}" \
    --platform "${PLATFORM}" \
    --dockerfile "Dockerfile.base" \
    --target "foundationpose-base-l1" \
    --flavor "l40sx1" \
    --git-repo "https://huggingface.co/spaces/${HF_SPACE}" \
    --git-ref "${JOB_REF}" 2>&1 | tee /tmp/hf_image_job.log)

JOB_ID=$(echo "${JOB_OUTPUT}" | awk '/Job ID:/ {print $3}')
if [ -z "${JOB_ID}" ]; then
    echo "Warning: Could not parse HF job id. See /tmp/hf_image_job.log"
else
    echo "Following job logs until completion..."
    if [ -x "${HF_BIN}" ]; then
        HF_BIN_PATH="${HF_BIN}" JOB_ID="${JOB_ID}" "${PY_BIN}" - <<'PY'
import json
import os
import subprocess
import sys
import time

hf = os.environ["HF_BIN_PATH"]
job_id = os.environ["JOB_ID"]

log_proc = subprocess.Popen([hf, "jobs", "logs", job_id], stdout=sys.stdout, stderr=sys.stderr)
try:
    while True:
        inspect = subprocess.run([hf, "jobs", "inspect", job_id], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, text=True)
        if inspect.returncode == 0:
            try:
                data = json.loads(inspect.stdout)[0]
                stage = (data.get("status") or {}).get("stage", "UNKNOWN")
            except Exception:
                stage = "UNKNOWN"
        else:
            stage = "UNKNOWN"
        if stage in {"SUCCESS","SUCCEEDED","COMPLETED","DONE","FAILED","ERROR","CANCELED","CANCELLED"}:
            break
        time.sleep(15)
finally:
    log_proc.terminate()
    try:
        log_proc.wait(timeout=5)
    except Exception:
        log_proc.kill()
PY
        echo ""
        echo "Job status:"
        "${HF_BIN}" jobs inspect "${JOB_ID}" || true
    else
        echo "hf CLI not available; job logs skipped"
    fi
fi

if [ -n "${JOB_ID}" ] && [ -x "${HF_BIN}" ]; then
    echo ""
    echo "Waiting for L1 image build job to complete..."
    for i in $(seq 1 40); do
        JOB_STAGE=$("${HF_BIN}" jobs inspect "${JOB_ID}" | python3 -c "import sys, json; data=json.load(sys.stdin)[0]; print(data.get('status', {}).get('stage', 'UNKNOWN'))" 2>/dev/null || echo "UNKNOWN")
        echo "  Job stage: ${JOB_STAGE}"
        if [[ "${JOB_STAGE}" =~ ^(SUCCESS|SUCCEEDED|COMPLETED|DONE)$ ]]; then
            echo "✓ L1 image build job completed"
            echo "${L1_HASH}" > "${LAST_L1_HASH_FILE}"
            break
        elif [[ "${JOB_STAGE}" =~ ^(FAILED|ERROR|CANCELED|CANCELLED)$ ]]; then
            echo "✗ Image build job failed: ${JOB_STAGE}"
            exit 1
        else
            sleep 30
        fi
    done
fi
fi

echo ""
if [ "${SKIP_L2}" -eq 0 ]; then
echo "Stage 2: Building L2 base image via HF Job"
echo ""

JOB_OUTPUT_L2=$("${PY_BIN}" scripts/run_hf_image_job.py \
    --image-name "${IMAGE_NAME_L2}" \
    --tag "${TAG}" \
    --platform "${PLATFORM}" \
    --dockerfile "Dockerfile.base" \
    --target "foundationpose-base-l2" \
    --flavor "l40sx1" \
    --git-repo "https://huggingface.co/spaces/${HF_SPACE}" \
    --git-ref "${JOB_REF}" 2>&1 | tee /tmp/hf_image_job_l2.log)

JOB_ID_L2=$(echo "${JOB_OUTPUT_L2}" | awk '/Job ID:/ {print $3}')
if [ -z "${JOB_ID_L2}" ]; then
    echo "Warning: Could not parse HF job id for L2. See /tmp/hf_image_job_l2.log"
else
    echo "Following L2 job logs until completion..."
    if [ -x "${HF_BIN}" ]; then
        HF_BIN_PATH="${HF_BIN}" JOB_ID="${JOB_ID_L2}" "${PY_BIN}" - <<'PY'
import json
import os
import subprocess
import sys
import time

hf = os.environ["HF_BIN_PATH"]
job_id = os.environ["JOB_ID"]

log_proc = subprocess.Popen([hf, "jobs", "logs", job_id], stdout=sys.stdout, stderr=sys.stderr)
try:
    while True:
        inspect = subprocess.run([hf, "jobs", "inspect", job_id], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, text=True)
        if inspect.returncode == 0:
            try:
                data = json.loads(inspect.stdout)[0]
                stage = (data.get("status") or {}).get("stage", "UNKNOWN")
            except Exception:
                stage = "UNKNOWN"
        else:
            stage = "UNKNOWN"
        if stage in {"SUCCESS","SUCCEEDED","COMPLETED","DONE","FAILED","ERROR","CANCELED","CANCELLED"}:
            break
        time.sleep(15)
finally:
    log_proc.terminate()
    try:
        log_proc.wait(timeout=5)
    except Exception:
        log_proc.kill()
PY
        echo ""
        echo "L2 job status:"
        "${HF_BIN}" jobs inspect "${JOB_ID_L2}" || true
    else
        echo "hf CLI not available; job logs skipped"
    fi
fi

if [ -n "${JOB_ID_L2}" ] && [ -x "${HF_BIN}" ]; then
    echo ""
    echo "Waiting for L2 image build job to complete..."
    for i in $(seq 1 60); do
        JOB_STAGE=$("${HF_BIN}" jobs inspect "${JOB_ID_L2}" | python3 -c "import sys, json; data=json.load(sys.stdin)[0]; print(data.get('status', {}).get('stage', 'UNKNOWN'))" 2>/dev/null || echo "UNKNOWN")
        echo "  Job stage: ${JOB_STAGE}"
        if [[ "${JOB_STAGE}" =~ ^(SUCCESS|SUCCEEDED|COMPLETED|DONE)$ ]]; then
            echo "✓ L2 image build job completed"
            echo "${L2_HASH}" > "${LAST_L2_HASH_FILE}"
            break
        elif [[ "${JOB_STAGE}" =~ ^(FAILED|ERROR|CANCELED|CANCELLED)$ ]]; then
            echo "✗ L2 image build job failed: ${JOB_STAGE}"
            exit 1
        else
            sleep 30
        fi
    done
fi
fi

echo ""
echo "Stage 3: Deploying to HuggingFace Space"
echo ""

# Initialize git repo if needed
if [ ! -d .git ]; then
    echo "Initializing git repository..."
    git init
    git remote add origin "https://huggingface.co/spaces/${HF_SPACE}"
    echo "✓ Git repository initialized"
    echo ""
fi

# Bump build number to force Space rebuild
BUILDNUM_FILE="buildnum.txt"
"${PY_BIN}" - <<'PY'
from pathlib import Path

path = Path("buildnum.txt")
try:
    current = int(path.read_text().strip())
except Exception:
    current = 0

path.write_text(f"{current + 1}\n")
print(f"Updated buildnum.txt -> {current + 1}")
PY

# Check if there are changes to commit
if [[ -n $(git status -s) ]]; then
    echo "Committing changes..."
    git add .
    git commit -m "Update base image build and deps"
    echo "✓ Changes committed"
else
    echo "No changes to commit"
fi

# Push to HuggingFace
echo ""
echo "Pushing to HuggingFace Space: ${HF_SPACE}"
git push "https://huggingface.co/spaces/${HF_SPACE}" main --force

echo ""
echo "✓ Pushed to HuggingFace"
echo ""
echo "HuggingFace will now:"
echo "  1. Pull base image from DockerHub (${IMAGE_NAME_L2}:${TAG})"
echo "  2. Start the Gradio app"
echo ""

# Follow build logs
echo "Following build logs..."
echo "Press Ctrl+C to stop watching"
echo ""

HF_TOKEN="${HUGGINGFACE_TOKEN:-${HF_TOKEN:-}}"
export HF_TOKEN

if [ -n "${HF_TOKEN}" ]; then
    curl -N -H "Authorization: Bearer ${HF_TOKEN}" \
        "https://huggingface.co/api/spaces/${HF_SPACE}/logs/build" 2>/dev/null | \
        while IFS= read -r line; do
            echo "$line" | grep -o '"data":"[^"]*"' | sed 's/"data":"//;s/"$//' | sed 's/\\n/\n/g'
        done

    echo ""
    echo "===================================="
    echo "Build Status Check"
    echo "===================================="
    echo ""

    # Wait a moment for status to update
    sleep 2

    # Check final build status
    STATUS_JSON=$(curl -s -H "Authorization: Bearer ${HF_TOKEN}" \
        "https://huggingface.co/api/spaces/${HF_SPACE}")

    STAGE=$(echo "$STATUS_JSON" | python3 -c "import sys, json; data=json.load(sys.stdin); print(data.get('runtime', {}).get('stage', 'UNKNOWN'))" 2>/dev/null)
    ERROR_MSG=$(echo "$STATUS_JSON" | python3 -c "import sys, json; data=json.load(sys.stdin); print(data.get('runtime', {}).get('errorMessage', ''))" 2>/dev/null)

    echo "Final Status: ${STAGE}"

    if [ "${STAGE}" = "RUNNING" ]; then
        echo "✓ Deployment successful!"
        echo ""
        echo "Space URL: https://${HF_SPACE/\//-}.hf.space"
        echo "API URL: https://${HF_SPACE/\//-}.hf.space/gradio_api/info"
        echo ""
        echo "Test with: cd ../training && make test-perception-api"
    elif [ "${STAGE}" = "BUILD_ERROR" ]; then
        echo "✗ Build failed!"
        if [ -n "${ERROR_MSG}" ]; then
            echo "Error: ${ERROR_MSG}"
        fi
        echo ""
        echo "If still getting OOM errors, consider:"
        echo "  - Moving weights to runtime download (not build time)"
        echo "  - Requesting larger build instance from HuggingFace"
        echo "  - Using only CUDA arch 7.5 (T4 only)"
        exit 1
    else
        echo "Status: ${STAGE}"
        if [ -n "${ERROR_MSG}" ]; then
            echo "Message: ${ERROR_MSG}"
        fi
    fi

    echo ""
    echo "Following application logs for 1 minute..."
    LOG_URL="https://huggingface.co/api/spaces/${HF_SPACE}/logs/run"
    export LOG_URL
    python3 - <<'PY'
import os
import subprocess
import sys
import time

log_url = os.environ.get("LOG_URL")
token = os.environ.get("HF_TOKEN")
if not log_url or not token:
    print("Skipping app logs: missing LOG_URL or HF_TOKEN")
    raise SystemExit(0)

proc = subprocess.Popen(
    ["curl", "-N", "-H", f"Authorization: Bearer {token}", log_url],
    stdout=sys.stdout,
    stderr=subprocess.DEVNULL,
)
try:
    time.sleep(60)
finally:
    proc.terminate()
    try:
        proc.wait(timeout=5)
    except Exception:
        proc.kill()
PY

else
    echo "Warning: HF token not available; cannot follow logs"
    echo "To follow logs manually:"
    echo "  curl -N -H \"Authorization: Bearer \$HF_TOKEN\" \"https://huggingface.co/api/spaces/${HF_SPACE}/logs/build\""
fi