Spaces:
Sleeping
Sleeping
Georg commited on
Commit ·
c58f0bb
1
Parent(s): a0f9c96
Update base image build and deps
Browse files- Dockerfile +19 -8
- Dockerfile.base +127 -48
- deploy.sh +122 -96
- scripts/run_hf_image_job.py +185 -0
Dockerfile
CHANGED
|
@@ -1,24 +1,37 @@
|
|
| 1 |
# Final stage Dockerfile - optimized for HuggingFace
|
| 2 |
-
|
| 3 |
-
FROM gpue/foundationpose-base:latest
|
| 4 |
|
| 5 |
# FoundationPose configuration
|
| 6 |
ENV FOUNDATIONPOSE_MODEL_REPO=gpue/foundationpose-weights
|
| 7 |
ENV USE_REAL_MODEL=true
|
| 8 |
|
| 9 |
-
# Ensure NumPy 1.x for CUDA extension compatibility
|
| 10 |
RUN pip install --no-cache-dir "numpy<2" transformers==4.41.2 \
|
| 11 |
&& pip install --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/py310_cu118_pyt210/download.html
|
| 12 |
|
| 13 |
# Set MAX_JOBS=1 BEFORE any CUDA compilation to limit memory usage
|
| 14 |
ENV MAX_JOBS=1
|
| 15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
# Install nvdiffrast (CUDA rasterizer) - needs GPU, build here
|
| 17 |
RUN git clone --depth 1 https://github.com/NVlabs/nvdiffrast.git /tmp/nvdiffrast \
|
| 18 |
&& cd /tmp/nvdiffrast \
|
| 19 |
&& python3 setup.py build_ext --inplace
|
| 20 |
RUN python3 -c "import shutil, sysconfig, glob; from pathlib import Path; site=Path(sysconfig.get_paths()['purelib']); src=Path('/tmp/nvdiffrast/nvdiffrast'); dst=site/'nvdiffrast'; shutil.rmtree(dst, ignore_errors=True); shutil.copytree(src, dst); so_files=(glob.glob('/tmp/nvdiffrast/_nvdiffrast_c*.so') + glob.glob('/tmp/nvdiffrast/nvdiffrast/_nvdiffrast_c*.so') + glob.glob('/tmp/nvdiffrast/build/lib.*/*_nvdiffrast_c*.so')); [shutil.copy2(p, site) for p in so_files]"
|
| 21 |
-
RUN python3 -c "import sysconfig; from pathlib import Path; site=Path(sysconfig.get_paths()['purelib']); dist=site/'nvdiffrast-0.0.0.dist-info'; dist.mkdir(exist_ok=True); (dist/'METADATA').write_text('Metadata-Version: 2.1
|
| 22 |
RUN python3 -c "import nvdiffrast.torch"
|
| 23 |
RUN rm -rf /tmp/nvdiffrast
|
| 24 |
|
|
@@ -26,10 +39,8 @@ RUN rm -rf /tmp/nvdiffrast
|
|
| 26 |
WORKDIR /app/FoundationPose
|
| 27 |
RUN cd bundlesdf/mycuda && pip install . --no-build-isolation
|
| 28 |
|
| 29 |
-
#
|
| 30 |
WORKDIR /app
|
| 31 |
-
|
| 32 |
-
# Copy application files (placed here so changes don't require base image rebuild)
|
| 33 |
-
COPY app.py client.py estimator.py masks.py ./
|
| 34 |
|
| 35 |
CMD ["python3", "app.py"]
|
|
|
|
| 1 |
# Final stage Dockerfile - optimized for HuggingFace
|
| 2 |
+
FROM gpue/foundationpose-base-l2:latest
|
|
|
|
| 3 |
|
| 4 |
# FoundationPose configuration
|
| 5 |
ENV FOUNDATIONPOSE_MODEL_REPO=gpue/foundationpose-weights
|
| 6 |
ENV USE_REAL_MODEL=true
|
| 7 |
|
| 8 |
+
# Ensure NumPy 1.x for CUDA extension compatibility and install SAM/pytorch3d
|
| 9 |
RUN pip install --no-cache-dir "numpy<2" transformers==4.41.2 \
|
| 10 |
&& pip install --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/py310_cu118_pyt210/download.html
|
| 11 |
|
| 12 |
# Set MAX_JOBS=1 BEFORE any CUDA compilation to limit memory usage
|
| 13 |
ENV MAX_JOBS=1
|
| 14 |
|
| 15 |
+
# Clone FoundationPose source
|
| 16 |
+
RUN git clone --depth 1 https://github.com/NVlabs/FoundationPose.git /app/FoundationPose \
|
| 17 |
+
&& cd /app/FoundationPose/bundlesdf/mycuda \
|
| 18 |
+
&& sed -i 's/-std=c++14/-std=c++17/g' setup.py
|
| 19 |
+
|
| 20 |
+
# Build CPU-only C++ code
|
| 21 |
+
WORKDIR /app/FoundationPose
|
| 22 |
+
RUN cd mycpp && mkdir -p build && cd build && cmake .. && make
|
| 23 |
+
|
| 24 |
+
# Download model weights (246MB)
|
| 25 |
+
WORKDIR /app
|
| 26 |
+
COPY download_weights.py ./download_weights.py
|
| 27 |
+
RUN python3 download_weights.py
|
| 28 |
+
|
| 29 |
# Install nvdiffrast (CUDA rasterizer) - needs GPU, build here
|
| 30 |
RUN git clone --depth 1 https://github.com/NVlabs/nvdiffrast.git /tmp/nvdiffrast \
|
| 31 |
&& cd /tmp/nvdiffrast \
|
| 32 |
&& python3 setup.py build_ext --inplace
|
| 33 |
RUN python3 -c "import shutil, sysconfig, glob; from pathlib import Path; site=Path(sysconfig.get_paths()['purelib']); src=Path('/tmp/nvdiffrast/nvdiffrast'); dst=site/'nvdiffrast'; shutil.rmtree(dst, ignore_errors=True); shutil.copytree(src, dst); so_files=(glob.glob('/tmp/nvdiffrast/_nvdiffrast_c*.so') + glob.glob('/tmp/nvdiffrast/nvdiffrast/_nvdiffrast_c*.so') + glob.glob('/tmp/nvdiffrast/build/lib.*/*_nvdiffrast_c*.so')); [shutil.copy2(p, site) for p in so_files]"
|
| 34 |
+
RUN python3 -c "import sysconfig; from pathlib import Path; site=Path(sysconfig.get_paths()['purelib']); dist=site/'nvdiffrast-0.0.0.dist-info'; dist.mkdir(exist_ok=True); (dist/'METADATA').write_text('Metadata-Version: 2.1\nName: nvdiffrast\nVersion: 0.0.0\n'); (dist/'WHEEL').write_text('Wheel-Version: 1.0\nGenerator: manual\nRoot-Is-Purelib: false\nTag: py3-none-any\n'); (dist/'top_level.txt').write_text('nvdiffrast\n'); (dist/'RECORD').write_text('')"
|
| 35 |
RUN python3 -c "import nvdiffrast.torch"
|
| 36 |
RUN rm -rf /tmp/nvdiffrast
|
| 37 |
|
|
|
|
| 39 |
WORKDIR /app/FoundationPose
|
| 40 |
RUN cd bundlesdf/mycuda && pip install . --no-build-isolation
|
| 41 |
|
| 42 |
+
# Copy application files
|
| 43 |
WORKDIR /app
|
| 44 |
+
COPY app.py client.py estimator.py masks.py .
|
|
|
|
|
|
|
| 45 |
|
| 46 |
CMD ["python3", "app.py"]
|
Dockerfile.base
CHANGED
|
@@ -1,5 +1,108 @@
|
|
| 1 |
-
# Base image with
|
| 2 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
ENV DEBIAN_FRONTEND=noninteractive
|
| 5 |
ENV CUDA_HOME=/usr/local/cuda
|
|
@@ -9,8 +112,7 @@ ENV LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
|
|
| 9 |
# Only build for T4 (7.5) - reduces compilation memory by 50%
|
| 10 |
ENV TORCH_CUDA_ARCH_LIST="7.5"
|
| 11 |
|
| 12 |
-
# Install
|
| 13 |
-
# Remove problematic CUDA repo and install packages
|
| 14 |
RUN rm -f /etc/apt/sources.list.d/cuda*.list /etc/apt/sources.list.d/*.list && \
|
| 15 |
apt-get update && apt-get install -y --no-install-recommends --allow-unauthenticated \
|
| 16 |
ca-certificates \
|
|
@@ -22,6 +124,16 @@ RUN rm -f /etc/apt/sources.list.d/cuda*.list /etc/apt/sources.list.d/*.list && \
|
|
| 22 |
libgl1 \
|
| 23 |
libglib2.0-0 \
|
| 24 |
libgomp1 \
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
&& rm -rf /var/lib/apt/lists/* \
|
| 26 |
&& apt-get clean
|
| 27 |
|
|
@@ -32,23 +144,22 @@ RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1
|
|
| 32 |
# Upgrade pip
|
| 33 |
RUN python3 -m pip install --no-cache-dir --upgrade pip
|
| 34 |
|
| 35 |
-
|
|
|
|
|
|
|
| 36 |
|
| 37 |
-
# Install PyTorch (
|
| 38 |
-
RUN pip install --no-cache-dir torch==2.1.0 torchvision==0.16.0 --index-url https://download.pytorch.org/whl/cu118
|
| 39 |
|
| 40 |
-
#
|
| 41 |
-
# Pin NumPy to 1.x for CUDA extension compatibility
|
| 42 |
RUN pip install --no-cache-dir \
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
huggingface-hub>=0.20.0 \
|
| 48 |
&& pip cache purge
|
| 49 |
|
| 50 |
-
#
|
| 51 |
-
# Install BEFORE nvdiffrast because it needs python3.10-dev
|
| 52 |
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 53 |
cmake \
|
| 54 |
build-essential \
|
|
@@ -60,38 +171,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
| 60 |
pybind11-dev \
|
| 61 |
&& rm -rf /var/lib/apt/lists/*
|
| 62 |
|
| 63 |
-
# Install FoundationPose dependencies
|
| 64 |
-
RUN pip install --no-cache-dir \
|
| 65 |
-
trimesh==4.2.2 \
|
| 66 |
-
scipy==1.12.0 \
|
| 67 |
-
scikit-image==0.22.0 \
|
| 68 |
-
kornia==0.7.2 \
|
| 69 |
-
einops==0.7.0 \
|
| 70 |
-
timm==0.9.16 \
|
| 71 |
-
transformations==2024.6.1 \
|
| 72 |
-
pyyaml==6.0.1 \
|
| 73 |
-
joblib==1.4.0 \
|
| 74 |
-
psutil==6.1.1 \
|
| 75 |
-
open3d==0.18.0 \
|
| 76 |
-
&& pip cache purge
|
| 77 |
-
|
| 78 |
-
# Note: nvdiffrast will be built in final Dockerfile on HuggingFace (needs GPU)
|
| 79 |
-
|
| 80 |
-
# Clone FoundationPose
|
| 81 |
-
RUN git clone --depth 1 https://github.com/NVlabs/FoundationPose.git /app/FoundationPose && \
|
| 82 |
-
cd /app/FoundationPose/bundlesdf/mycuda && \
|
| 83 |
-
sed -i 's/-std=c++14/-std=c++17/g' setup.py
|
| 84 |
-
|
| 85 |
-
# Build mycpp (non-GPU C++ code - can be built without GPU)
|
| 86 |
-
WORKDIR /app/FoundationPose
|
| 87 |
-
RUN cd mycpp && mkdir -p build && cd build && cmake .. && make
|
| 88 |
-
|
| 89 |
-
# Download model weights (246MB)
|
| 90 |
WORKDIR /app
|
| 91 |
-
RUN python3 -c "from huggingface_hub import snapshot_download; \
|
| 92 |
-
snapshot_download(repo_id='gpue/foundationpose-weights', local_dir='weights', repo_type='model')"
|
| 93 |
-
|
| 94 |
-
# Note: Application files (app.py, client.py, estimator.py) are copied in main Dockerfile
|
| 95 |
-
# This allows updates without rebuilding the entire base image
|
| 96 |
|
| 97 |
EXPOSE 7860
|
|
|
|
| 1 |
+
# Base image with FoundationPose dependencies split into CPU (L1) and GPU (L2)
|
| 2 |
+
|
| 3 |
+
# Stage 1: CPU-only base with Python deps
|
| 4 |
+
FROM ubuntu:22.04 AS foundationpose-base-l1
|
| 5 |
+
|
| 6 |
+
ENV DEBIAN_FRONTEND=noninteractive
|
| 7 |
+
|
| 8 |
+
# Install system deps needed to build/run python packages
|
| 9 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 10 |
+
ca-certificates \
|
| 11 |
+
git \
|
| 12 |
+
python3.10 \
|
| 13 |
+
python3-pip \
|
| 14 |
+
build-essential \
|
| 15 |
+
cmake \
|
| 16 |
+
ninja-build \
|
| 17 |
+
libeigen3-dev \
|
| 18 |
+
python3.10-dev \
|
| 19 |
+
libboost-system-dev \
|
| 20 |
+
libboost-program-options-dev \
|
| 21 |
+
pybind11-dev \
|
| 22 |
+
libgl1 \
|
| 23 |
+
libglib2.0-0 \
|
| 24 |
+
libgomp1 \
|
| 25 |
+
libsm6 \
|
| 26 |
+
libxext6 \
|
| 27 |
+
libxrender1 \
|
| 28 |
+
libxkbcommon0 \
|
| 29 |
+
libx11-6 \
|
| 30 |
+
libxrandr2 \
|
| 31 |
+
libxi6 \
|
| 32 |
+
libxinerama1 \
|
| 33 |
+
libxcursor1 \
|
| 34 |
+
libspatialindex-dev \
|
| 35 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 36 |
+
|
| 37 |
+
# Set python as default
|
| 38 |
+
RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1 && \
|
| 39 |
+
update-alternatives --install /usr/bin/python python /usr/bin/python3.10 1
|
| 40 |
+
|
| 41 |
+
RUN python3 -m pip install --no-cache-dir --upgrade pip
|
| 42 |
+
|
| 43 |
+
# Core python deps (CPU-safe)
|
| 44 |
+
# Keep NumPy <2 for extension compatibility
|
| 45 |
+
RUN pip install --no-cache-dir \
|
| 46 |
+
"numpy<2" \
|
| 47 |
+
Pillow>=10.0.0 \
|
| 48 |
+
gradio>=4.0.0 \
|
| 49 |
+
huggingface-hub>=0.20.0 \
|
| 50 |
+
scipy==1.12.0 \
|
| 51 |
+
scikit-image==0.22.0 \
|
| 52 |
+
scikit-learn==1.4.1.post1 \
|
| 53 |
+
kornia==0.7.2 \
|
| 54 |
+
einops==0.7.0 \
|
| 55 |
+
timm==0.9.16 \
|
| 56 |
+
pyyaml==6.0.1 \
|
| 57 |
+
ruamel.yaml==0.18.6 \
|
| 58 |
+
omegaconf==2.3.0 \
|
| 59 |
+
h5py==3.10.0 \
|
| 60 |
+
numba==0.59.1 \
|
| 61 |
+
imageio==2.34.0 \
|
| 62 |
+
joblib==1.3.2 \
|
| 63 |
+
psutil==6.1.1 \
|
| 64 |
+
albumentations==1.4.2 \
|
| 65 |
+
imgaug==0.4.0 \
|
| 66 |
+
seaborn==0.13.2 \
|
| 67 |
+
plotly==5.20.0 \
|
| 68 |
+
bokeh==3.4.0 \
|
| 69 |
+
colorama==0.4.6 \
|
| 70 |
+
GPUtil==1.4.0 \
|
| 71 |
+
simplejson==3.19.2 \
|
| 72 |
+
openpyxl==3.1.2 \
|
| 73 |
+
xlsxwriter==3.2.0 \
|
| 74 |
+
nodejs==0.1.1 \
|
| 75 |
+
jupyterlab==4.1.5 \
|
| 76 |
+
ipywidgets==8.1.2 \
|
| 77 |
+
py-spy==0.3.14 \
|
| 78 |
+
videoio==0.2.8 \
|
| 79 |
+
pypng==0.20220715.0 \
|
| 80 |
+
roma==1.4.4 \
|
| 81 |
+
transformations==2024.6.1 \
|
| 82 |
+
meshcat==0.3.2 \
|
| 83 |
+
webdataset==0.2.86 \
|
| 84 |
+
wandb==0.16.5 \
|
| 85 |
+
g4f==0.2.7.1 \
|
| 86 |
+
objaverse==0.1.7 \
|
| 87 |
+
opencv-python==4.9.0.80 \
|
| 88 |
+
opencv-contrib-python==4.9.0.80 \
|
| 89 |
+
open3d==0.18.0 \
|
| 90 |
+
pyglet==1.5.28 \
|
| 91 |
+
pysdf==0.1.9 \
|
| 92 |
+
trimesh==4.2.2 \
|
| 93 |
+
xatlas==0.0.9 \
|
| 94 |
+
rtree==1.2.0 \
|
| 95 |
+
pyrender==0.1.45 \
|
| 96 |
+
pyOpenGL>=3.1.0 \
|
| 97 |
+
pyOpenGL_accelerate>=3.1.0 \
|
| 98 |
+
pybullet==3.2.6 \
|
| 99 |
+
pycocotools==2.0.7 \
|
| 100 |
+
Panda3D==1.10.14 \
|
| 101 |
+
pin==2.7.0 \
|
| 102 |
+
&& pip cache purge
|
| 103 |
+
|
| 104 |
+
# Stage 2: GPU-enabled base
|
| 105 |
+
FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04 AS foundationpose-base-l2
|
| 106 |
|
| 107 |
ENV DEBIAN_FRONTEND=noninteractive
|
| 108 |
ENV CUDA_HOME=/usr/local/cuda
|
|
|
|
| 112 |
# Only build for T4 (7.5) - reduces compilation memory by 50%
|
| 113 |
ENV TORCH_CUDA_ARCH_LIST="7.5"
|
| 114 |
|
| 115 |
+
# Install system deps
|
|
|
|
| 116 |
RUN rm -f /etc/apt/sources.list.d/cuda*.list /etc/apt/sources.list.d/*.list && \
|
| 117 |
apt-get update && apt-get install -y --no-install-recommends --allow-unauthenticated \
|
| 118 |
ca-certificates \
|
|
|
|
| 124 |
libgl1 \
|
| 125 |
libglib2.0-0 \
|
| 126 |
libgomp1 \
|
| 127 |
+
libsm6 \
|
| 128 |
+
libxext6 \
|
| 129 |
+
libxrender1 \
|
| 130 |
+
libxkbcommon0 \
|
| 131 |
+
libx11-6 \
|
| 132 |
+
libxrandr2 \
|
| 133 |
+
libxi6 \
|
| 134 |
+
libxinerama1 \
|
| 135 |
+
libxcursor1 \
|
| 136 |
+
libspatialindex-dev \
|
| 137 |
&& rm -rf /var/lib/apt/lists/* \
|
| 138 |
&& apt-get clean
|
| 139 |
|
|
|
|
| 144 |
# Upgrade pip
|
| 145 |
RUN python3 -m pip install --no-cache-dir --upgrade pip
|
| 146 |
|
| 147 |
+
# Copy CPU-only python deps from L1
|
| 148 |
+
COPY --from=foundationpose-base-l1 /usr/local/lib/python3.10/dist-packages /usr/local/lib/python3.10/dist-packages
|
| 149 |
+
COPY --from=foundationpose-base-l1 /usr/local/bin /usr/local/bin
|
| 150 |
|
| 151 |
+
# Install PyTorch (CUDA 11.8)
|
| 152 |
+
RUN pip install --no-cache-dir torch==2.1.0 torchvision==0.16.0 torchaudio==2.1.0 --index-url https://download.pytorch.org/whl/cu118
|
| 153 |
|
| 154 |
+
# GPU/torch-dependent deps
|
|
|
|
| 155 |
RUN pip install --no-cache-dir \
|
| 156 |
+
fvcore==0.1.5.post20221221 \
|
| 157 |
+
torchnet==0.0.4 \
|
| 158 |
+
ultralytics==8.0.120 \
|
| 159 |
+
warp-lang==1.0.2 \
|
|
|
|
| 160 |
&& pip cache purge
|
| 161 |
|
| 162 |
+
# Build deps required for CUDA extensions
|
|
|
|
| 163 |
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 164 |
cmake \
|
| 165 |
build-essential \
|
|
|
|
| 171 |
pybind11-dev \
|
| 172 |
&& rm -rf /var/lib/apt/lists/*
|
| 173 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
WORKDIR /app
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
|
| 176 |
EXPOSE 7860
|
deploy.sh
CHANGED
|
@@ -3,58 +3,63 @@
|
|
| 3 |
|
| 4 |
set -e
|
| 5 |
|
| 6 |
-
IMAGE_NAME="gpue/foundationpose-base"
|
| 7 |
TAG="latest"
|
| 8 |
PLATFORM="linux/amd64"
|
| 9 |
HF_SPACE="gpue/foundationpose"
|
| 10 |
-
|
| 11 |
|
| 12 |
echo "==================================="
|
| 13 |
echo "FoundationPose Deployment"
|
| 14 |
echo "==================================="
|
| 15 |
echo ""
|
| 16 |
|
| 17 |
-
#
|
| 18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
echo "Platform: ${PLATFORM}"
|
| 20 |
echo "Image: ${IMAGE_NAME}:${TAG}"
|
| 21 |
echo ""
|
| 22 |
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
else
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
fi
|
| 34 |
-
echo "✓ DockerHub authentication verified"
|
| 35 |
fi
|
| 36 |
-
echo ""
|
| 37 |
-
|
| 38 |
-
echo "Building base image..."
|
| 39 |
-
docker build --platform ${PLATFORM} -f Dockerfile.base -t ${IMAGE_NAME}:${TAG} .
|
| 40 |
-
|
| 41 |
-
echo ""
|
| 42 |
-
echo "✓ Base image built successfully"
|
| 43 |
-
echo ""
|
| 44 |
-
|
| 45 |
-
# Show image size
|
| 46 |
-
IMAGE_SIZE=$(docker images ${IMAGE_NAME}:${TAG} --format "{{.Size}}")
|
| 47 |
-
echo "Image size: ${IMAGE_SIZE}"
|
| 48 |
-
echo ""
|
| 49 |
-
|
| 50 |
-
echo "Pushing to DockerHub..."
|
| 51 |
-
docker push ${IMAGE_NAME}:${TAG}
|
| 52 |
|
| 53 |
echo ""
|
| 54 |
-
echo "✓ Base image pushed to DockerHub: ${IMAGE_NAME}:${TAG}"
|
| 55 |
-
echo ""
|
| 56 |
-
|
| 57 |
-
# Stage 2: Deploy to HuggingFace
|
| 58 |
echo "Stage 2: Deploying to HuggingFace Space"
|
| 59 |
echo ""
|
| 60 |
|
|
@@ -62,7 +67,7 @@ echo ""
|
|
| 62 |
if [ ! -d .git ]; then
|
| 63 |
echo "Initializing git repository..."
|
| 64 |
git init
|
| 65 |
-
git remote add origin https://huggingface.co/spaces/${HF_SPACE}
|
| 66 |
echo "✓ Git repository initialized"
|
| 67 |
echo ""
|
| 68 |
fi
|
|
@@ -70,8 +75,8 @@ fi
|
|
| 70 |
# Check if there are changes to commit
|
| 71 |
if [[ -n $(git status -s) ]]; then
|
| 72 |
echo "Committing changes..."
|
| 73 |
-
git add Dockerfile Dockerfile.base requirements.txt deploy.sh app.py client.py estimator.py masks.py
|
| 74 |
-
git commit -m "
|
| 75 |
echo "✓ Changes committed"
|
| 76 |
else
|
| 77 |
echo "No changes to commit"
|
|
@@ -80,18 +85,16 @@ fi
|
|
| 80 |
# Push to HuggingFace
|
| 81 |
echo ""
|
| 82 |
echo "Pushing to HuggingFace Space: ${HF_SPACE}"
|
| 83 |
-
git push https://huggingface.co/spaces/${HF_SPACE} main --force
|
| 84 |
|
| 85 |
echo ""
|
| 86 |
echo "✓ Pushed to HuggingFace"
|
| 87 |
echo ""
|
| 88 |
echo "HuggingFace will now:"
|
| 89 |
echo " 1. Pull base image from DockerHub (${IMAGE_NAME}:${TAG})"
|
| 90 |
-
echo " 2.
|
| 91 |
-
echo " 3.
|
| 92 |
-
echo " 4.
|
| 93 |
-
echo " 5. Download model weights (246MB)"
|
| 94 |
-
echo " 6. Start the Gradio app"
|
| 95 |
echo ""
|
| 96 |
|
| 97 |
# Follow build logs
|
|
@@ -99,67 +102,90 @@ echo "Following build logs..."
|
|
| 99 |
echo "Press Ctrl+C to stop watching"
|
| 100 |
echo ""
|
| 101 |
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
"https://huggingface.co/api/spaces/${HF_SPACE}/logs/build" 2>/dev/null | \
|
| 109 |
-
while IFS= read -r line; do
|
| 110 |
-
# Parse JSON and extract data field
|
| 111 |
-
echo "$line" | grep -o '"data":"[^"]*"' | sed 's/"data":"//;s/"$//' | sed 's/\\n/\n/g'
|
| 112 |
-
done
|
| 113 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
echo ""
|
| 115 |
-
echo "
|
| 116 |
-
echo "
|
| 117 |
-
echo "===================================="
|
| 118 |
echo ""
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
STATUS_JSON=$(curl -s -H "Authorization: Bearer ${HF_TOKEN}" \
|
| 125 |
-
"https://huggingface.co/api/spaces/${HF_SPACE}")
|
| 126 |
-
|
| 127 |
-
STAGE=$(echo "$STATUS_JSON" | python3 -c "import sys, json; data=json.load(sys.stdin); print(data.get('runtime', {}).get('stage', 'UNKNOWN'))" 2>/dev/null)
|
| 128 |
-
ERROR_MSG=$(echo "$STATUS_JSON" | python3 -c "import sys, json; data=json.load(sys.stdin); print(data.get('runtime', {}).get('errorMessage', ''))" 2>/dev/null)
|
| 129 |
-
|
| 130 |
-
echo "Final Status: ${STAGE}"
|
| 131 |
-
|
| 132 |
-
if [ "${STAGE}" = "RUNNING" ]; then
|
| 133 |
-
echo "✓ Deployment successful!"
|
| 134 |
-
echo ""
|
| 135 |
-
echo "Space URL: https://${HF_SPACE/\//-}.hf.space"
|
| 136 |
-
echo "API URL: https://${HF_SPACE/\//-}.hf.space/gradio_api/info"
|
| 137 |
-
echo ""
|
| 138 |
-
echo "Test with: cd ../training && make test-perception-api"
|
| 139 |
-
elif [ "${STAGE}" = "BUILD_ERROR" ]; then
|
| 140 |
-
echo "✗ Build failed!"
|
| 141 |
-
if [ -n "${ERROR_MSG}" ]; then
|
| 142 |
-
echo "Error: ${ERROR_MSG}"
|
| 143 |
-
fi
|
| 144 |
-
echo ""
|
| 145 |
-
echo "If still getting OOM errors, consider:"
|
| 146 |
-
echo " - Moving weights to runtime download (not build time)"
|
| 147 |
-
echo " - Requesting larger build instance from HuggingFace"
|
| 148 |
-
echo " - Using only CUDA arch 7.5 (T4 only)"
|
| 149 |
-
exit 1
|
| 150 |
-
else
|
| 151 |
-
echo "Status: ${STAGE}"
|
| 152 |
-
if [ -n "${ERROR_MSG}" ]; then
|
| 153 |
-
echo "Message: ${ERROR_MSG}"
|
| 154 |
-
fi
|
| 155 |
fi
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
else
|
| 157 |
-
echo "
|
| 158 |
-
|
| 159 |
-
|
|
|
|
| 160 |
fi
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 161 |
else
|
| 162 |
-
echo "Warning:
|
| 163 |
echo "To follow logs manually:"
|
| 164 |
echo " curl -N -H \"Authorization: Bearer \$HF_TOKEN\" \"https://huggingface.co/api/spaces/${HF_SPACE}/logs/build\""
|
| 165 |
fi
|
|
|
|
| 3 |
|
| 4 |
set -e
|
| 5 |
|
| 6 |
+
IMAGE_NAME="gpue/foundationpose-base-l2"
|
| 7 |
TAG="latest"
|
| 8 |
PLATFORM="linux/amd64"
|
| 9 |
HF_SPACE="gpue/foundationpose"
|
| 10 |
+
ENV_FILE=".env"
|
| 11 |
|
| 12 |
echo "==================================="
|
| 13 |
echo "FoundationPose Deployment"
|
| 14 |
echo "==================================="
|
| 15 |
echo ""
|
| 16 |
|
| 17 |
+
# Load tokens from .env
|
| 18 |
+
if [ -f "${ENV_FILE}" ]; then
|
| 19 |
+
set -a
|
| 20 |
+
# shellcheck disable=SC1090
|
| 21 |
+
source "${ENV_FILE}"
|
| 22 |
+
set +a
|
| 23 |
+
else
|
| 24 |
+
echo "Warning: ${ENV_FILE} not found"
|
| 25 |
+
fi
|
| 26 |
+
|
| 27 |
+
# Ensure hf CLI is available for job logs
|
| 28 |
+
if ! command -v hf >/dev/null 2>&1; then
|
| 29 |
+
echo "Installing huggingface_hub CLI (hf)..."
|
| 30 |
+
python3 -m pip install --user --quiet huggingface_hub
|
| 31 |
+
export PATH="$HOME/.local/bin:$PATH"
|
| 32 |
+
fi
|
| 33 |
+
|
| 34 |
+
echo "Stage 1: Building base image via HF Job"
|
| 35 |
echo "Platform: ${PLATFORM}"
|
| 36 |
echo "Image: ${IMAGE_NAME}:${TAG}"
|
| 37 |
echo ""
|
| 38 |
|
| 39 |
+
JOB_OUTPUT=$(python3 scripts/run_hf_image_job.py \
|
| 40 |
+
--image-name "${IMAGE_NAME}" \
|
| 41 |
+
--tag "${TAG}" \
|
| 42 |
+
--platform "${PLATFORM}" \
|
| 43 |
+
--dockerfile "Dockerfile.base" \
|
| 44 |
+
--target "foundationpose-base-l2" \
|
| 45 |
+
--git-repo "https://huggingface.co/spaces/${HF_SPACE}" 2>&1 | tee /tmp/hf_image_job.log)
|
| 46 |
+
|
| 47 |
+
JOB_ID=$(echo "${JOB_OUTPUT}" | awk '/Job ID:/ {print $3}')
|
| 48 |
+
if [ -z "${JOB_ID}" ]; then
|
| 49 |
+
echo "Warning: Could not parse HF job id. See /tmp/hf_image_job.log"
|
| 50 |
else
|
| 51 |
+
echo "Following job logs for 1 minute..."
|
| 52 |
+
if command -v hf >/dev/null 2>&1; then
|
| 53 |
+
(timeout 60 hf jobs logs "${JOB_ID}") || true
|
| 54 |
+
echo ""
|
| 55 |
+
echo "Job status:"
|
| 56 |
+
hf jobs status "${JOB_ID}" || true
|
| 57 |
+
else
|
| 58 |
+
echo "hf CLI not available; job logs skipped"
|
| 59 |
fi
|
|
|
|
| 60 |
fi
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
|
| 62 |
echo ""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
echo "Stage 2: Deploying to HuggingFace Space"
|
| 64 |
echo ""
|
| 65 |
|
|
|
|
| 67 |
if [ ! -d .git ]; then
|
| 68 |
echo "Initializing git repository..."
|
| 69 |
git init
|
| 70 |
+
git remote add origin "https://huggingface.co/spaces/${HF_SPACE}"
|
| 71 |
echo "✓ Git repository initialized"
|
| 72 |
echo ""
|
| 73 |
fi
|
|
|
|
| 75 |
# Check if there are changes to commit
|
| 76 |
if [[ -n $(git status -s) ]]; then
|
| 77 |
echo "Committing changes..."
|
| 78 |
+
git add Dockerfile Dockerfile.base requirements.txt deploy.sh app.py client.py estimator.py masks.py scripts/run_hf_image_job.py download_weights.py
|
| 79 |
+
git commit -m "Update base image build and deps"
|
| 80 |
echo "✓ Changes committed"
|
| 81 |
else
|
| 82 |
echo "No changes to commit"
|
|
|
|
| 85 |
# Push to HuggingFace
|
| 86 |
echo ""
|
| 87 |
echo "Pushing to HuggingFace Space: ${HF_SPACE}"
|
| 88 |
+
git push "https://huggingface.co/spaces/${HF_SPACE}" main --force
|
| 89 |
|
| 90 |
echo ""
|
| 91 |
echo "✓ Pushed to HuggingFace"
|
| 92 |
echo ""
|
| 93 |
echo "HuggingFace will now:"
|
| 94 |
echo " 1. Pull base image from DockerHub (${IMAGE_NAME}:${TAG})"
|
| 95 |
+
echo " 2. Build CUDA extensions"
|
| 96 |
+
echo " 3. Download model weights"
|
| 97 |
+
echo " 4. Start the Gradio app"
|
|
|
|
|
|
|
| 98 |
echo ""
|
| 99 |
|
| 100 |
# Follow build logs
|
|
|
|
| 102 |
echo "Press Ctrl+C to stop watching"
|
| 103 |
echo ""
|
| 104 |
|
| 105 |
+
HF_TOKEN="${HUGGINGFACE_TOKEN:-${HF_TOKEN:-}}"
|
| 106 |
+
|
| 107 |
+
if [ -n "${HF_TOKEN}" ]; then
|
| 108 |
+
curl -N -H "Authorization: Bearer ${HF_TOKEN}" \
|
| 109 |
+
"https://huggingface.co/api/spaces/${HF_SPACE}/logs/build" 2>/dev/null | \
|
| 110 |
+
while IFS= read -r line; do
|
| 111 |
+
echo "$line" | grep -o '"data":"[^"]*"' | sed 's/"data":"//;s/"$//' | sed 's/\\n/\n/g'
|
| 112 |
+
done
|
| 113 |
+
|
| 114 |
+
echo ""
|
| 115 |
+
echo "===================================="
|
| 116 |
+
echo "Build Status Check"
|
| 117 |
+
echo "===================================="
|
| 118 |
+
echo ""
|
| 119 |
+
|
| 120 |
+
# Wait a moment for status to update
|
| 121 |
+
sleep 2
|
| 122 |
+
|
| 123 |
+
# Check final build status
|
| 124 |
+
STATUS_JSON=$(curl -s -H "Authorization: Bearer ${HF_TOKEN}" \
|
| 125 |
+
"https://huggingface.co/api/spaces/${HF_SPACE}")
|
| 126 |
|
| 127 |
+
STAGE=$(echo "$STATUS_JSON" | python3 -c "import sys, json; data=json.load(sys.stdin); print(data.get('runtime', {}).get('stage', 'UNKNOWN'))" 2>/dev/null)
|
| 128 |
+
ERROR_MSG=$(echo "$STATUS_JSON" | python3 -c "import sys, json; data=json.load(sys.stdin); print(data.get('runtime', {}).get('errorMessage', ''))" 2>/dev/null)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
|
| 130 |
+
echo "Final Status: ${STAGE}"
|
| 131 |
+
|
| 132 |
+
if [ "${STAGE}" = "RUNNING" ]; then
|
| 133 |
+
echo "✓ Deployment successful!"
|
| 134 |
echo ""
|
| 135 |
+
echo "Space URL: https://${HF_SPACE/\//-}.hf.space"
|
| 136 |
+
echo "API URL: https://${HF_SPACE/\//-}.hf.space/gradio_api/info"
|
|
|
|
| 137 |
echo ""
|
| 138 |
+
echo "Test with: cd ../training && make test-perception-api"
|
| 139 |
+
elif [ "${STAGE}" = "BUILD_ERROR" ]; then
|
| 140 |
+
echo "✗ Build failed!"
|
| 141 |
+
if [ -n "${ERROR_MSG}" ]; then
|
| 142 |
+
echo "Error: ${ERROR_MSG}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 143 |
fi
|
| 144 |
+
echo ""
|
| 145 |
+
echo "If still getting OOM errors, consider:"
|
| 146 |
+
echo " - Moving weights to runtime download (not build time)"
|
| 147 |
+
echo " - Requesting larger build instance from HuggingFace"
|
| 148 |
+
echo " - Using only CUDA arch 7.5 (T4 only)"
|
| 149 |
+
exit 1
|
| 150 |
else
|
| 151 |
+
echo "Status: ${STAGE}"
|
| 152 |
+
if [ -n "${ERROR_MSG}" ]; then
|
| 153 |
+
echo "Message: ${ERROR_MSG}"
|
| 154 |
+
fi
|
| 155 |
fi
|
| 156 |
+
|
| 157 |
+
echo ""
|
| 158 |
+
echo "Following application logs for 1 minute..."
|
| 159 |
+
LOG_URL="https://huggingface.co/api/spaces/${HF_SPACE}/logs"
|
| 160 |
+
python3 - <<'PY'
|
| 161 |
+
import os
|
| 162 |
+
import subprocess
|
| 163 |
+
import sys
|
| 164 |
+
import time
|
| 165 |
+
|
| 166 |
+
log_url = os.environ.get("LOG_URL")
|
| 167 |
+
token = os.environ.get("HF_TOKEN")
|
| 168 |
+
if not log_url or not token:
|
| 169 |
+
print("Skipping app logs: missing LOG_URL or HF_TOKEN")
|
| 170 |
+
raise SystemExit(0)
|
| 171 |
+
|
| 172 |
+
proc = subprocess.Popen(
|
| 173 |
+
["curl", "-N", "-H", f"Authorization: Bearer {token}", log_url],
|
| 174 |
+
stdout=sys.stdout,
|
| 175 |
+
stderr=subprocess.DEVNULL,
|
| 176 |
+
)
|
| 177 |
+
try:
|
| 178 |
+
time.sleep(60)
|
| 179 |
+
finally:
|
| 180 |
+
proc.terminate()
|
| 181 |
+
try:
|
| 182 |
+
proc.wait(timeout=5)
|
| 183 |
+
except Exception:
|
| 184 |
+
proc.kill()
|
| 185 |
+
PY
|
| 186 |
+
|
| 187 |
else
|
| 188 |
+
echo "Warning: HF token not available; cannot follow logs"
|
| 189 |
echo "To follow logs manually:"
|
| 190 |
echo " curl -N -H \"Authorization: Bearer \$HF_TOKEN\" \"https://huggingface.co/api/spaces/${HF_SPACE}/logs/build\""
|
| 191 |
fi
|
scripts/run_hf_image_job.py
ADDED
|
@@ -0,0 +1,185 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Submit a HuggingFace Job that builds the FoundationPose base image and pushes it to Docker Hub.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import argparse
|
| 7 |
+
import os
|
| 8 |
+
import sys
|
| 9 |
+
|
| 10 |
+
from huggingface_hub import run_job
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def main() -> None:
|
| 14 |
+
parser = argparse.ArgumentParser(
|
| 15 |
+
description="Build and push the FoundationPose base image via HuggingFace Jobs"
|
| 16 |
+
)
|
| 17 |
+
parser.add_argument(
|
| 18 |
+
"--image-name",
|
| 19 |
+
default="gpue/foundationpose-base-l2",
|
| 20 |
+
help="Docker Hub image name (default: gpue/foundationpose-base-l2)",
|
| 21 |
+
)
|
| 22 |
+
parser.add_argument(
|
| 23 |
+
"--tag",
|
| 24 |
+
default="latest",
|
| 25 |
+
help="Docker image tag (default: latest)",
|
| 26 |
+
)
|
| 27 |
+
parser.add_argument(
|
| 28 |
+
"--platform",
|
| 29 |
+
default="linux/amd64",
|
| 30 |
+
help="Target platform for docker build (default: linux/amd64)",
|
| 31 |
+
)
|
| 32 |
+
parser.add_argument(
|
| 33 |
+
"--dockerfile",
|
| 34 |
+
default="Dockerfile.base",
|
| 35 |
+
help="Dockerfile path inside repo (default: Dockerfile.base)",
|
| 36 |
+
)
|
| 37 |
+
parser.add_argument(
|
| 38 |
+
"--context",
|
| 39 |
+
default=".",
|
| 40 |
+
help="Docker build context path inside repo (default: .)",
|
| 41 |
+
)
|
| 42 |
+
parser.add_argument(
|
| 43 |
+
"--target",
|
| 44 |
+
default="foundationpose-base-l2",
|
| 45 |
+
help="Docker build target (default: foundationpose-base-l2)",
|
| 46 |
+
)
|
| 47 |
+
parser.add_argument(
|
| 48 |
+
"--git-repo",
|
| 49 |
+
default="https://huggingface.co/spaces/gpue/foundationpose",
|
| 50 |
+
help="Git repo to clone for build context (default: HF space repo)",
|
| 51 |
+
)
|
| 52 |
+
parser.add_argument(
|
| 53 |
+
"--flavor",
|
| 54 |
+
default="l40s",
|
| 55 |
+
help="HF Jobs hardware flavor (default: l40s)",
|
| 56 |
+
)
|
| 57 |
+
parser.add_argument(
|
| 58 |
+
"--timeout",
|
| 59 |
+
default="2h",
|
| 60 |
+
help="Job timeout (default: 2h)",
|
| 61 |
+
)
|
| 62 |
+
parser.add_argument("--namespace", help="Organization namespace (optional)")
|
| 63 |
+
parser.add_argument(
|
| 64 |
+
"--hf-token",
|
| 65 |
+
help="HuggingFace token (default: from HF_TOKEN or HUGGINGFACE_TOKEN env)",
|
| 66 |
+
)
|
| 67 |
+
parser.add_argument(
|
| 68 |
+
"--docker-user",
|
| 69 |
+
default=os.getenv("DOCKER_HF_USER", "gpue"),
|
| 70 |
+
help="Docker Hub username (default: DOCKER_HF_USER or gpue)",
|
| 71 |
+
)
|
| 72 |
+
parser.add_argument(
|
| 73 |
+
"--docker-token",
|
| 74 |
+
help="Docker Hub token (default: from DOCKER_HF_PAT env)",
|
| 75 |
+
)
|
| 76 |
+
|
| 77 |
+
args = parser.parse_args()
|
| 78 |
+
|
| 79 |
+
hf_token = args.hf_token or os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_TOKEN")
|
| 80 |
+
docker_token = args.docker_token or os.getenv("DOCKER_HF_PAT")
|
| 81 |
+
|
| 82 |
+
if not hf_token:
|
| 83 |
+
print("Error: missing HF token (set HF_TOKEN or HUGGINGFACE_TOKEN)")
|
| 84 |
+
sys.exit(1)
|
| 85 |
+
if not docker_token:
|
| 86 |
+
print("Error: missing Docker token (set DOCKER_HF_PAT or --docker-token)")
|
| 87 |
+
sys.exit(1)
|
| 88 |
+
|
| 89 |
+
env = {
|
| 90 |
+
"IMAGE_NAME": args.image_name,
|
| 91 |
+
"IMAGE_TAG": args.tag,
|
| 92 |
+
"PLATFORM": args.platform,
|
| 93 |
+
"DOCKERFILE": args.dockerfile,
|
| 94 |
+
"CONTEXT": args.context,
|
| 95 |
+
"TARGET": args.target,
|
| 96 |
+
"GIT_REPO": args.git_repo,
|
| 97 |
+
"DOCKER_USER": args.docker_user,
|
| 98 |
+
}
|
| 99 |
+
secrets = {
|
| 100 |
+
"HF_TOKEN": hf_token,
|
| 101 |
+
"DOCKER_TOKEN": docker_token,
|
| 102 |
+
}
|
| 103 |
+
|
| 104 |
+
command = [
|
| 105 |
+
"sh",
|
| 106 |
+
"-c",
|
| 107 |
+
r"""
|
| 108 |
+
set -euo pipefail
|
| 109 |
+
|
| 110 |
+
echo "Installing git and certificates..."
|
| 111 |
+
apk add --no-cache git ca-certificates curl >/dev/null
|
| 112 |
+
|
| 113 |
+
# Start Docker daemon (DinD image)
|
| 114 |
+
echo "Starting Docker daemon..."
|
| 115 |
+
dockerd-entrypoint.sh > /tmp/dockerd.log 2>&1 &
|
| 116 |
+
|
| 117 |
+
# Wait for Docker
|
| 118 |
+
for i in $(seq 1 30); do
|
| 119 |
+
if docker info >/dev/null 2>&1; then
|
| 120 |
+
break
|
| 121 |
+
fi
|
| 122 |
+
sleep 1
|
| 123 |
+
if [ "$i" -eq 30 ]; then
|
| 124 |
+
echo "Docker did not start in time. Logs:" >&2
|
| 125 |
+
tail -n 200 /tmp/dockerd.log >&2 || true
|
| 126 |
+
exit 1
|
| 127 |
+
fi
|
| 128 |
+
done
|
| 129 |
+
|
| 130 |
+
echo "Cloning build context..."
|
| 131 |
+
if [ -n "${HF_TOKEN:-}" ]; then
|
| 132 |
+
AUTH_REPO=$(echo "$GIT_REPO" | sed -e "s#https://#https://user:${HF_TOKEN}@#")
|
| 133 |
+
git clone --depth 1 "$AUTH_REPO" /work/repo
|
| 134 |
+
else
|
| 135 |
+
git clone --depth 1 "$GIT_REPO" /work/repo
|
| 136 |
+
fi
|
| 137 |
+
|
| 138 |
+
cd /work/repo
|
| 139 |
+
|
| 140 |
+
echo "Logging in to Docker Hub..."
|
| 141 |
+
echo "$DOCKER_TOKEN" | docker login -u "$DOCKER_USER" --password-stdin
|
| 142 |
+
|
| 143 |
+
IMAGE_REF="$IMAGE_NAME:$IMAGE_TAG"
|
| 144 |
+
|
| 145 |
+
echo "Building image $IMAGE_REF (target: $TARGET)..."
|
| 146 |
+
docker build --platform "$PLATFORM" -f "$DOCKERFILE" --target "$TARGET" -t "$IMAGE_REF" "$CONTEXT"
|
| 147 |
+
|
| 148 |
+
echo "Pushing image $IMAGE_REF..."
|
| 149 |
+
docker push "$IMAGE_REF"
|
| 150 |
+
|
| 151 |
+
echo "✓ Image pushed successfully"
|
| 152 |
+
""",
|
| 153 |
+
]
|
| 154 |
+
|
| 155 |
+
print("Submitting HF job for image build...")
|
| 156 |
+
print(f" Image: {args.image_name}:{args.tag}")
|
| 157 |
+
print(f" Target: {args.target}")
|
| 158 |
+
print(f" Repo: {args.git_repo}")
|
| 159 |
+
print(f" Dockerfile: {args.dockerfile}")
|
| 160 |
+
print(f" Flavor: {args.flavor}")
|
| 161 |
+
print(f" Timeout: {args.timeout}")
|
| 162 |
+
print()
|
| 163 |
+
|
| 164 |
+
job_info = run_job(
|
| 165 |
+
image="docker:24.0.7-dind",
|
| 166 |
+
command=command,
|
| 167 |
+
env=env,
|
| 168 |
+
secrets=secrets,
|
| 169 |
+
flavor=args.flavor,
|
| 170 |
+
timeout=args.timeout,
|
| 171 |
+
namespace=args.namespace,
|
| 172 |
+
)
|
| 173 |
+
|
| 174 |
+
print("✓ Job submitted")
|
| 175 |
+
print(f" Job ID: {job_info.id}")
|
| 176 |
+
print(f" Job URL: {job_info.url}")
|
| 177 |
+
print()
|
| 178 |
+
print("Monitor logs:")
|
| 179 |
+
print(f" hf jobs logs {job_info.id}")
|
| 180 |
+
print("Check status:")
|
| 181 |
+
print(f" hf jobs status {job_info.id}")
|
| 182 |
+
|
| 183 |
+
|
| 184 |
+
if __name__ == "__main__":
|
| 185 |
+
main()
|