cadgenbench-eval-gpu / Dockerfile
Michael Rabinovich
eval_job: upload renders to the public bucket; report references them by URL
2cf3635
raw
history blame
4.21 kB
# syntax=docker/dockerfile:1.7
#
# HF Space at HuggingAI4Engineering/cadgenbench-eval-gpu.
# Provides the Docker image consumed by the leaderboard's HF Jobs
# eval pipeline (see space-setup/jobs-migration.md). The Space
# itself is not run as a Gradio app; the image exists only to be
# pulled by `hf jobs run --image hf.co/spaces/...`. Pause the
# Space after the first successful build so no idle hardware cost
# accrues; the built image stays available to Jobs while paused.
#
# Local smoke test (slow on Apple Silicon under Rosetta):
#
# docker buildx build --platform linux/amd64 \
# -t cadgenbench-eval-gpu-test .
FROM nvidia/cuda:12.4.1-runtime-ubuntu22.04
ENV PYTHONUNBUFFERED=1 \
PYTHONDONTWRITEBYTECODE=1 \
PIP_DISABLE_PIP_VERSION_CHECK=1 \
VIRTUAL_ENV=/opt/venv \
PATH="/opt/venv/bin:$PATH" \
DEBIAN_FRONTEND=noninteractive
# Python 3.12 from deadsnakes (Ubuntu 22.04 ships 3.10 by default)
# plus the apt runtime deps shared with the leaderboard Dockerfile
# (OCP / build123d / Pillow / VTK). libegl1 + libegl-mesa0 provide
# the EGL surface vtk-egl binds to; on this CUDA-base image the
# NVIDIA driver supplies hardware OpenGL, no Mesa fallback path.
RUN apt-get update && apt-get install -y --no-install-recommends \
software-properties-common \
&& add-apt-repository -y ppa:deadsnakes/ppa \
&& apt-get update && apt-get install -y --no-install-recommends \
python3.12 python3.12-venv python3.12-dev \
git ca-certificates \
libglib2.0-0 libsm6 libxext6 libgomp1 libfontconfig1 \
libgl1 libegl1 libegl-mesa0 libxrender1 \
&& rm -rf /var/lib/apt/lists/* \
&& ln -sf /usr/bin/python3.12 /usr/local/bin/python \
&& ln -sf /usr/bin/python3.12 /usr/local/bin/python3
# Build the runtime Python environment in a venv. The CUDA/deadsnakes
# apt setup leaves distro Python modules (for example blinker) on the
# system path; isolating pip installs avoids trying to uninstall
# apt-owned distutils packages during dependency resolution.
RUN python -m venv "$VIRTUAL_ENV" \
&& python -m pip install --no-cache-dir --upgrade pip
# cadgenbench from the Public GitHub repo, same convention and ARG name
# as the leaderboard Dockerfile. Defaults to `main` so every pre-v1 image
# rebuild picks up the latest evaluator dependencies (including Open3D for
# alignment). Lock to a commit SHA at the v1 release for reproducible scores
# (see space-setup/post-gt-swap.md Stage F).
ARG CADGENBENCH_SHA=3d49822
RUN python -m pip install --no-cache-dir \
"cadgenbench @ git+https://github.com/huggingface/cadgenbench.git@${CADGENBENCH_SHA}"
# Shard mode syncs per-fixture artifacts to an HF Storage Bucket via the
# bucket API (HfApi.sync_bucket), which needs a recent huggingface_hub.
RUN python -m pip install --no-cache-dir "huggingface_hub>=1.16.0"
# The cadgenbench wheel pulls vanilla `vtk` from PyPI (built with
# vtkXOpenGLRenderWindow, needs an X server). Swap for vtk-egl:
# same VTK, compiled against EGL so it acquires an off-screen GL
# context against the NVIDIA driver on this CUDA-base image.
# PyVista picks up whichever `vtk` dist is installed; no
# cadgenbench code change. Same shape as the leaderboard's
# vtk-osmesa swap, just the GPU counterpart.
RUN python -m pip uninstall -y vtk \
&& python -m pip install --no-cache-dir \
--extra-index-url https://wheels.vtk.org vtk-egl
# In-job entrypoint. Invoked by:
#
# hf jobs run --image hf.co/spaces/HuggingAI4Engineering/cadgenbench-eval-gpu \
# --flavor a10g-large --secrets HF_TOKEN \
# python /opt/eval_job.py <submission_id> <zip_url>
COPY eval_job.py /opt/eval_job.py
# Drop privileges. HF Spaces conventionally run as uid 1000. Keep the venv
# writable so HF Jobs can install short-lived extras (for example baseline
# LLM clients) through the orchestrator's --pip-install hook.
RUN useradd -m -u 1000 user \
&& chown -R user:user "$VIRTUAL_ENV"
USER user
WORKDIR /home/user
# Idle CMD so the Space's runtime starts without restart-flapping.
# Pause the Space via HF UI or HfApi().pause_space() after the
# first green build; the cached image stays available to Jobs.
CMD ["sleep", "infinity"]