Spaces:

HuggingAI4Engineering
/

cadgenbench-eval-gpu

Paused

Michael Rabinovich

eval_job: upload renders to the public bucket; report references them by URL

2cf3635 about 2 months ago

4.21 kB

	# syntax=docker/dockerfile:1.7
	#
	# HF Space at HuggingAI4Engineering/cadgenbench-eval-gpu.
	# Provides the Docker image consumed by the leaderboard's HF Jobs
	# eval pipeline (see space-setup/jobs-migration.md). The Space
	# itself is not run as a Gradio app; the image exists only to be
	# pulled by `hf jobs run --image hf.co/spaces/...`. Pause the
	# Space after the first successful build so no idle hardware cost
	# accrues; the built image stays available to Jobs while paused.
	#
	# Local smoke test (slow on Apple Silicon under Rosetta):
	#
	# docker buildx build --platform linux/amd64 \
	# -t cadgenbench-eval-gpu-test .

	FROM nvidia/cuda:12.4.1-runtime-ubuntu22.04

	ENV PYTHONUNBUFFERED=1 \
	PYTHONDONTWRITEBYTECODE=1 \
	PIP_DISABLE_PIP_VERSION_CHECK=1 \
	VIRTUAL_ENV=/opt/venv \
	PATH="/opt/venv/bin:$PATH" \
	DEBIAN_FRONTEND=noninteractive

	# Python 3.12 from deadsnakes (Ubuntu 22.04 ships 3.10 by default)
	# plus the apt runtime deps shared with the leaderboard Dockerfile
	# (OCP / build123d / Pillow / VTK). libegl1 + libegl-mesa0 provide
	# the EGL surface vtk-egl binds to; on this CUDA-base image the
	# NVIDIA driver supplies hardware OpenGL, no Mesa fallback path.
	RUN apt-get update && apt-get install -y --no-install-recommends \
	software-properties-common \
	&& add-apt-repository -y ppa:deadsnakes/ppa \
	&& apt-get update && apt-get install -y --no-install-recommends \
	python3.12 python3.12-venv python3.12-dev \
	git ca-certificates \
	libglib2.0-0 libsm6 libxext6 libgomp1 libfontconfig1 \
	libgl1 libegl1 libegl-mesa0 libxrender1 \
	&& rm -rf /var/lib/apt/lists/* \
	&& ln -sf /usr/bin/python3.12 /usr/local/bin/python \
	&& ln -sf /usr/bin/python3.12 /usr/local/bin/python3

	# Build the runtime Python environment in a venv. The CUDA/deadsnakes
	# apt setup leaves distro Python modules (for example blinker) on the
	# system path; isolating pip installs avoids trying to uninstall
	# apt-owned distutils packages during dependency resolution.
	RUN python -m venv "$VIRTUAL_ENV" \
	&& python -m pip install --no-cache-dir --upgrade pip

	# cadgenbench from the Public GitHub repo, same convention and ARG name
	# as the leaderboard Dockerfile. Defaults to `main` so every pre-v1 image
	# rebuild picks up the latest evaluator dependencies (including Open3D for
	# alignment). Lock to a commit SHA at the v1 release for reproducible scores
	# (see space-setup/post-gt-swap.md Stage F).
	ARG CADGENBENCH_SHA=3d49822
	RUN python -m pip install --no-cache-dir \
	"cadgenbench @ git+https://github.com/huggingface/cadgenbench.git@${CADGENBENCH_SHA}"

	# Shard mode syncs per-fixture artifacts to an HF Storage Bucket via the
	# bucket API (HfApi.sync_bucket), which needs a recent huggingface_hub.
	RUN python -m pip install --no-cache-dir "huggingface_hub>=1.16.0"

	# The cadgenbench wheel pulls vanilla `vtk` from PyPI (built with
	# vtkXOpenGLRenderWindow, needs an X server). Swap for vtk-egl:
	# same VTK, compiled against EGL so it acquires an off-screen GL
	# context against the NVIDIA driver on this CUDA-base image.
	# PyVista picks up whichever `vtk` dist is installed; no
	# cadgenbench code change. Same shape as the leaderboard's
	# vtk-osmesa swap, just the GPU counterpart.
	RUN python -m pip uninstall -y vtk \
	&& python -m pip install --no-cache-dir \
	--extra-index-url https://wheels.vtk.org vtk-egl

	# In-job entrypoint. Invoked by:
	#
	# hf jobs run --image hf.co/spaces/HuggingAI4Engineering/cadgenbench-eval-gpu \
	# --flavor a10g-large --secrets HF_TOKEN \
	# python /opt/eval_job.py <submission_id> <zip_url>
	COPY eval_job.py /opt/eval_job.py

	# Drop privileges. HF Spaces conventionally run as uid 1000. Keep the venv
	# writable so HF Jobs can install short-lived extras (for example baseline
	# LLM clients) through the orchestrator's --pip-install hook.
	RUN useradd -m -u 1000 user \
	&& chown -R user:user "$VIRTUAL_ENV"
	USER user
	WORKDIR /home/user

	# Idle CMD so the Space's runtime starts without restart-flapping.
	# Pause the Space via HF UI or HfApi().pause_space() after the
	# first green build; the cached image stays available to Jobs.
	CMD ["sleep", "infinity"]