cpu-estimate-base / Dockerfile
VladKha's picture
public CUDA inference base image (image-estimate)
6df5981 verified
Raw
History Blame Contribute Delete
2.87 kB
# syntax=docker/dockerfile:1.6
# PUBLIC base image for the cheap CPU ESTIMATE job (HF Jobs) — slim, no GPU stack.
#
# Same contract as demo/image/Dockerfile: ONLY open-source, generic dependencies — NO proprietary
# code, and no names that reveal which models/techniques the pipeline uses. The Job's bootstrap
# (/opt/run-job.sh) pulls the private wheel + demo modules at startup and runs run_job.py.
#
# This variant is CPU-ONLY and slim: it omits CUDA/torch and the GPU inference servers (the estimate
# runs zero signals and starts no model server — run_job.py returns at the ESTIMATE_ONLY branch
# before anything loads), and it omits an optional dependency group that `import dataset_reviewer`
# no longer pulls eagerly. Both shrink the image and its pull time, which is the bulk of the
# estimate's wall-clock. A lightweight tokenizer loads via the Rust `tokenizers` backend, so no
# torch is needed.
#
# Build: JOB_IMAGE_DIR=image-estimate JOB_IMAGE_BASE_PROVIDES=torch \
# JOB_IMAGE_SPACE=<owner>/cpu-estimate-base make demo-deploy-job
# (run-job.sh + the dataset_reviewer stub are shared from demo/image/; the omitted dep group is
# applied automatically for this variant — see demo/deploy_job.py.)
FROM python:3.12-slim
ENV DEBIAN_FRONTEND=noninteractive \
PIP_NO_CACHE_DIR=0 \
PYTHONUNBUFFERED=1 \
HF_HUB_ENABLE_HF_TRANSFER=1 \
PIP_BREAK_SYSTEM_PACKAGES=1
# git/curl/ca-certificates for the HF pulls at bootstrap. python3.12 + pip are in the base.
RUN apt-get update && apt-get install -y --no-install-recommends \
git curl ca-certificates \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app
# Install the generic DEPENDENCIES only — the generated pyproject omits torch
# (JOB_IMAGE_BASE_PROVIDES=torch) since the estimate needs no torch/CUDA. A stub package
# (empty dataset_reviewer/__init__.py) lets `pip install .` resolve the deps WITHOUT shipping
# proprietary source into this public image. At runtime the Job replaces the stub with the
# real wheel; run-job.sh skips the technique-revealing runtime deps for ESTIMATE_ONLY (never
# imported on this path).
COPY pyproject.toml /app/pyproject.toml
COPY dataset_reviewer /app/dataset_reviewer
RUN --mount=type=cache,target=/root/.cache/pip pip install /app
# Explicit pins for the bootstrap + demo modules (transitive above, pinned so the bootstrap
# never depends on resolution order). telethon is the Telegram client the Job imports — not a
# dataset_reviewer dep.
RUN --mount=type=cache,target=/root/.cache/pip \
pip install "huggingface_hub[hf_transfer]" python-dotenv "telethon==1.43.2"
COPY run-job.sh /opt/run-job.sh
RUN chmod +x /opt/run-job.sh
# Default command keeps the Space itself idle+RUNNING (so the image publishes cleanly);
# HF Jobs override this with `bash /opt/run-job.sh`.
EXPOSE 7860
CMD ["python3", "-m", "http.server", "7860"]