# syntax=docker/dockerfile:1.6 # PUBLIC base image for the cheap CPU ESTIMATE job (HF Jobs) — slim, no GPU stack. # # Same contract as demo/image/Dockerfile: ONLY open-source, generic dependencies — NO proprietary # code, and no names that reveal which models/techniques the pipeline uses. The Job's bootstrap # (/opt/run-job.sh) pulls the private wheel + demo modules at startup and runs run_job.py. # # This variant is CPU-ONLY and slim: it omits CUDA/torch and the GPU inference servers (the estimate # runs zero signals and starts no model server — run_job.py returns at the ESTIMATE_ONLY branch # before anything loads), and it omits an optional dependency group that `import dataset_reviewer` # no longer pulls eagerly. Both shrink the image and its pull time, which is the bulk of the # estimate's wall-clock. A lightweight tokenizer loads via the Rust `tokenizers` backend, so no # torch is needed. # # Build: JOB_IMAGE_DIR=image-estimate JOB_IMAGE_BASE_PROVIDES=torch \ # JOB_IMAGE_SPACE=/cpu-estimate-base make demo-deploy-job # (run-job.sh + the dataset_reviewer stub are shared from demo/image/; the omitted dep group is # applied automatically for this variant — see demo/deploy_job.py.) FROM python:3.12-slim ENV DEBIAN_FRONTEND=noninteractive \ PIP_NO_CACHE_DIR=0 \ PYTHONUNBUFFERED=1 \ HF_HUB_ENABLE_HF_TRANSFER=1 \ PIP_BREAK_SYSTEM_PACKAGES=1 # git/curl/ca-certificates for the HF pulls at bootstrap. python3.12 + pip are in the base. RUN apt-get update && apt-get install -y --no-install-recommends \ git curl ca-certificates \ && rm -rf /var/lib/apt/lists/* WORKDIR /app # Install the generic DEPENDENCIES only — the generated pyproject omits torch # (JOB_IMAGE_BASE_PROVIDES=torch) since the estimate needs no torch/CUDA. A stub package # (empty dataset_reviewer/__init__.py) lets `pip install .` resolve the deps WITHOUT shipping # proprietary source into this public image. At runtime the Job replaces the stub with the # real wheel; run-job.sh skips the technique-revealing runtime deps for ESTIMATE_ONLY (never # imported on this path). COPY pyproject.toml /app/pyproject.toml COPY dataset_reviewer /app/dataset_reviewer RUN --mount=type=cache,target=/root/.cache/pip pip install /app # Explicit pins for the bootstrap + demo modules (transitive above, pinned so the bootstrap # never depends on resolution order). telethon is the Telegram client the Job imports — not a # dataset_reviewer dep. RUN --mount=type=cache,target=/root/.cache/pip \ pip install "huggingface_hub[hf_transfer]" python-dotenv "telethon==1.43.2" COPY run-job.sh /opt/run-job.sh RUN chmod +x /opt/run-job.sh # Default command keeps the Space itself idle+RUNNING (so the image publishes cleanly); # HF Jobs override this with `bash /opt/run-job.sh`. EXPOSE 7860 CMD ["python3", "-m", "http.server", "7860"]