# ───────────────────────────────────────────────────────────────────── # Dockerfile for HuggingFace Spaces (Docker SDK) — also works on # Render, Railway, Fly.io, or any container platform. # # Key choices: # • python:3.11-slim base (small, modern) # • CPU-only torch wheel installed from PyTorch's CPU index # → saves ~1.5 GB vs. the default GPU wheel # • Model is pre-downloaded at build time so the first request is fast # • Non-root user (HF Spaces requires UID 1000) # • Gunicorn with a single worker — BERT eats memory, extra workers # would duplicate the ~500 MB model in RAM # ───────────────────────────────────────────────────────────────────── FROM python:3.11-slim ENV PYTHONDONTWRITEBYTECODE=1 \ PYTHONUNBUFFERED=1 \ PIP_NO_CACHE_DIR=1 \ PIP_DISABLE_PIP_VERSION_CHECK=1 \ HF_HOME=/home/user/.cache/huggingface \ TRANSFORMERS_CACHE=/home/user/.cache/huggingface \ PORT=7860 # System deps — only what we truly need RUN apt-get update && apt-get install -y --no-install-recommends \ build-essential \ curl \ ca-certificates \ && rm -rf /var/lib/apt/lists/* # HF Spaces requires a non-root user with UID 1000 RUN useradd -m -u 1000 user USER user ENV PATH="/home/user/.local/bin:$PATH" WORKDIR /home/user/app # Install CPU-only PyTorch first (big layer — cache-friendly) RUN pip install --user --no-cache-dir \ torch==2.4.1 \ --index-url https://download.pytorch.org/whl/cpu # Copy requirements and install the rest COPY --chown=user:user requirements.txt . RUN pip install --user --no-cache-dir -r requirements.txt # Pre-download the model into the image so cold starts are fast ARG HF_MODEL_NAME=deepset/bert-base-cased-squad2 ENV HF_MODEL_NAME=${HF_MODEL_NAME} RUN python -c "import os; \ from transformers import AutoTokenizer, AutoModelForQuestionAnswering; \ m = os.environ['HF_MODEL_NAME']; \ AutoTokenizer.from_pretrained(m); \ AutoModelForQuestionAnswering.from_pretrained(m); \ print('Model pre-downloaded:', m)" # Copy application code COPY --chown=user:user src/ ./src/ COPY --chown=user:user templates/ ./templates/ COPY --chown=user:user static/ ./static/ # Persistent disk on HF Spaces mounts at /data (when enabled in Space settings) # If /data isn't writable, config.py falls back to ./history.db automatically. EXPOSE 7860 HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \ CMD curl -fsS http://localhost:${PORT}/healthz || exit 1 # Single worker, long timeout — BERT inference can take a few seconds on CPU CMD ["gunicorn", "src.app:app", \ "--bind", "0.0.0.0:7860", \ "--workers", "1", \ "--threads", "4", \ "--timeout", "180", \ "--access-logfile", "-", \ "--error-logfile", "-"]