Spaces:
Running
Running
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Dockerfile for HuggingFace Spaces (Docker SDK) β also works on | |
| # Render, Railway, Fly.io, or any container platform. | |
| # | |
| # Key choices: | |
| # β’ python:3.11-slim base (small, modern) | |
| # β’ CPU-only torch wheel installed from PyTorch's CPU index | |
| # β saves ~1.5 GB vs. the default GPU wheel | |
| # β’ Model is pre-downloaded at build time so the first request is fast | |
| # β’ Non-root user (HF Spaces requires UID 1000) | |
| # β’ Gunicorn with a single worker β BERT eats memory, extra workers | |
| # would duplicate the ~500 MB model in RAM | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| FROM python:3.11-slim | |
| ENV PYTHONDONTWRITEBYTECODE=1 \ | |
| PYTHONUNBUFFERED=1 \ | |
| PIP_NO_CACHE_DIR=1 \ | |
| PIP_DISABLE_PIP_VERSION_CHECK=1 \ | |
| HF_HOME=/home/user/.cache/huggingface \ | |
| TRANSFORMERS_CACHE=/home/user/.cache/huggingface \ | |
| PORT=7860 | |
| # System deps β only what we truly need | |
| RUN apt-get update && apt-get install -y --no-install-recommends \ | |
| build-essential \ | |
| curl \ | |
| ca-certificates \ | |
| && rm -rf /var/lib/apt/lists/* | |
| # HF Spaces requires a non-root user with UID 1000 | |
| RUN useradd -m -u 1000 user | |
| USER user | |
| ENV PATH="/home/user/.local/bin:$PATH" | |
| WORKDIR /home/user/app | |
| # Install CPU-only PyTorch first (big layer β cache-friendly) | |
| RUN pip install --user --no-cache-dir \ | |
| torch==2.4.1 \ | |
| --index-url https://download.pytorch.org/whl/cpu | |
| # Copy requirements and install the rest | |
| COPY --chown=user:user requirements.txt . | |
| RUN pip install --user --no-cache-dir -r requirements.txt | |
| # Pre-download the model into the image so cold starts are fast | |
| ARG HF_MODEL_NAME=deepset/bert-base-cased-squad2 | |
| ENV HF_MODEL_NAME=${HF_MODEL_NAME} | |
| RUN python -c "import os; \ | |
| from transformers import AutoTokenizer, AutoModelForQuestionAnswering; \ | |
| m = os.environ['HF_MODEL_NAME']; \ | |
| AutoTokenizer.from_pretrained(m); \ | |
| AutoModelForQuestionAnswering.from_pretrained(m); \ | |
| print('Model pre-downloaded:', m)" | |
| # Copy application code | |
| COPY --chown=user:user src/ ./src/ | |
| COPY --chown=user:user templates/ ./templates/ | |
| COPY --chown=user:user static/ ./static/ | |
| # Persistent disk on HF Spaces mounts at /data (when enabled in Space settings) | |
| # If /data isn't writable, config.py falls back to ./history.db automatically. | |
| EXPOSE 7860 | |
| HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \ | |
| CMD curl -fsS http://localhost:${PORT}/healthz || exit 1 | |
| # Single worker, long timeout β BERT inference can take a few seconds on CPU | |
| CMD ["gunicorn", "src.app:app", \ | |
| "--bind", "0.0.0.0:7860", \ | |
| "--workers", "1", \ | |
| "--threads", "4", \ | |
| "--timeout", "180", \ | |
| "--access-logfile", "-", \ | |
| "--error-logfile", "-"] | |