# ─────────────────────────────────────────────────────────────────────
# Dockerfile for HuggingFace Spaces (Docker SDK) — also works on
# Render, Railway, Fly.io, or any container platform.
#
# Key choices:
#   • python:3.11-slim base (small, modern)
#   • CPU-only torch wheel installed from PyTorch's CPU index
#       → saves ~1.5 GB vs. the default GPU wheel
#   • Model is pre-downloaded at build time so the first request is fast
#   • Non-root user (HF Spaces requires UID 1000)
#   • Gunicorn with a single worker — BERT eats memory, extra workers
#     would duplicate the ~500 MB model in RAM
# ─────────────────────────────────────────────────────────────────────
FROM python:3.11-slim

ENV PYTHONDONTWRITEBYTECODE=1 \
    PYTHONUNBUFFERED=1 \
    PIP_NO_CACHE_DIR=1 \
    PIP_DISABLE_PIP_VERSION_CHECK=1 \
    HF_HOME=/home/user/.cache/huggingface \
    TRANSFORMERS_CACHE=/home/user/.cache/huggingface \
    PORT=7860

# System deps — only what we truly need
RUN apt-get update && apt-get install -y --no-install-recommends \
        build-essential \
        curl \
        ca-certificates \
    && rm -rf /var/lib/apt/lists/*

# HF Spaces requires a non-root user with UID 1000
RUN useradd -m -u 1000 user
USER user
ENV PATH="/home/user/.local/bin:$PATH"
WORKDIR /home/user/app

# Install CPU-only PyTorch first (big layer — cache-friendly)
RUN pip install --user --no-cache-dir \
    torch==2.4.1 \
    --index-url https://download.pytorch.org/whl/cpu

# Copy requirements and install the rest
COPY --chown=user:user requirements.txt .
RUN pip install --user --no-cache-dir -r requirements.txt

# Pre-download the model into the image so cold starts are fast
ARG HF_MODEL_NAME=deepset/bert-base-cased-squad2
ENV HF_MODEL_NAME=${HF_MODEL_NAME}
RUN python -c "import os; \
from transformers import AutoTokenizer, AutoModelForQuestionAnswering; \
m = os.environ['HF_MODEL_NAME']; \
AutoTokenizer.from_pretrained(m); \
AutoModelForQuestionAnswering.from_pretrained(m); \
print('Model pre-downloaded:', m)"

# Copy application code
COPY --chown=user:user src/       ./src/
COPY --chown=user:user templates/ ./templates/
COPY --chown=user:user static/    ./static/

# Persistent disk on HF Spaces mounts at /data (when enabled in Space settings)
# If /data isn't writable, config.py falls back to ./history.db automatically.

EXPOSE 7860

HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
    CMD curl -fsS http://localhost:${PORT}/healthz || exit 1

# Single worker, long timeout — BERT inference can take a few seconds on CPU
CMD ["gunicorn", "src.app:app", \
     "--bind", "0.0.0.0:7860", \
     "--workers", "1", \
     "--threads", "4", \
     "--timeout", "180", \
     "--access-logfile", "-", \
     "--error-logfile", "-"]