FROM python:3.10-slim

WORKDIR /app

RUN apt-get update && apt-get install -y \
    build-essential curl git \
    && rm -rf /var/lib/apt/lists/*

COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

COPY app.py .

# Pre-download model weights at build time so the container starts fast.
# Both models are hard-coded in app.py; change them here if you change them there.
RUN python -c "\
from huggingface_hub import snapshot_download; \
snapshot_download('sentence-transformers/all-MiniLM-L6-v2'); \
snapshot_download('TinyLlama/TinyLlama-1.1B-Chat-v1.0')"

EXPOSE 7860

# Runtime tuning knobs — none of these are read by app.py today,
# but they serve as documentation and are easy to wire in later.
ENV PYTHONUNBUFFERED=1 \
    DEVICE=cpu

# Boot takes ~3-5 min: model load + API wake + GitHub fetch + indexing.
# start-period must cover all of that before the first health probe fires.
HEALTHCHECK --interval=30s --timeout=10s --start-period=300s --retries=3 \
    CMD curl -f http://localhost:7860/health || exit 1

CMD ["python", "-u", "app.py"]