FROM python:3.10-slim WORKDIR /app RUN apt-get update && apt-get install -y \ build-essential curl git \ && rm -rf /var/lib/apt/lists/* COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt COPY app.py . # Pre-download model weights at build time so the container starts fast. # Both models are hard-coded in app.py; change them here if you change them there. RUN python -c "\ from huggingface_hub import snapshot_download; \ snapshot_download('sentence-transformers/all-MiniLM-L6-v2'); \ snapshot_download('TinyLlama/TinyLlama-1.1B-Chat-v1.0')" EXPOSE 7860 # Runtime tuning knobs — none of these are read by app.py today, # but they serve as documentation and are easy to wire in later. ENV PYTHONUNBUFFERED=1 \ DEVICE=cpu # Boot takes ~3-5 min: model load + API wake + GitHub fetch + indexing. # start-period must cover all of that before the first health probe fires. HEALTHCHECK --interval=30s --timeout=10s --start-period=300s --retries=3 \ CMD curl -f http://localhost:7860/health || exit 1 CMD ["python", "-u", "app.py"]