rag_server / Dockerfile
atkiya110's picture
Update Dockerfile
983618b verified
FROM python:3.10-slim
WORKDIR /app
RUN apt-get update && apt-get install -y \
build-essential curl git \
&& rm -rf /var/lib/apt/lists/*
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
COPY app.py .
# Pre-download model weights at build time so the container starts fast.
# Both models are hard-coded in app.py; change them here if you change them there.
RUN python -c "\
from huggingface_hub import snapshot_download; \
snapshot_download('sentence-transformers/all-MiniLM-L6-v2'); \
snapshot_download('TinyLlama/TinyLlama-1.1B-Chat-v1.0')"
EXPOSE 7860
# Runtime tuning knobs — none of these are read by app.py today,
# but they serve as documentation and are easy to wire in later.
ENV PYTHONUNBUFFERED=1 \
DEVICE=cpu
# Boot takes ~3-5 min: model load + API wake + GitHub fetch + indexing.
# start-period must cover all of that before the first health probe fires.
HEALTHCHECK --interval=30s --timeout=10s --start-period=300s --retries=3 \
CMD curl -f http://localhost:7860/health || exit 1
CMD ["python", "-u", "app.py"]