Spaces:

atkiya110
/

rag_server

Running

rag_server / Dockerfile

Update Dockerfile

983618b verified 6 days ago

1.08 kB

	FROM python:3.10-slim

	WORKDIR /app

	RUN apt-get update && apt-get install -y \
	build-essential curl git \
	&& rm -rf /var/lib/apt/lists/*

	COPY requirements.txt .
	RUN pip install --no-cache-dir -r requirements.txt

	COPY app.py .

	# Pre-download model weights at build time so the container starts fast.
	# Both models are hard-coded in app.py; change them here if you change them there.
	RUN python -c "\
	from huggingface_hub import snapshot_download; \
	snapshot_download('sentence-transformers/all-MiniLM-L6-v2'); \
	snapshot_download('TinyLlama/TinyLlama-1.1B-Chat-v1.0')"

	EXPOSE 7860

	# Runtime tuning knobs — none of these are read by app.py today,
	# but they serve as documentation and are easy to wire in later.
	ENV PYTHONUNBUFFERED=1 \
	DEVICE=cpu

	# Boot takes ~3-5 min: model load + API wake + GitHub fetch + indexing.
	# start-period must cover all of that before the first health probe fires.
	HEALTHCHECK --interval=30s --timeout=10s --start-period=300s --retries=3 \
	CMD curl -f http://localhost:7860/health \|\| exit 1

	CMD ["python", "-u", "app.py"]