vgecbot / Dockerfile
harsh-dev's picture
fix: upgrade torch to 2.5.1 for NumPy 2.x and Accelerate support
7599817 unverified
# ─────────────────────────────────────────────────────────────────────────────
# VGEC RAG Chatbot β€” Dockerfile for Hugging Face Spaces
# ─────────────────────────────────────────────────────────────────────────────
# HF Spaces requirements:
# β€’ Port MUST be 7860
# β€’ GOOGLE_API_KEY must be set as a Space Secret in HF UI
# ─────────────────────────────────────────────────────────────────────────────
FROM python:3.11-slim
# ── System dependencies ───────────────────────────────────────────────────────
# build-essential β†’ needed by chromadb (hnswlib C extension)
# libgomp1 β†’ needed by sentence-transformers / scikit-learn OpenMP
# git β†’ needed by some pip packages that install from git
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
libgomp1 \
git \
&& rm -rf /var/lib/apt/lists/*
# ── Working directory ─────────────────────────────────────────────────────────
WORKDIR /app
# ── Python dependencies ───────────────────────────────────────────────────────
# Copy requirements first so Docker caches this layer separately from source code.
# Any requirements change rebuilds from here; source code changes don't.
COPY requirements.txt .
# Install CPU-only PyTorch FIRST (prevents pip from pulling 2+ GB GPU wheels
# when sentence-transformers later requests torch as a dependency).
RUN pip install --no-cache-dir \
torch==2.5.1 \
--index-url https://download.pytorch.org/whl/cpu
# Install the rest of the requirements.
# llama-cpp-python is intentionally excluded β€” Gemini-only deployment.
RUN pip install --no-cache-dir -r requirements.txt
# Download the spaCy English model at build time so it's baked into the image.
RUN python -m spacy download en_core_web_sm
# ── Application source ────────────────────────────────────────────────────────
COPY . .
# ── Environment variables ─────────────────────────────────────────────────────
# Tell Python not to buffer stdout/stderr (so logs appear in real time on HF).
ENV PYTHONUNBUFFERED=1
ENV PYTHONDONTWRITEBYTECODE=1
# LLM mode β€” overrides the config.py default; HF Spaces will use Gemini API.
# GOOGLE_API_KEY is NOT set here β€” it must be added as a HF Space Secret.
ENV LLM_PROVIDER=gemini
ENV ENABLE_FALLBACK=false
# Point sentence-transformers cache inside /app so it's predictable.
ENV SENTENCE_TRANSFORMERS_HOME=/app/ml_models/embeddings
ENV HF_HOME=/app/.cache/huggingface
# ── Port ──────────────────────────────────────────────────────────────────────
# HF Spaces requires exactly port 7860.
EXPOSE 7860
# ── Startup ───────────────────────────────────────────────────────────────────
# No --reload (dev-only flag).
# --workers 1 keeps RAM usage predictable on the free tier (2 vCPU, 16 GB RAM).
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]