# ───────────────────────────────────────────────────────────────────────────── # VGEC RAG Chatbot — Dockerfile for Hugging Face Spaces # ───────────────────────────────────────────────────────────────────────────── # HF Spaces requirements: # • Port MUST be 7860 # • GOOGLE_API_KEY must be set as a Space Secret in HF UI # ───────────────────────────────────────────────────────────────────────────── FROM python:3.11-slim # ── System dependencies ─────────────────────────────────────────────────────── # build-essential → needed by chromadb (hnswlib C extension) # libgomp1 → needed by sentence-transformers / scikit-learn OpenMP # git → needed by some pip packages that install from git RUN apt-get update && apt-get install -y --no-install-recommends \ build-essential \ libgomp1 \ git \ && rm -rf /var/lib/apt/lists/* # ── Working directory ───────────────────────────────────────────────────────── WORKDIR /app # ── Python dependencies ─────────────────────────────────────────────────────── # Copy requirements first so Docker caches this layer separately from source code. # Any requirements change rebuilds from here; source code changes don't. COPY requirements.txt . # Install CPU-only PyTorch FIRST (prevents pip from pulling 2+ GB GPU wheels # when sentence-transformers later requests torch as a dependency). RUN pip install --no-cache-dir \ torch==2.5.1 \ --index-url https://download.pytorch.org/whl/cpu # Install the rest of the requirements. # llama-cpp-python is intentionally excluded — Gemini-only deployment. RUN pip install --no-cache-dir -r requirements.txt # Download the spaCy English model at build time so it's baked into the image. RUN python -m spacy download en_core_web_sm # ── Application source ──────────────────────────────────────────────────────── COPY . . # ── Environment variables ───────────────────────────────────────────────────── # Tell Python not to buffer stdout/stderr (so logs appear in real time on HF). ENV PYTHONUNBUFFERED=1 ENV PYTHONDONTWRITEBYTECODE=1 # LLM mode — overrides the config.py default; HF Spaces will use Gemini API. # GOOGLE_API_KEY is NOT set here — it must be added as a HF Space Secret. ENV LLM_PROVIDER=gemini ENV ENABLE_FALLBACK=false # Point sentence-transformers cache inside /app so it's predictable. ENV SENTENCE_TRANSFORMERS_HOME=/app/ml_models/embeddings ENV HF_HOME=/app/.cache/huggingface # ── Port ────────────────────────────────────────────────────────────────────── # HF Spaces requires exactly port 7860. EXPOSE 7860 # ── Startup ─────────────────────────────────────────────────────────────────── # No --reload (dev-only flag). # --workers 1 keeps RAM usage predictable on the free tier (2 vCPU, 16 GB RAM). CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]