Spaces:

harshvisualz
/

vgecbot

Sleeping

File size: 4,179 Bytes

# ─────────────────────────────────────────────────────────────────────────────
# VGEC RAG Chatbot — Dockerfile for Hugging Face Spaces
# ─────────────────────────────────────────────────────────────────────────────
# HF Spaces requirements:
#   • Port MUST be 7860
#   • GOOGLE_API_KEY must be set as a Space Secret in HF UI
# ─────────────────────────────────────────────────────────────────────────────

FROM python:3.11-slim

# ── System dependencies ───────────────────────────────────────────────────────
# build-essential  → needed by chromadb (hnswlib C extension)
# libgomp1         → needed by sentence-transformers / scikit-learn OpenMP
# git              → needed by some pip packages that install from git
RUN apt-get update && apt-get install -y --no-install-recommends \
    build-essential \
    libgomp1 \
    git \
    && rm -rf /var/lib/apt/lists/*

# ── Working directory ─────────────────────────────────────────────────────────
WORKDIR /app

# ── Python dependencies ───────────────────────────────────────────────────────
# Copy requirements first so Docker caches this layer separately from source code.
# Any requirements change rebuilds from here; source code changes don't.
COPY requirements.txt .

# Install CPU-only PyTorch FIRST (prevents pip from pulling 2+ GB GPU wheels
# when sentence-transformers later requests torch as a dependency).
RUN pip install --no-cache-dir \
    torch==2.5.1 \
    --index-url https://download.pytorch.org/whl/cpu

# Install the rest of the requirements.
# llama-cpp-python is intentionally excluded — Gemini-only deployment.
RUN pip install --no-cache-dir -r requirements.txt

# Download the spaCy English model at build time so it's baked into the image.
RUN python -m spacy download en_core_web_sm

# ── Application source ────────────────────────────────────────────────────────
COPY . .

# ── Environment variables ─────────────────────────────────────────────────────
# Tell Python not to buffer stdout/stderr (so logs appear in real time on HF).
ENV PYTHONUNBUFFERED=1
ENV PYTHONDONTWRITEBYTECODE=1

# LLM mode — overrides the config.py default; HF Spaces will use Gemini API.
# GOOGLE_API_KEY is NOT set here — it must be added as a HF Space Secret.
ENV LLM_PROVIDER=gemini
ENV ENABLE_FALLBACK=false

# Point sentence-transformers cache inside /app so it's predictable.
ENV SENTENCE_TRANSFORMERS_HOME=/app/ml_models/embeddings
ENV HF_HOME=/app/.cache/huggingface

# ── Port ──────────────────────────────────────────────────────────────────────
# HF Spaces requires exactly port 7860.
EXPOSE 7860

# ── Startup ───────────────────────────────────────────────────────────────────
# No --reload (dev-only flag).
# --workers 1 keeps RAM usage predictable on the free tier (2 vCPU, 16 GB RAM).
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]