File size: 4,179 Bytes
4225666
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7599817
4225666
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# ─────────────────────────────────────────────────────────────────────────────
# VGEC RAG Chatbot β€” Dockerfile for Hugging Face Spaces
# ─────────────────────────────────────────────────────────────────────────────
# HF Spaces requirements:
#   β€’ Port MUST be 7860
#   β€’ GOOGLE_API_KEY must be set as a Space Secret in HF UI
# ─────────────────────────────────────────────────────────────────────────────

FROM python:3.11-slim

# ── System dependencies ───────────────────────────────────────────────────────
# build-essential  β†’ needed by chromadb (hnswlib C extension)
# libgomp1         β†’ needed by sentence-transformers / scikit-learn OpenMP
# git              β†’ needed by some pip packages that install from git
RUN apt-get update && apt-get install -y --no-install-recommends \
    build-essential \
    libgomp1 \
    git \
    && rm -rf /var/lib/apt/lists/*

# ── Working directory ─────────────────────────────────────────────────────────
WORKDIR /app

# ── Python dependencies ───────────────────────────────────────────────────────
# Copy requirements first so Docker caches this layer separately from source code.
# Any requirements change rebuilds from here; source code changes don't.
COPY requirements.txt .

# Install CPU-only PyTorch FIRST (prevents pip from pulling 2+ GB GPU wheels
# when sentence-transformers later requests torch as a dependency).
RUN pip install --no-cache-dir \
    torch==2.5.1 \
    --index-url https://download.pytorch.org/whl/cpu

# Install the rest of the requirements.
# llama-cpp-python is intentionally excluded β€” Gemini-only deployment.
RUN pip install --no-cache-dir -r requirements.txt

# Download the spaCy English model at build time so it's baked into the image.
RUN python -m spacy download en_core_web_sm

# ── Application source ────────────────────────────────────────────────────────
COPY . .

# ── Environment variables ─────────────────────────────────────────────────────
# Tell Python not to buffer stdout/stderr (so logs appear in real time on HF).
ENV PYTHONUNBUFFERED=1
ENV PYTHONDONTWRITEBYTECODE=1

# LLM mode β€” overrides the config.py default; HF Spaces will use Gemini API.
# GOOGLE_API_KEY is NOT set here β€” it must be added as a HF Space Secret.
ENV LLM_PROVIDER=gemini
ENV ENABLE_FALLBACK=false

# Point sentence-transformers cache inside /app so it's predictable.
ENV SENTENCE_TRANSFORMERS_HOME=/app/ml_models/embeddings
ENV HF_HOME=/app/.cache/huggingface

# ── Port ──────────────────────────────────────────────────────────────────────
# HF Spaces requires exactly port 7860.
EXPOSE 7860

# ── Startup ───────────────────────────────────────────────────────────────────
# No --reload (dev-only flag).
# --workers 1 keeps RAM usage predictable on the free tier (2 vCPU, 16 GB RAM).
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]