Spaces:

harshvisualz
/

vgecbot

Running

App Files Files Community

vgecbot / Dockerfile

harsh-dev

fix: upgrade torch to 2.5.1 for NumPy 2.x and Accelerate support

7599817 unverified about 1 month ago

raw

history blame contribute delete

4.18 kB

	# ─────────────────────────────────────────────────────────────────────────────
	# VGEC RAG Chatbot — Dockerfile for Hugging Face Spaces
	# ─────────────────────────────────────────────────────────────────────────────
	# HF Spaces requirements:
	# • Port MUST be 7860
	# • GOOGLE_API_KEY must be set as a Space Secret in HF UI
	# ─────────────────────────────────────────────────────────────────────────────

	FROM python:3.11-slim

	# ── System dependencies ───────────────────────────────────────────────────────
	# build-essential → needed by chromadb (hnswlib C extension)
	# libgomp1 → needed by sentence-transformers / scikit-learn OpenMP
	# git → needed by some pip packages that install from git
	RUN apt-get update && apt-get install -y --no-install-recommends \
	build-essential \
	libgomp1 \
	git \
	&& rm -rf /var/lib/apt/lists/*

	# ── Working directory ─────────────────────────────────────────────────────────
	WORKDIR /app

	# ── Python dependencies ───────────────────────────────────────────────────────
	# Copy requirements first so Docker caches this layer separately from source code.
	# Any requirements change rebuilds from here; source code changes don't.
	COPY requirements.txt .

	# Install CPU-only PyTorch FIRST (prevents pip from pulling 2+ GB GPU wheels
	# when sentence-transformers later requests torch as a dependency).
	RUN pip install --no-cache-dir \
	torch==2.5.1 \
	--index-url https://download.pytorch.org/whl/cpu

	# Install the rest of the requirements.
	# llama-cpp-python is intentionally excluded — Gemini-only deployment.
	RUN pip install --no-cache-dir -r requirements.txt

	# Download the spaCy English model at build time so it's baked into the image.
	RUN python -m spacy download en_core_web_sm

	# ── Application source ────────────────────────────────────────────────────────
	COPY . .

	# ── Environment variables ─────────────────────────────────────────────────────
	# Tell Python not to buffer stdout/stderr (so logs appear in real time on HF).
	ENV PYTHONUNBUFFERED=1
	ENV PYTHONDONTWRITEBYTECODE=1

	# LLM mode — overrides the config.py default; HF Spaces will use Gemini API.
	# GOOGLE_API_KEY is NOT set here — it must be added as a HF Space Secret.
	ENV LLM_PROVIDER=gemini
	ENV ENABLE_FALLBACK=false

	# Point sentence-transformers cache inside /app so it's predictable.
	ENV SENTENCE_TRANSFORMERS_HOME=/app/ml_models/embeddings
	ENV HF_HOME=/app/.cache/huggingface

	# ── Port ──────────────────────────────────────────────────────────────────────
	# HF Spaces requires exactly port 7860.
	EXPOSE 7860

	# ── Startup ───────────────────────────────────────────────────────────────────
	# No --reload (dev-only flag).
	# --workers 1 keeps RAM usage predictable on the free tier (2 vCPU, 16 GB RAM).
	CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]