Spaces:
Running
Running
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # VGEC RAG Chatbot β Dockerfile for Hugging Face Spaces | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # HF Spaces requirements: | |
| # β’ Port MUST be 7860 | |
| # β’ GOOGLE_API_KEY must be set as a Space Secret in HF UI | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| FROM python:3.11-slim | |
| # ββ System dependencies βββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # build-essential β needed by chromadb (hnswlib C extension) | |
| # libgomp1 β needed by sentence-transformers / scikit-learn OpenMP | |
| # git β needed by some pip packages that install from git | |
| RUN apt-get update && apt-get install -y --no-install-recommends \ | |
| build-essential \ | |
| libgomp1 \ | |
| git \ | |
| && rm -rf /var/lib/apt/lists/* | |
| # ββ Working directory βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| WORKDIR /app | |
| # ββ Python dependencies βββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Copy requirements first so Docker caches this layer separately from source code. | |
| # Any requirements change rebuilds from here; source code changes don't. | |
| COPY requirements.txt . | |
| # Install CPU-only PyTorch FIRST (prevents pip from pulling 2+ GB GPU wheels | |
| # when sentence-transformers later requests torch as a dependency). | |
| RUN pip install --no-cache-dir \ | |
| torch==2.5.1 \ | |
| --index-url https://download.pytorch.org/whl/cpu | |
| # Install the rest of the requirements. | |
| # llama-cpp-python is intentionally excluded β Gemini-only deployment. | |
| RUN pip install --no-cache-dir -r requirements.txt | |
| # Download the spaCy English model at build time so it's baked into the image. | |
| RUN python -m spacy download en_core_web_sm | |
| # ββ Application source ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| COPY . . | |
| # ββ Environment variables βββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Tell Python not to buffer stdout/stderr (so logs appear in real time on HF). | |
| ENV PYTHONUNBUFFERED=1 | |
| ENV PYTHONDONTWRITEBYTECODE=1 | |
| # LLM mode β overrides the config.py default; HF Spaces will use Gemini API. | |
| # GOOGLE_API_KEY is NOT set here β it must be added as a HF Space Secret. | |
| ENV LLM_PROVIDER=gemini | |
| ENV ENABLE_FALLBACK=false | |
| # Point sentence-transformers cache inside /app so it's predictable. | |
| ENV SENTENCE_TRANSFORMERS_HOME=/app/ml_models/embeddings | |
| ENV HF_HOME=/app/.cache/huggingface | |
| # ββ Port ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # HF Spaces requires exactly port 7860. | |
| EXPOSE 7860 | |
| # ββ Startup βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # No --reload (dev-only flag). | |
| # --workers 1 keeps RAM usage predictable on the free tier (2 vCPU, 16 GB RAM). | |
| CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"] | |