# ============================================================ # VoiceVault — Docker Image for Hugging Face Spaces # ============================================================ # Base: Python 3.11-slim (stable, widely supported on HF) # Runtime: CPU-only (Groq cloud API for transcription + LLM) # Port: 7860 (HF Spaces default) # ============================================================ FROM python:3.11-slim WORKDIR /app # ── System dependencies ──────────────────────────────────────────────── # build-essential : compiles C extensions (chromadb, numpy, etc.) # git : some pip packages clone during install # tesseract-ocr : OCR fallback for scanned PDFs (pytesseract) # libsndfile1 : soundfile audio I/O library (WAV reading for VAD) RUN apt-get update && apt-get install -y --no-install-recommends \ build-essential \ git \ tesseract-ocr \ libsndfile1 \ && rm -rf /var/lib/apt/lists/* # ── Python: CPU-only PyTorch FIRST ──────────────────────────────────── # Install before requirements.txt so pip reuses this install (~650MB) # instead of the CUDA wheel from PyPI (~2.5GB). RUN pip install --no-cache-dir \ torch==2.5.1 \ --index-url https://download.pytorch.org/whl/cpu # ── Python: all other dependencies ──────────────────────────────────── COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt # ── spaCy language model ─────────────────────────────────────────────── # Required by SemanticChunker for sentence tokenization during ingestion. RUN python -m spacy download en_core_web_sm # ── Pre-download ML models ──────────────────────────────────────────── # Baking models into the image avoids slow cold-start downloads in prod. # Embedding model (~90 MB) — used for vector search # Cross-encoder (~67 MB) — used for reranking retrieved chunks ENV HF_HOME=/app/cache ENV TRANSFORMERS_CACHE=/app/cache ENV SENTENCE_TRANSFORMERS_HOME=/app/cache RUN python -c "\ from sentence_transformers import SentenceTransformer, CrossEncoder; \ SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2'); \ CrossEncoder('cross-encoder/ms-marco-MiniLM-L12-v2'); \ print('Models pre-downloaded successfully.')" # ── Application code ────────────────────────────────────────────────── COPY . . # ── Runtime directories ─────────────────────────────────────────────── RUN mkdir -p data/uploads models # ── Environment ─────────────────────────────────────────────────────── # Force CPU — avoids CUDA errors on HF CPU-only hardware ENV CUDA_VISIBLE_DEVICES=-1 # Suppress Windows-only symlink warning (harmless, reduces log noise) ENV HF_HUB_DISABLE_SYMLINKS_WARNING=1 # Ensure Python output is unbuffered (logs appear immediately) ENV PYTHONUNBUFFERED=1 # Server binding — HF Spaces requires 0.0.0.0:7860 ENV HOST=0.0.0.0 ENV PORT=7860 EXPOSE 7860 # ── Entrypoint ──────────────────────────────────────────────────────── CMD ["python", "server.py"]