# infra/hf_spaces/embedder/Dockerfile # Bakes BAAI/bge-small-en-v1.5 weights into the image at build time. # Cold start: ~5s (weights load from disk, not downloaded). No startup surprises. FROM python:3.11-slim WORKDIR /app # HF_HOME must be set before the RUN python cache step so the model # is written to /app/model_cache, which becomes part of this layer. ENV HF_HOME=/app/model_cache \ PYTHONDONTWRITEBYTECODE=1 \ PYTHONUNBUFFERED=1 RUN pip install --no-cache-dir \ fastapi>=0.115.0 \ uvicorn[standard]>=0.29.0 \ sentence-transformers>=3.0.0 \ numpy>=1.26.0 # Download and cache model weights during build — never at runtime. # This is the key step that makes cold starts fast on HF Spaces. RUN python -c "\ from sentence_transformers import SentenceTransformer; \ SentenceTransformer('BAAI/bge-small-en-v1.5', cache_folder='/app/model_cache')" COPY app.py . EXPOSE 7860 CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]