Spaces:
Running
Running
| # infra/hf_spaces/api/Dockerfile | |
| # Stateless API Space. No model weights — they live in the embedder and reranker Spaces. | |
| # All secrets are injected at runtime via HF Space environment variables. | |
| FROM python:3.11-slim | |
| WORKDIR /app | |
| ENV PYTHONDONTWRITEBYTECODE=1 \ | |
| PYTHONUNBUFFERED=1 | |
| # git is required for pip to install the toon_format VCS dependency. | |
| # Installed before the pip layer so it is cached alongside system deps. | |
| RUN apt-get update && apt-get install -y --no-install-recommends git \ | |
| && rm -rf /var/lib/apt/lists/* | |
| # Install dependencies first for better Docker layer caching. | |
| # --prefer-binary: always take a pre-built wheel over compiling from source. | |
| # Without this, onnxruntime (pulled by fastembed) compiles from source and | |
| # adds 20-40 min to the build, causing HF Spaces to appear stuck. | |
| COPY requirements.txt . | |
| RUN pip install --no-cache-dir --prefer-binary -r requirements.txt | |
| # Copy app source after dependencies so code changes don't bust the pip cache layer. | |
| COPY . . | |
| EXPOSE 7860 | |
| # Single worker — HF Spaces are not auto-scaled, multiple workers waste RAM. | |
| CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"] | |