Spaces:
Sleeping
Sleeping
| # G.U.I.D.E. β Hugging Face Spaces Dockerfile | |
| # | |
| # Build args: | |
| # HF_MODEL_REPO HF Model Hub repo containing trained weights | |
| # e.g. --build-arg HF_MODEL_REPO=myuser/guide-models | |
| # | |
| # Runtime secrets (set via HF Spaces Secrets UI β never bake into image): | |
| # ANTHROPIC_API_KEY | |
| FROM python:3.11-slim | |
| # System dependencies | |
| RUN apt-get update && apt-get install -y --no-install-recommends \ | |
| tesseract-ocr \ | |
| git \ | |
| && rm -rf /var/lib/apt/lists/* | |
| WORKDIR /app | |
| # Python dependencies β installed before copying full source to leverage cache | |
| COPY requirements.txt . | |
| RUN pip install --no-cache-dir -r requirements.txt | |
| # spaCy language model (~750 MB) β required by Presidio privacy layer | |
| RUN python -m spacy download en_core_web_lg | |
| # Copy application source before downloading models. | |
| # Models are gitignored so COPY does not include them β but it does create | |
| # the /app/models/ directory entry. Downloading AFTER COPY ensures the | |
| # overlay filesystem sees models/ files in the top-most layer and does not | |
| # hide them behind an opaque whiteout created by a later COPY. | |
| COPY . . | |
| # Download trained model weights from HF Model Hub at build time | |
| # (avoids cold-start delay for visitors; weights land at /app/models/) | |
| ARG HF_MODEL_REPO | |
| RUN test -n "${HF_MODEL_REPO}" || (echo "ERROR: HF_MODEL_REPO build arg is not set" && exit 1) | |
| RUN python -c "from huggingface_hub import snapshot_download; snapshot_download('${HF_MODEL_REPO}', repo_type='model', local_dir='models')" && echo "=== models dir ===" && find /app/models -not -path '*/.cache/*' | sort | |
| # HF Spaces exposes port 7860 (Gradio); FastAPI runs internally on 8000 | |
| EXPOSE 7860 | |
| # Start both servers; models are already present so training is skipped | |
| CMD ["python", "start.py", "--download-models", "--no-train"] | |