FiberGate / Dockerfile
AzizMiladi's picture
fix(deploy): don't bake models into image, download from HF Hub
7f6dc6d
Raw
History Blame
3.53 kB
# ────────────────────────────────────────────────────────────────────────────
# GuichetOI ML β€” production Docker image
# ────────────────────────────────────────────────────────────────────────────
# β€’ Tesseract with the French language pack is a runtime requirement.
# β€’ Model weights are baked in (under ~2 GB total) so the image is a
# single self-contained artifact β€” simplest CI/CD and Kubernetes rollout.
# To slim the image, drop the COPY for models/ and mount /app/models as
# a volume at runtime instead.
# β€’ Uses one worker β€” the ML pipeline keeps ~1.2 GB of weights resident
# in RAM; multiple workers would multiply the memory footprint. Scale
# horizontally with container replicas, not workers.
# ────────────────────────────────────────────────────────────────────────────
FROM python:3.12-slim AS runtime
ENV PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 \
PIP_NO_CACHE_DIR=1 \
PIP_DISABLE_PIP_VERSION_CHECK=1
# System deps:
# tesseract-ocr + tesseract-ocr-fra : French OCR for the inference pipeline
# libgl1 + libglib2.0-0 : runtime for opencv-python (used by
# ocr_rasterise.py)
# curl : HEALTHCHECK probe
RUN apt-get update && apt-get install -y --no-install-recommends \
tesseract-ocr \
tesseract-ocr-fra \
libgl1 \
libglib2.0-0 \
curl \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app
# Install Python deps first for cache-friendly rebuilds
COPY requirements.txt pyproject.toml ./
RUN pip install --upgrade pip && pip install -r requirements.txt
# Application code + assets.
# Models are downloaded at container start from HF Hub using the
# GUICHETOI_CLASSIFIER_DIR / GUICHETOI_EXTRACTOR_DIR env vars (see
# resolve_model_path() in guichetoi.inference). To bake models into
# the image instead (Render-style), re-add: COPY models/ ./models/
COPY src/ ./src/
COPY assets/cms_template.xlsx ./assets/cms_template.xlsx
COPY assets/label_mappings.json ./assets/label_mappings.json
# Install the package so `uvicorn guichetoi.api.main:app` resolves.
RUN pip install --no-deps -e .
# Default CORS = open; override in compose/Helm/k8s when calling Angular
# directly. Leave unset when only Spring Boot calls this service.
ENV GUICHETOI_CORS_ORIGINS="*"
# Render injects $PORT (typically 10000); locally it defaults to 8000.
# Use shell form so $PORT actually expands at container start.
ENV PORT=8000
EXPOSE 8000
# start-period covers the ~30 s model load. Spring Boot's readiness probe
# can call the same endpoint with the same allowance. Render's own health
# checks use the path configured in render.yaml β€” same URL works for both.
HEALTHCHECK --interval=30s --timeout=10s --start-period=120s --retries=3 \
CMD curl -fsS http://127.0.0.1:${PORT}/health | grep -q '"pipeline_loaded": *true' || exit 1
# Shell form (no JSON array) so $PORT expands at runtime.
CMD uvicorn guichetoi.api.main:app --host 0.0.0.0 --port ${PORT} --workers 1