Spaces:
Sleeping
Sleeping
| # syntax=docker/dockerfile:1 | |
| # ---------- builder ---------- | |
| # Build wheels for the lean runtime deps in an isolated stage so the final image | |
| # carries no build toolchain. Offline mode needs only these (no torch). | |
| FROM python:3.11-slim AS builder | |
| WORKDIR /build | |
| RUN python -m pip install --no-cache-dir --upgrade pip wheel | |
| COPY requirements.txt . | |
| RUN pip wheel --no-cache-dir --wheel-dir /wheels -r requirements.txt | |
| # ---------- runtime ---------- | |
| FROM python:3.11-slim AS runtime | |
| # WITH_MODEL=1 (default) installs the ML stack and serves the REAL fine-tuned model | |
| # β this is what the public Hugging Face Space runs. Pass --build-arg WITH_MODEL=0 for | |
| # the lean, torch-free image used by CI and local compose (which run the offline stub). | |
| ARG WITH_MODEL=1 | |
| # Don't write .pyc, don't buffer stdout (so logs stream in real time). | |
| # OFFLINE defaults to 0 (serve the real model); lean builds set OFFLINE=1 at run time | |
| # (see docker-compose.yml and CI). The classifier falls back to the stub if a real | |
| # load is requested but torch is absent, so a mismatched build still boots. | |
| ENV PYTHONDONTWRITEBYTECODE=1 \ | |
| PYTHONUNBUFFERED=1 \ | |
| PIP_NO_CACHE_DIR=1 \ | |
| OFFLINE=0 \ | |
| PORT=8000 | |
| # Non-root user β never run a network service as root. | |
| RUN useradd --create-home --uid 10001 appuser | |
| WORKDIR /app | |
| # Install the prebuilt lean wheels, then drop them. | |
| COPY --from=builder /wheels /wheels | |
| COPY requirements.txt requirements-ml.txt ./ | |
| RUN pip install --no-cache-dir --no-index --find-links=/wheels -r requirements.txt \ | |
| && rm -rf /wheels | |
| # Real-model stack (torch + transformers), pulled from the index. Skipped for the | |
| # lean CI/local image (--build-arg WITH_MODEL=0). | |
| RUN if [ "$WITH_MODEL" = "1" ]; then \ | |
| pip install --no-cache-dir -r requirements-ml.txt ; \ | |
| fi | |
| # Application code (see .dockerignore for what is excluded). | |
| COPY app/ ./app/ | |
| COPY demo/ ./demo/ | |
| COPY scripts/ ./scripts/ | |
| USER appuser | |
| EXPOSE 8000 | |
| # Container-level liveness: the orchestrator restarts the container if this fails. | |
| HEALTHCHECK --interval=30s --timeout=3s --start-period=10s --retries=3 \ | |
| CMD python -c "import os,urllib.request,sys; \ | |
| url='http://127.0.0.1:%s/healthz' % os.getenv('PORT','8000'); \ | |
| sys.exit(0 if urllib.request.urlopen(url, timeout=2).status==200 else 1)" | |
| # One uvicorn worker; the in-process micro-batcher handles concurrency. Scale | |
| # horizontally (more replicas) rather than with multiple workers so the batcher | |
| # stays effective per process. | |
| CMD ["sh", "-c", "uvicorn app.main:app --host 0.0.0.0 --port ${PORT:-8000}"] | |