| # ╔══════════════════════════════════════════════════════════════╗ | |
| # ║ Granite 4.0 ONNX Inference Server ║ | |
| # ║ Model: onnx-community/granite-4.0-h-350m-ONNX ║ | |
| # ║ Runtime: ONNX Runtime CPU · FastAPI · Beautiful UI ║ | |
| # ╚══════════════════════════════════════════════════════════════╝ | |
| FROM python:3.11-slim | |
| # ── System dependencies ─────────────────────────────────────────────────────── | |
| RUN apt-get update && apt-get install -y \ | |
| git \ | |
| curl \ | |
| build-essential \ | |
| && rm -rf /var/lib/apt/lists/* | |
| # ── Create non-root user (HuggingFace Spaces requirement) ───────────────────── | |
| RUN useradd -m -u 1000 user | |
| USER user | |
| ENV HOME=/home/user \ | |
| PATH=/home/user/.local/bin:$PATH \ | |
| HF_HOME=/home/user/.cache/huggingface \ | |
| TRANSFORMERS_CACHE=/home/user/.cache/huggingface \ | |
| # Prevents OMP issues on CPU | |
| OMP_NUM_THREADS=4 \ | |
| MKL_NUM_THREADS=4 | |
| WORKDIR /app | |
| # ── Install Python dependencies ─────────────────────────────────────────────── | |
| COPY --chown=user requirements.txt . | |
| RUN pip install --no-cache-dir --upgrade pip && \ | |
| pip install --no-cache-dir -r requirements.txt | |
| # ── Copy application files ──────────────────────────────────────────────────── | |
| COPY --chown=user server.py . | |
| COPY --chown=user static/ ./static/ | |
| # ── Expose port (HF Spaces uses 7860) ──────────────────────────────────────── | |
| EXPOSE 7860 | |
| # ── Health check ───────────────────────────────────────────────────────────── | |
| HEALTHCHECK --interval=30s --timeout=10s --start-period=120s --retries=3 \ | |
| CMD curl -f http://localhost:7860/health || exit 1 | |
| # ── Launch server ───────────────────────────────────────────────────────────── | |
| CMD ["uvicorn", "server:app", \ | |
| "--host", "0.0.0.0", \ | |
| "--port", "7860", \ | |
| "--workers", "1", \ | |
| "--log-level", "info"] | |