Spaces:
Paused
Paused
| # ============================================================================ | |
| # Stage 1: Build llama-cpp-python with CPU optimizations | |
| # ============================================================================ | |
| FROM python:3.11-slim AS builder | |
| # Install build dependencies | |
| RUN apt-get update && apt-get install -y --no-install-recommends \ | |
| build-essential \ | |
| cmake \ | |
| git \ | |
| libopenblas-dev \ | |
| pkg-config \ | |
| && rm -rf /var/lib/apt/lists/* | |
| # Build llama-cpp-python with OpenBLAS + AVX2 | |
| # This is the key advantage over the Gradio wheel — compiled for this CPU | |
| ENV CMAKE_ARGS="-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DGGML_AVX2=ON -DGGML_FMA=ON" | |
| ENV FORCE_CMAKE=1 | |
| RUN pip install --no-cache-dir --upgrade pip && \ | |
| pip install --no-cache-dir llama-cpp-python==0.3.4 | |
| # ============================================================================ | |
| # Stage 2: Runtime image (lean) | |
| # ============================================================================ | |
| FROM python:3.11-slim AS runtime | |
| # Only the runtime lib needed for OpenBLAS | |
| RUN apt-get update && apt-get install -y --no-install-recommends \ | |
| libopenblas0 \ | |
| && rm -rf /var/lib/apt/lists/* | |
| # Copy the compiled llama-cpp-python from builder | |
| COPY --from=builder /usr/local/lib/python3.11/site-packages /usr/local/lib/python3.11/site-packages | |
| COPY --from=builder /usr/local/bin /usr/local/bin | |
| # Install remaining Python deps (no llama-cpp here — already copied) | |
| RUN pip install --no-cache-dir \ | |
| fastapi==0.115.0 \ | |
| uvicorn[standard]==0.30.0 \ | |
| huggingface_hub==0.24.0 \ | |
| pydantic==2.8.0 | |
| # Create non-root user (HF Spaces requirement) | |
| RUN useradd -m -u 1000 user | |
| USER user | |
| WORKDIR /app | |
| # Copy app code | |
| COPY --chown=user:user app.py . | |
| # HF Spaces persistent storage mounts at /data | |
| # We just make sure the path exists as a fallback if not mounted | |
| RUN mkdir -p /home/user/data/models | |
| EXPOSE 7860 | |
| CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--log-level", "info"] |