Spaces:
Sleeping
Sleeping
| # Dockerfile for HuggingFace Spaces - Build llama-cpp-python from source | |
| FROM python:3.11-slim-bookworm | |
| # Set working directory | |
| WORKDIR /app | |
| # Install system dependencies including CMAKE build tools | |
| RUN apt-get update && apt-get install -y \ | |
| build-essential \ | |
| cmake \ | |
| git \ | |
| curl \ | |
| && rm -rf /var/lib/apt/lists/* | |
| # Copy requirements FIRST (for better caching) | |
| COPY requirements.txt . | |
| # Install Python dependencies EXCEPT llama-cpp-python | |
| RUN pip install --no-cache-dir --upgrade pip && \ | |
| grep -v "llama-cpp-python" requirements.txt > requirements_temp.txt && \ | |
| pip install --no-cache-dir -r requirements_temp.txt && \ | |
| rm requirements_temp.txt | |
| # Build llama-cpp-python from source with CMAKE (this is the key!) | |
| # Force CMAKE build to ensure it compiles against glibc, not using musl wheels | |
| RUN CMAKE_ARGS="-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS" \ | |
| FORCE_CMAKE=1 \ | |
| pip install --no-cache-dir --force-reinstall --upgrade --verbose \ | |
| llama-cpp-python==0.2.90 | |
| # Copy application code | |
| COPY app.py . | |
| # Expose port (HF Spaces uses 7860) | |
| EXPOSE 7860 | |
| # Health check | |
| HEALTHCHECK --interval=30s --timeout=10s --start-period=120s --retries=3 \ | |
| CMD curl -f http://localhost:7860/health || exit 1 | |
| # Run the FastAPI app | |
| CMD ["python", "app.py"] | |