# Dockerfile for HuggingFace Spaces - Build llama-cpp-python from source FROM python:3.11-slim-bookworm # Set working directory WORKDIR /app # Install system dependencies including CMAKE build tools RUN apt-get update && apt-get install -y \ build-essential \ cmake \ git \ curl \ && rm -rf /var/lib/apt/lists/* # Copy requirements FIRST (for better caching) COPY requirements.txt . # Install Python dependencies EXCEPT llama-cpp-python RUN pip install --no-cache-dir --upgrade pip && \ grep -v "llama-cpp-python" requirements.txt > requirements_temp.txt && \ pip install --no-cache-dir -r requirements_temp.txt && \ rm requirements_temp.txt # Build llama-cpp-python from source with CMAKE (this is the key!) # Force CMAKE build to ensure it compiles against glibc, not using musl wheels RUN CMAKE_ARGS="-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS" \ FORCE_CMAKE=1 \ pip install --no-cache-dir --force-reinstall --upgrade --verbose \ llama-cpp-python==0.2.90 # Copy application code COPY app.py . # Expose port (HF Spaces uses 7860) EXPOSE 7860 # Health check HEALTHCHECK --interval=30s --timeout=10s --start-period=120s --retries=3 \ CMD curl -f http://localhost:7860/health || exit 1 # Run the FastAPI app CMD ["python", "app.py"]