File size: 1,289 Bytes
1e3c6e8
 
561e31f
 
 
 
1e3c6e8
561e31f
 
1e3c6e8
561e31f
1e3c6e8
561e31f
 
1e3c6e8
e1bf5f5
1e3c6e8
 
561e31f
1e3c6e8
 
 
 
 
 
 
 
 
 
561e31f
 
e1bf5f5
561e31f
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# Dockerfile for HuggingFace Spaces - Build llama-cpp-python from source
FROM python:3.11-slim-bookworm

# Set working directory
WORKDIR /app

# Install system dependencies including CMAKE build tools
RUN apt-get update && apt-get install -y \
    build-essential \
    cmake \
    git \
    curl \
    && rm -rf /var/lib/apt/lists/*

# Copy requirements FIRST (for better caching)
COPY requirements.txt .

# Install Python dependencies EXCEPT llama-cpp-python
RUN pip install --no-cache-dir --upgrade pip && \
    grep -v "llama-cpp-python" requirements.txt > requirements_temp.txt && \
    pip install --no-cache-dir -r requirements_temp.txt && \
    rm requirements_temp.txt

# Build llama-cpp-python from source with CMAKE (this is the key!)
# Force CMAKE build to ensure it compiles against glibc, not using musl wheels
RUN CMAKE_ARGS="-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS" \
    FORCE_CMAKE=1 \
    pip install --no-cache-dir --force-reinstall --upgrade --verbose \
    llama-cpp-python==0.2.90

# Copy application code
COPY app.py .

# Expose port (HF Spaces uses 7860)
EXPOSE 7860

# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=120s --retries=3 \
    CMD curl -f http://localhost:7860/health || exit 1

# Run the FastAPI app
CMD ["python", "app.py"]