Spaces:

Fola-AI
/

FarmEyes

Sleeping

Fola-AI commited on Dec 18, 2025

Commit

1143731

1 Parent(s): 3d5012a

Use jllllll pre-built wheels for llama-cpp-python

Files changed (1) hide show

Dockerfile CHANGED Viewed

@@ -9,13 +9,9 @@ ENV HOST=0.0.0.0
 ENV PORT=7860
 ENV DEBIAN_FRONTEND=noninteractive
-# llama-cpp-python CUDA environment variables
-ENV CMAKE_ARGS="-DGGML_CUDA=on"
-ENV FORCE_CMAKE=1
 WORKDIR /app
-# Install system dependencies including build tools for llama-cpp-python
 RUN apt-get update && apt-get install -y --no-install-recommends \
     ffmpeg \
     libsm6 \
@@ -24,8 +20,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
     libglib2.0-0 \
     git \
     curl \
-    build-essential \
-    cmake \
     && rm -rf /var/lib/apt/lists/*
 # Upgrade pip
@@ -37,10 +31,10 @@ RUN pip install "numpy<2.0"
 # Install ultralytics
 RUN pip install ultralytics
-# Install llama-cpp-python with CUDA support
-# Using pre-built wheel from jllllll repository for faster build
-RUN pip install llama-cpp-python \
-    --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu118
 # Copy requirements and install (excluding llama-cpp-python since we installed it above)
 COPY requirements.txt .

 ENV PORT=7860
 ENV DEBIAN_FRONTEND=noninteractive
 WORKDIR /app
+# Install system dependencies
 RUN apt-get update && apt-get install -y --no-install-recommends \
     ffmpeg \
     libsm6 \
     libglib2.0-0 \
     git \
     curl \
     && rm -rf /var/lib/apt/lists/*
 # Upgrade pip
 # Install ultralytics
 RUN pip install ultralytics
+# Install llama-cpp-python with CUDA support from jllllll pre-built wheels
+# This avoids compilation and is much faster
+RUN pip install llama-cpp-python --prefer-binary \
+    --extra-index-url=https://jllllll.github.io/llama-cpp-python-cuBLAS-wheels/AVX2/cu118
 # Copy requirements and install (excluding llama-cpp-python since we installed it above)
 COPY requirements.txt .