Spaces:

tusarway
/

codegen

Running

tusarway commited on Apr 13

Commit

b72e18c

verified ·

1 Parent(s): 8dcb3c9

u

Files changed (1) hide show

Dockerfile CHANGED Viewed

@@ -2,24 +2,29 @@ FROM python:3.11-slim
 WORKDIR /app
-# Only runtime libs needed — no build toolchain since we use pre-built wheels
 RUN apt-get update && apt-get install -y \
-    libopenblas0 \
     curl \
     wget \
     && rm -rf /var/lib/apt/lists/*
-# Install all deps except llama-cpp-python first
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
-# ── KEY FIX: install pre-built CPU wheel (seconds, not hours) ─────────────────
-# abetlen's CPU wheel index has pre-compiled binaries — no C++ compilation needed
-RUN pip install --no-cache-dir \
-    "llama-cpp-python==0.3.8" \
-    --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu
-# Download model at build time so cold starts are fast (~60s instead of 10min)
 RUN mkdir -p /app/models && \
     wget --progress=dot:giga \
     "https://huggingface.co/unsloth/gemma-4-26B-A4B-it-GGUF/resolve/main/gemma-4-26B-A4B-it-UD-IQ3_XXS.gguf" \
@@ -27,7 +32,6 @@ RUN mkdir -p /app/models && \
 COPY app.py .
-# HuggingFace Spaces requires port 7860
 EXPOSE 7860
 ENV SPACE_URL=""

 WORKDIR /app
+# Build tools required — no pre-built CPU wheels exist for llama-cpp-python >= 0.3.x
+# (abetlen's /whl/cpu index only has older versions)
 RUN apt-get update && apt-get install -y \
+    build-essential \
+    cmake \
+    libopenblas-dev \
     curl \
     wget \
     && rm -rf /var/lib/apt/lists/*
+# Install all non-compiled deps first (fast, cached separately)
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
+# ── Compile llama-cpp-python from source ──────────────────────────────────────
+# CMAKE_BUILD_PARALLEL_LEVEL=4  → use all available build cores (~4x faster)
+# GGML_BLAS=ON                  → link OpenBLAS for faster matrix ops on CPU
+# This layer is Docker-cached: only re-runs if requirements change
+RUN CMAKE_BUILD_PARALLEL_LEVEL=4 \
+    CMAKE_ARGS="-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DCMAKE_BUILD_TYPE=Release" \
+    pip install --no-cache-dir "llama-cpp-python==0.3.8"
+# Download model at build time (cached separately from compilation)
 RUN mkdir -p /app/models && \
     wget --progress=dot:giga \
     "https://huggingface.co/unsloth/gemma-4-26B-A4B-it-GGUF/resolve/main/gemma-4-26B-A4B-it-UD-IQ3_XXS.gguf" \
 COPY app.py .
 EXPOSE 7860
 ENV SPACE_URL=""