Spaces:

tusarway
/

codegen

Running

tusarway commited on Apr 13

Commit

16365e0

verified ·

1 Parent(s): 195817e

fix llama

Files changed (1) hide show

Dockerfile CHANGED Viewed

@@ -2,35 +2,32 @@ FROM python:3.11-slim
 WORKDIR /app
-# Install system dependencies (including ccache and wget)
 RUN apt-get update && apt-get install -y \
-    build-essential \
-    cmake \
-    libopenblas-dev \
     curl \
-    pkg-config \
-    git \
     wget \
-    ccache \
     && rm -rf /var/lib/apt/lists/*
-# Set up ccache
-ENV CCACHE_DIR=/tmp/ccache
-ENV PATH="/usr/lib/ccache:$PATH"
-# Install Python dependencies
 COPY requirements.txt .
-RUN CMAKE_ARGS="-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS" \
-    FORCE_CMAKE=1 \
-    pip install --no-cache-dir -r requirements.txt
-# Download the model during build (automatic)
 RUN mkdir -p /app/models && \
-    wget -q https://huggingface.co/unsloth/gemma-4-26B-A4B-it-GGUF/resolve/main/gemma-4-26B-A4B-it-UD-IQ3_XXS.gguf -O /app/models/gemma-4-26B-A4B-it-UD-IQ3_XXS.gguf
 COPY app.py .
-# HuggingFace Spaces expects port 7860
 EXPOSE 7860
 ENV SPACE_URL=""

 WORKDIR /app
+# Only runtime libs needed — no build toolchain since we use pre-built wheels
 RUN apt-get update && apt-get install -y \
+    libopenblas0 \
     curl \
     wget \
     && rm -rf /var/lib/apt/lists/*
+# Install all deps except llama-cpp-python first
 COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# ── KEY FIX: install pre-built CPU wheel (seconds, not hours) ─────────────────
+# abetlen's CPU wheel index has pre-compiled binaries — no C++ compilation needed
+RUN pip install --no-cache-dir \
+    "llama-cpp-python==0.3.8" \
+    --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu
+# Download model at build time so cold starts are fast (~60s instead of 10min)
 RUN mkdir -p /app/models && \
+    wget --progress=dot:giga \
+    "https://huggingface.co/unsloth/gemma-4-26B-A4B-it-GGUF/resolve/main/gemma-4-26B-A4B-it-UD-IQ3_XXS.gguf" \
+    -O /app/models/gemma-4-26B-A4B-it-UD-IQ3_XXS.gguf
 COPY app.py .
+# HuggingFace Spaces requires port 7860
 EXPOSE 7860
 ENV SPACE_URL=""