tusarway commited on
Commit
16365e0
Β·
verified Β·
1 Parent(s): 195817e

fix llama

Browse files
Files changed (1) hide show
  1. Dockerfile +15 -18
Dockerfile CHANGED
@@ -2,35 +2,32 @@ FROM python:3.11-slim
2
 
3
  WORKDIR /app
4
 
5
- # Install system dependencies (including ccache and wget)
6
  RUN apt-get update && apt-get install -y \
7
- build-essential \
8
- cmake \
9
- libopenblas-dev \
10
  curl \
11
- pkg-config \
12
- git \
13
  wget \
14
- ccache \
15
  && rm -rf /var/lib/apt/lists/*
16
 
17
- # Set up ccache
18
- ENV CCACHE_DIR=/tmp/ccache
19
- ENV PATH="/usr/lib/ccache:$PATH"
20
-
21
- # Install Python dependencies
22
  COPY requirements.txt .
23
- RUN CMAKE_ARGS="-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS" \
24
- FORCE_CMAKE=1 \
25
- pip install --no-cache-dir -r requirements.txt
 
 
 
 
26
 
27
- # Download the model during build (automatic)
28
  RUN mkdir -p /app/models && \
29
- wget -q https://huggingface.co/unsloth/gemma-4-26B-A4B-it-GGUF/resolve/main/gemma-4-26B-A4B-it-UD-IQ3_XXS.gguf -O /app/models/gemma-4-26B-A4B-it-UD-IQ3_XXS.gguf
 
 
30
 
31
  COPY app.py .
32
 
33
- # HuggingFace Spaces expects port 7860
34
  EXPOSE 7860
35
 
36
  ENV SPACE_URL=""
 
2
 
3
  WORKDIR /app
4
 
5
+ # Only runtime libs needed β€” no build toolchain since we use pre-built wheels
6
  RUN apt-get update && apt-get install -y \
7
+ libopenblas0 \
 
 
8
  curl \
 
 
9
  wget \
 
10
  && rm -rf /var/lib/apt/lists/*
11
 
12
+ # Install all deps except llama-cpp-python first
 
 
 
 
13
  COPY requirements.txt .
14
+ RUN pip install --no-cache-dir -r requirements.txt
15
+
16
+ # ── KEY FIX: install pre-built CPU wheel (seconds, not hours) ─────────────────
17
+ # abetlen's CPU wheel index has pre-compiled binaries β€” no C++ compilation needed
18
+ RUN pip install --no-cache-dir \
19
+ "llama-cpp-python==0.3.8" \
20
+ --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu
21
 
22
+ # Download model at build time so cold starts are fast (~60s instead of 10min)
23
  RUN mkdir -p /app/models && \
24
+ wget --progress=dot:giga \
25
+ "https://huggingface.co/unsloth/gemma-4-26B-A4B-it-GGUF/resolve/main/gemma-4-26B-A4B-it-UD-IQ3_XXS.gguf" \
26
+ -O /app/models/gemma-4-26B-A4B-it-UD-IQ3_XXS.gguf
27
 
28
  COPY app.py .
29
 
30
+ # HuggingFace Spaces requires port 7860
31
  EXPOSE 7860
32
 
33
  ENV SPACE_URL=""