up
Browse files- Dockerfile +2 -3
Dockerfile
CHANGED
|
@@ -15,12 +15,11 @@ RUN apt-get update && apt-get install -y \
|
|
| 15 |
COPY requirements.txt .
|
| 16 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 17 |
|
|
|
|
| 18 |
RUN CMAKE_BUILD_PARALLEL_LEVEL=4 \
|
| 19 |
CMAKE_ARGS="-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS" \
|
| 20 |
-
pip install --no-cache-dir "llama-cpp-python
|
| 21 |
|
| 22 |
-
# Model is downloaded at runtime by app.py via hf_hub_download (handles retries/resume)
|
| 23 |
-
# Do NOT wget here — large files fail silently during Docker build on HF Spaces
|
| 24 |
RUN mkdir -p /app/models
|
| 25 |
|
| 26 |
COPY app.py .
|
|
|
|
| 15 |
COPY requirements.txt .
|
| 16 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 17 |
|
| 18 |
+
# Use latest llama-cpp-python — 0.3.8 does NOT support Gemma 4 MoE (A4B) architecture
|
| 19 |
RUN CMAKE_BUILD_PARALLEL_LEVEL=4 \
|
| 20 |
CMAKE_ARGS="-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS" \
|
| 21 |
+
pip install --no-cache-dir "llama-cpp-python"
|
| 22 |
|
|
|
|
|
|
|
| 23 |
RUN mkdir -p /app/models
|
| 24 |
|
| 25 |
COPY app.py .
|