Spaces:

pelmorex
/

gemma

Sleeping

NS-Genai commited on Jan 18

Commit

3e9bc10

verified ·

1 Parent(s): f65e995

Update Dockerfile

Files changed (1) hide show

Dockerfile CHANGED Viewed

@@ -1,17 +1,37 @@
-# Use the official image (lightweight and pre-configured)
-FROM ghcr.io/abetlen/llama-cpp-python:latest
-# 1. Change WORKDIR to something neutral to avoid file conflicts
-WORKDIR /workspace
-# 2. Copy ONLY the model file.
-#    Do NOT use "COPY . ." which brings in broken local folders.
-COPY model/gemma-3-finetuned.Q4_K_M.gguf /workspace/model.gguf
-# 3. Set Environment Variables
 ENV HOST=0.0.0.0
 ENV PORT=7860
-ENV MODEL=/workspace/model.gguf
-# 4. Start the server
-CMD ["python3", "-m", "llama_cpp.server", "--model", "/workspace/model.gguf", "--host", "0.0.0.0", "--port", "7860", "--n_ctx", "2048"]

+# Use python 3.10-slim as base
+FROM python:3.10-slim
+# Set working directory
+WORKDIR /app
+# 1. Install BUILD DEPENDENCIES (Critical for compiling from source)
+#    We need build-essential (gcc) and cmake to compile the library for Gemma 3 support.
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    cmake \
+    libgomp1 \
+    git \
+    && rm -rf /var/lib/apt/lists/*
+# 2. Upgrade pip to ensure it handles modern build processes
+RUN pip install --upgrade pip
+# 3. Install llama-cpp-python from SOURCE
+#    We do NOT use the --extra-index-url flag here.
+#    This forces pip to download the source code and compile it locally,
+#    ensuring you get the latest architecture support.
+RUN CMAKE_ARGS="-DGGML_NATIVE=OFF" pip install llama-cpp-python --no-cache-dir --verbose
+# 4. Install server dependencies
+RUN pip install fastapi uvicorn sse-starlette pydantic-settings starlette-context
+# 5. Setup Model
+RUN mkdir -p model
+COPY model/gemma-3-finetuned.Q4_K_M.gguf model/model.gguf
+# 6. Configure & Start Server
 ENV HOST=0.0.0.0
 ENV PORT=7860
+ENV MODEL=/app/model/model.gguf
+CMD ["python3", "-m", "llama_cpp.server", "--model", "/app/model/model.gguf", "--host", "0.0.0.0", "--port", "7860", "--n_ctx", "2048"]