Spaces:

pelmorex
/

gemma

Sleeping

NS-Genai commited on Jan 18

Commit

a6cc846

verified ·

1 Parent(s): 3e9bc10

Update Dockerfile

Files changed (1) hide show

Dockerfile CHANGED Viewed

@@ -1,37 +1,18 @@
-# Use python 3.10-slim as base
-FROM python:3.10-slim
-# Set working directory
 WORKDIR /app
-# 1. Install BUILD DEPENDENCIES (Critical for compiling from source)
-#    We need build-essential (gcc) and cmake to compile the library for Gemma 3 support.
-RUN apt-get update && apt-get install -y \
-    build-essential \
-    cmake \
-    libgomp1 \
-    git \
-    && rm -rf /var/lib/apt/lists/*
-# 2. Upgrade pip to ensure it handles modern build processes
-RUN pip install --upgrade pip
-# 3. Install llama-cpp-python from SOURCE
-#    We do NOT use the --extra-index-url flag here.
-#    This forces pip to download the source code and compile it locally,
-#    ensuring you get the latest architecture support.
-RUN CMAKE_ARGS="-DGGML_NATIVE=OFF" pip install llama-cpp-python --no-cache-dir --verbose
-# 4. Install server dependencies
-RUN pip install fastapi uvicorn sse-starlette pydantic-settings starlette-context
-# 5. Setup Model
-RUN mkdir -p model
-COPY model/gemma-3-finetuned.Q4_K_M.gguf model/model.gguf
-# 6. Configure & Start Server
 ENV HOST=0.0.0.0
 ENV PORT=7860
-ENV MODEL=/app/model/model.gguf
-CMD ["python3", "-m", "llama_cpp.server", "--model", "/app/model/model.gguf", "--host", "0.0.0.0", "--port", "7860", "--n_ctx", "2048"]

+# Use the official lightweight C++ image from the main llama.cpp repo
+# This image is pre-compiled and supports the newest architectures (Gemma 3)
+FROM ghcr.io/ggml-org/llama.cpp:server
+# Set the working directory
 WORKDIR /app
+# Copy your model file
+# Ensure the file 'model/gemma-3-finetuned.Q4_K_M.gguf' exists in your HF Space "Files" tab
+COPY model/gemma-3-finetuned.Q4_K_M.gguf /app/model.gguf
+# Expose the required port
 ENV HOST=0.0.0.0
 ENV PORT=7860
+# Run the server binary directly (No Python)
+# This uses the C++ 'llama-server' which is faster and supports Gemma 3
+CMD ["-m", "/app/model.gguf", "--host", "0.0.0.0", "--port", "7860", "--n-gpu-layers", "0", "-c", "2048"]