Spaces:

pelmorex
/

gemma

Sleeping

NS-Genai commited on Jan 18

Commit

b9ca278

verified ·

1 Parent(s): 0ad551a

Update Dockerfile

Files changed (1) hide show

Dockerfile CHANGED Viewed

@@ -1,42 +1,19 @@
-# Use python 3.10
-FROM python:3.10-slim
-# Set working directory
 WORKDIR /app
-# Install system dependencies
-# libgomp1 is required for the pre-compiled binary to run
-RUN apt-get update && apt-get install -y \
-    libgomp1 \
-    && rm -rf /var/lib/apt/lists/*
-# --- CRITICAL FIX START ---
-# 1. Upgrade pip to the latest version.
-#    Old versions (like 23.0) often fail to recognize the specific wheel tags used by llama-cpp-python.
-RUN pip install --upgrade pip
-# 2. Install llama-cpp-python using PRE-BUILT WHEELS.
-#    We prefer binary to prevent falling back to the source build (which causes the gcc error).
-RUN pip install llama-cpp-python \
-    --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu \
-    --prefer-binary
-# 3. Install server dependencies
-RUN pip install fastapi uvicorn sse-starlette pydantic-settings starlette-context
-# --- CRITICAL FIX END ---
-# Create model directory and ensure permissions
-RUN mkdir -p model && chmod 777 model
-# Copy the model file
-COPY model/gemma-3-finetuned.Q4_K_M.gguf model/model.gguf
-# Expose port 7860
 ENV PORT=7860
-# Run the server
-CMD python3 -m llama_cpp.server \
-    --model model/model.gguf \
-    --host 0.0.0.0 \
-    --port 7860 \
-    --n_ctx 2048

+# Use the official image from the library author.
+# This includes the correct pre-compiled binaries and system libraries.
+FROM ghcr.io/abetlen/llama-cpp-python:latest
+# Set the working directory
 WORKDIR /app
+# Copy your model file into the container
+# Ensure 'model/gemma-3-finetuned.Q4_K_M.gguf' exists in your Space's file list!
+COPY model/gemma-3-finetuned.Q4_K_M.gguf /app/model/model.gguf
+# Set environment variables for the server
+# Hugging Face Spaces requires port 7860
+ENV HOST=0.0.0.0
 ENV PORT=7860
+ENV MODEL=/app/model/model.gguf
+# Start the OpenAI-compatible server
+CMD ["python3", "-m", "llama_cpp.server", "--model", "/app/model/model.gguf", "--host", "0.0.0.0", "--port", "7860", "--n_ctx", "2048"]