File size: 683 Bytes
a6cc846 ea6ff15 a6cc846 3e9bc10 ea6ff15 a6cc846 feb6f10 a6cc846 b9ca278 ea6ff15 a6cc846 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 | # Use the official lightweight C++ image from the main llama.cpp repo
# This image is pre-compiled and supports the newest architectures (Gemma 3)
FROM ghcr.io/ggml-org/llama.cpp:server
# Set the working directory
WORKDIR /app
# Copy your model file
# Ensure the file 'model/gemma-3-finetuned.Q4_K_M.gguf' exists in your HF Space "Files" tab
COPY model/gemma-3-finetuned.Q4_K_M.gguf /app/model.gguf
# Expose the required port
ENV HOST=0.0.0.0
ENV PORT=7860
# Run the server binary directly (No Python)
# This uses the C++ 'llama-server' which is faster and supports Gemma 3
CMD ["-m", "/app/model.gguf", "--host", "0.0.0.0", "--port", "7860", "--n-gpu-layers", "0", "-c", "2048"] |