# Pull the official, pre-compiled C++ server image FROM ghcr.io/ggml-org/llama.cpp:server # Temporarily switch to root to install wget USER root RUN apt-get update && apt-get install -y wget && rm -rf /var/lib/apt/lists/* # Download your GGUF model RUN wget -O /model.gguf "https://huggingface.co/waddie/mini-2.0-GGUF/resolve/main/mini-2.0-Q4_K_M.gguf" EXPOSE 7860 # Run the native server using the arguments from the docs you linked # (The image's ENTRYPOINT is automatically the llama-server binary) CMD ["--model", "/model.gguf", \ "--host", "0.0.0.0", \ "--port", "7860", \ "--ctx-size", "4096"]