# Pull the official, pre-compiled C++ server image
FROM ghcr.io/ggml-org/llama.cpp:server

# Temporarily switch to root to install wget
USER root
RUN apt-get update && apt-get install -y wget && rm -rf /var/lib/apt/lists/*

# Download your GGUF model
RUN wget -O /model.gguf "https://huggingface.co/waddie/mini-2.0-GGUF/resolve/main/mini-2.0-Q4_K_M.gguf"

EXPOSE 7860

# Run the native server using the arguments from the docs you linked
# (The image's ENTRYPOINT is automatically the llama-server binary)
CMD ["--model", "/model.gguf", \
     "--host", "0.0.0.0", \
     "--port", "7860", \
     "--ctx-size", "4096"]