Spaces:
Sleeping
Sleeping
| # Pull the official, pre-compiled C++ server image | |
| FROM ghcr.io/ggml-org/llama.cpp:server | |
| # Temporarily switch to root to install wget | |
| USER root | |
| RUN apt-get update && apt-get install -y wget && rm -rf /var/lib/apt/lists/* | |
| # Download your GGUF model | |
| RUN wget -O /model.gguf "https://huggingface.co/waddie/mini-2.0-GGUF/resolve/main/mini-2.0-Q4_K_M.gguf" | |
| EXPOSE 7860 | |
| # Run the native server using the arguments from the docs you linked | |
| # (The image's ENTRYPOINT is automatically the llama-server binary) | |
| CMD ["--model", "/model.gguf", \ | |
| "--host", "0.0.0.0", \ | |
| "--port", "7860", \ | |
| "--ctx-size", "4096"] |