cloudmini-api / Dockerfile
waddie's picture
Update Dockerfile
601ed38 verified
raw
history blame contribute delete
618 Bytes
# Pull the official, pre-compiled C++ server image
FROM ghcr.io/ggml-org/llama.cpp:server
# Temporarily switch to root to install wget
USER root
RUN apt-get update && apt-get install -y wget && rm -rf /var/lib/apt/lists/*
# Download your GGUF model
RUN wget -O /model.gguf "https://huggingface.co/waddie/mini-2.0-GGUF/resolve/main/mini-2.0-Q4_K_M.gguf"
EXPOSE 7860
# Run the native server using the arguments from the docs you linked
# (The image's ENTRYPOINT is automatically the llama-server binary)
CMD ["--model", "/model.gguf", \
"--host", "0.0.0.0", \
"--port", "7860", \
"--ctx-size", "4096"]