Spaces:

waddie
/

cloudmini-api

Sleeping

cloudmini-api / Dockerfile

Update Dockerfile

601ed38 verified 3 days ago

618 Bytes

	# Pull the official, pre-compiled C++ server image
	FROM ghcr.io/ggml-org/llama.cpp:server

	# Temporarily switch to root to install wget
	USER root
	RUN apt-get update && apt-get install -y wget && rm -rf /var/lib/apt/lists/*

	# Download your GGUF model
	RUN wget -O /model.gguf "https://huggingface.co/waddie/mini-2.0-GGUF/resolve/main/mini-2.0-Q4_K_M.gguf"

	EXPOSE 7860

	# Run the native server using the arguments from the docs you linked
	# (The image's ENTRYPOINT is automatically the llama-server binary)
	CMD ["--model", "/model.gguf", \
	"--host", "0.0.0.0", \
	"--port", "7860", \
	"--ctx-size", "4096"]