llm4245

Sleeping

llm4245 / Dockerfile

Llama-3.2-1B-Instruct-Q8_0.gguf

9fe680c verified 7 months ago

1.03 kB

	FROM ubuntu:22.04

	# Install required packages (no ninja)
	RUN apt-get update && apt-get install -y \
	git cmake build-essential curl wget \
	libcurl4-openssl-dev libssl-dev && \
	rm -rf /var/lib/apt/lists/*

	# Clone llama.cpp
	RUN git clone https://github.com/ggml-org/llama.cpp /opt/llama.cpp

	WORKDIR /opt/llama.cpp

	# Build llama-server with CURL and without ninja
	RUN mkdir build && cd build && \
	cmake .. -DLLAMA_SERVER=ON -DLLAMA_CURL=ON && \
	make -j$(nproc)


	ARG MODEL_URL=https://huggingface.co/bartowski/Llama-3.2-1B-Instruct-GGUF/resolve/main/Llama-3.2-1B-Instruct-Q8_0.gguf

	# Download the Qwen2 0.5B Instruct model
	RUN mkdir /models && \
	wget -qO /models/model.gguf \
	${MODEL_URL}

	# Expose server port
	EXPOSE 7860

	# Run llama-server with parallel slots and continuous batching
	ENTRYPOINT ["/opt/llama.cpp/build/bin/llama-server", \
	"-m", "/models/model.gguf", \
	"--threads", "4", "--threads-batch", "4", \
	"--host", "0.0.0.0", "--port", "7860", \
	"-np", "4", "--cont-batching"]