llama-cpp-server

Build error

App Files Files Community

llama-cpp-server / Dockerfile

Apsiknb

Update Dockerfile

88aedab verified about 2 months ago

raw

history blame contribute delete

4.22 kB

	# syntax=docker/dockerfile:1.6

	ARG UBUNTU_VERSION=20.04
	FROM ubuntu:${UBUNTU_VERSION} AS builder

	ARG DEBIAN_FRONTEND=noninteractive

	# The model URL is used at BUILD time only to rewrite "Hello World!" in your patch.
	# (Set this as a Buildtime variable in Spaces if you want it to override.)
	ARG MODEL_DOWNLOAD_LINK="https://huggingface.co/QuantFactory/MN-Violet-Lotus-12B-GGUF/resolve/main/MN-Violet-Lotus-12B.Q4_K_M.gguf?download=true"

	# Pin llama.cpp to the commit your patch was designed for.
	# Your logs show this resolves to b34443923 after checking out "1d36b367^".
	ARG LLAMA_CPP_REPO="https://github.com/ggml-org/llama.cpp.git"
	ARG LLAMA_CPP_REF="b34443923"

	RUN useradd -m -u 1000 user

	WORKDIR /home/user/app
	COPY --chown=user . /home/user/app

	# Build deps (NOTE: libcurl4-openssl-dev fixes "Could NOT find CURL")
	RUN apt-get update && apt-get install -y --no-install-recommends \
	git cmake build-essential g++ \
	wget curl ca-certificates \
	python3 \
	patch \
	pkg-config \
	libcurl4-openssl-dev \
	&& rm -rf /var/lib/apt/lists/*

	# Node 20 for building the WebUI
	RUN curl -fsSL https://deb.nodesource.com/setup_20.x \| bash - \
	&& apt-get update \
	&& apt-get install -y --no-install-recommends nodejs \
	&& rm -rf /var/lib/apt/lists/*

	# Make /data (Spaces mounts this at runtime; harmless during build)
	RUN mkdir -p /data && chmod 777 /data

	# Rewrite your patch content: "Hello World!" -> extracted model filename
	ENV MODEL_DOWNLOAD_LINK=${MODEL_DOWNLOAD_LINK}
	RUN python3 replace_hw.py

	# Build llama.cpp + apply patch
	RUN git clone ${LLAMA_CPP_REPO} /home/user/llama.cpp
	WORKDIR /home/user/llama.cpp
	RUN git checkout ${LLAMA_CPP_REF}
	RUN git apply /home/user/app/helloworld.patch

	# Build WebUI
	WORKDIR /home/user/llama.cpp/examples/server/webui
	RUN npm install
	RUN npm run build

	# Build llama-server
	WORKDIR /home/user/llama.cpp
	RUN cmake -B build -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=Release
	RUN cmake --build build --config Release -j $(nproc) -t llama-server


	FROM ubuntu:${UBUNTU_VERSION} AS runtime

	ARG DEBIAN_FRONTEND=noninteractive

	RUN useradd -m -u 1000 user

	# Runtime deps only
	RUN apt-get update && apt-get install -y --no-install-recommends \
	ca-certificates curl \
	libstdc++6 libgomp1 \
	&& rm -rf /var/lib/apt/lists/*

	# Create /data for local runs (Spaces will mount it at runtime)
	RUN mkdir -p /data && chmod 777 /data

	# Copy server binary + web assets
	RUN mkdir -p /home/user/llama.cpp
	COPY --from=builder /home/user/llama.cpp/build/bin/llama-server /usr/local/bin/llama-server
	COPY --from=builder /home/user/llama.cpp/examples/server /home/user/llama.cpp/examples/server

	# Defaults (override in Spaces Variables at runtime)
	ENV LLAMA_HOST="0.0.0.0"
	ENV LLAMA_PORT="7860"
	ENV MODEL_DOWNLOAD_LINK="https://huggingface.co/QuantFactory/MN-Violet-Lotus-12B-GGUF/resolve/main/MN-Violet-Lotus-12B.Q4_K_M.gguf?download=true"
	ENV MODEL_DIR="/data"
	ENV LLAMA_EXTRA_ARGS=""

	# Startup script: download model into /data if missing, then run server
	RUN cat > /usr/local/bin/start.sh << 'EOF'\n\
	#!/usr/bin/env bash\n\
	set -euo pipefail\n\
	\n\
	mkdir -p \"${MODEL_DIR}\"\n\
	\n\
	# Derive filename from URL (strip query string)\n\
	URL_NO_QUERY=\"${MODEL_DOWNLOAD_LINK%%\\?*}\"\n\
	FNAME=\"$(basename \"${URL_NO_QUERY}\")\"\n\
	MODEL_PATH=\"${MODEL_DIR}/${FNAME}\"\n\
	\n\
	if [[ ! -f \"${MODEL_PATH}\" ]]; then\n\
	echo \"Model not found at ${MODEL_PATH}\"\n\
	echo \"Downloading: ${MODEL_DOWNLOAD_LINK}\"\n\
	tmp=\"${MODEL_PATH}.tmp\"\n\
	rm -f \"${tmp}\"\n\
	curl -L --fail --retry 5 --retry-all-errors --connect-timeout 30 \\\n\
	-o \"${tmp}\" \"${MODEL_DOWNLOAD_LINK}\"\n\
	mv \"${tmp}\" \"${MODEL_PATH}\"\n\
	echo \"Downloaded model to ${MODEL_PATH}\"\n\
	else\n\
	echo \"Using cached model: ${MODEL_PATH}\"\n\
	fi\n\
	\n\
	cd /home/user/llama.cpp\n\
	\n\
	# Serve WebUI assets from examples/server\n\
	exec /usr/local/bin/llama-server \\\n\
	--host \"${LLAMA_HOST}\" \\\n\
	--port \"${LLAMA_PORT}\" \\\n\
	--path \"/home/user/llama.cpp/examples/server\" \\\n\
	-m \"${MODEL_PATH}\" \\\n\
	${LLAMA_EXTRA_ARGS}\n\
	EOF\n\
	&& chmod +x /usr/local/bin/start.sh

	WORKDIR /home/user/llama.cpp
	USER user

	EXPOSE 7860
	ENTRYPOINT ["/usr/local/bin/start.sh"]