llama-cpp-server / Dockerfile
Apsiknb's picture
Update Dockerfile
88aedab verified
# syntax=docker/dockerfile:1.6
ARG UBUNTU_VERSION=20.04
FROM ubuntu:${UBUNTU_VERSION} AS builder
ARG DEBIAN_FRONTEND=noninteractive
# The model URL is used at BUILD time only to rewrite "Hello World!" in your patch.
# (Set this as a *Buildtime* variable in Spaces if you want it to override.)
ARG MODEL_DOWNLOAD_LINK="https://huggingface.co/QuantFactory/MN-Violet-Lotus-12B-GGUF/resolve/main/MN-Violet-Lotus-12B.Q4_K_M.gguf?download=true"
# Pin llama.cpp to the commit your patch was designed for.
# Your logs show this resolves to b34443923 after checking out "1d36b367^".
ARG LLAMA_CPP_REPO="https://github.com/ggml-org/llama.cpp.git"
ARG LLAMA_CPP_REF="b34443923"
RUN useradd -m -u 1000 user
WORKDIR /home/user/app
COPY --chown=user . /home/user/app
# Build deps (NOTE: libcurl4-openssl-dev fixes "Could NOT find CURL")
RUN apt-get update && apt-get install -y --no-install-recommends \
git cmake build-essential g++ \
wget curl ca-certificates \
python3 \
patch \
pkg-config \
libcurl4-openssl-dev \
&& rm -rf /var/lib/apt/lists/*
# Node 20 for building the WebUI
RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \
&& apt-get update \
&& apt-get install -y --no-install-recommends nodejs \
&& rm -rf /var/lib/apt/lists/*
# Make /data (Spaces mounts this at runtime; harmless during build)
RUN mkdir -p /data && chmod 777 /data
# Rewrite your patch content: "Hello World!" -> extracted model filename
ENV MODEL_DOWNLOAD_LINK=${MODEL_DOWNLOAD_LINK}
RUN python3 replace_hw.py
# Build llama.cpp + apply patch
RUN git clone ${LLAMA_CPP_REPO} /home/user/llama.cpp
WORKDIR /home/user/llama.cpp
RUN git checkout ${LLAMA_CPP_REF}
RUN git apply /home/user/app/helloworld.patch
# Build WebUI
WORKDIR /home/user/llama.cpp/examples/server/webui
RUN npm install
RUN npm run build
# Build llama-server
WORKDIR /home/user/llama.cpp
RUN cmake -B build -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=Release
RUN cmake --build build --config Release -j $(nproc) -t llama-server
FROM ubuntu:${UBUNTU_VERSION} AS runtime
ARG DEBIAN_FRONTEND=noninteractive
RUN useradd -m -u 1000 user
# Runtime deps only
RUN apt-get update && apt-get install -y --no-install-recommends \
ca-certificates curl \
libstdc++6 libgomp1 \
&& rm -rf /var/lib/apt/lists/*
# Create /data for local runs (Spaces will mount it at runtime)
RUN mkdir -p /data && chmod 777 /data
# Copy server binary + web assets
RUN mkdir -p /home/user/llama.cpp
COPY --from=builder /home/user/llama.cpp/build/bin/llama-server /usr/local/bin/llama-server
COPY --from=builder /home/user/llama.cpp/examples/server /home/user/llama.cpp/examples/server
# Defaults (override in Spaces Variables at runtime)
ENV LLAMA_HOST="0.0.0.0"
ENV LLAMA_PORT="7860"
ENV MODEL_DOWNLOAD_LINK="https://huggingface.co/QuantFactory/MN-Violet-Lotus-12B-GGUF/resolve/main/MN-Violet-Lotus-12B.Q4_K_M.gguf?download=true"
ENV MODEL_DIR="/data"
ENV LLAMA_EXTRA_ARGS=""
# Startup script: download model into /data if missing, then run server
RUN cat > /usr/local/bin/start.sh << 'EOF'\n\
#!/usr/bin/env bash\n\
set -euo pipefail\n\
\n\
mkdir -p \"${MODEL_DIR}\"\n\
\n\
# Derive filename from URL (strip query string)\n\
URL_NO_QUERY=\"${MODEL_DOWNLOAD_LINK%%\\?*}\"\n\
FNAME=\"$(basename \"${URL_NO_QUERY}\")\"\n\
MODEL_PATH=\"${MODEL_DIR}/${FNAME}\"\n\
\n\
if [[ ! -f \"${MODEL_PATH}\" ]]; then\n\
echo \"Model not found at ${MODEL_PATH}\"\n\
echo \"Downloading: ${MODEL_DOWNLOAD_LINK}\"\n\
tmp=\"${MODEL_PATH}.tmp\"\n\
rm -f \"${tmp}\"\n\
curl -L --fail --retry 5 --retry-all-errors --connect-timeout 30 \\\n\
-o \"${tmp}\" \"${MODEL_DOWNLOAD_LINK}\"\n\
mv \"${tmp}\" \"${MODEL_PATH}\"\n\
echo \"Downloaded model to ${MODEL_PATH}\"\n\
else\n\
echo \"Using cached model: ${MODEL_PATH}\"\n\
fi\n\
\n\
cd /home/user/llama.cpp\n\
\n\
# Serve WebUI assets from examples/server\n\
exec /usr/local/bin/llama-server \\\n\
--host \"${LLAMA_HOST}\" \\\n\
--port \"${LLAMA_PORT}\" \\\n\
--path \"/home/user/llama.cpp/examples/server\" \\\n\
-m \"${MODEL_PATH}\" \\\n\
${LLAMA_EXTRA_ARGS}\n\
EOF\n\
&& chmod +x /usr/local/bin/start.sh
WORKDIR /home/user/llama.cpp
USER user
EXPOSE 7860
ENTRYPOINT ["/usr/local/bin/start.sh"]