llama-cpp-server

Build error

App Files Files Community

Apsiknb commited on Feb 20

Commit

88aedab

verified ·

1 Parent(s): 6813fad

Update Dockerfile

Browse files

Files changed (1) hide show

Dockerfile +104 -55

Dockerfile CHANGED Viewed

@@ -1,81 +1,130 @@
-FROM ubuntu:20.04
-# --- Model URL (Spaces can override this via Build Args) ---
-ARG MODEL_DOWNLOAD_LINK
-ENV MODEL_DOWNLOAD_LINK=${MODEL_DOWNLOAD_LINK:-https://huggingface.co/QuantFactory/MN-Violet-Lotus-12B-GGUF/resolve/main/MN-Violet-Lotus-12B.Q4_K_M.gguf?download=true}
-# llama.cpp ref:
-# PR #13249 ("move end-user examples to tools directory") was merged as 1d36b367;
-# we pin to its parent so examples/server/* still exists and your patch applies.
-ARG LLAMA_CPP_REF="1d36b367^"
-ENV DEBIAN_FRONTEND=noninteractive
-# Spaces runs as UID 1000; create it early and set WORKDIR before COPY
-RUN useradd -m -u 1000 user
-ENV HOME=/home/user
-ENV PATH="$HOME/.local/bin:$PATH"
-WORKDIR $HOME/app
-# Copy your repo (must include helloworld.patch and replace_hw.py at repo root)
-COPY --chown=user . $HOME/app
-USER root
-# System deps
 RUN apt-get update && apt-get install -y --no-install-recommends \
     git cmake build-essential g++ \
     wget curl ca-certificates \
     python3 \
-  && rm -rf /var/lib/apt/lists/*
-# Node (needed for the older examples/server/webui build)
 RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \
-  && apt-get update && apt-get install -y --no-install-recommends nodejs \
-  && rm -rf /var/lib/apt/lists/*
-# Prepare /data (mounted at runtime if you enable persistent storage on Spaces)
 RUN mkdir -p /data && chmod 777 /data
-USER user
-# Update your patch/UI text based on MODEL_DOWNLOAD_LINK
 RUN python3 replace_hw.py
-# Clone llama.cpp and pin to a revision compatible with your patch
-RUN git clone https://github.com/ggml-org/llama.cpp.git $HOME/llama.cpp
-WORKDIR $HOME/llama.cpp
-RUN git checkout "${LLAMA_CPP_REF}"
-# Apply your UI patch (now updated by replace_hw.py)
-RUN git apply $HOME/app/helloworld.patch
-# Build the legacy web UI (exists in this pinned revision)
-WORKDIR $HOME/llama.cpp/examples/server/webui
 RUN npm install
 RUN npm run build
-# Build llama-server (CMake)
-WORKDIR $HOME/llama.cpp
 RUN cmake -B build -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=Release
 RUN cmake --build build --config Release -j $(nproc) -t llama-server
-# Spaces default exposed port is 7860 (set app_port in README if you change it)
-EXPOSE 7860
-# Download model at runtime into /data and start server
-CMD ["bash", "-lc", "set -euo pipefail; \
-  MODEL_FILE=$(python3 -c 'import os,urllib.parse; u=os.environ[\"MODEL_DOWNLOAD_LINK\"]; print(os.path.basename(urllib.parse.urlsplit(u).path))'); \
-  MODEL_DIR=/data/models; MODEL_PATH=\"$MODEL_DIR/$MODEL_FILE\"; \
-  mkdir -p \"$MODEL_DIR\"; \
-  if [ ! -f \"$MODEL_PATH\" ]; then \
-    echo \"Downloading model -> $MODEL_PATH\"; \
-    wget -nv -O \"$MODEL_PATH\" \"$MODEL_DOWNLOAD_LINK\"; \
-  fi; \
-  exec \"$HOME/llama.cpp/build/bin/llama-server\" \
-    --host 0.0.0.0 --port 7860 \
-    -c 2048 \
-    -m \"$MODEL_PATH\" \
-    --cache-type-k q8_0 \
-    --alias \"MN-Violet-Lotus-12B\" \
-"]

+# syntax=docker/dockerfile:1.6
+ARG UBUNTU_VERSION=20.04
+FROM ubuntu:${UBUNTU_VERSION} AS builder
+ARG DEBIAN_FRONTEND=noninteractive
+# The model URL is used at BUILD time only to rewrite "Hello World!" in your patch.
+# (Set this as a *Buildtime* variable in Spaces if you want it to override.)
+ARG MODEL_DOWNLOAD_LINK="https://huggingface.co/QuantFactory/MN-Violet-Lotus-12B-GGUF/resolve/main/MN-Violet-Lotus-12B.Q4_K_M.gguf?download=true"
+# Pin llama.cpp to the commit your patch was designed for.
+# Your logs show this resolves to b34443923 after checking out "1d36b367^".
+ARG LLAMA_CPP_REPO="https://github.com/ggml-org/llama.cpp.git"
+ARG LLAMA_CPP_REF="b34443923"
+RUN useradd -m -u 1000 user
+WORKDIR /home/user/app
+COPY --chown=user . /home/user/app
+# Build deps (NOTE: libcurl4-openssl-dev fixes "Could NOT find CURL")
 RUN apt-get update && apt-get install -y --no-install-recommends \
     git cmake build-essential g++ \
     wget curl ca-certificates \
     python3 \
+    patch \
+    pkg-config \
+    libcurl4-openssl-dev \
+ && rm -rf /var/lib/apt/lists/*
+# Node 20 for building the WebUI
 RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \
+ && apt-get update \
+ && apt-get install -y --no-install-recommends nodejs \
+ && rm -rf /var/lib/apt/lists/*
+# Make /data (Spaces mounts this at runtime; harmless during build)
 RUN mkdir -p /data && chmod 777 /data
+# Rewrite your patch content: "Hello World!" -> extracted model filename
+ENV MODEL_DOWNLOAD_LINK=${MODEL_DOWNLOAD_LINK}
 RUN python3 replace_hw.py
+# Build llama.cpp + apply patch
+RUN git clone ${LLAMA_CPP_REPO} /home/user/llama.cpp
+WORKDIR /home/user/llama.cpp
+RUN git checkout ${LLAMA_CPP_REF}
+RUN git apply /home/user/app/helloworld.patch
+# Build WebUI
+WORKDIR /home/user/llama.cpp/examples/server/webui
 RUN npm install
 RUN npm run build
+# Build llama-server
+WORKDIR /home/user/llama.cpp
 RUN cmake -B build -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=Release
 RUN cmake --build build --config Release -j $(nproc) -t llama-server
+FROM ubuntu:${UBUNTU_VERSION} AS runtime
+ARG DEBIAN_FRONTEND=noninteractive
+RUN useradd -m -u 1000 user
+# Runtime deps only
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    ca-certificates curl \
+    libstdc++6 libgomp1 \
+ && rm -rf /var/lib/apt/lists/*
+# Create /data for local runs (Spaces will mount it at runtime)
+RUN mkdir -p /data && chmod 777 /data
+# Copy server binary + web assets
+RUN mkdir -p /home/user/llama.cpp
+COPY --from=builder /home/user/llama.cpp/build/bin/llama-server /usr/local/bin/llama-server
+COPY --from=builder /home/user/llama.cpp/examples/server /home/user/llama.cpp/examples/server
+# Defaults (override in Spaces Variables at runtime)
+ENV LLAMA_HOST="0.0.0.0"
+ENV LLAMA_PORT="7860"
+ENV MODEL_DOWNLOAD_LINK="https://huggingface.co/QuantFactory/MN-Violet-Lotus-12B-GGUF/resolve/main/MN-Violet-Lotus-12B.Q4_K_M.gguf?download=true"
+ENV MODEL_DIR="/data"
+ENV LLAMA_EXTRA_ARGS=""
+# Startup script: download model into /data if missing, then run server
+RUN cat > /usr/local/bin/start.sh << 'EOF'\n\
+#!/usr/bin/env bash\n\
+set -euo pipefail\n\
+\n\
+mkdir -p \"${MODEL_DIR}\"\n\
+\n\
+# Derive filename from URL (strip query string)\n\
+URL_NO_QUERY=\"${MODEL_DOWNLOAD_LINK%%\\?*}\"\n\
+FNAME=\"$(basename \"${URL_NO_QUERY}\")\"\n\
+MODEL_PATH=\"${MODEL_DIR}/${FNAME}\"\n\
+\n\
+if [[ ! -f \"${MODEL_PATH}\" ]]; then\n\
+  echo \"Model not found at ${MODEL_PATH}\"\n\
+  echo \"Downloading: ${MODEL_DOWNLOAD_LINK}\"\n\
+  tmp=\"${MODEL_PATH}.tmp\"\n\
+  rm -f \"${tmp}\"\n\
+  curl -L --fail --retry 5 --retry-all-errors --connect-timeout 30 \\\n\
+    -o \"${tmp}\" \"${MODEL_DOWNLOAD_LINK}\"\n\
+  mv \"${tmp}\" \"${MODEL_PATH}\"\n\
+  echo \"Downloaded model to ${MODEL_PATH}\"\n\
+else\n\
+  echo \"Using cached model: ${MODEL_PATH}\"\n\
+fi\n\
+\n\
+cd /home/user/llama.cpp\n\
+\n\
+# Serve WebUI assets from examples/server\n\
+exec /usr/local/bin/llama-server \\\n\
+  --host \"${LLAMA_HOST}\" \\\n\
+  --port \"${LLAMA_PORT}\" \\\n\
+  --path \"/home/user/llama.cpp/examples/server\" \\\n\
+  -m \"${MODEL_PATH}\" \\\n\
+  ${LLAMA_EXTRA_ARGS}\n\
+EOF\n\
+ && chmod +x /usr/local/bin/start.sh
+WORKDIR /home/user/llama.cpp
+USER user
+EXPOSE 7860
+ENTRYPOINT ["/usr/local/bin/start.sh"]