Spaces:
Build error
Build error
| # syntax=docker/dockerfile:1.6 | |
| ARG UBUNTU_VERSION=20.04 | |
| FROM ubuntu:${UBUNTU_VERSION} AS builder | |
| ARG DEBIAN_FRONTEND=noninteractive | |
| # The model URL is used at BUILD time only to rewrite "Hello World!" in your patch. | |
| # (Set this as a *Buildtime* variable in Spaces if you want it to override.) | |
| ARG MODEL_DOWNLOAD_LINK="https://huggingface.co/QuantFactory/MN-Violet-Lotus-12B-GGUF/resolve/main/MN-Violet-Lotus-12B.Q4_K_M.gguf?download=true" | |
| # Pin llama.cpp to the commit your patch was designed for. | |
| # Your logs show this resolves to b34443923 after checking out "1d36b367^". | |
| ARG LLAMA_CPP_REPO="https://github.com/ggml-org/llama.cpp.git" | |
| ARG LLAMA_CPP_REF="b34443923" | |
| RUN useradd -m -u 1000 user | |
| WORKDIR /home/user/app | |
| COPY --chown=user . /home/user/app | |
| # Build deps (NOTE: libcurl4-openssl-dev fixes "Could NOT find CURL") | |
| RUN apt-get update && apt-get install -y --no-install-recommends \ | |
| git cmake build-essential g++ \ | |
| wget curl ca-certificates \ | |
| python3 \ | |
| patch \ | |
| pkg-config \ | |
| libcurl4-openssl-dev \ | |
| && rm -rf /var/lib/apt/lists/* | |
| # Node 20 for building the WebUI | |
| RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \ | |
| && apt-get update \ | |
| && apt-get install -y --no-install-recommends nodejs \ | |
| && rm -rf /var/lib/apt/lists/* | |
| # Make /data (Spaces mounts this at runtime; harmless during build) | |
| RUN mkdir -p /data && chmod 777 /data | |
| # Rewrite your patch content: "Hello World!" -> extracted model filename | |
| ENV MODEL_DOWNLOAD_LINK=${MODEL_DOWNLOAD_LINK} | |
| RUN python3 replace_hw.py | |
| # Build llama.cpp + apply patch | |
| RUN git clone ${LLAMA_CPP_REPO} /home/user/llama.cpp | |
| WORKDIR /home/user/llama.cpp | |
| RUN git checkout ${LLAMA_CPP_REF} | |
| RUN git apply /home/user/app/helloworld.patch | |
| # Build WebUI | |
| WORKDIR /home/user/llama.cpp/examples/server/webui | |
| RUN npm install | |
| RUN npm run build | |
| # Build llama-server | |
| WORKDIR /home/user/llama.cpp | |
| RUN cmake -B build -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=Release | |
| RUN cmake --build build --config Release -j $(nproc) -t llama-server | |
| FROM ubuntu:${UBUNTU_VERSION} AS runtime | |
| ARG DEBIAN_FRONTEND=noninteractive | |
| RUN useradd -m -u 1000 user | |
| # Runtime deps only | |
| RUN apt-get update && apt-get install -y --no-install-recommends \ | |
| ca-certificates curl \ | |
| libstdc++6 libgomp1 \ | |
| && rm -rf /var/lib/apt/lists/* | |
| # Create /data for local runs (Spaces will mount it at runtime) | |
| RUN mkdir -p /data && chmod 777 /data | |
| # Copy server binary + web assets | |
| RUN mkdir -p /home/user/llama.cpp | |
| COPY --from=builder /home/user/llama.cpp/build/bin/llama-server /usr/local/bin/llama-server | |
| COPY --from=builder /home/user/llama.cpp/examples/server /home/user/llama.cpp/examples/server | |
| # Defaults (override in Spaces Variables at runtime) | |
| ENV LLAMA_HOST="0.0.0.0" | |
| ENV LLAMA_PORT="7860" | |
| ENV MODEL_DOWNLOAD_LINK="https://huggingface.co/QuantFactory/MN-Violet-Lotus-12B-GGUF/resolve/main/MN-Violet-Lotus-12B.Q4_K_M.gguf?download=true" | |
| ENV MODEL_DIR="/data" | |
| ENV LLAMA_EXTRA_ARGS="" | |
| # Startup script: download model into /data if missing, then run server | |
| RUN cat > /usr/local/bin/start.sh << 'EOF'\n\ | |
| #!/usr/bin/env bash\n\ | |
| set -euo pipefail\n\ | |
| \n\ | |
| mkdir -p \"${MODEL_DIR}\"\n\ | |
| \n\ | |
| # Derive filename from URL (strip query string)\n\ | |
| URL_NO_QUERY=\"${MODEL_DOWNLOAD_LINK%%\\?*}\"\n\ | |
| FNAME=\"$(basename \"${URL_NO_QUERY}\")\"\n\ | |
| MODEL_PATH=\"${MODEL_DIR}/${FNAME}\"\n\ | |
| \n\ | |
| if [[ ! -f \"${MODEL_PATH}\" ]]; then\n\ | |
| echo \"Model not found at ${MODEL_PATH}\"\n\ | |
| echo \"Downloading: ${MODEL_DOWNLOAD_LINK}\"\n\ | |
| tmp=\"${MODEL_PATH}.tmp\"\n\ | |
| rm -f \"${tmp}\"\n\ | |
| curl -L --fail --retry 5 --retry-all-errors --connect-timeout 30 \\\n\ | |
| -o \"${tmp}\" \"${MODEL_DOWNLOAD_LINK}\"\n\ | |
| mv \"${tmp}\" \"${MODEL_PATH}\"\n\ | |
| echo \"Downloaded model to ${MODEL_PATH}\"\n\ | |
| else\n\ | |
| echo \"Using cached model: ${MODEL_PATH}\"\n\ | |
| fi\n\ | |
| \n\ | |
| cd /home/user/llama.cpp\n\ | |
| \n\ | |
| # Serve WebUI assets from examples/server\n\ | |
| exec /usr/local/bin/llama-server \\\n\ | |
| --host \"${LLAMA_HOST}\" \\\n\ | |
| --port \"${LLAMA_PORT}\" \\\n\ | |
| --path \"/home/user/llama.cpp/examples/server\" \\\n\ | |
| -m \"${MODEL_PATH}\" \\\n\ | |
| ${LLAMA_EXTRA_ARGS}\n\ | |
| EOF\n\ | |
| && chmod +x /usr/local/bin/start.sh | |
| WORKDIR /home/user/llama.cpp | |
| USER user | |
| EXPOSE 7860 | |
| ENTRYPOINT ["/usr/local/bin/start.sh"] |