Apsiknb commited on
Commit
88aedab
·
verified ·
1 Parent(s): 6813fad

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +104 -55
Dockerfile CHANGED
@@ -1,81 +1,130 @@
1
- FROM ubuntu:20.04
2
 
3
- # --- Model URL (Spaces can override this via Build Args) ---
4
- ARG MODEL_DOWNLOAD_LINK
5
- ENV MODEL_DOWNLOAD_LINK=${MODEL_DOWNLOAD_LINK:-https://huggingface.co/QuantFactory/MN-Violet-Lotus-12B-GGUF/resolve/main/MN-Violet-Lotus-12B.Q4_K_M.gguf?download=true}
6
 
7
- # llama.cpp ref:
8
- # PR #13249 ("move end-user examples to tools directory") was merged as 1d36b367;
9
- # we pin to its parent so examples/server/* still exists and your patch applies.
10
- ARG LLAMA_CPP_REF="1d36b367^"
11
 
12
- ENV DEBIAN_FRONTEND=noninteractive
 
 
13
 
14
- # Spaces runs as UID 1000; create it early and set WORKDIR before COPY
15
- RUN useradd -m -u 1000 user
16
- ENV HOME=/home/user
17
- ENV PATH="$HOME/.local/bin:$PATH"
18
- WORKDIR $HOME/app
19
 
20
- # Copy your repo (must include helloworld.patch and replace_hw.py at repo root)
21
- COPY --chown=user . $HOME/app
22
 
23
- USER root
 
24
 
25
- # System deps
26
  RUN apt-get update && apt-get install -y --no-install-recommends \
27
  git cmake build-essential g++ \
28
  wget curl ca-certificates \
29
  python3 \
30
- && rm -rf /var/lib/apt/lists/*
 
 
 
31
 
32
- # Node (needed for the older examples/server/webui build)
33
  RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \
34
- && apt-get update && apt-get install -y --no-install-recommends nodejs \
35
- && rm -rf /var/lib/apt/lists/*
 
36
 
37
- # Prepare /data (mounted at runtime if you enable persistent storage on Spaces)
38
  RUN mkdir -p /data && chmod 777 /data
39
 
40
- USER user
41
-
42
- # Update your patch/UI text based on MODEL_DOWNLOAD_LINK
43
  RUN python3 replace_hw.py
44
 
45
- # Clone llama.cpp and pin to a revision compatible with your patch
46
- RUN git clone https://github.com/ggml-org/llama.cpp.git $HOME/llama.cpp
47
- WORKDIR $HOME/llama.cpp
48
- RUN git checkout "${LLAMA_CPP_REF}"
49
-
50
- # Apply your UI patch (now updated by replace_hw.py)
51
- RUN git apply $HOME/app/helloworld.patch
52
 
53
- # Build the legacy web UI (exists in this pinned revision)
54
- WORKDIR $HOME/llama.cpp/examples/server/webui
55
  RUN npm install
56
  RUN npm run build
57
 
58
- # Build llama-server (CMake)
59
- WORKDIR $HOME/llama.cpp
60
  RUN cmake -B build -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=Release
61
  RUN cmake --build build --config Release -j $(nproc) -t llama-server
62
 
63
- # Spaces default exposed port is 7860 (set app_port in README if you change it)
64
- EXPOSE 7860
65
 
66
- # Download model at runtime into /data and start server
67
- CMD ["bash", "-lc", "set -euo pipefail; \
68
- MODEL_FILE=$(python3 -c 'import os,urllib.parse; u=os.environ[\"MODEL_DOWNLOAD_LINK\"]; print(os.path.basename(urllib.parse.urlsplit(u).path))'); \
69
- MODEL_DIR=/data/models; MODEL_PATH=\"$MODEL_DIR/$MODEL_FILE\"; \
70
- mkdir -p \"$MODEL_DIR\"; \
71
- if [ ! -f \"$MODEL_PATH\" ]; then \
72
- echo \"Downloading model -> $MODEL_PATH\"; \
73
- wget -nv -O \"$MODEL_PATH\" \"$MODEL_DOWNLOAD_LINK\"; \
74
- fi; \
75
- exec \"$HOME/llama.cpp/build/bin/llama-server\" \
76
- --host 0.0.0.0 --port 7860 \
77
- -c 2048 \
78
- -m \"$MODEL_PATH\" \
79
- --cache-type-k q8_0 \
80
- --alias \"MN-Violet-Lotus-12B\" \
81
- "]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # syntax=docker/dockerfile:1.6
2
 
3
+ ARG UBUNTU_VERSION=20.04
4
+ FROM ubuntu:${UBUNTU_VERSION} AS builder
 
5
 
6
+ ARG DEBIAN_FRONTEND=noninteractive
 
 
 
7
 
8
+ # The model URL is used at BUILD time only to rewrite "Hello World!" in your patch.
9
+ # (Set this as a *Buildtime* variable in Spaces if you want it to override.)
10
+ ARG MODEL_DOWNLOAD_LINK="https://huggingface.co/QuantFactory/MN-Violet-Lotus-12B-GGUF/resolve/main/MN-Violet-Lotus-12B.Q4_K_M.gguf?download=true"
11
 
12
+ # Pin llama.cpp to the commit your patch was designed for.
13
+ # Your logs show this resolves to b34443923 after checking out "1d36b367^".
14
+ ARG LLAMA_CPP_REPO="https://github.com/ggml-org/llama.cpp.git"
15
+ ARG LLAMA_CPP_REF="b34443923"
 
16
 
17
+ RUN useradd -m -u 1000 user
 
18
 
19
+ WORKDIR /home/user/app
20
+ COPY --chown=user . /home/user/app
21
 
22
+ # Build deps (NOTE: libcurl4-openssl-dev fixes "Could NOT find CURL")
23
  RUN apt-get update && apt-get install -y --no-install-recommends \
24
  git cmake build-essential g++ \
25
  wget curl ca-certificates \
26
  python3 \
27
+ patch \
28
+ pkg-config \
29
+ libcurl4-openssl-dev \
30
+ && rm -rf /var/lib/apt/lists/*
31
 
32
+ # Node 20 for building the WebUI
33
  RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \
34
+ && apt-get update \
35
+ && apt-get install -y --no-install-recommends nodejs \
36
+ && rm -rf /var/lib/apt/lists/*
37
 
38
+ # Make /data (Spaces mounts this at runtime; harmless during build)
39
  RUN mkdir -p /data && chmod 777 /data
40
 
41
+ # Rewrite your patch content: "Hello World!" -> extracted model filename
42
+ ENV MODEL_DOWNLOAD_LINK=${MODEL_DOWNLOAD_LINK}
 
43
  RUN python3 replace_hw.py
44
 
45
+ # Build llama.cpp + apply patch
46
+ RUN git clone ${LLAMA_CPP_REPO} /home/user/llama.cpp
47
+ WORKDIR /home/user/llama.cpp
48
+ RUN git checkout ${LLAMA_CPP_REF}
49
+ RUN git apply /home/user/app/helloworld.patch
 
 
50
 
51
+ # Build WebUI
52
+ WORKDIR /home/user/llama.cpp/examples/server/webui
53
  RUN npm install
54
  RUN npm run build
55
 
56
+ # Build llama-server
57
+ WORKDIR /home/user/llama.cpp
58
  RUN cmake -B build -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=Release
59
  RUN cmake --build build --config Release -j $(nproc) -t llama-server
60
 
 
 
61
 
62
+ FROM ubuntu:${UBUNTU_VERSION} AS runtime
63
+
64
+ ARG DEBIAN_FRONTEND=noninteractive
65
+
66
+ RUN useradd -m -u 1000 user
67
+
68
+ # Runtime deps only
69
+ RUN apt-get update && apt-get install -y --no-install-recommends \
70
+ ca-certificates curl \
71
+ libstdc++6 libgomp1 \
72
+ && rm -rf /var/lib/apt/lists/*
73
+
74
+ # Create /data for local runs (Spaces will mount it at runtime)
75
+ RUN mkdir -p /data && chmod 777 /data
76
+
77
+ # Copy server binary + web assets
78
+ RUN mkdir -p /home/user/llama.cpp
79
+ COPY --from=builder /home/user/llama.cpp/build/bin/llama-server /usr/local/bin/llama-server
80
+ COPY --from=builder /home/user/llama.cpp/examples/server /home/user/llama.cpp/examples/server
81
+
82
+ # Defaults (override in Spaces Variables at runtime)
83
+ ENV LLAMA_HOST="0.0.0.0"
84
+ ENV LLAMA_PORT="7860"
85
+ ENV MODEL_DOWNLOAD_LINK="https://huggingface.co/QuantFactory/MN-Violet-Lotus-12B-GGUF/resolve/main/MN-Violet-Lotus-12B.Q4_K_M.gguf?download=true"
86
+ ENV MODEL_DIR="/data"
87
+ ENV LLAMA_EXTRA_ARGS=""
88
+
89
+ # Startup script: download model into /data if missing, then run server
90
+ RUN cat > /usr/local/bin/start.sh << 'EOF'\n\
91
+ #!/usr/bin/env bash\n\
92
+ set -euo pipefail\n\
93
+ \n\
94
+ mkdir -p \"${MODEL_DIR}\"\n\
95
+ \n\
96
+ # Derive filename from URL (strip query string)\n\
97
+ URL_NO_QUERY=\"${MODEL_DOWNLOAD_LINK%%\\?*}\"\n\
98
+ FNAME=\"$(basename \"${URL_NO_QUERY}\")\"\n\
99
+ MODEL_PATH=\"${MODEL_DIR}/${FNAME}\"\n\
100
+ \n\
101
+ if [[ ! -f \"${MODEL_PATH}\" ]]; then\n\
102
+ echo \"Model not found at ${MODEL_PATH}\"\n\
103
+ echo \"Downloading: ${MODEL_DOWNLOAD_LINK}\"\n\
104
+ tmp=\"${MODEL_PATH}.tmp\"\n\
105
+ rm -f \"${tmp}\"\n\
106
+ curl -L --fail --retry 5 --retry-all-errors --connect-timeout 30 \\\n\
107
+ -o \"${tmp}\" \"${MODEL_DOWNLOAD_LINK}\"\n\
108
+ mv \"${tmp}\" \"${MODEL_PATH}\"\n\
109
+ echo \"Downloaded model to ${MODEL_PATH}\"\n\
110
+ else\n\
111
+ echo \"Using cached model: ${MODEL_PATH}\"\n\
112
+ fi\n\
113
+ \n\
114
+ cd /home/user/llama.cpp\n\
115
+ \n\
116
+ # Serve WebUI assets from examples/server\n\
117
+ exec /usr/local/bin/llama-server \\\n\
118
+ --host \"${LLAMA_HOST}\" \\\n\
119
+ --port \"${LLAMA_PORT}\" \\\n\
120
+ --path \"/home/user/llama.cpp/examples/server\" \\\n\
121
+ -m \"${MODEL_PATH}\" \\\n\
122
+ ${LLAMA_EXTRA_ARGS}\n\
123
+ EOF\n\
124
+ && chmod +x /usr/local/bin/start.sh
125
+
126
+ WORKDIR /home/user/llama.cpp
127
+ USER user
128
+
129
+ EXPOSE 7860
130
+ ENTRYPOINT ["/usr/local/bin/start.sh"]