Spaces:
Sleeping
Sleeping
OpenCode Deployer
commited on
Commit
·
1e941ef
1
Parent(s):
5fd9c8d
update
Browse files- Dockerfile +22 -11
- README.md +16 -0
Dockerfile
CHANGED
|
@@ -1,3 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
FROM ubuntu:22.04
|
| 2 |
|
| 3 |
ENV DEBIAN_FRONTEND=noninteractive
|
|
@@ -9,24 +27,17 @@ ENV THREADS="-1"
|
|
| 9 |
ENV TEMPERATURE="0.7"
|
| 10 |
ENV PREDICT_TOKENS="2048"
|
| 11 |
|
|
|
|
| 12 |
RUN apt-get update && apt-get install -y \
|
| 13 |
curl \
|
| 14 |
-
wget \
|
| 15 |
-
build-essential \
|
| 16 |
-
git \
|
| 17 |
-
python3 \
|
| 18 |
-
python3-pip \
|
| 19 |
&& rm -rf /var/lib/apt/lists/*
|
| 20 |
|
| 21 |
WORKDIR /app
|
| 22 |
|
| 23 |
-
|
|
|
|
| 24 |
|
| 25 |
-
|
| 26 |
-
cd /tmp/llamacpp && \
|
| 27 |
-
make LLAMA_SERVER=1 && \
|
| 28 |
-
cp /tmp/llamacpp/llama-server /usr/local/bin/ && \
|
| 29 |
-
rm -rf /tmp/llamacpp
|
| 30 |
|
| 31 |
RUN echo "📥 下载 LFM2.5-1.2B-Thinking-Q4_K_M.gguf (731MB)..." && \
|
| 32 |
curl -L -o "$MODEL_FILE" \
|
|
|
|
| 1 |
+
# 构建阶段
|
| 2 |
+
FROM ubuntu:22.04 AS builder
|
| 3 |
+
|
| 4 |
+
ENV DEBIAN_FRONTEND=noninteractive
|
| 5 |
+
|
| 6 |
+
RUN apt-get update && apt-get install -y \
|
| 7 |
+
build-essential \
|
| 8 |
+
git \
|
| 9 |
+
cmake \
|
| 10 |
+
curl \
|
| 11 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 12 |
+
|
| 13 |
+
RUN git clone https://github.com/ggerganov/llama.cpp.git /tmp/llamacpp && \
|
| 14 |
+
cd /tmp/llamacpp && \
|
| 15 |
+
cmake -B build -DLLAMA_BUILD_SERVER=ON && \
|
| 16 |
+
cmake --build build --config Release
|
| 17 |
+
|
| 18 |
+
# 运行阶段
|
| 19 |
FROM ubuntu:22.04
|
| 20 |
|
| 21 |
ENV DEBIAN_FRONTEND=noninteractive
|
|
|
|
| 27 |
ENV TEMPERATURE="0.7"
|
| 28 |
ENV PREDICT_TOKENS="2048"
|
| 29 |
|
| 30 |
+
# 仅安装运行时依赖
|
| 31 |
RUN apt-get update && apt-get install -y \
|
| 32 |
curl \
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
&& rm -rf /var/lib/apt/lists/*
|
| 34 |
|
| 35 |
WORKDIR /app
|
| 36 |
|
| 37 |
+
# 从构建阶段复制 llama-server
|
| 38 |
+
COPY --from=builder /tmp/llamacpp/build/bin/llama-server /usr/local/bin/
|
| 39 |
|
| 40 |
+
COPY start-lfm25-server.sh /app/start-lfm25-server.sh
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
RUN echo "📥 下载 LFM2.5-1.2B-Thinking-Q4_K_M.gguf (731MB)..." && \
|
| 43 |
curl -L -o "$MODEL_FILE" \
|
README.md
CHANGED
|
@@ -44,6 +44,22 @@ curl -X POST "http://localhost:7860/v1/chat/completions" \
|
|
| 44 |
|
| 45 |
## 🐳 Docker 部署
|
| 46 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
### 本地部署
|
| 48 |
|
| 49 |
```bash
|
|
|
|
| 44 |
|
| 45 |
## 🐳 Docker 部署
|
| 46 |
|
| 47 |
+
### 构建说明
|
| 48 |
+
|
| 49 |
+
Dockerfile 使用多阶段构建和 CMake 构建系统来编译 llama.cpp:
|
| 50 |
+
|
| 51 |
+
**构建阶段**:
|
| 52 |
+
- 安装构建依赖(build-essential, git, cmake)
|
| 53 |
+
- 使用 `-DLLAMA_BUILD_SERVER=ON` 标志构建 llama-server
|
| 54 |
+
- 编译 Release 版本以获得最佳性能
|
| 55 |
+
|
| 56 |
+
**运行阶段**:
|
| 57 |
+
- 仅安装运行时依赖(curl)
|
| 58 |
+
- 从构建阶段复制编译好的 llama-server
|
| 59 |
+
- 下载模型文件并配置环境
|
| 60 |
+
|
| 61 |
+
这种多阶段构建方式显著减少了最终镜像大小。
|
| 62 |
+
|
| 63 |
### 本地部署
|
| 64 |
|
| 65 |
```bash
|