Spaces:
Sleeping
Sleeping
OpenCode Deployer commited on
Commit ·
5fd9c8d
1
Parent(s): e366a65
update
Browse files- Dockerfile +67 -0
- README.md +93 -1
- start-lfm25-server.sh +1 -1
Dockerfile
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM ubuntu:22.04
|
| 2 |
+
|
| 3 |
+
ENV DEBIAN_FRONTEND=noninteractive
|
| 4 |
+
ENV MODEL_FILE="LFM2.5-1.2B-Thinking-Q4_K_M.gguf"
|
| 5 |
+
ENV HOST="0.0.0.0"
|
| 6 |
+
ENV PORT="7860"
|
| 7 |
+
ENV CTX_SIZE="4096"
|
| 8 |
+
ENV THREADS="-1"
|
| 9 |
+
ENV TEMPERATURE="0.7"
|
| 10 |
+
ENV PREDICT_TOKENS="2048"
|
| 11 |
+
|
| 12 |
+
RUN apt-get update && apt-get install -y \
|
| 13 |
+
curl \
|
| 14 |
+
wget \
|
| 15 |
+
build-essential \
|
| 16 |
+
git \
|
| 17 |
+
python3 \
|
| 18 |
+
python3-pip \
|
| 19 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 20 |
+
|
| 21 |
+
WORKDIR /app
|
| 22 |
+
|
| 23 |
+
COPY start-lfm25-server.sh /app/start-lfm25-server.sh
|
| 24 |
+
|
| 25 |
+
RUN git clone https://github.com/ggerganov/llama.cpp.git /tmp/llamacpp && \
|
| 26 |
+
cd /tmp/llamacpp && \
|
| 27 |
+
make LLAMA_SERVER=1 && \
|
| 28 |
+
cp /tmp/llamacpp/llama-server /usr/local/bin/ && \
|
| 29 |
+
rm -rf /tmp/llamacpp
|
| 30 |
+
|
| 31 |
+
RUN echo "📥 下载 LFM2.5-1.2B-Thinking-Q4_K_M.gguf (731MB)..." && \
|
| 32 |
+
curl -L -o "$MODEL_FILE" \
|
| 33 |
+
"https://huggingface.co/LiquidAI/LFM2.5-1.2B-Thinking-GGUF/resolve/main/LFM2.5-1.2B-Thinking-Q4_K_M.gguf" \
|
| 34 |
+
--connect-timeout 60 \
|
| 35 |
+
--max-time 300 && \
|
| 36 |
+
echo "✅ 模型下载完成"
|
| 37 |
+
|
| 38 |
+
RUN chmod +x /app/start-lfm25-server.sh
|
| 39 |
+
|
| 40 |
+
RUN cat > /app/entrypoint.sh << 'EOF'
|
| 41 |
+
#!/bin/bash
|
| 42 |
+
set -e
|
| 43 |
+
|
| 44 |
+
echo "🚀 启动 LFM2.5-1.2B-Thinking-Q4_K_M.gguf HTTP 服务器..."
|
| 45 |
+
echo "📁 模型文件: $MODEL_FILE"
|
| 46 |
+
echo "🌐 服务地址: http://0.0.0.0:7860"
|
| 47 |
+
echo "💬 API 端点: http://0.0.0.0:7860/v1/chat/completions"
|
| 48 |
+
echo ""
|
| 49 |
+
|
| 50 |
+
exec llama-server \
|
| 51 |
+
--model "$MODEL_FILE" \
|
| 52 |
+
--host "0.0.0.0" \
|
| 53 |
+
--port "7860" \
|
| 54 |
+
--ctx-size "$CTX_SIZE" \
|
| 55 |
+
--threads "$THREADS" \
|
| 56 |
+
--temp "$TEMPERATURE" \
|
| 57 |
+
--n-predict "$PREDICT_TOKENS" \
|
| 58 |
+
--log-disable \
|
| 59 |
+
--verbose-prompt \
|
| 60 |
+
--api-key "lfm25-api-key"
|
| 61 |
+
EOF
|
| 62 |
+
|
| 63 |
+
RUN chmod +x /app/entrypoint.sh
|
| 64 |
+
|
| 65 |
+
EXPOSE 7860
|
| 66 |
+
|
| 67 |
+
CMD ["/app/entrypoint.sh"]
|
README.md
CHANGED
|
@@ -7,4 +7,96 @@ sdk: docker
|
|
| 7 |
pinned: false
|
| 8 |
---
|
| 9 |
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
pinned: false
|
| 8 |
---
|
| 9 |
|
| 10 |
+
# LiquidAI LFM2.5-1.2B-Thinking HuggingFace Space
|
| 11 |
+
|
| 12 |
+
基于 llama.cpp 的 LFM2.5-1.2B-Thinking 模型 HTTP API 服务器部署。
|
| 13 |
+
|
| 14 |
+
## 🚀 模型信息
|
| 15 |
+
|
| 16 |
+
- **模型名称**: LFM2.5-1.2B-Thinking
|
| 17 |
+
- **量化版本**: Q4_K_M
|
| 18 |
+
- **文件大小**: 731MB
|
| 19 |
+
- **架构**: Transformer-based 语言模型
|
| 20 |
+
|
| 21 |
+
## 📡 API 服务
|
| 22 |
+
|
| 23 |
+
### 服务端点
|
| 24 |
+
|
| 25 |
+
- **基础 URL**: `http://localhost:7860`
|
| 26 |
+
- **聊天完成**: `POST /v1/chat/completions`
|
| 27 |
+
- **健康检查**: `GET /health`
|
| 28 |
+
|
| 29 |
+
### API 使用示例
|
| 30 |
+
|
| 31 |
+
```bash
|
| 32 |
+
curl -X POST "http://localhost:7860/v1/chat/completions" \
|
| 33 |
+
-H "Content-Type: application/json" \
|
| 34 |
+
-H "Authorization: Bearer lfm25-api-key" \
|
| 35 |
+
-d '{
|
| 36 |
+
"model": "LFM2.5-1.2B-Thinking-Q4_K_M.gguf",
|
| 37 |
+
"messages": [
|
| 38 |
+
{"role": "user", "content": "你好,请介绍一下你自己"}
|
| 39 |
+
],
|
| 40 |
+
"temperature": 0.7,
|
| 41 |
+
"max_tokens": 2048
|
| 42 |
+
}'
|
| 43 |
+
```
|
| 44 |
+
|
| 45 |
+
## 🐳 Docker 部署
|
| 46 |
+
|
| 47 |
+
### 本地部署
|
| 48 |
+
|
| 49 |
+
```bash
|
| 50 |
+
# 构建镜像
|
| 51 |
+
docker build -t liquidai-lfm25 .
|
| 52 |
+
|
| 53 |
+
# 运行容器
|
| 54 |
+
docker run -p 7860:7860 liquidai-lfm25
|
| 55 |
+
```
|
| 56 |
+
|
| 57 |
+
### HuggingFace Space 自动部署
|
| 58 |
+
|
| 59 |
+
1. 推送代码到 HuggingFace Space 仓库
|
| 60 |
+
2. Space 将自动构建并运行 Docker 容器
|
| 61 |
+
3. 服务将在端口 7860 上可用
|
| 62 |
+
|
| 63 |
+
## ⚙️ 配置参数
|
| 64 |
+
|
| 65 |
+
- **监听地址**: 0.0.0.0
|
| 66 |
+
- **监听端口**: 7860
|
| 67 |
+
- **上下文大小**: 4096 tokens
|
| 68 |
+
- **CPU 线程**: 自动检测
|
| 69 |
+
- **温度参数**: 0.7
|
| 70 |
+
- **最大预测 tokens**: 2048
|
| 71 |
+
- **API 密钥**: lfm25-api-key
|
| 72 |
+
|
| 73 |
+
## 📊 监控和日志
|
| 74 |
+
|
| 75 |
+
服务器启动时将显示:
|
| 76 |
+
- 模型文件路径
|
| 77 |
+
- 服务地址
|
| 78 |
+
- API 端点信息
|
| 79 |
+
|
| 80 |
+
## 🛠️ 开发和调试
|
| 81 |
+
|
| 82 |
+
### 本地开发
|
| 83 |
+
|
| 84 |
+
```bash
|
| 85 |
+
# 安装依赖
|
| 86 |
+
./start-lfm25-server.sh
|
| 87 |
+
```
|
| 88 |
+
|
| 89 |
+
### 日志查看
|
| 90 |
+
|
| 91 |
+
```bash
|
| 92 |
+
# 查看容器日志
|
| 93 |
+
docker logs <container_id>
|
| 94 |
+
```
|
| 95 |
+
|
| 96 |
+
---
|
| 97 |
+
|
| 98 |
+
## 📚 更多信息
|
| 99 |
+
|
| 100 |
+
- [llama.cpp 官方文档](https://github.com/ggerganov/llama.cpp)
|
| 101 |
+
- [LiquidAI 模型仓库](https://huggingface.co/LiquidAI)
|
| 102 |
+
- [HuggingFace Space 配置参考](https://huggingface.co/docs/hub/spaces-config-reference)
|
start-lfm25-server.sh
CHANGED
|
@@ -8,7 +8,7 @@ set -e
|
|
| 8 |
# 配置变量
|
| 9 |
MODEL_FILE="LFM2.5-1.2B-Thinking-Q4_K_M.gguf"
|
| 10 |
HOST="0.0.0.0"
|
| 11 |
-
PORT="
|
| 12 |
CTX_SIZE="4096"
|
| 13 |
THREADS="-1" # 自动检测CPU核心数
|
| 14 |
TEMPERATURE="0.7"
|
|
|
|
| 8 |
# 配置变量
|
| 9 |
MODEL_FILE="LFM2.5-1.2B-Thinking-Q4_K_M.gguf"
|
| 10 |
HOST="0.0.0.0"
|
| 11 |
+
PORT="7860"
|
| 12 |
CTX_SIZE="4096"
|
| 13 |
THREADS="-1" # 自动检测CPU核心数
|
| 14 |
TEMPERATURE="0.7"
|