File size: 2,240 Bytes
1e941ef
 
 
 
 
 
 
 
 
 
e6d126a
1e941ef
 
 
 
e6d126a
1e941ef
 
 
5fd9c8d
 
 
 
 
 
 
 
 
 
 
e6d126a
5fd9c8d
 
e6d126a
5fd9c8d
 
 
 
1e941ef
 
5fd9c8d
1e941ef
5fd9c8d
de44a82
5fd9c8d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eb50311
 
 
 
5fd9c8d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# 构建阶段
FROM ubuntu:22.04 AS builder

ENV DEBIAN_FRONTEND=noninteractive

RUN apt-get update && apt-get install -y \
    build-essential \
    git \
    cmake \
    curl \
    libomp-dev \
    && rm -rf /var/lib/apt/lists/*

RUN git clone https://github.com/ggerganov/llama.cpp.git /tmp/llamacpp && \
    cd /tmp/llamacpp && \
    cmake -B build -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_EXE_LINKER_FLAGS="-static-libgcc -static-libstdc++" && \
    cmake --build build --config Release

# 运行阶段
FROM ubuntu:22.04

ENV DEBIAN_FRONTEND=noninteractive
ENV MODEL_FILE="LFM2.5-1.2B-Thinking-Q4_K_M.gguf"
ENV HOST="0.0.0.0"
ENV PORT="7860"
ENV CTX_SIZE="4096"
ENV THREADS="-1"
ENV TEMPERATURE="0.7"
ENV PREDICT_TOKENS="2048"

# 仅安装运行时依赖(包括 OpenMP 运行时库)
RUN apt-get update && apt-get install -y \
    curl \
    libgomp1 \
    && rm -rf /var/lib/apt/lists/*

WORKDIR /app

# 从构建阶段复制 llama-server
COPY --from=builder /tmp/llamacpp/build/bin/llama-server /usr/local/bin/

COPY start-lfm25-server.sh /app/start-lfm25-server.sh

RUN echo "📥 下载 LFM2.5-1.2B-Thinking-Q4_K_M.gguf (731MB)......" && \
    curl -L -o "$MODEL_FILE" \
        "https://huggingface.co/LiquidAI/LFM2.5-1.2B-Thinking-GGUF/resolve/main/LFM2.5-1.2B-Thinking-Q4_K_M.gguf" \
        --connect-timeout 60 \
        --max-time 300 && \
    echo "✅ 模型下载完成"

RUN chmod +x /app/start-lfm25-server.sh

RUN cat > /app/entrypoint.sh << 'EOF'
#!/bin/bash
set -e

echo "🚀 启动 LFM2.5-1.2B-Thinking-Q4_K_M.gguf HTTP 服务器..."
echo "📁 模型文件: $MODEL_FILE"
echo "🌐 服务地址: http://0.0.0.0:7860"
echo "💬 API 端点: http://0.0.0.0:7860/v1/chat/completions"
echo ""

exec llama-server \
    --model "$MODEL_FILE" \
    --host "0.0.0.0" \
    --port "7860" \
    --ctx-size "$CTX_SIZE" \
    --threads "$THREADS" \
    --temp "$TEMPERATURE" \
    --n-predict "$PREDICT_TOKENS" \
    --log-disable \
    --verbose-prompt \
    --api-key "lfm25-api-key"
EOF

RUN chmod +x /app/entrypoint.sh

EXPOSE 7860

# 健康检查
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
    CMD curl -f http://localhost:7860/health || exit 1

CMD ["/app/entrypoint.sh"]