# 构建阶段 FROM ubuntu:22.04 AS builder ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update && apt-get install -y \ build-essential \ git \ cmake \ curl \ libomp-dev \ && rm -rf /var/lib/apt/lists/* RUN git clone https://github.com/ggerganov/llama.cpp.git /tmp/llamacpp && \ cd /tmp/llamacpp && \ cmake -B build -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_EXE_LINKER_FLAGS="-static-libgcc -static-libstdc++" && \ cmake --build build --config Release # 运行阶段 FROM ubuntu:22.04 ENV DEBIAN_FRONTEND=noninteractive ENV MODEL_FILE="LFM2.5-1.2B-Thinking-Q4_K_M.gguf" ENV HOST="0.0.0.0" ENV PORT="7860" ENV CTX_SIZE="4096" ENV THREADS="-1" ENV TEMPERATURE="0.7" ENV PREDICT_TOKENS="2048" # 仅安装运行时依赖(包括 OpenMP 运行时库) RUN apt-get update && apt-get install -y \ curl \ libgomp1 \ && rm -rf /var/lib/apt/lists/* WORKDIR /app # 从构建阶段复制 llama-server COPY --from=builder /tmp/llamacpp/build/bin/llama-server /usr/local/bin/ COPY start-lfm25-server.sh /app/start-lfm25-server.sh RUN echo "📥 下载 LFM2.5-1.2B-Thinking-Q4_K_M.gguf (731MB)......" && \ curl -L -o "$MODEL_FILE" \ "https://huggingface.co/LiquidAI/LFM2.5-1.2B-Thinking-GGUF/resolve/main/LFM2.5-1.2B-Thinking-Q4_K_M.gguf" \ --connect-timeout 60 \ --max-time 300 && \ echo "✅ 模型下载完成" RUN chmod +x /app/start-lfm25-server.sh RUN cat > /app/entrypoint.sh << 'EOF' #!/bin/bash set -e echo "🚀 启动 LFM2.5-1.2B-Thinking-Q4_K_M.gguf HTTP 服务器..." echo "📁 模型文件: $MODEL_FILE" echo "🌐 服务地址: http://0.0.0.0:7860" echo "💬 API 端点: http://0.0.0.0:7860/v1/chat/completions" echo "" exec llama-server \ --model "$MODEL_FILE" \ --host "0.0.0.0" \ --port "7860" \ --ctx-size "$CTX_SIZE" \ --threads "$THREADS" \ --temp "$TEMPERATURE" \ --n-predict "$PREDICT_TOKENS" \ --log-disable \ --verbose-prompt \ --api-key "lfm25-api-key" EOF RUN chmod +x /app/entrypoint.sh EXPOSE 7860 # 健康检查 HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \ CMD curl -f http://localhost:7860/health || exit 1 CMD ["/app/entrypoint.sh"]