Spaces:
Sleeping
Sleeping
| # 构建阶段 | |
| FROM ubuntu:22.04 AS builder | |
| ENV DEBIAN_FRONTEND=noninteractive | |
| RUN apt-get update && apt-get install -y \ | |
| build-essential \ | |
| git \ | |
| cmake \ | |
| curl \ | |
| libomp-dev \ | |
| && rm -rf /var/lib/apt/lists/* | |
| RUN git clone https://github.com/ggerganov/llama.cpp.git /tmp/llamacpp && \ | |
| cd /tmp/llamacpp && \ | |
| cmake -B build -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_EXE_LINKER_FLAGS="-static-libgcc -static-libstdc++" && \ | |
| cmake --build build --config Release | |
| # 运行阶段 | |
| FROM ubuntu:22.04 | |
| ENV DEBIAN_FRONTEND=noninteractive | |
| ENV MODEL_FILE="LFM2.5-1.2B-Thinking-Q4_K_M.gguf" | |
| ENV HOST="0.0.0.0" | |
| ENV PORT="7860" | |
| ENV CTX_SIZE="4096" | |
| ENV THREADS="-1" | |
| ENV TEMPERATURE="0.7" | |
| ENV PREDICT_TOKENS="2048" | |
| # 仅安装运行时依赖(包括 OpenMP 运行时库) | |
| RUN apt-get update && apt-get install -y \ | |
| curl \ | |
| libgomp1 \ | |
| && rm -rf /var/lib/apt/lists/* | |
| WORKDIR /app | |
| # 从构建阶段复制 llama-server | |
| COPY --from=builder /tmp/llamacpp/build/bin/llama-server /usr/local/bin/ | |
| COPY start-lfm25-server.sh /app/start-lfm25-server.sh | |
| RUN echo "📥 下载 LFM2.5-1.2B-Thinking-Q4_K_M.gguf (731MB)......" && \ | |
| curl -L -o "$MODEL_FILE" \ | |
| "https://huggingface.co/LiquidAI/LFM2.5-1.2B-Thinking-GGUF/resolve/main/LFM2.5-1.2B-Thinking-Q4_K_M.gguf" \ | |
| --connect-timeout 60 \ | |
| --max-time 300 && \ | |
| echo "✅ 模型下载完成" | |
| RUN chmod +x /app/start-lfm25-server.sh | |
| RUN cat > /app/entrypoint.sh << 'EOF' | |
| #!/bin/bash | |
| set -e | |
| echo "🚀 启动 LFM2.5-1.2B-Thinking-Q4_K_M.gguf HTTP 服务器..." | |
| echo "📁 模型文件: $MODEL_FILE" | |
| echo "🌐 服务地址: http://0.0.0.0:7860" | |
| echo "💬 API 端点: http://0.0.0.0:7860/v1/chat/completions" | |
| echo "" | |
| exec llama-server \ | |
| --model "$MODEL_FILE" \ | |
| --host "0.0.0.0" \ | |
| --port "7860" \ | |
| --ctx-size "$CTX_SIZE" \ | |
| --threads "$THREADS" \ | |
| --temp "$TEMPERATURE" \ | |
| --n-predict "$PREDICT_TOKENS" \ | |
| --log-disable \ | |
| --verbose-prompt \ | |
| --api-key "lfm25-api-key" | |
| EOF | |
| RUN chmod +x /app/entrypoint.sh | |
| EXPOSE 7860 | |
| # 健康检查 | |
| HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \ | |
| CMD curl -f http://localhost:7860/health || exit 1 | |
| CMD ["/app/entrypoint.sh"] |