# 官方纯 Linux-amd64 环境镜像 FROM ghcr.io/ggml-org/llama.cpp:server WORKDIR /app # 切换到 root 用户安装必要工具 USER root RUN apt-get update && apt-get install -y --no-install-recommends \ curl \ ca-certificates \ && rm -rf /var/lib/apt/lists/* # 下载你的 Qwen 2.5 1.5B 优质轻量模型 RUN curl -L --fail \ https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct-GGUF/resolve/main/qwen2.5-1.5b-instruct-q8_0.gguf \ -o /app/model.gguf # 暴露 Hugging Face 规定的 7860 端口 EXPOSE 7860 # 💡 核心修复点:删掉开头的 "/llama-server",直接传参给底层的 ENTRYPOINT CMD ["-m", "/app/model.gguf", \ "--host", "0.0.0.0", \ "--port", "7860", \ "-ngl", "0", \ "-t", "2", \ "-c", "4096"]