Spaces:

javaeeduke
/

llm

Sleeping

javaeeduke commited on 8 days ago

Commit

59f7b7d

verified ·

1 Parent(s): a75f40b

Update Dockerfile

Files changed (1) hide show

Dockerfile CHANGED Viewed

@@ -1,32 +1,30 @@
-FROM ubuntu:22.04
-ENV DEBIAN_FRONTEND=noninteractive
 WORKDIR /app
-# 安装基础工具（增加 zsh，因为某些版本的 Cosmopolitan 壳需要它或者 bash 强制解析）
 RUN apt-get update && apt-get install -y --no-install-recommends \
-    ca-certificates \
     curl \
-    wget \
-    bash \
-    tini \
     && rm -rf /var/lib/apt/lists/*
-# 💡 核心修复点 1：下载官方特制、没有被 Cosmopolitan 混淆的纯 Linux-x86_64 专用二进制版
-RUN curl -L --fail \
-    https://github.com/Mozilla-Ocho/llamafile/releases/download/0.9.2/llamafile-0.9.2.linux-x86_64 \
-    -o /app/llamafile \
-    && chmod +x /app/llamafile
-# 下载 GGUF 模型（保持你选的 1.5B 优质轻量模型）
 RUN curl -L --fail \
     https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct-GGUF/resolve/main/qwen2.5-1.5b-instruct-q8_0.gguf \
     -o /app/model.gguf
 EXPOSE 7860
-ENTRYPOINT ["/usr/bin/tini", "--"]
-# 💡 核心修复点 2：在 Linux Docker 中，最稳妥的启动方式是让 /bin/sh 或者是 /bin/bash 去拉起它，避免内核直接解析 header 失败
-CMD ["/bin/sh", "-c", "/app/llamafile -m /app/model.gguf --server --host 0.0.0.0 --port 7860 -ngl 0 -t 2 -c 4096"]

+# 💡 直接使用 llama.cpp 官方构建好的纯 Linux-amd64 运行环境镜像
+FROM ghcr.io/ggerganov/llama.cpp:server
 WORKDIR /app
+# 安装 curl 用来下载模型
+USER root
 RUN apt-get update && apt-get install -y --no-install-recommends \
     curl \
+    ca-certificates \
     && rm -rf /var/lib/apt/lists/*
+# 下载你的 Qwen 2.5 1.5B 优质轻量模型
 RUN curl -L --fail \
     https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct-GGUF/resolve/main/qwen2.5-1.5b-instruct-q8_0.gguf \
     -o /app/model.gguf
+# 暴露 Hugging Face 规定的 7860 端口
 EXPOSE 7860
+# 启动 llama.cpp 自带的满血版聊天 Web 界面
+# --host 0.0.0.0 和 --port 7860 是为了让外网（你的浏览器）可以访问
+# -t 2 限制为免费层的 2 核 CPU 线程数
+CMD ["/llama-server", \
+     "-m", "/app/model.gguf", \
+     "--host", "0.0.0.0", \
+     "--port", "7860", \
+     "-ngl", "0", \
+     "-t", "2", \
+     "-c", "4096"]