Spaces:

waddie
/

cloudmini-api

Sleeping

waddie commited on 3 days ago

Commit

601ed38

verified ·

1 Parent(s): 95c1b8c

Update Dockerfile

Files changed (1) hide show

Dockerfile CHANGED Viewed

@@ -1,31 +1,18 @@
-FROM python:3.10-slim
-# Install ONLY wget to fetch your model file (No compiler tools needed!)
 RUN apt-get update && apt-get install -y wget && rm -rf /var/lib/apt/lists/*
-# Pull down the ultra-fast uv binary
-COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
-WORKDIR /app
-# Pin execution explicitly to pure CPU backends
-ENV LLAMA_GGML_BACKEND=cpu
-# Install the official, pre-compiled x86_64 CPU wheel instantly via UV
-RUN uv pip install --system --no-cache \
-    --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu \
-    "llama-cpp-python[server]"
-# Stream down your 4.68 GB model file
-RUN wget -O model.gguf "https://huggingface.co/waddie/mini-2.0-GGUF/resolve/main/mini-2.0-Q4_K_M.gguf"
 EXPOSE 7860
-# Revert to executing via python's native server module
-CMD ["python3", "-m", "llama_cpp.server", \
-     "--model", "model.gguf", \
      "--host", "0.0.0.0", \
      "--port", "7860", \
-     "--n_threads", "2", \
-     "--n_ctx", "4096", \
-     "--chat_format", "chatml"]

+# Pull the official, pre-compiled C++ server image
+FROM ghcr.io/ggml-org/llama.cpp:server
+# Temporarily switch to root to install wget
+USER root
 RUN apt-get update && apt-get install -y wget && rm -rf /var/lib/apt/lists/*
+# Download your GGUF model
+RUN wget -O /model.gguf "https://huggingface.co/waddie/mini-2.0-GGUF/resolve/main/mini-2.0-Q4_K_M.gguf"
 EXPOSE 7860
+# Run the native server using the arguments from the docs you linked
+# (The image's ENTRYPOINT is automatically the llama-server binary)
+CMD ["--model", "/model.gguf", \
      "--host", "0.0.0.0", \
      "--port", "7860", \
+     "--ctx-size", "4096"]