Spaces:

waddie
/

cloudmini-api

Sleeping

waddie commited on 4 days ago

Commit

2e82582

verified ·

1 Parent(s): 9744925

Update Dockerfile

Files changed (1) hide show

Dockerfile CHANGED Viewed

@@ -1,30 +1,25 @@
-FROM python:3.10-slim
-# Install system compilation utilities
-RUN apt-get update && apt-get install -y \
-    build-essential \
-    python3-dev \
-    wget \
-    && rm -rf /var/lib/apt/lists/*
-# Install uv directly from the official binary release
-COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
 WORKDIR /app
-# Configure environmental variables for pure CPU building
-ENV LLAMA_GGML_BACKEND=cpu
-# Use uv to install the server extensions down to the system level globally
-RUN uv pip install --system --no-cache "llama-cpp-python[server]"
-# Pull down your target 4.68 GB model file
 RUN wget -O model.gguf "https://huggingface.co/waddie/mini-2.0-GGUF/resolve/main/mini-2.0-Q4_K_M.gguf"
 EXPOSE 7860
-CMD ["python3", "-m", "llama_cpp.server", \
-     "--model", "model.gguf", \
      "--host", "0.0.0.0", \
      "--port", "7860", \
-     "--n_threads", "2"]

+FROM debian:stable-slim
+# Install wget to fetch binaries and models
+RUN apt-get update && apt-get install -y wget && rm -rf /var/lib/apt/lists/*
 WORKDIR /app
+# 1. Download the pre-compiled Llamafile server binary v0.8.13 (or latest)
+RUN wget -O llamafile https://github.com/Mozilla-Ocho/llamafile/releases/download/0.8.13/llamafile-0.8.13 && \
+    chmod +x llamafile
+# 2. Download your GGUF model file
 RUN wget -O model.gguf "https://huggingface.co/waddie/mini-2.0-GGUF/resolve/main/mini-2.0-Q4_K_M.gguf"
 EXPOSE 7860
+# Run Llamafile server in headless mode pointing to your model file
+# Llamafile matches the OpenAI API endpoints layout perfectly
+CMD ["./llamafile", \
+     "--server", \
      "--host", "0.0.0.0", \
      "--port", "7860", \
+     "-m", "model.gguf", \
+     "--embedding", \
+     "-t", "2"]