Spaces:

chipling
/

uniocloud

Sleeping

chipling commited on Mar 8

Commit

fc2c5e5

verified ·

1 Parent(s): 25fd3ae

Update Dockerfile

Files changed (1) hide show

Dockerfile CHANGED Viewed

@@ -1,31 +1,21 @@
-# Use a standard Python image
-FROM python:3.10-slim
-WORKDIR /app
-# Install basic system tools required for the server
-RUN apt-get update && apt-get install -y \
-    libopenblas-dev \
-    gcc \
-    && rm -rf /var/lib/apt/lists/*
-# Install the PRE-COMPILED wheel for CPU (Skips the 'stuck' build process)
-RUN pip install --no-cache-dir \
-    "llama-cpp-python[server] @ https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.7/llama_cpp_python-0.3.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl"
-# Install huggingface_hub to fetch the model
-RUN pip install huggingface_hub
-# Pre-download the model so it's ready on boot
-RUN python3 -c "from huggingface_hub import hf_hub_download; hf_hub_download(repo_id='HauhauCS/Qwen3.5-4B-Uncensored-HauhauCS-Aggressive', filename='Qwen3.5-4B-Uncensored-HauhauCS-Aggressive-Q4_K_M.gguf', local_dir='.')"
-# Expose the HF Space port
 EXPOSE 7860
-# Launch the server optimized for 2 CPUs
-CMD ["python3", "-m", "llama_cpp.server", \
-     "--model", "Qwen3.5-4B-Uncensored-HauhauCS-Aggressive-Q4_K_M.gguf", \
      "--host", "0.0.0.0", \
      "--port", "7860", \
-     "--n_threads", "2", \
-     "--n_ctx", "2048"]

+# Use the official Hugging Face GGUF server image
+# This comes pre-compiled and optimized for CPU
+FROM ghcr.io/huggingface/llama-cpp-gguf-server:latest
+# Set environment variables for the server
+ENV MODEL_ID="HauhauCS/Qwen3.5-4B-Uncensored-HauhauCS-Aggressive"
+ENV MODEL_FILE="Qwen3.5-4B-Uncensored-HauhauCS-Aggressive-Q4_K_M.gguf"
+ENV PORT=7860
+ENV HOST=0.0.0.0
+# The server automatically handles the download and OpenAI endpoint mapping
 EXPOSE 7860
+# Run the server with CPU-optimized threads
+CMD ["llama-server", \
+     "--hf-repo", "HauhauCS/Qwen3.5-4B-Uncensored-HauhauCS-Aggressive", \
+     "--hf-file", "Qwen3.5-4B-Uncensored-HauhauCS-Aggressive-Q4_K_M.gguf", \
      "--host", "0.0.0.0", \
      "--port", "7860", \
+     "--threads", "2", \
+     "--ctx-size", "2048"]