Spaces:

chipling
/

uniocloud

Sleeping

chipling commited on Mar 8

Commit

4c87739

verified ·

1 Parent(s): dfada46

Update Dockerfile

Files changed (1) hide show

Dockerfile CHANGED Viewed

@@ -1,28 +1,29 @@
-# Must use 3.10 to match the wheel
 FROM python:3.10-slim
 ENV PYTHONUNBUFFERED=1
 WORKDIR /app
-# Install system dependencies
 RUN apt-get update && apt-get install -y \
     gcc g++ make cmake git libopenblas-dev wget \
     && rm -rf /var/lib/apt/lists/*
-# Install the NEWER pre-built wheel that supports Qwen 3.5 (v0.3.24)
 RUN pip install --no-cache-dir \
-    https://huggingface.co/Luigi/llama-cpp-python-wheels-hf-spaces-free-cpu/resolve/main/llama_cpp_python-0.3.24-cp310-cp310-linux_x86_64.whl
-# Ensure we have the server components
-RUN pip install --no-cache-dir "llama-cpp-python[server]" huggingface_hub
-# Model download
 RUN python3 -c "from huggingface_hub import hf_hub_download; hf_hub_download(repo_id='HauhauCS/Qwen3.5-4B-Uncensored-HauhauCS-Aggressive', filename='Qwen3.5-4B-Uncensored-HauhauCS-Aggressive-Q4_K_M.gguf', local_dir='.')"
 EXPOSE 7860
-# Run the server
 CMD ["python3", "-m", "llama_cpp.server", \
      "--model", "Qwen3.5-4B-Uncensored-HauhauCS-Aggressive-Q4_K_M.gguf", \
      "--host", "0.0.0.0", \

+# Python 3.10 is required for these specific wheels
 FROM python:3.10-slim
 ENV PYTHONUNBUFFERED=1
 WORKDIR /app
+# Install basic system tools
 RUN apt-get update && apt-get install -y \
     gcc g++ make cmake git libopenblas-dev wget \
     && rm -rf /var/lib/apt/lists/*
+# 1. Install llama-cpp-python from the official CPU wheel index
+# This skips the 'stuck' compilation and provides the latest architecture support
 RUN pip install --no-cache-dir \
+    --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu \
+    "llama-cpp-python[server]"
+# 2. Download the model (Qwen 3.5 4B)
+RUN pip install huggingface_hub
 RUN python3 -c "from huggingface_hub import hf_hub_download; hf_hub_download(repo_id='HauhauCS/Qwen3.5-4B-Uncensored-HauhauCS-Aggressive', filename='Qwen3.5-4B-Uncensored-HauhauCS-Aggressive-Q4_K_M.gguf', local_dir='.')"
 EXPOSE 7860
+# 3. Launch the server
+# Note: n_threads is set to 2 to match the Free Tier CPU limit
 CMD ["python3", "-m", "llama_cpp.server", \
      "--model", "Qwen3.5-4B-Uncensored-HauhauCS-Aggressive-Q4_K_M.gguf", \
      "--host", "0.0.0.0", \