Spaces:

chipling
/

uniocloud

Sleeping

chipling commited on Mar 8

Commit

25fd3ae

verified ·

1 Parent(s): de54083

Update Dockerfile

Files changed (1) hide show

Dockerfile CHANGED Viewed

@@ -1,22 +1,28 @@
-# Use the official pre-built server image
-FROM ghcr.io/abetlen/llama-cpp-python:latest
-# Set the working directory
 WORKDIR /app
-# Install huggingface_hub to download the model
 RUN pip install huggingface_hub
-# Download the model during the build process
-# This ensures the model is ready as soon as the container starts
 RUN python3 -c "from huggingface_hub import hf_hub_download; hf_hub_download(repo_id='HauhauCS/Qwen3.5-4B-Uncensored-HauhauCS-Aggressive', filename='Qwen3.5-4B-Uncensored-HauhauCS-Aggressive-Q4_K_M.gguf', local_dir='.')"
-# Hugging Face Spaces require the app to listen on port 7860
 EXPOSE 7860
-# Run the server with OpenAI-compatible settings
-# - n_threads 2: Matches the free tier's 2 vCPUs
-# - host 0.0.0.0: Required for external access
 CMD ["python3", "-m", "llama_cpp.server", \
      "--model", "Qwen3.5-4B-Uncensored-HauhauCS-Aggressive-Q4_K_M.gguf", \
      "--host", "0.0.0.0", \

+# Use a standard Python image
+FROM python:3.10-slim
 WORKDIR /app
+# Install basic system tools required for the server
+RUN apt-get update && apt-get install -y \
+    libopenblas-dev \
+    gcc \
+    && rm -rf /var/lib/apt/lists/*
+# Install the PRE-COMPILED wheel for CPU (Skips the 'stuck' build process)
+RUN pip install --no-cache-dir \
+    "llama-cpp-python[server] @ https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.7/llama_cpp_python-0.3.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl"
+# Install huggingface_hub to fetch the model
 RUN pip install huggingface_hub
+# Pre-download the model so it's ready on boot
 RUN python3 -c "from huggingface_hub import hf_hub_download; hf_hub_download(repo_id='HauhauCS/Qwen3.5-4B-Uncensored-HauhauCS-Aggressive', filename='Qwen3.5-4B-Uncensored-HauhauCS-Aggressive-Q4_K_M.gguf', local_dir='.')"
+# Expose the HF Space port
 EXPOSE 7860
+# Launch the server optimized for 2 CPUs
 CMD ["python3", "-m", "llama_cpp.server", \
      "--model", "Qwen3.5-4B-Uncensored-HauhauCS-Aggressive-Q4_K_M.gguf", \
      "--host", "0.0.0.0", \