Spaces:

4n0s
/

white-rabbit-api

Build error

4n0s commited on Feb 13

Commit

ad8a4dc

verified ·

1 Parent(s): c2ecda6

Update Dockerfile

Files changed (1) hide show

Dockerfile CHANGED Viewed

@@ -1,13 +1,17 @@
-# Use the stable pre-built llama-cpp image
-FROM ghcr.io/abetlen/llama-cpp-python:latest
-# Install downloader
-RUN pip install --no-cache-dir huggingface_hub
-# CORRECT REPO AND FILENAME - Verified
 ENV REPO_ID="tensorblock/WhiteRabbitNeo-2.5-Qwen-2.5-Coder-7B-GGUF"
 ENV FILENAME="WhiteRabbitNeo-2.5-Qwen-2.5-Coder-7B-Q2_K.gguf"
-# Runtime download and server start
 CMD python3 -c "from huggingface_hub import hf_hub_download; hf_hub_download(repo_id='$REPO_ID', filename='$FILENAME', local_dir='.')" && \
     python3 -m llama_cpp.server --model ./$FILENAME --host 0.0.0.0 --port 7860 --n_ctx 1024 --n_threads 2

+# Use the official Python image
+FROM python:3.10-slim
+# Install system essentials
+RUN apt-get update && apt-get install -y build-essential libopenblas-dev wget && rm -rf /var/lib/apt/lists/*
+# Install the server with a pre-compiled backend for CPU
+RUN pip install --no-cache-dir "llama-cpp-python[server]" huggingface_hub
+# Model details - Using Q2_K to ensure it fits in free RAM
 ENV REPO_ID="tensorblock/WhiteRabbitNeo-2.5-Qwen-2.5-Coder-7B-GGUF"
 ENV FILENAME="WhiteRabbitNeo-2.5-Qwen-2.5-Coder-7B-Q2_K.gguf"
+# RUNTIME: Download and Start
+# We use --n_ctx 1024 to stay within free memory limits
 CMD python3 -c "from huggingface_hub import hf_hub_download; hf_hub_download(repo_id='$REPO_ID', filename='$FILENAME', local_dir='.')" && \
     python3 -m llama_cpp.server --model ./$FILENAME --host 0.0.0.0 --port 7860 --n_ctx 1024 --n_threads 2