4n0s commited on
Commit
ad8a4dc
·
verified ·
1 Parent(s): c2ecda6

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +10 -6
Dockerfile CHANGED
@@ -1,13 +1,17 @@
1
- # Use the stable pre-built llama-cpp image
2
- FROM ghcr.io/abetlen/llama-cpp-python:latest
3
 
4
- # Install downloader
5
- RUN pip install --no-cache-dir huggingface_hub
6
 
7
- # CORRECT REPO AND FILENAME - Verified
 
 
 
8
  ENV REPO_ID="tensorblock/WhiteRabbitNeo-2.5-Qwen-2.5-Coder-7B-GGUF"
9
  ENV FILENAME="WhiteRabbitNeo-2.5-Qwen-2.5-Coder-7B-Q2_K.gguf"
10
 
11
- # Runtime download and server start
 
12
  CMD python3 -c "from huggingface_hub import hf_hub_download; hf_hub_download(repo_id='$REPO_ID', filename='$FILENAME', local_dir='.')" && \
13
  python3 -m llama_cpp.server --model ./$FILENAME --host 0.0.0.0 --port 7860 --n_ctx 1024 --n_threads 2
 
1
+ # Use the official Python image
2
+ FROM python:3.10-slim
3
 
4
+ # Install system essentials
5
+ RUN apt-get update && apt-get install -y build-essential libopenblas-dev wget && rm -rf /var/lib/apt/lists/*
6
 
7
+ # Install the server with a pre-compiled backend for CPU
8
+ RUN pip install --no-cache-dir "llama-cpp-python[server]" huggingface_hub
9
+
10
+ # Model details - Using Q2_K to ensure it fits in free RAM
11
  ENV REPO_ID="tensorblock/WhiteRabbitNeo-2.5-Qwen-2.5-Coder-7B-GGUF"
12
  ENV FILENAME="WhiteRabbitNeo-2.5-Qwen-2.5-Coder-7B-Q2_K.gguf"
13
 
14
+ # RUNTIME: Download and Start
15
+ # We use --n_ctx 1024 to stay within free memory limits
16
  CMD python3 -c "from huggingface_hub import hf_hub_download; hf_hub_download(repo_id='$REPO_ID', filename='$FILENAME', local_dir='.')" && \
17
  python3 -m llama_cpp.server --model ./$FILENAME --host 0.0.0.0 --port 7860 --n_ctx 1024 --n_threads 2