waddie commited on
Commit
ca6c646
·
verified ·
1 Parent(s): bbe7501

Create Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +27 -0
Dockerfile ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ # Install system dependencies needed to compile llama.cpp
4
+ RUN apt-get update && apt-get install -y \
5
+ build-essential \
6
+ python3-dev \
7
+ wget \
8
+ && rm -rf /var/lib/apt/lists/*
9
+
10
+ WORKDIR /app
11
+
12
+ # Optimize build configurations specifically for standard CPU execution
13
+ ENV LLAMA_GGML_BACKEND=cpu
14
+ RUN pip install --no-cache-dir "llama-cpp-python[server]"
15
+
16
+ # Download the optimal Q4_K_M variant directly from the waddie repo
17
+ RUN wget -O model.gguf "https://huggingface.co/waddie/mini-2.0-GGUF/resolve/main/mini-2.0-Q4_K_M.gguf"
18
+
19
+ # Expose the default port for Hugging Face Spaces
20
+ EXPOSE 7860
21
+
22
+ # Run the API server with 2 context threads to play nice with the shared CPU limits
23
+ CMD ["python3", "-m", "llama_cpp.server", \
24
+ "--model", "model.gguf", \
25
+ "--host", "0.0.0.0", \
26
+ "--port", "7860", \
27
+ "--n_threads", "2"]