waddie commited on
Commit
2e82582
·
verified ·
1 Parent(s): 9744925

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +14 -19
Dockerfile CHANGED
@@ -1,30 +1,25 @@
1
- FROM python:3.10-slim
2
 
3
- # Install system compilation utilities
4
- RUN apt-get update && apt-get install -y \
5
- build-essential \
6
- python3-dev \
7
- wget \
8
- && rm -rf /var/lib/apt/lists/*
9
-
10
- # Install uv directly from the official binary release
11
- COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
12
 
13
  WORKDIR /app
14
 
15
- # Configure environmental variables for pure CPU building
16
- ENV LLAMA_GGML_BACKEND=cpu
17
-
18
- # Use uv to install the server extensions down to the system level globally
19
- RUN uv pip install --system --no-cache "llama-cpp-python[server]"
20
 
21
- # Pull down your target 4.68 GB model file
22
  RUN wget -O model.gguf "https://huggingface.co/waddie/mini-2.0-GGUF/resolve/main/mini-2.0-Q4_K_M.gguf"
23
 
24
  EXPOSE 7860
25
 
26
- CMD ["python3", "-m", "llama_cpp.server", \
27
- "--model", "model.gguf", \
 
 
28
  "--host", "0.0.0.0", \
29
  "--port", "7860", \
30
- "--n_threads", "2"]
 
 
 
1
+ FROM debian:stable-slim
2
 
3
+ # Install wget to fetch binaries and models
4
+ RUN apt-get update && apt-get install -y wget && rm -rf /var/lib/apt/lists/*
 
 
 
 
 
 
 
5
 
6
  WORKDIR /app
7
 
8
+ # 1. Download the pre-compiled Llamafile server binary v0.8.13 (or latest)
9
+ RUN wget -O llamafile https://github.com/Mozilla-Ocho/llamafile/releases/download/0.8.13/llamafile-0.8.13 && \
10
+ chmod +x llamafile
 
 
11
 
12
+ # 2. Download your GGUF model file
13
  RUN wget -O model.gguf "https://huggingface.co/waddie/mini-2.0-GGUF/resolve/main/mini-2.0-Q4_K_M.gguf"
14
 
15
  EXPOSE 7860
16
 
17
+ # Run Llamafile server in headless mode pointing to your model file
18
+ # Llamafile matches the OpenAI API endpoints layout perfectly
19
+ CMD ["./llamafile", \
20
+ "--server", \
21
  "--host", "0.0.0.0", \
22
  "--port", "7860", \
23
+ "-m", "model.gguf", \
24
+ "--embedding", \
25
+ "-t", "2"]