CooLLaMACEO commited on
Commit
5b14eba
·
verified ·
1 Parent(s): 2142f68

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +8 -8
Dockerfile CHANGED
@@ -2,21 +2,21 @@ FROM python:3.10-slim
2
 
3
  WORKDIR /app
4
 
5
- # 1. Install wget
6
  RUN apt-get update && apt-get install -y wget && rm -rf /var/lib/apt/lists/*
7
 
8
- # 2. Install pre-built llama-cpp (NO BUILDING HANGS)
9
- RUN pip install --no-cache-dir \
10
- fastapi \
11
- uvicorn \
12
  --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu \
13
- llama-cpp-python
14
 
15
- # 3. Download MPT-7B Q2 as 'model.gguf' (Matches your script)
16
  RUN wget -q -O model.gguf \
17
  "https://huggingface.co/maddes8cht/mosaicml-mpt-7b-chat-gguf/resolve/main/mosaicml-mpt-7b-chat-Q2_K.gguf?download=true"
18
 
19
- # 4. Copy the script you found
20
  COPY app.py .
21
 
22
  EXPOSE 7860
 
2
 
3
  WORKDIR /app
4
 
5
+ # 1. Install wget (essential for the model)
6
  RUN apt-get update && apt-get install -y wget && rm -rf /var/lib/apt/lists/*
7
 
8
+ # 2. Install llama-cpp-python FORCING the binary (No compilation allowed)
9
+ # We use version 0.2.76 as it is highly stable for MPT models
10
+ RUN pip install --no-cache-dir fastapi uvicorn
11
+ RUN pip install llama-cpp-python==0.2.76 \
12
  --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu \
13
+ --prefer-binary
14
 
15
+ # 3. Download the model silently
16
  RUN wget -q -O model.gguf \
17
  "https://huggingface.co/maddes8cht/mosaicml-mpt-7b-chat-gguf/resolve/main/mosaicml-mpt-7b-chat-Q2_K.gguf?download=true"
18
 
19
+ # 4. Copy your script
20
  COPY app.py .
21
 
22
  EXPOSE 7860