CooLLaMACEO commited on
Commit
ad08817
·
verified ·
1 Parent(s): 0122415

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +13 -10
Dockerfile CHANGED
@@ -2,26 +2,29 @@ FROM python:3.10-slim
2
 
3
  WORKDIR /app
4
 
5
- # Install wget and build tools
6
  RUN apt-get update && apt-get install -y \
7
  wget \
8
- git \
9
  && rm -rf /var/lib/apt/lists/*
10
 
11
- # Install Python dependencies
12
  RUN pip install --no-cache-dir \
13
  fastapi \
14
- uvicorn[standard] \
15
- torch \
16
- transformers
17
 
18
- # Download model (example GGUF from Hugging Face)
19
- RUN wget -q -O mpt-7b-q2.gguf \
20
- "https://huggingface.co/maddes8cht/mosaicml-mpt-7b-chat-gguf/resolve/main/mosaicml-mpt-7b-chat-Q2_K.gguf?download=true"
 
21
 
22
- # Copy app
23
  COPY app.py .
24
 
 
25
  EXPOSE 7860
26
 
 
27
  CMD ["python", "app.py"]
 
2
 
3
  WORKDIR /app
4
 
5
+ # 1️⃣ Install system dependencies
6
  RUN apt-get update && apt-get install -y \
7
  wget \
8
+ build-essential \
9
  && rm -rf /var/lib/apt/lists/*
10
 
11
+ # 2️⃣ Install Python dependencies (pre-built llama-cpp-python wheel!)
12
  RUN pip install --no-cache-dir \
13
  fastapi \
14
+ uvicorn \
15
+ --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu \
16
+ llama-cpp-python
17
 
18
+ # 3️⃣ Download GGUF model (must include tokenizer!)
19
+ # Use a model with tokenizer included. Example:
20
+ RUN wget -q -O mpt-7b-chat.gguf \
21
+ "https://huggingface.co/mosaicml/mpt-7b-chat-gguf/resolve/main/mpt-7b-chat.Q4_K_M.gguf"
22
 
23
+ # 4️⃣ Copy the FastAPI app
24
  COPY app.py .
25
 
26
+ # 5️⃣ Expose port
27
  EXPOSE 7860
28
 
29
+ # 6️⃣ Run the app
30
  CMD ["python", "app.py"]