File size: 741 Bytes
6ab2320
d74b10b
8e30e39
 
5b14eba
5aee2f2
d74b10b
5b14eba
 
 
 
ad08817
5b14eba
6ab2320
5b14eba
5aee2f2
 
3294758
5b14eba
d74b10b
8e30e39
0122415
d74b10b
6ab2320
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
FROM python:3.10-slim

WORKDIR /app

# 1. Install wget (essential for the model)
RUN apt-get update && apt-get install -y wget && rm -rf /var/lib/apt/lists/*

# 2. Install llama-cpp-python FORCING the binary (No compilation allowed)
# We use version 0.2.76 as it is highly stable for MPT models
RUN pip install --no-cache-dir fastapi uvicorn
RUN pip install llama-cpp-python==0.2.76 \
    --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu \
    --prefer-binary

# 3. Download the model silently
RUN wget -q -O model.gguf \
    "https://huggingface.co/maddes8cht/mosaicml-mpt-7b-chat-gguf/resolve/main/mosaicml-mpt-7b-chat-Q2_K.gguf?download=true"

# 4. Copy your script
COPY app.py .

EXPOSE 7860

CMD ["python", "app.py"]