| FROM ubuntu:22.04 |
|
|
| RUN apt-get update && \ |
| apt-get install -y \ |
| build-essential \ |
| libssl-dev \ |
| zlib1g-dev \ |
| libopenblas-dev \ |
| libomp-dev \ |
| cmake \ |
| pkg-config \ |
| git \ |
| python3-pip \ |
| curl \ |
| libcurl4-openssl-dev \ |
| wget && \ |
| rm -rf /var/lib/apt/lists/* |
|
|
| RUN pip3 install --upgrade pip && \ |
| pip3 install openai fastapi uvicorn pydantic orjson httptools |
|
|
| RUN pip install httpx[http2] |
|
|
| RUN git clone https://github.com/ggerganov/llama.cpp && \ |
| cd llama.cpp && \ |
| cmake -B build -S . \ |
| -DLLAMA_BUILD_SERVER=ON \ |
| -DGGML_BLAS=ON \ |
| -DGGML_BLAS_VENDOR=OpenBLAS \ |
| -DCMAKE_BUILD_TYPE=Release && \ |
| cmake --build build --config Release --target llama-server -j $(nproc) |
|
|
| RUN mkdir -p /models && \ |
| wget -O /models/model.gguf https://huggingface.co/unsloth/Qwen3.5-0.8B-GGUF/resolve/main/Qwen3.5-0.8B-Q8_0.gguf |
|
|
| COPY app.py /app.py |
| COPY start.sh /start.sh |
| RUN chmod +x /start.sh |
|
|
| EXPOSE 7860 |
|
|
| CMD ["/start.sh"] |