| # Based on llama-gpt-api: | |
| # Pin the image to llama-cpp-python 0.1.78 to avoid ggml => gguf breaking changes | |
| FROM ghcr.io/abetlen/llama-cpp-python:latest | |
| VOLUME ["/models"] | |
| EXPOSE 8000 | |
| ENV PYTHONUNBUFFERED 1 | |
| ENV MODEL './models/llama-2-13b-chat.bin' | |
| # ENV MODEL_DOWNLOAD_URL 'https://huggingface.co/TheBloke/Nous-Hermes-Llama-2-7B-GGML/resolve/main/nous-hermes-llama-2-7b.ggmlv3.q4_0.bin' | |
| RUN apt install -y curl | |
| RUN mkdir models | |
| RUN curl -L https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/resolve/main/llama-2-13b-chat.ggmlv3.q4_K_M.bin -o ./models/llama-2-13b-chat.bin | |
| # Build the project | |
| RUN make build | |
| # Run the server | |
| CMD ["python3", "-m", "llama_cpp.server"] |