Create Dockerfile
Browse files- Dockerfile +21 -0
Dockerfile
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Based on llama-gpt-api:
|
| 2 |
+
# Pin the image to llama-cpp-python 0.1.78 to avoid ggml => gguf breaking changes
|
| 3 |
+
FROM ghcr.io/abetlen/llama-cpp-python:latest@sha256:b6d21ff8c4d9baad65e1fa741a0f8c898d68735fff3f3cd777e3f0c6a1839dd4
|
| 4 |
+
|
| 5 |
+
VOLUME ["/models"]
|
| 6 |
+
|
| 7 |
+
EXPOSE 8000
|
| 8 |
+
|
| 9 |
+
ENV PYTHONUNBUFFERED 1
|
| 10 |
+
ENV MODEL './models/llama-2-13b-chat.bin'
|
| 11 |
+
# ENV MODEL_DOWNLOAD_URL 'https://huggingface.co/TheBloke/Nous-Hermes-Llama-2-7B-GGML/resolve/main/nous-hermes-llama-2-7b.ggmlv3.q4_0.bin'
|
| 12 |
+
|
| 13 |
+
RUN apt install -y curl
|
| 14 |
+
RUN mkdir models
|
| 15 |
+
RUN curl -L https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/resolve/main/llama-2-13b-chat.ggmlv3.q4_K_M.bin -o ./models/llama-2-13b-chat.bin
|
| 16 |
+
|
| 17 |
+
# Build the project
|
| 18 |
+
RUN make build
|
| 19 |
+
|
| 20 |
+
# Run the server
|
| 21 |
+
CMD ["python3", "-m", "llama_cpp.server"]
|