Erik Schultheis commited on
Commit
ef8b98a
·
1 Parent(s): 564b79e

update to support GPU and llama 3

Browse files
Files changed (2) hide show
  1. Dockerfile +12 -22
  2. llama-annotate +1 -1
Dockerfile CHANGED
@@ -1,36 +1,26 @@
1
- FROM nvidia/cuda:12.6.1-devel-ubuntu22.04
2
 
3
  WORKDIR /code
4
 
5
- # RUN git clone --branch gradio --single-branch https://version.aalto.fi/gitlab/schulte1/llama-annotate.git
6
-
7
- # WORKDIR /code/llama-annotate
8
-
9
  RUN apt-get update && apt-get install -y cmake ccache wget python3-pip git
10
  RUN apt-get clean && rm -rf /var/lib/apt/lists/*
11
 
 
 
 
 
 
12
 
13
  COPY llama-annotate/ .
 
14
 
 
 
 
15
 
16
-
17
- RUN cmake -S . -B build -DLLAMA_CUBLAS=1
18
- RUN cmake --build build
19
-
20
- # COPY ./requirements.txt /code/requirements.txt
21
- # RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
22
-
23
- RUN pip3 install --no-cache-dir gradio numpy
24
-
25
-
26
- RUN wget --no-verbose https://huggingface.co/TheBloke/Llama-2-7B-GGUF/resolve/main/llama-2-7b.Q2_K.gguf
27
- #RUN wget --no-verbose https://huggingface.co/TheBloke/Llama-2-7B-GGUF/resolve/main/llama-2-7b.Q4_K_M.gguf
28
- #RUN wget --no-verbose https://huggingface.co/TheBloke/Llama-2-13B-GGUF/resolve/main/llama-2-13b.Q6_K.gguf
29
-
30
- # RUN mkdir -p models
31
- RUN mv *.gguf models
32
 
33
  EXPOSE 7860
34
  ENV GRADIO_SERVER_NAME="0.0.0.0"
35
 
36
- CMD ["python3", "gui/gui.py"]
 
1
+ FROM nvidia/cuda:12.1.1-devel-ubuntu22.04
2
 
3
  WORKDIR /code
4
 
 
 
 
 
5
  RUN apt-get update && apt-get install -y cmake ccache wget python3-pip git
6
  RUN apt-get clean && rm -rf /var/lib/apt/lists/*
7
 
8
+ RUN wget --no-verbose https://huggingface.co/TheBloke/Llama-2-7B-GGUF/resolve/main/llama-2-7b.Q2_K.gguf &&\
9
+ wget --no-verbose https://huggingface.co/TheBloke/Llama-2-13B-GGUF/resolve/main/llama-2-13b.Q6_K.gguf &&\
10
+ wget --no-verbose https://huggingface.co/QuantFactory/Meta-Llama-3-8B-Instruct-GGUF-v2/resolve/main/Meta-Llama-3-8B-Instruct-v2.Q4_K_M.gguf &&\
11
+ mkdir models &&\
12
+ mv *.gguf models
13
 
14
  COPY llama-annotate/ .
15
+ ENV CUDA_DOCKER_ARCH=all
16
 
17
+ RUN cmake -S . -B build -DGGML_CUDA=ON
18
+ RUN cmake --build build --target ggml --parallel
19
+ RUN cmake --build build --parallel
20
 
21
+ RUN pip3 install --no-cache-dir gradio numpy gguf-parser
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
  EXPOSE 7860
24
  ENV GRADIO_SERVER_NAME="0.0.0.0"
25
 
26
+ CMD ["python3", "-u", "gui/gui.py"]
llama-annotate CHANGED
@@ -1 +1 @@
1
- Subproject commit 58a764509007360bbb95250e3174946a7219de5f
 
1
+ Subproject commit 267f94217e5763544c3db80c72c2f32e30cdf744