muryshev commited on
Commit
608d0ea
·
verified ·
1 Parent(s): be8e555

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +19 -19
Dockerfile CHANGED
@@ -1,15 +1,15 @@
1
  ARG UBUNTU_VERSION=22.04
2
- ARG CUDA_VERSION=12.2.0
3
  ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
4
  ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
5
 
6
- # Unless otherwise specified, we make a fat build.
7
- ARG CUDA_DOCKER_ARCH=all
8
-
9
  FROM ${BASE_CUDA_DEV_CONTAINER} as build
10
 
11
- RUN apt-get update && \
12
- apt-get install -y build-essential git cmake wget
 
 
 
13
 
14
  WORKDIR /build
15
 
@@ -17,30 +17,30 @@ RUN git clone https://github.com/ggerganov/llama.cpp.git
17
 
18
  WORKDIR /build/llama.cpp
19
 
20
- # Set nvcc architecture
21
  ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
22
- # Enable cuBLAS
23
  ENV LLAMA_CUBLAS=1
24
 
25
- RUN make LLAMA_CUBLAS=1
26
-
27
- WORKDIR /data
28
- RUN wget https://huggingface.co/IlyaGusev/saiga2_70b_gguf/resolve/main/ggml-model-q2_K.gguf -nv -O model.gguf
29
 
30
  FROM ${BASE_CUDA_RUN_CONTAINER} as runtime
31
-
 
 
32
 
33
  WORKDIR /app
34
 
35
  # Copy the executable from the build stage
36
- COPY --from=build /build/llama.cpp/server /app
37
- COPY --from=build /data/model.gguf /data/model.gguf
38
  COPY ./run.sh /app/run.sh
39
-
40
- EXPOSE 7860
41
 
42
  # Make the script executable
43
  RUN chmod +x run.sh
44
- ENV LC_ALL=C.utf8
45
  # CMD to run your script
46
- CMD /app/run.sh
 
1
  ARG UBUNTU_VERSION=22.04
2
+ ARG CUDA_VERSION=12.3.1
3
  ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
4
  ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
5
 
 
 
 
6
  FROM ${BASE_CUDA_DEV_CONTAINER} as build
7
 
8
+ ARG CUDA_DOCKER_ARCH=all
9
+
10
+ RUN apt-get update --fix-missing && \
11
+ apt-get install -y --no-install-recommends git build-essential gcc cmake && \
12
+ rm -rf /var/lib/apt/lists/*
13
 
14
  WORKDIR /build
15
 
 
17
 
18
  WORKDIR /build/llama.cpp
19
 
 
20
  ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
 
21
  ENV LLAMA_CUBLAS=1
22
 
23
+ RUN mkdir build && \
24
+ cd build && \
25
+ cmake .. -DLLAMA_CUBLAS=ON && \
26
+ cmake --build . --config Release
27
 
28
  FROM ${BASE_CUDA_RUN_CONTAINER} as runtime
29
+ RUN apt-get update --fix-missing && \
30
+ apt-get install -y --no-install-recommends curl && \
31
+ rm -rf /var/lib/apt/lists/*
32
 
33
  WORKDIR /app
34
 
35
  # Copy the executable from the build stage
36
+ COPY --from=build /build/llama.cpp/build/bin/server /app
37
+ COPY --from=build /build/llama.cpp/examples/server/public /app/public
38
  COPY ./run.sh /app/run.sh
39
+ WORKDIR /app
40
+ EXPOSE 7867
41
 
42
  # Make the script executable
43
  RUN chmod +x run.sh
44
+
45
  # CMD to run your script
46
+ CMD ./run.sh