muryshev commited on
Commit
890d69d
·
verified ·
1 Parent(s): 5b6cfb6

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +18 -12
Dockerfile CHANGED
@@ -1,14 +1,14 @@
1
  ARG UBUNTU_VERSION=22.04
2
- ARG CUDA_VERSION=12.3.1
3
  ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
4
  ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
5
 
6
  FROM ${BASE_CUDA_DEV_CONTAINER} as build
7
 
8
- ARG CUDA_DOCKER_ARCH=all
9
 
10
  RUN apt-get update --fix-missing && \
11
- apt-get install -y --no-install-recommends git build-essential gcc cmake && \
12
  rm -rf /var/lib/apt/lists/*
13
 
14
  WORKDIR /build
@@ -16,26 +16,32 @@ WORKDIR /build
16
  RUN git clone https://github.com/ggerganov/llama.cpp.git
17
 
18
  WORKDIR /build/llama.cpp
19
- RUN git checkout 821f0a271e7c9ee737945245dd7abfa22cc9b5b0
 
20
 
21
  ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
22
- ENV LLAMA_CUBLAS=1
 
23
 
 
24
  RUN mkdir build && \
25
- cd build && \
26
- cmake .. -DLLAMA_CUBLAS=ON && \
27
- cmake --build . --config Release
 
 
28
 
29
  FROM ${BASE_CUDA_RUN_CONTAINER} as runtime
30
  RUN apt-get update --fix-missing && \
31
- apt-get install -y --no-install-recommends wget && \
32
  rm -rf /var/lib/apt/lists/*
33
 
34
  WORKDIR /app
35
 
36
- # Copy the executable from the build stage
37
- COPY --from=build /build/llama.cpp/build/bin/server /app
38
- COPY --from=build /build/llama.cpp/examples/server/public /app/public
 
39
  COPY ./run.sh /app/run.sh
40
  WORKDIR /app
41
  EXPOSE 7867
 
1
  ARG UBUNTU_VERSION=22.04
2
+ ARG CUDA_VERSION=12.6.0
3
  ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
4
  ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
5
 
6
  FROM ${BASE_CUDA_DEV_CONTAINER} as build
7
 
8
+ ARG CUDA_DOCKER_ARCH="default"
9
 
10
  RUN apt-get update --fix-missing && \
11
+ apt-get install -y --no-install-recommends git build-essential gcc cmake curl libcurl4-openssl-dev && \
12
  rm -rf /var/lib/apt/lists/*
13
 
14
  WORKDIR /build
 
16
  RUN git clone https://github.com/ggerganov/llama.cpp.git
17
 
18
  WORKDIR /build/llama.cpp
19
+ #Минимальная версия для работы очереди запросов 821f0a271e7c9ee737945245dd7abfa22cc9b5b0
20
+ RUN git checkout b3465
21
 
22
  ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
23
+ # ENV LLAMA_CUBLAS=1
24
+ ENV GGML_CUDA=1
25
 
26
+ # Use the default CUDA archs if not specified
27
  RUN mkdir build && \
28
+ if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
29
+ export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
30
+ fi && \
31
+ cmake -B build -DGGML_CUDA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
32
+ cmake --build build --config Release --target llama-server -j$(nproc)
33
 
34
  FROM ${BASE_CUDA_RUN_CONTAINER} as runtime
35
  RUN apt-get update --fix-missing && \
36
+ apt-get install -y --no-install-recommends wget libgomp1 libcurl4-openssl-dev && \
37
  rm -rf /var/lib/apt/lists/*
38
 
39
  WORKDIR /app
40
 
41
+ COPY --from=build /build/llama.cpp/build/ggml/src/libggml.so /app/libggml.so
42
+ COPY --from=build /build/llama.cpp/build/src/libllama.so /app/libllama.so
43
+ COPY --from=build /build/llama.cpp/build/bin/llama-server /app/server
44
+
45
  COPY ./run.sh /app/run.sh
46
  WORKDIR /app
47
  EXPOSE 7867