Spaces:
Sleeping
Sleeping
Update Dockerfile
Browse files- Dockerfile +18 -12
Dockerfile
CHANGED
|
@@ -1,14 +1,14 @@
|
|
| 1 |
ARG UBUNTU_VERSION=22.04
|
| 2 |
-
ARG CUDA_VERSION=12.
|
| 3 |
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
|
| 4 |
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
|
| 5 |
|
| 6 |
FROM ${BASE_CUDA_DEV_CONTAINER} as build
|
| 7 |
|
| 8 |
-
ARG CUDA_DOCKER_ARCH=
|
| 9 |
|
| 10 |
RUN apt-get update --fix-missing && \
|
| 11 |
-
apt-get install -y --no-install-recommends git build-essential gcc cmake && \
|
| 12 |
rm -rf /var/lib/apt/lists/*
|
| 13 |
|
| 14 |
WORKDIR /build
|
|
@@ -16,26 +16,32 @@ WORKDIR /build
|
|
| 16 |
RUN git clone https://github.com/ggerganov/llama.cpp.git
|
| 17 |
|
| 18 |
WORKDIR /build/llama.cpp
|
| 19 |
-
|
|
|
|
| 20 |
|
| 21 |
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
|
| 22 |
-
ENV LLAMA_CUBLAS=1
|
|
|
|
| 23 |
|
|
|
|
| 24 |
RUN mkdir build && \
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
|
|
|
|
|
|
| 28 |
|
| 29 |
FROM ${BASE_CUDA_RUN_CONTAINER} as runtime
|
| 30 |
RUN apt-get update --fix-missing && \
|
| 31 |
-
apt-get install -y --no-install-recommends wget && \
|
| 32 |
rm -rf /var/lib/apt/lists/*
|
| 33 |
|
| 34 |
WORKDIR /app
|
| 35 |
|
| 36 |
-
|
| 37 |
-
COPY --from=build /build/llama.cpp/build/
|
| 38 |
-
COPY --from=build /build/llama.cpp/
|
|
|
|
| 39 |
COPY ./run.sh /app/run.sh
|
| 40 |
WORKDIR /app
|
| 41 |
EXPOSE 7867
|
|
|
|
| 1 |
ARG UBUNTU_VERSION=22.04
|
| 2 |
+
ARG CUDA_VERSION=12.6.0
|
| 3 |
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
|
| 4 |
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
|
| 5 |
|
| 6 |
FROM ${BASE_CUDA_DEV_CONTAINER} as build
|
| 7 |
|
| 8 |
+
ARG CUDA_DOCKER_ARCH="default"
|
| 9 |
|
| 10 |
RUN apt-get update --fix-missing && \
|
| 11 |
+
apt-get install -y --no-install-recommends git build-essential gcc cmake curl libcurl4-openssl-dev && \
|
| 12 |
rm -rf /var/lib/apt/lists/*
|
| 13 |
|
| 14 |
WORKDIR /build
|
|
|
|
| 16 |
RUN git clone https://github.com/ggerganov/llama.cpp.git
|
| 17 |
|
| 18 |
WORKDIR /build/llama.cpp
|
| 19 |
+
#Минимальная версия для работы очереди запросов 821f0a271e7c9ee737945245dd7abfa22cc9b5b0
|
| 20 |
+
RUN git checkout b3465
|
| 21 |
|
| 22 |
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
|
| 23 |
+
# ENV LLAMA_CUBLAS=1
|
| 24 |
+
ENV GGML_CUDA=1
|
| 25 |
|
| 26 |
+
# Use the default CUDA archs if not specified
|
| 27 |
RUN mkdir build && \
|
| 28 |
+
if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
|
| 29 |
+
export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
|
| 30 |
+
fi && \
|
| 31 |
+
cmake -B build -DGGML_CUDA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
|
| 32 |
+
cmake --build build --config Release --target llama-server -j$(nproc)
|
| 33 |
|
| 34 |
FROM ${BASE_CUDA_RUN_CONTAINER} as runtime
|
| 35 |
RUN apt-get update --fix-missing && \
|
| 36 |
+
apt-get install -y --no-install-recommends wget libgomp1 libcurl4-openssl-dev && \
|
| 37 |
rm -rf /var/lib/apt/lists/*
|
| 38 |
|
| 39 |
WORKDIR /app
|
| 40 |
|
| 41 |
+
COPY --from=build /build/llama.cpp/build/ggml/src/libggml.so /app/libggml.so
|
| 42 |
+
COPY --from=build /build/llama.cpp/build/src/libllama.so /app/libllama.so
|
| 43 |
+
COPY --from=build /build/llama.cpp/build/bin/llama-server /app/server
|
| 44 |
+
|
| 45 |
COPY ./run.sh /app/run.sh
|
| 46 |
WORKDIR /app
|
| 47 |
EXPOSE 7867
|