muryshev commited on
Commit
98aa1e2
·
1 Parent(s): 1b406b7

Reconfigured to use sh startup file.

Browse files
Files changed (2) hide show
  1. Dockerfile +8 -3
  2. run.sh +2 -0
Dockerfile CHANGED
@@ -2,6 +2,7 @@ ARG UBUNTU_VERSION=22.04
2
  ARG CUDA_VERSION=12.3.1
3
  ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
4
  ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
 
5
 
6
  FROM ${BASE_CUDA_DEV_CONTAINER} as build
7
 
@@ -22,11 +23,10 @@ ENV LLAMA_CUBLAS=1
22
  RUN mkdir build && \
23
  cd build && \
24
  cmake .. -DLLAMA_CUBLAS=ON && \
25
- # cmake .. && \
26
  cmake --build . --config Release
27
 
28
  WORKDIR /data
29
- RUN wget https://huggingface.co/IlyaGusev/saiga2_70b_gguf/resolve/main/ggml-model-q4_1.gguf -nv -O model.gguf
30
 
31
  FROM ${BASE_CUDA_RUN_CONTAINER} as runtime
32
 
@@ -36,7 +36,12 @@ WORKDIR /app
36
  # Copy the executable from the build stage
37
  COPY --from=build /build/llama.cpp/build/bin/server /app
38
  COPY --from=build /data/model.gguf /data/model.gguf
 
39
  WORKDIR /app
40
  EXPOSE 7860
41
 
42
- CMD ./server -m /data/model.gguf -c 4096 -ngl 70 --port 7860 --host 0.0.0.0
 
 
 
 
 
2
  ARG CUDA_VERSION=12.3.1
3
  ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
4
  ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
5
+ ARG MODEL_URL=https://huggingface.co/IlyaGusev/saiga2_70b_gguf/resolve/main/ggml-model-q4_1.gguf
6
 
7
  FROM ${BASE_CUDA_DEV_CONTAINER} as build
8
 
 
23
  RUN mkdir build && \
24
  cd build && \
25
  cmake .. -DLLAMA_CUBLAS=ON && \
 
26
  cmake --build . --config Release
27
 
28
  WORKDIR /data
29
+ RUN wget ${MODEL_URL} -nv -O model.gguf
30
 
31
  FROM ${BASE_CUDA_RUN_CONTAINER} as runtime
32
 
 
36
  # Copy the executable from the build stage
37
  COPY --from=build /build/llama.cpp/build/bin/server /app
38
  COPY --from=build /data/model.gguf /data/model.gguf
39
+ COPY ./run.sh /app/run.sh
40
  WORKDIR /app
41
  EXPOSE 7860
42
 
43
+ # Make the script executable
44
+ RUN chmod +x run.sh
45
+
46
+ # CMD to run your script
47
+ CMD ./run.sh
run.sh ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ #!/bin/bash
2
+ /app/server -m /data/model.gguf -c 4096 -ngl 70 --port 7860 --host 0.0.0.0