CharlieBonito commited on
Commit
a3dfa89
verified
1 Parent(s): 4048f9f

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +15 -46
Dockerfile CHANGED
@@ -1,35 +1,3 @@
1
- # --- ETAPA DE COMPILACI脫N ---
2
- FROM nvidia/cuda:12.6.3-cudnn-devel-ubuntu22.04 AS llama-builder
3
-
4
- ENV DEBIAN_FRONTEND=noninteractive
5
-
6
- RUN apt-get update && apt-get install -y \
7
- git cmake build-essential \
8
- && rm -rf /var/lib/apt/lists/*
9
-
10
- RUN ln -s /usr/local/cuda/lib64/stubs/libcuda.so \
11
- /usr/local/cuda/lib64/stubs/libcuda.so.1 && \
12
- echo "/usr/local/cuda/lib64/stubs" > /etc/ld.so.conf.d/cuda-stubs.conf && \
13
- ldconfig
14
-
15
- # Compilaci贸n DESACTIVANDO AVX512 para evitar SIGILL en el EPYC virtualizado
16
- RUN git clone --depth 1 https://github.com/ggerganov/llama.cpp /tmp/llama-cpp-src && \
17
- cd /tmp/llama-cpp-src && \
18
- cmake -B build \
19
- -DGGML_CUDA=ON \
20
- -DCMAKE_CUDA_ARCHITECTURES="89" \
21
- -DGGML_OPENMP=OFF \
22
- -DCMAKE_BUILD_TYPE=Release \
23
- -DGGML_AVX512=OFF \
24
- -DGGML_AVX512_VBMI=OFF \
25
- -DGGML_AVX512_VNNI=OFF && \
26
- cmake --build build --target llama-server -j4 && \
27
- mkdir -p /opt/llama-cpp/libs && \
28
- find /tmp/llama-cpp-src/build -name "*.so*" -exec cp {} /opt/llama-cpp/libs/ \; && \
29
- cp /tmp/llama-cpp-src/build/bin/llama-server /opt/llama-cpp/llama-server && \
30
- chmod +x /opt/llama-cpp/llama-server
31
-
32
- # --- ETAPA DE EJECUCI脫N ---
33
  FROM nvidia/cuda:12.6.3-cudnn-runtime-ubuntu22.04
34
 
35
  ENV DEBIAN_FRONTEND=noninteractive
@@ -38,29 +6,30 @@ ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
38
  ENV CUDA_VISIBLE_DEVICES=0
39
 
40
  RUN apt-get update && apt-get install -y \
41
- python3 python3-pip \
42
- git git-lfs curl \
43
- libgomp1 \
44
  && rm -rf /var/lib/apt/lists/*
45
 
46
- COPY --from=llama-builder /opt/llama-cpp/llama-server /opt/llama-cpp/llama-server
47
- COPY --from=llama-builder /opt/llama-cpp/libs/* /usr/local/lib/
48
-
49
- # Limpiar compat para que NVIDIA inyecte los drivers reales del host
50
- RUN rm -rf /usr/local/cuda/compat && \
51
- rm -rf /usr/local/cuda-12.6/compat && \
52
- ldconfig
53
-
54
  WORKDIR /app
55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  COPY requirements.txt .
57
  RUN pip install --no-cache-dir -r requirements.txt
58
 
59
  COPY . .
60
 
61
  EXPOSE 7860
62
-
63
- COPY start.sh .
64
  RUN chmod +x start.sh
65
-
66
  CMD ["./start.sh"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  FROM nvidia/cuda:12.6.3-cudnn-runtime-ubuntu22.04
2
 
3
  ENV DEBIAN_FRONTEND=noninteractive
 
6
  ENV CUDA_VISIBLE_DEVICES=0
7
 
8
  RUN apt-get update && apt-get install -y \
9
+ python3 python3-pip git git-lfs curl libgomp1 \
 
 
10
  && rm -rf /var/lib/apt/lists/*
11
 
 
 
 
 
 
 
 
 
12
  WORKDIR /app
13
 
14
+ # Descargar binario precompilado b4685 (CUDA 12, sin AVX512 problem谩tico)
15
+ RUN set -e && \
16
+ VER="b4685" && \
17
+ URL="https://github.com/ggml-org/llama.cpp/releases/download/${VER}/llama-${VER}-bin-ubuntu-x64-cuda12.tar.gz" && \
18
+ echo "Descargando ${URL}..." && \
19
+ curl -fL "${URL}" -o /tmp/llama.tar.gz && \
20
+ test $(stat -c%s /tmp/llama.tar.gz) -gt 5000000 && \
21
+ mkdir -p /opt/llama-cpp && \
22
+ tar xzf /tmp/llama.tar.gz -C /opt/llama-cpp --strip-components=1 && \
23
+ cp /opt/llama-cpp/lib/*.so* /usr/local/lib/ && \
24
+ chmod +x /opt/llama-cpp/llama-server && \
25
+ ldconfig && \
26
+ rm /tmp/llama.tar.gz
27
+
28
  COPY requirements.txt .
29
  RUN pip install --no-cache-dir -r requirements.txt
30
 
31
  COPY . .
32
 
33
  EXPOSE 7860
 
 
34
  RUN chmod +x start.sh
 
35
  CMD ["./start.sh"]