Spaces:

CharlieBonito
/

ClarityGuardAgent

Sleeping

App Files Files Community

CharlieBonito commited on Apr 24

Commit

a3dfa89

verified ·

1 Parent(s): 4048f9f

Update Dockerfile

Browse files

Files changed (1) hide show

Dockerfile +15 -46

Dockerfile CHANGED Viewed

@@ -1,35 +1,3 @@
-# --- ETAPA DE COMPILACIÓN ---
-FROM nvidia/cuda:12.6.3-cudnn-devel-ubuntu22.04 AS llama-builder
-ENV DEBIAN_FRONTEND=noninteractive
-RUN apt-get update && apt-get install -y \
-    git cmake build-essential \
-    && rm -rf /var/lib/apt/lists/*
-RUN ln -s /usr/local/cuda/lib64/stubs/libcuda.so \
-         /usr/local/cuda/lib64/stubs/libcuda.so.1 && \
-    echo "/usr/local/cuda/lib64/stubs" > /etc/ld.so.conf.d/cuda-stubs.conf && \
-    ldconfig
-# Compilación DESACTIVANDO AVX512 para evitar SIGILL en el EPYC virtualizado
-RUN git clone --depth 1 https://github.com/ggerganov/llama.cpp /tmp/llama-cpp-src && \
-    cd /tmp/llama-cpp-src && \
-    cmake -B build \
-      -DGGML_CUDA=ON \
-      -DCMAKE_CUDA_ARCHITECTURES="89" \
-      -DGGML_OPENMP=OFF \
-      -DCMAKE_BUILD_TYPE=Release \
-      -DGGML_AVX512=OFF \
-      -DGGML_AVX512_VBMI=OFF \
-      -DGGML_AVX512_VNNI=OFF && \
-    cmake --build build --target llama-server -j4 && \
-    mkdir -p /opt/llama-cpp/libs && \
-    find /tmp/llama-cpp-src/build -name "*.so*" -exec cp {} /opt/llama-cpp/libs/ \; && \
-    cp /tmp/llama-cpp-src/build/bin/llama-server /opt/llama-cpp/llama-server && \
-    chmod +x /opt/llama-cpp/llama-server
-# --- ETAPA DE EJECUCIÓN ---
 FROM nvidia/cuda:12.6.3-cudnn-runtime-ubuntu22.04
 ENV DEBIAN_FRONTEND=noninteractive
@@ -38,29 +6,30 @@ ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
 ENV CUDA_VISIBLE_DEVICES=0
 RUN apt-get update && apt-get install -y \
-    python3 python3-pip \
-    git git-lfs curl \
-    libgomp1 \
     && rm -rf /var/lib/apt/lists/*
-COPY --from=llama-builder /opt/llama-cpp/llama-server /opt/llama-cpp/llama-server
-COPY --from=llama-builder /opt/llama-cpp/libs/* /usr/local/lib/
-# Limpiar compat para que NVIDIA inyecte los drivers reales del host
-RUN rm -rf /usr/local/cuda/compat && \
-    rm -rf /usr/local/cuda-12.6/compat && \
-    ldconfig
 WORKDIR /app
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
 COPY . .
 EXPOSE 7860
-COPY start.sh .
 RUN chmod +x start.sh
 CMD ["./start.sh"]

 FROM nvidia/cuda:12.6.3-cudnn-runtime-ubuntu22.04
 ENV DEBIAN_FRONTEND=noninteractive
 ENV CUDA_VISIBLE_DEVICES=0
 RUN apt-get update && apt-get install -y \
+    python3 python3-pip git git-lfs curl libgomp1 \
     && rm -rf /var/lib/apt/lists/*
 WORKDIR /app
+# Descargar binario precompilado b4685 (CUDA 12, sin AVX512 problemático)
+RUN set -e && \
+    VER="b4685" && \
+    URL="https://github.com/ggml-org/llama.cpp/releases/download/${VER}/llama-${VER}-bin-ubuntu-x64-cuda12.tar.gz" && \
+    echo "Descargando ${URL}..." && \
+    curl -fL "${URL}" -o /tmp/llama.tar.gz && \
+    test $(stat -c%s /tmp/llama.tar.gz) -gt 5000000 && \
+    mkdir -p /opt/llama-cpp && \
+    tar xzf /tmp/llama.tar.gz -C /opt/llama-cpp --strip-components=1 && \
+    cp /opt/llama-cpp/lib/*.so* /usr/local/lib/ && \
+    chmod +x /opt/llama-cpp/llama-server && \
+    ldconfig && \
+    rm /tmp/llama.tar.gz
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
 COPY . .
 EXPOSE 7860
 RUN chmod +x start.sh
 CMD ["./start.sh"]