|
|
ARG CUDA_IMAGE="12.1.0-devel-ubuntu22.04" |
|
|
FROM nvidia/cuda:${CUDA_IMAGE} |
|
|
ENV DEBIAN_FRONTEND=noninteractive |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
RUN apt-get update && \ |
|
|
apt-get upgrade -y && \ |
|
|
apt-get install -y --no-install-recommends --fix-missing \ |
|
|
git \ |
|
|
git-lfs \ |
|
|
wget \ |
|
|
curl \ |
|
|
libcurl4-openssl-dev \ |
|
|
cmake \ |
|
|
|
|
|
build-essential \ |
|
|
libssl-dev \ |
|
|
zlib1g-dev \ |
|
|
libbz2-dev \ |
|
|
libreadline-dev \ |
|
|
libsqlite3-dev \ |
|
|
libncursesw5-dev \ |
|
|
xz-utils \ |
|
|
tk-dev \ |
|
|
libxml2-dev \ |
|
|
libxmlsec1-dev \ |
|
|
libffi-dev \ |
|
|
liblzma-dev \ |
|
|
ffmpeg \ |
|
|
nvidia-driver-570 |
|
|
|
|
|
|
|
|
RUN id -u 1000 &>/dev/null || useradd -m -u 1000 user |
|
|
USER 1000 |
|
|
ENV HOME=/home/user \ |
|
|
PATH=/home/user/.local/bin:${PATH} |
|
|
WORKDIR ${HOME}/app |
|
|
|
|
|
|
|
|
RUN git clone https://github.com/ggerganov/llama.cpp.git |
|
|
|
|
|
RUN ls |
|
|
|
|
|
RUN cmake llama.cpp -B build -DGGML_CUDA=ON |
|
|
RUN cmake --build build --config Release |
|
|
|
|
|
RUN ls |
|
|
|
|
|
CMD CUDA_VISIBLE_DEVICES=0 ./build/bin/llama-server \ |
|
|
-hf unsloth/Qwen3-30B-A3B-GGUF \ |
|
|
--cache-type-k q4_0 \ |
|
|
--threads 12 \ |
|
|
--prio 2 \ |
|
|
--n-gpu-layers 49 \ |
|
|
--seed 3407 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|