llamacpp / Dockerfile
m1b2lover's picture
Update Dockerfile
f8e5171 verified
ARG CUDA_IMAGE="12.1.0-devel-ubuntu22.04"
FROM nvidia/cuda:${CUDA_IMAGE}
ENV DEBIAN_FRONTEND=noninteractive
# RUN apt-get update && apt-get upgrade -y \
# && apt-get install -y git build-essential \
# python3 python3-pip gcc wget \
# ocl-icd-opencl-dev opencl-headers clinfo \
# libclblast-dev libopenblas-dev \
RUN apt-get update && \
apt-get upgrade -y && \
apt-get install -y --no-install-recommends --fix-missing \
git \
git-lfs \
wget \
curl \
libcurl4-openssl-dev \
cmake \
# python build dependencies \
build-essential \
libssl-dev \
zlib1g-dev \
libbz2-dev \
libreadline-dev \
libsqlite3-dev \
libncursesw5-dev \
xz-utils \
tk-dev \
libxml2-dev \
libxmlsec1-dev \
libffi-dev \
liblzma-dev \
ffmpeg \
nvidia-driver-570
# Check if user with UID 1000 exists, if not create it
RUN id -u 1000 &>/dev/null || useradd -m -u 1000 user
USER 1000
ENV HOME=/home/user \
PATH=/home/user/.local/bin:${PATH}
WORKDIR ${HOME}/app
RUN git clone https://github.com/ggerganov/llama.cpp.git
RUN ls
RUN cmake llama.cpp -B build -DGGML_CUDA=ON
RUN cmake --build build --config Release
RUN ls
CMD CUDA_VISIBLE_DEVICES=0 ./build/bin/llama-server \
-hf unsloth/Qwen3-30B-A3B-GGUF \
--cache-type-k q4_0 \
--threads 12 \
--prio 2 \
--n-gpu-layers 49 \
--seed 3407
# git clone https://github.com/ggerganov/llama.cpp.git
# cd llama.cpp
# cmake -B build -DGGML_CUDA=ON --DLLAMA_CURL=ON
# cmake --build build --config Release
# CUDA_VISIBLE_DEVICES=0 ./build/bin/llama-server \
# -hf unsloth/Qwen3-30B-A3B-GGUF \
# --cache-type-k q4_0 \
# --threads 12 \
# --prio 2 \
# --n-gpu-layers 49 \
# --seed 3407