ARG CUDA_IMAGE="12.1.0-devel-ubuntu22.04" FROM nvidia/cuda:${CUDA_IMAGE} ENV DEBIAN_FRONTEND=noninteractive # RUN apt-get update && apt-get upgrade -y \ # && apt-get install -y git build-essential \ # python3 python3-pip gcc wget \ # ocl-icd-opencl-dev opencl-headers clinfo \ # libclblast-dev libopenblas-dev \ RUN apt-get update && \ apt-get upgrade -y && \ apt-get install -y --no-install-recommends --fix-missing \ git \ git-lfs \ wget \ curl \ libcurl4-openssl-dev \ cmake \ # python build dependencies \ build-essential \ libssl-dev \ zlib1g-dev \ libbz2-dev \ libreadline-dev \ libsqlite3-dev \ libncursesw5-dev \ xz-utils \ tk-dev \ libxml2-dev \ libxmlsec1-dev \ libffi-dev \ liblzma-dev \ ffmpeg \ nvidia-driver-570 # Check if user with UID 1000 exists, if not create it RUN id -u 1000 &>/dev/null || useradd -m -u 1000 user USER 1000 ENV HOME=/home/user \ PATH=/home/user/.local/bin:${PATH} WORKDIR ${HOME}/app RUN git clone https://github.com/ggerganov/llama.cpp.git RUN ls RUN cmake llama.cpp -B build -DGGML_CUDA=ON RUN cmake --build build --config Release RUN ls CMD CUDA_VISIBLE_DEVICES=0 ./build/bin/llama-server \ -hf unsloth/Qwen3-30B-A3B-GGUF \ --cache-type-k q4_0 \ --threads 12 \ --prio 2 \ --n-gpu-layers 49 \ --seed 3407 # git clone https://github.com/ggerganov/llama.cpp.git # cd llama.cpp # cmake -B build -DGGML_CUDA=ON --DLLAMA_CURL=ON # cmake --build build --config Release # CUDA_VISIBLE_DEVICES=0 ./build/bin/llama-server \ # -hf unsloth/Qwen3-30B-A3B-GGUF \ # --cache-type-k q4_0 \ # --threads 12 \ # --prio 2 \ # --n-gpu-layers 49 \ # --seed 3407