ARG CUDA_IMAGE="12.1.0-devel-ubuntu22.04"
FROM nvidia/cuda:${CUDA_IMAGE}
ENV DEBIAN_FRONTEND=noninteractive

# RUN apt-get update && apt-get upgrade -y \
#     && apt-get install -y git build-essential \
#     python3 python3-pip gcc wget \
#     ocl-icd-opencl-dev opencl-headers clinfo \
#     libclblast-dev libopenblas-dev \

RUN apt-get update && \
    apt-get upgrade -y && \
    apt-get install -y --no-install-recommends --fix-missing \
    git \
    git-lfs \
    wget \
    curl \
    libcurl4-openssl-dev \
    cmake \
    # python build dependencies \
    build-essential \
    libssl-dev \
    zlib1g-dev \
    libbz2-dev \
    libreadline-dev \
    libsqlite3-dev \
    libncursesw5-dev \
    xz-utils \
    tk-dev \
    libxml2-dev \
    libxmlsec1-dev \
    libffi-dev \
    liblzma-dev \
    ffmpeg \ 
    nvidia-driver-570

# Check if user with UID 1000 exists, if not create it
RUN id -u 1000 &>/dev/null || useradd -m -u 1000 user
USER 1000
ENV HOME=/home/user \
    PATH=/home/user/.local/bin:${PATH}
WORKDIR ${HOME}/app


RUN git clone https://github.com/ggerganov/llama.cpp.git

RUN ls

RUN cmake llama.cpp -B build -DGGML_CUDA=ON
RUN cmake --build build --config Release

RUN ls

CMD CUDA_VISIBLE_DEVICES=0 ./build/bin/llama-server \
    -hf unsloth/Qwen3-30B-A3B-GGUF \
    --cache-type-k q4_0 \
    --threads 12 \
    --prio 2 \
    --n-gpu-layers 49 \
    --seed 3407
# git clone https://github.com/ggerganov/llama.cpp.git
# cd llama.cpp
# cmake -B build -DGGML_CUDA=ON --DLLAMA_CURL=ON
# cmake --build build --config Release
# CUDA_VISIBLE_DEVICES=0 ./build/bin/llama-server \
#     -hf unsloth/Qwen3-30B-A3B-GGUF \
#     --cache-type-k q4_0 \
#     --threads 12 \
#     --prio 2 \
#     --n-gpu-layers 49 \
#     --seed 3407