Spaces:

m1b2lover
/

llamacpp

Paused

App Files Files Community

llamacpp / Dockerfile

m1b2lover

Update Dockerfile

f8e5171 verified 8 months ago

raw

history blame contribute delete

1.76 kB

	ARG CUDA_IMAGE="12.1.0-devel-ubuntu22.04"
	FROM nvidia/cuda:${CUDA_IMAGE}
	ENV DEBIAN_FRONTEND=noninteractive

	# RUN apt-get update && apt-get upgrade -y \
	# && apt-get install -y git build-essential \
	# python3 python3-pip gcc wget \
	# ocl-icd-opencl-dev opencl-headers clinfo \
	# libclblast-dev libopenblas-dev \

	RUN apt-get update && \
	apt-get upgrade -y && \
	apt-get install -y --no-install-recommends --fix-missing \
	git \
	git-lfs \
	wget \
	curl \
	libcurl4-openssl-dev \
	cmake \
	# python build dependencies \
	build-essential \
	libssl-dev \
	zlib1g-dev \
	libbz2-dev \
	libreadline-dev \
	libsqlite3-dev \
	libncursesw5-dev \
	xz-utils \
	tk-dev \
	libxml2-dev \
	libxmlsec1-dev \
	libffi-dev \
	liblzma-dev \
	ffmpeg \
	nvidia-driver-570

	# Check if user with UID 1000 exists, if not create it
	RUN id -u 1000 &>/dev/null \|\| useradd -m -u 1000 user
	USER 1000
	ENV HOME=/home/user \
	PATH=/home/user/.local/bin:${PATH}
	WORKDIR ${HOME}/app


	RUN git clone https://github.com/ggerganov/llama.cpp.git

	RUN ls

	RUN cmake llama.cpp -B build -DGGML_CUDA=ON
	RUN cmake --build build --config Release

	RUN ls

	CMD CUDA_VISIBLE_DEVICES=0 ./build/bin/llama-server \
	-hf unsloth/Qwen3-30B-A3B-GGUF \
	--cache-type-k q4_0 \
	--threads 12 \
	--prio 2 \
	--n-gpu-layers 49 \
	--seed 3407
	# git clone https://github.com/ggerganov/llama.cpp.git
	# cd llama.cpp
	# cmake -B build -DGGML_CUDA=ON --DLLAMA_CURL=ON
	# cmake --build build --config Release
	# CUDA_VISIBLE_DEVICES=0 ./build/bin/llama-server \
	# -hf unsloth/Qwen3-30B-A3B-GGUF \
	# --cache-type-k q4_0 \
	# --threads 12 \
	# --prio 2 \
	# --n-gpu-layers 49 \
	# --seed 3407