Spaces:

aetherbase
/

bitnet-ai

Sleeping

bitnet-ai / Dockerfile

0c45022 10 days ago

2.29 kB

	FROM ubuntu:22.04 AS builder

	ENV DEBIAN_FRONTEND=noninteractive

	RUN apt-get update && apt-get install -y --no-install-recommends \
	cmake \
	build-essential \
	python3 \
	python3-pip \
	git \
	wget \
	software-properties-common \
	gnupg \
	libomp-dev \
	&& wget -qO- https://apt.llvm.org/llvm-snapshot.gpg.key \| tee /etc/apt/trusted.gpg.d/llvm.asc \
	&& add-apt-repository -y "deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-18 main" \
	&& apt-get update && apt-get install -y --no-install-recommends clang-18 \
	&& ln -s /usr/bin/clang-18 /usr/bin/clang \
	&& ln -s /usr/bin/clang++-18 /usr/bin/clang++ \
	&& rm -rf /var/lib/apt/lists/*

	WORKDIR /build

	RUN git clone --recursive https://github.com/microsoft/BitNet.git .

	RUN pip3 install --no-cache-dir 3rdparty/llama.cpp/gguf-py

	RUN sed -i 's/int8_t \* y_col = y + col \* by;/const int8_t * y_col = y + col * by;/' src/ggml-bitnet-mad.cpp

	RUN python3 utils/codegen_tl2.py \
	--model bitnet_b1_58-3B \
	--BM 160,320,320 \
	--BK 96,96,96 \
	--bm 32,32,32

	RUN cmake -B build \
	-DBITNET_X86_TL2=OFF \
	-DCMAKE_C_COMPILER=clang \
	-DCMAKE_CXX_COMPILER=clang++ \
	-DCMAKE_BUILD_TYPE=Release \
	&& cmake --build build --config Release -j$(nproc) --target llama-server

	FROM ubuntu:22.04

	ENV DEBIAN_FRONTEND=noninteractive

	RUN apt-get update && apt-get install -y --no-install-recommends \
	libgomp1 \
	python3 \
	python3-pip \
	&& rm -rf /var/lib/apt/lists/*

	RUN mkdir -p /models && \
	pip3 install --no-cache-dir huggingface-hub && \
	python3 -c "from huggingface_hub import hf_hub_download; hf_hub_download(repo_id='microsoft/BitNet-b1.58-2B-4T-gguf', filename='ggml-model-i2_s.gguf', local_dir='/models')"

	RUN useradd -m -u 1000 user

	WORKDIR /app

	COPY --from=builder /build/build/bin/llama-server ./build/bin/llama-server
	COPY --from=builder /build/build/3rdparty/llama.cpp/src/libllama.so ./build/lib/
	COPY --from=builder /build/build/3rdparty/llama.cpp/ggml/src/libggml.so ./build/lib/

	ENV LD_LIBRARY_PATH=/app/build/lib

	RUN chown -R user:user /app /models

	USER user

	EXPOSE 7860

	ENTRYPOINT ["./build/bin/llama-server", "--host", "0.0.0.0", "--port", "7860"]
	CMD ["-m", "/models/ggml-model-i2_s.gguf", "--mlock", "--parallel", "5"]