bitnet-ai / Dockerfiles /Dockerfile.allcore
Soumik-404's picture
first commit
3a74e13
Raw
History Blame Contribute Delete
1.89 kB
FROM ubuntu:22.04 AS builder
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get install -y --no-install-recommends \
cmake \
build-essential \
python3 \
python3-pip \
git \
wget \
software-properties-common \
gnupg \
libomp-dev \
&& wget -qO- https://apt.llvm.org/llvm-snapshot.gpg.key | tee /etc/apt/trusted.gpg.d/llvm.asc \
&& add-apt-repository -y "deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-18 main" \
&& apt-get update && apt-get install -y --no-install-recommends clang-18 \
&& ln -s /usr/bin/clang-18 /usr/bin/clang \
&& ln -s /usr/bin/clang++-18 /usr/bin/clang++ \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /build
RUN git clone --recursive https://github.com/microsoft/BitNet.git .
RUN pip3 install --no-cache-dir 3rdparty/llama.cpp/gguf-py
RUN sed -i 's/int8_t \* y_col = y + col \* by;/const int8_t * y_col = y + col * by;/' src/ggml-bitnet-mad.cpp
RUN python3 utils/codegen_tl2.py \
--model bitnet_b1_58-3B \
--BM 160,320,320 \
--BK 96,96,96 \
--bm 32,32,32
RUN cmake -B build \
-DBITNET_X86_TL2=OFF \
-DCMAKE_C_COMPILER=clang \
-DCMAKE_CXX_COMPILER=clang++ \
-DCMAKE_BUILD_TYPE=Release \
&& cmake --build build --config Release -j$(nproc) --target llama-server
FROM ubuntu:22.04
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get install -y --no-install-recommends \
libgomp1 \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app
COPY --from=builder /build/build/bin/llama-server ./build/bin/llama-server
COPY --from=builder /build/build/3rdparty/llama.cpp/src/libllama.so ./build/lib/
COPY --from=builder /build/build/3rdparty/llama.cpp/ggml/src/libggml.so ./build/lib/
ENV LD_LIBRARY_PATH=/app/build/lib
EXPOSE 8080
ENTRYPOINT ["./build/bin/llama-server", "--host", "0.0.0.0", "--port", "8080"]
CMD ["--mlock", "--parallel", "5"]