Spaces:
Sleeping
Sleeping
| FROM ubuntu:22.04 AS builder | |
| ENV DEBIAN_FRONTEND=noninteractive | |
| RUN apt-get update && apt-get install -y --no-install-recommends \ | |
| cmake \ | |
| build-essential \ | |
| python3 \ | |
| python3-pip \ | |
| git \ | |
| wget \ | |
| software-properties-common \ | |
| gnupg \ | |
| libomp-dev \ | |
| && wget -qO- https://apt.llvm.org/llvm-snapshot.gpg.key | tee /etc/apt/trusted.gpg.d/llvm.asc \ | |
| && add-apt-repository -y "deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-18 main" \ | |
| && apt-get update && apt-get install -y --no-install-recommends clang-18 \ | |
| && ln -s /usr/bin/clang-18 /usr/bin/clang \ | |
| && ln -s /usr/bin/clang++-18 /usr/bin/clang++ \ | |
| && rm -rf /var/lib/apt/lists/* | |
| WORKDIR /build | |
| RUN git clone --recursive https://github.com/microsoft/BitNet.git . | |
| RUN pip3 install --no-cache-dir 3rdparty/llama.cpp/gguf-py | |
| RUN sed -i 's/int8_t \* y_col = y + col \* by;/const int8_t * y_col = y + col * by;/' src/ggml-bitnet-mad.cpp | |
| RUN python3 utils/codegen_tl2.py \ | |
| --model bitnet_b1_58-3B \ | |
| --BM 160,320,320 \ | |
| --BK 96,96,96 \ | |
| --bm 32,32,32 | |
| RUN cmake -B build \ | |
| -DBITNET_X86_TL2=OFF \ | |
| -DCMAKE_C_COMPILER=clang \ | |
| -DCMAKE_CXX_COMPILER=clang++ \ | |
| -DCMAKE_BUILD_TYPE=Release \ | |
| && cmake --build build --config Release -j$(nproc) --target llama-server | |
| FROM ubuntu:22.04 | |
| ENV DEBIAN_FRONTEND=noninteractive | |
| RUN apt-get update && apt-get install -y --no-install-recommends \ | |
| libgomp1 \ | |
| python3 \ | |
| python3-pip \ | |
| && rm -rf /var/lib/apt/lists/* | |
| RUN mkdir -p /models && \ | |
| pip3 install --no-cache-dir huggingface-hub && \ | |
| python3 -c "from huggingface_hub import hf_hub_download; hf_hub_download(repo_id='microsoft/BitNet-b1.58-2B-4T-gguf', filename='ggml-model-i2_s.gguf', local_dir='/models')" | |
| RUN useradd -m -u 1000 user | |
| WORKDIR /app | |
| COPY --from=builder /build/build/bin/llama-server ./build/bin/llama-server | |
| COPY --from=builder /build/build/3rdparty/llama.cpp/src/libllama.so ./build/lib/ | |
| COPY --from=builder /build/build/3rdparty/llama.cpp/ggml/src/libggml.so ./build/lib/ | |
| ENV LD_LIBRARY_PATH=/app/build/lib | |
| RUN chown -R user:user /app /models | |
| USER user | |
| EXPOSE 7860 | |
| ENTRYPOINT ["./build/bin/llama-server", "--host", "0.0.0.0", "--port", "7860"] | |
| CMD ["-m", "/models/ggml-model-i2_s.gguf", "--mlock", "--parallel", "5"] | |