# NuWave HuggingFace Space — Docker image. # # Stack: # - BitNet b1.58 2B4T (GGUF, i2_s quant) for user-facing chat # - Falcon3-10B-Instruct 1.58bit (GGUF) for concept extraction # - bitnet.cpp (microsoft/BitNet) as the inference runtime # - ng_tract (Rust BTF) for substrate tracts # - gradio for the UI # # Why bitnet.cpp instead of transformers bf16: to actually deliver the # "CPU-native ternary-weight inference" claim in NuWave's architecture # docs. Running BitNet through transformers gives BitNet's training # quality but none of its inference-efficiency benefits. The bitnet.cpp # runtime uses specialized ternary kernels — ~16× memory reduction and # major throughput gains on CPU, which is what the whole NuWave thesis # rests on. # # Why Falcon3-10B for extraction: native BitNet 2B on greedy decoding # collapses into repetition loops ("cycle, cycle, cycle") on enumeration # tasks. Falcon3-10B-Instruct was properly instruct-tuned before being # quantized to 1.58-bit — inherits Falcon's enumeration capability, # delivers through bitnet.cpp's fast kernels. FROM python:3.12-slim USER root # System deps — build toolchain for bitnet.cpp, runtime libs for everything. # libgomp1 — onnxruntime # git — cloning bitnet.cpp repo + huggingface_hub's git backend # g++ / clang — compiling bitnet.cpp's C++ kernels # cmake — bitnet.cpp build system # build-essential — make, etc. # ca-certificates / curl — HTTPS downloads RUN apt-get update && apt-get install -y --no-install-recommends \ libgomp1 \ git \ g++ \ clang \ cmake \ build-essential \ ca-certificates \ curl \ pkg-config \ libcurl4-openssl-dev \ libssl-dev \ && rm -rf /var/lib/apt/lists/* # HF Spaces convention: non-root user with UID 1000 RUN useradd -m -u 1000 user USER user ENV HOME=/home/user \ PATH=/home/user/.local/bin:$PATH \ PYTHONDONTWRITEBYTECODE=1 \ PYTHONUNBUFFERED=1 # ── bitnet.cpp clone + build ──────────────────────────────────────── # Clone microsoft/BitNet with submodules (bitnet.cpp's custom llama.cpp fork). # Pinned commit for reproducibility; bump when a new release is needed. WORKDIR /home/user/bitnet RUN git clone --recursive https://github.com/microsoft/BitNet.git /home/user/bitnet # Install BitNet's Python build/utility deps (gguf, huggingface_hub, # numpy, torch-cpu, etc. — used by their conversion + download scripts, # which we're about to bypass for the compile, but keep for ggml tools). RUN pip install --no-cache-dir --user -r /home/user/bitnet/requirements.txt # Patch upstream const-correctness bug in ggml-bitnet-mad.cpp. # Modern gcc-14 / clang-19 rejects `int8_t * y_col = y + col * by;` # because y is declared `const int8_t *` a few lines above. These # vec-dot functions only READ y, so const-qualifying y_col is the # safe + minimal fix. Upstream likely builds with an older compiler # that downgraded this to a warning. Remove this patch if/when # microsoft/BitNet fixes this in their source. RUN sed -i \ 's|int8_t \* y_col = y + col \* by;|const int8_t * y_col = y + col * by;|g' \ /home/user/bitnet/src/ggml-bitnet-mad.cpp \ && grep -c "const int8_t \* y_col" /home/user/bitnet/src/ggml-bitnet-mad.cpp || true # Run setup_env.py — REQUIRED. It codegens bitnet-lut-kernels.h before # cmake, which cmake can't build without. The quant flag (-q i2_s / # tl1 / tl2) drives which ternary lookup table gets generated. On # failure, dump every cmake + compile log we can find so the next # build iteration has signal instead of just "check details in # logs/compile.log" (useless in a headless container). # # CMAKE_BUILD_PARALLEL_LEVEL=2 caps compile parallelism so LLVM-heavy # kernel compilation doesn't OOM the HF build runner. setup_env.py # forwards this env var through to cmake. ENV CMAKE_BUILD_PARALLEL_LEVEL=2 # setup_env.py does three things: (1) codegens bitnet-lut-kernels.h # from the quant flag, (2) configures+builds llama-cli via cmake, # (3) downloads safetensors + converts to GGUF + quantizes. Only # steps 1-2 are load-bearing for us — step 3 is currently broken # upstream (convert-hf-to-gguf-bitnet.py doesn't recognize # BitNetForCausalLM architecture), and we don't need it anyway # because the pre-converted GGUF is published at a separate HF repo. # # Allow setup_env.py to fail at the convert step with `|| true`. # Then verify the binary actually exists — if codegen or compile # failed, that's still a hard failure we need to see. RUN cd /home/user/bitnet \ && (python setup_env.py --hf-repo microsoft/BitNet-b1.58-2B-4T -q i2_s || true) \ && (test -f /home/user/bitnet/build/bin/llama-cli || \ (echo "==================== BINARY NOT BUILT ====================" && \ find /home/user/bitnet -name "CMakeError.log" -o -name "CMakeOutput.log" -o -name "compile.log" -o -name "*.log" 2>/dev/null | \ while read f; do echo "=== $f ===" && cat "$f" 2>/dev/null || true; done ; \ echo "==================== END LOGS ====================" && \ exit 1)) \ && echo "llama-cli built successfully: $(ls -la /home/user/bitnet/build/bin/llama-cli)" # Download BOTH GGUF files directly from their pre-built HF repos. # Avoids the broken local-conversion path that setup_env.py attempts. RUN python -c "\ from huggingface_hub import snapshot_download; \ snapshot_download('microsoft/bitnet-b1.58-2B-4T-gguf', \ local_dir='/home/user/models/bitnet-2b-gguf', \ allow_patterns=['*.gguf']); \ snapshot_download('tiiuae/Falcon3-10B-Instruct-1.58bit-GGUF', \ local_dir='/home/user/models/falcon3-10b-gguf', \ allow_patterns=['*.gguf'])" # Paths exposed to app.py via env vars. GGUF files now live in # /home/user/models/{bitnet-2b,falcon3-10b}-gguf/ after clean # snapshot_download. BitnetCppClient.resolve_gguf finds the .gguf # via recursive glob at runtime. ENV BITNET_CPP_BINARY=/home/user/bitnet/build/bin/llama-cli ENV BITNET_CHAT_GGUF_DIR=/home/user/models/bitnet-2b-gguf ENV FALCON_EXTRACTOR_GGUF_DIR=/home/user/models/falcon3-10b-gguf # ── Python app deps + repo ────────────────────────────────────────── WORKDIR /app # Install pure-Python deps first — separate layer for caching COPY --chown=user:user requirements.txt /app/requirements.txt RUN pip install --no-cache-dir --user -r /app/requirements.txt # Copy repo COPY --chown=user:user . /app # Install vendored ng_tract wheel RUN pip install --no-cache-dir --user --force-reinstall --no-deps \ /app/ng_tract-0.1.0-cp312-abi3-manylinux_2_34_x86_64.whl # Gradio on 7860 ENV GRADIO_SERVER_NAME=0.0.0.0 \ GRADIO_SERVER_PORT=7860 EXPOSE 7860 CMD ["python", "app.py"]