#!/bin/bash # Build and deploy ternary inference engine # (c) 2026 OpenTransformers Ltd / Scott Bisset set -e WORKDIR=/root/ternary_engine MODEL_HF=deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B MODEL_HF_DIR=$WORKDIR/deepseek-r1-1.5b-hf TERNARY_DIR=$WORKDIR/deepseek-r1-1.5b-ternary echo "=== Ternary Inference Engine Build ===" echo "Target: AVX-512 Skylake" echo "" mkdir -p $WORKDIR cd $WORKDIR # Step 1: Compile C kernel with AVX-512 echo "[1/4] Compiling AVX-512 kernel..." gcc -O3 -march=skylake-avx512 -mavx512f -mavx512bw -mavx512dq -mavx512vl \ -shared -fPIC -lm \ -o ternary_kernel.so ternary_kernel.c echo " -> ternary_kernel.so built" ls -lh ternary_kernel.so # Step 2: Download model from HuggingFace echo "" echo "[2/4] Downloading model weights..." pip install --break-system-packages -q safetensors tokenizers 2>/dev/null python3 -c " from huggingface_hub import snapshot_download snapshot_download('$MODEL_HF', local_dir='$MODEL_HF_DIR', ignore_patterns=['*.md', '*.txt', 'figures/*']) print('Download complete') " # Step 3: Convert to ternary echo "" echo "[3/4] Converting to ternary format..." python3 convert.py "$MODEL_HF_DIR" "$TERNARY_DIR" 0.7 # Step 4: Verify echo "" echo "[4/4] Verifying..." ls -lh $TERNARY_DIR/ | head -20 echo "" du -sh $TERNARY_DIR/ echo "" # Quick test echo "Running speed test..." python3 -c " from inference import TernaryQwen, load_kernel import time import os kernel = load_kernel('$WORKDIR/ternary_kernel.so') model = TernaryQwen('$TERNARY_DIR', kernel) # Warm up import numpy as np cache_module = __import__('inference') cache = cache_module.KVCache(model.n_layers, model.n_kv, model.head_dim) hidden = model.forward_token(9707, cache, 0) # 'Hello' # Benchmark single token times = [] for i in range(5): cache2 = cache_module.KVCache(model.n_layers, model.n_kv, model.head_dim) t0 = time.time() h = model.forward_token(9707, cache2, 0) times.append(time.time() - t0) avg = sum(times) / len(times) print(f'Single token forward: {avg*1000:.1f}ms ({1/avg:.1f} tok/s)') print(f'Times: {[f\"{t*1000:.1f}ms\" for t in times]}') " echo "" echo "=== Build complete ===" echo "To start server: cd $WORKDIR && TERNARY_MODEL_DIR=$TERNARY_DIR TOKENIZER_DIR=$MODEL_HF_DIR python3 server.py"