#!/usr/bin/env bash # bench_tg.sh — stable TG measurement: N runs × 200 tokens, drop cold-starts, report median. # # Usage: ./scripts/bench_tg.sh [N_RUNS] (default 5) # LCA_WARMUP=3 ./scripts/bench_tg.sh (with warmup enabled) set -u cd "$(dirname "$0")/.." MODEL="${MODEL_DIR:-/path/to/Qwen3-235B-A22B-Instruct-2507-BF16}" BIN="./build/qwen3-moe-aclnn" N_RUNS="${1:-5}" N_PREDICT="${N_PREDICT:-200}" PROMPT="The history of artificial intelligence spans several decades and" VOCAB="tokenizer_data/vocab.bin" echo "bench_tg: $N_RUNS runs × $N_PREDICT tokens (LCA_WARMUP=${LCA_WARMUP:-0})" tgs=() for r in $(seq 1 $N_RUNS); do local_out=$(./scripts/tp_launch.sh 16 $BIN --model-dir "$MODEL" \ --prompt "$PROMPT" --n-predict $N_PREDICT \ --vocab "$VOCAB" --seed 0 2>&1 | grep "decode :" | awk '{print $(NF-2)}') printf " run %d: %s t/s\n" "$r" "$local_out" tgs+=("${local_out:-0}") done echo "" echo "====== Summary ======" sorted=($(printf '%s\n' "${tgs[@]}" | sort -n)) n=${#sorted[@]} mid=$((n / 2)) median="${sorted[$mid]}" min="${sorted[0]}" max="${sorted[-1]}" mean=$(printf '%s\n' "${tgs[@]}" | awk '{s+=$1} END {printf "%.2f", s/NR}') echo " all : ${tgs[*]}" echo " min : $min t/s" echo " median : $median t/s" echo " mean : $mean t/s" echo " max : $max t/s"