| # bench_tg.sh — stable TG measurement: N runs × 200 tokens, drop cold-starts, report median. | |
| # | |
| # Usage: ./scripts/bench_tg.sh [N_RUNS] (default 5) | |
| # LCA_WARMUP=3 ./scripts/bench_tg.sh (with warmup enabled) | |
| set -u | |
| cd "$(dirname "$0")/.." | |
| MODEL="${MODEL_DIR:-/path/to/Qwen3-235B-A22B-Instruct-2507-BF16}" | |
| BIN="./build/qwen3-moe-aclnn" | |
| N_RUNS="${1:-5}" | |
| N_PREDICT="${N_PREDICT:-200}" | |
| PROMPT="The history of artificial intelligence spans several decades and" | |
| VOCAB="tokenizer_data/vocab.bin" | |
| echo "bench_tg: $N_RUNS runs × $N_PREDICT tokens (LCA_WARMUP=${LCA_WARMUP:-0})" | |
| tgs=() | |
| for r in $(seq 1 $N_RUNS); do | |
| local_out=$(./scripts/tp_launch.sh 16 $BIN --model-dir "$MODEL" \ | |
| --prompt "$PROMPT" --n-predict $N_PREDICT \ | |
| --vocab "$VOCAB" --seed 0 2>&1 | grep "decode :" | awk '{print $(NF-2)}') | |
| printf " run %d: %s t/s\n" "$r" "$local_out" | |
| tgs+=("${local_out:-0}") | |
| done | |
| echo "" | |
| echo "====== Summary ======" | |
| sorted=($(printf '%s\n' "${tgs[@]}" | sort -n)) | |
| n=${#sorted[@]} | |
| mid=$((n / 2)) | |
| median="${sorted[$mid]}" | |
| min="${sorted[0]}" | |
| max="${sorted[-1]}" | |
| mean=$(printf '%s\n' "${tgs[@]}" | awk '{s+=$1} END {printf "%.2f", s/NR}') | |
| echo " all : ${tgs[*]}" | |
| echo " min : $min t/s" | |
| echo " median : $median t/s" | |
| echo " mean : $mean t/s" | |
| echo " max : $max t/s" | |