| #!/usr/bin/env bash |
| |
| |
| |
| |
| set -u |
| cd "$(dirname "$0")/.." |
|
|
| MODEL="${MODEL_DIR:-/path/to/Qwen3-235B-A22B-Instruct-2507-BF16}" |
| BIN="./build/qwen3-moe-aclnn" |
| LAUNCH="./scripts/tp_launch.sh" |
| TP="${TP_SIZE:-16}" |
| N_PREDICT="${N_PREDICT:-150}" |
| N_RUNS="${N_RUNS:-2}" |
| PROMPT="${PROMPT:-The history of artificial intelligence spans several decades and}" |
| VOCAB="tokenizer_data/vocab.bin" |
|
|
| OUT=/tmp/bench_hccl_results.csv |
| echo "algo,buffsize,runs,best_tgs" > $OUT |
|
|
| run_one() { |
| local algo="$1" buf="$2" |
| local tgs=() |
| for r in $(seq 1 $N_RUNS); do |
| export HCCL_ALGO="$algo" HCCL_BUFFSIZE="$buf" |
| local out |
| out=$(${LAUNCH} ${TP} ${BIN} --model-dir "$MODEL" \ |
| --prompt "$PROMPT" --n-predict $N_PREDICT \ |
| --vocab "$VOCAB" --seed 0 2>&1 | grep "decode :" | awk '{print $(NF-2)}') |
| tgs+=("${out:-0}") |
| done |
| local sorted=($(printf '%s\n' "${tgs[@]}" | sort -n)) |
| local best="${sorted[-1]}" |
| local csv="$algo,$buf,${tgs[*]},$best" |
| echo "$csv" | sed 's/ /|/g' >> $OUT |
| printf " %-22s buf=%-4s %s best=%s\n" \ |
| "${algo:-(auto)}" "$buf" "${tgs[*]}" "$best" |
| } |
|
|
| |
| ALGOS=("" "level0:ring" "level0:fullmesh") |
| BUFSIZES=("100" "200" "400") |
|
|
| echo "HCCL matrix: ${#ALGOS[@]} algos × ${#BUFSIZES[@]} buffsizes × ${N_RUNS} runs each" |
| echo "Results → $OUT" |
| echo "" |
|
|
| for algo in "${ALGOS[@]}"; do |
| for buf in "${BUFSIZES[@]}"; do |
| run_one "$algo" "$buf" |
| done |
| done |
|
|
| echo "" |
| echo "====== Summary (sorted by best TG) ======" |
| (head -1 $OUT; tail -n +2 $OUT | sort -t, -k4 -gr) | column -t -s, |
|
|