| #!/usr/bin/env bash |
| |
| |
| set -u |
| cd "$(dirname "$0")/.." |
|
|
| MODEL="${MODEL_DIR:-/path/to/Qwen3-235B-A22B-Instruct-2507-BF16}" |
| BIN="./build/qwen3-moe-aclnn" |
| LAUNCH="./scripts/tp_launch.sh" |
| TP=16 |
| N_PREDICT=200 |
| N_RUNS=3 |
| LONG_PROMPT="Write a very long detailed essay about artificial intelligence, machine learning, deep learning and their applications in modern society. Include historical context, current state of the art, and future predictions." |
| VOCAB="tokenizer_data/vocab.bin" |
|
|
| OUT=/tmp/bench_hccl_adv2.csv |
| echo "config,runs,best,median" > $OUT |
|
|
| run_one() { |
| local name="$1"; shift |
| local tgs=() |
| for r in $(seq 1 $N_RUNS); do |
| local out |
| out=$(env HCCL_ALGO=level0:ring HCCL_BUFFSIZE=200 \ |
| HCCL_OP_EXPANSION_MODE=AIV HCCL_OP_BASE_FFTS_MODE_ENABLE=1 \ |
| "$@" \ |
| ${LAUNCH} ${TP} ${BIN} --model-dir "$MODEL" \ |
| --prompt "$LONG_PROMPT" --n-predict $N_PREDICT \ |
| --vocab "$VOCAB" --seed 0 --no-stream 2>&1 \ |
| | grep "decode :" | awk '{print $(NF-2)}') |
| tgs+=("${out:-0}") |
| done |
| local sorted=($(printf '%s\n' "${tgs[@]}" | sort -n)) |
| local best="${sorted[-1]}" |
| local median="${sorted[$((${#sorted[@]}/2))]}" |
| echo "$name,${tgs[*]},$best,$median" | tr ' ' '|' | sed 's/|/,/' | sed 's/|/ /g' >> $OUT |
| printf " %-40s %s best=%s median=%s\n" "$name" "${tgs[*]}" "$best" "$median" |
| } |
|
|
| echo "Bench: AIV+FFTS baseline + single additional knob" |
| echo "$N_RUNS runs × $N_PREDICT tokens" |
| echo "" |
|
|
| run_one "baseline (AIV + FFTS)" |
|
|
| run_one "+ TASK_QUEUE_ENABLE=1" TASK_QUEUE_ENABLE=1 |
| run_one "+ TASK_QUEUE_ENABLE=2" TASK_QUEUE_ENABLE=2 |
| run_one "+ HCCL_BUFFSIZE=256" HCCL_BUFFSIZE=256 |
| run_one "+ HCCL_DETERMINISTIC=false" HCCL_DETERMINISTIC=false |
| run_one "+ HCCL_INTRA_ROCE_ENABLE=1" HCCL_INTRA_ROCE_ENABLE=1 |
| run_one "+ HCCL_CLUSTER_TIMEOUT=600" HCCL_CLUSTER_TIMEOUT=600 |
| run_one "+ ASCEND_LAUNCH_BLOCKING=0" ASCEND_LAUNCH_BLOCKING=0 |
|
|
| echo "" |
| echo "====== Sorted by best TG ======" |
| (head -1 $OUT; tail -n +2 $OUT | sort -t, -k3 -gr) | column -t -s, |
|
|