File size: 2,235 Bytes
4b9fefd | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 | #!/usr/bin/env bash
# bench_hccl_adv2.sh — layer 2 env knob exploration on top of AIV+FFTS=1 baseline.
# Target: break past 25 t/s MUST barrier.
set -u
cd "$(dirname "$0")/.."
MODEL="${MODEL_DIR:-/path/to/Qwen3-235B-A22B-Instruct-2507-BF16}"
BIN="./build/qwen3-moe-aclnn"
LAUNCH="./scripts/tp_launch.sh"
TP=16
N_PREDICT=200
N_RUNS=3
LONG_PROMPT="Write a very long detailed essay about artificial intelligence, machine learning, deep learning and their applications in modern society. Include historical context, current state of the art, and future predictions."
VOCAB="tokenizer_data/vocab.bin"
OUT=/tmp/bench_hccl_adv2.csv
echo "config,runs,best,median" > $OUT
run_one() {
local name="$1"; shift
local tgs=()
for r in $(seq 1 $N_RUNS); do
local out
out=$(env HCCL_ALGO=level0:ring HCCL_BUFFSIZE=200 \
HCCL_OP_EXPANSION_MODE=AIV HCCL_OP_BASE_FFTS_MODE_ENABLE=1 \
"$@" \
${LAUNCH} ${TP} ${BIN} --model-dir "$MODEL" \
--prompt "$LONG_PROMPT" --n-predict $N_PREDICT \
--vocab "$VOCAB" --seed 0 --no-stream 2>&1 \
| grep "decode :" | awk '{print $(NF-2)}')
tgs+=("${out:-0}")
done
local sorted=($(printf '%s\n' "${tgs[@]}" | sort -n))
local best="${sorted[-1]}"
local median="${sorted[$((${#sorted[@]}/2))]}"
echo "$name,${tgs[*]},$best,$median" | tr ' ' '|' | sed 's/|/,/' | sed 's/|/ /g' >> $OUT
printf " %-40s %s best=%s median=%s\n" "$name" "${tgs[*]}" "$best" "$median"
}
echo "Bench: AIV+FFTS baseline + single additional knob"
echo "$N_RUNS runs × $N_PREDICT tokens"
echo ""
run_one "baseline (AIV + FFTS)"
run_one "+ TASK_QUEUE_ENABLE=1" TASK_QUEUE_ENABLE=1
run_one "+ TASK_QUEUE_ENABLE=2" TASK_QUEUE_ENABLE=2
run_one "+ HCCL_BUFFSIZE=256" HCCL_BUFFSIZE=256
run_one "+ HCCL_DETERMINISTIC=false" HCCL_DETERMINISTIC=false
run_one "+ HCCL_INTRA_ROCE_ENABLE=1" HCCL_INTRA_ROCE_ENABLE=1
run_one "+ HCCL_CLUSTER_TIMEOUT=600" HCCL_CLUSTER_TIMEOUT=600
run_one "+ ASCEND_LAUNCH_BLOCKING=0" ASCEND_LAUNCH_BLOCKING=0
echo ""
echo "====== Sorted by best TG ======"
(head -1 $OUT; tail -n +2 $OUT | sort -t, -k3 -gr) | column -t -s,
|