File size: 2,232 Bytes
4b9fefd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#!/usr/bin/env bash
# bench_hccl_adv.sh — 进阶 HCCL 参数调优
# 在已确定的 ring:200 baseline 上加入 OP_EXPANSION_MODE=AIV 等 knob
set -u
cd "$(dirname "$0")/.."

MODEL="${MODEL_DIR:-/path/to/Qwen3-235B-A22B-Instruct-2507-BF16}"
BIN="./build/qwen3-moe-aclnn"
LAUNCH="./scripts/tp_launch.sh"
TP=16
N_PREDICT=200
N_RUNS=2
PROMPT="The history of artificial intelligence spans several decades and"
VOCAB="tokenizer_data/vocab.bin"

OUT=/tmp/bench_hccl_adv.csv
echo "config,run1,run2,best,median" > $OUT

run_one() {
    local name="$1"; shift
    # remaining args are env assignments: KEY=VALUE ...
    local tgs=()
    for r in $(seq 1 $N_RUNS); do
        local out
        # set env vars for this run
        local env_cmd=""
        for a in "$@"; do env_cmd="$env_cmd $a"; done
        out=$(env HCCL_ALGO=level0:ring HCCL_BUFFSIZE=200 $@ \
              ${LAUNCH} ${TP} ${BIN} --model-dir "$MODEL" \
              --prompt "$PROMPT" --n-predict $N_PREDICT \
              --vocab "$VOCAB" --seed 0 2>&1 | grep "decode :" | awk '{print $(NF-2)}')
        tgs+=("${out:-0}")
    done
    local sorted=($(printf '%s\n' "${tgs[@]}" | sort -n))
    local best="${sorted[-1]}"
    local median="${sorted[$((${#sorted[@]}/2))]}"
    echo "$name,${tgs[0]},${tgs[1]},$best,$median" >> $OUT
    printf "  %-40s  %s  best=%s median=%s\n" "$name" "${tgs[*]}" "$best" "$median"
}

echo "Adv HCCL bench: baseline ring:200 + additional knobs"
echo "Results → $OUT"
echo ""

run_one "baseline (ring+200 only)"

run_one "+ OP_EXPANSION_MODE=AIV"                        HCCL_OP_EXPANSION_MODE=AIV
run_one "+ OP_BASE_FFTS_MODE=1"                          HCCL_OP_BASE_FFTS_MODE_ENABLE=1
run_one "+ OP_EXPANSION=AIV + FFTS=1"                    HCCL_OP_EXPANSION_MODE=AIV HCCL_OP_BASE_FFTS_MODE_ENABLE=1
run_one "+ OP_EXPANSION=AIV + BUF=256"                   HCCL_OP_EXPANSION_MODE=AIV HCCL_BUFFSIZE=256
run_one "+ OP_EXPANSION=AIV + BUF=512"                   HCCL_OP_EXPANSION_MODE=AIV HCCL_BUFFSIZE=512
run_one "+ OP_EXPANSION=AIV + ALGO=fullmesh"             HCCL_OP_EXPANSION_MODE=AIV HCCL_ALGO=level0:fullmesh

echo ""
echo "====== Sorted by best TG ======"
(head -1 $OUT; tail -n +2 $OUT | sort -t, -k4 -gr) | column -t -s,