llm_mutil_npu / scripts /bench_pld_k.sh
xianglarry's picture
Initial C++ aclnn EAGER inference for Qwen3-235B-A22B MoE on Ascend 910 × 16 NPU
4b9fefd
#!/usr/bin/env bash
# bench_pld_k.sh — isolated K sweep with FIXED K (no adaptive) to characterize raw K effect.
# Larger K = more draft candidates per verify. Peak observed accept=7.38 suggests K=8 not saturated.
set -u
cd "$(dirname "$0")/.."
MODEL="${MODEL_DIR:-/path/to/Qwen3-235B-A22B-Instruct-2507-BF16}"
BIN="./build/qwen3-moe-aclnn"
LAUNCH="./scripts/tp_launch.sh"
TP=16
N_PREDICT=200
N_RUNS=3
PROMPT="Write a long Python function that computes the Fibonacci sequence with memoization, extensive comments, and type hints."
VOCAB="tokenizer_data/vocab.bin"
OUT=/tmp/bench_pld_k.csv
echo "k,runs,median,max,avg_accept" > $OUT
for K in 4 6 8 10 12 16; do
tgs=() accs=()
for r in $(seq 1 $N_RUNS); do
out=$(${LAUNCH} ${TP} ${BIN} --model-dir "$MODEL" \
--prompt "$PROMPT" --n-predict $N_PREDICT --max-seq 512 \
--vocab "$VOCAB" --seed 0 --no-stream \
--pld --pld-k $K --pld-ngram 1 --pld-fixed-k 2>&1)
tg=$(echo "$out" | grep "decode :" | awk '{print $(NF-2)}')
acc=$(echo "$out" | grep "\[pld\]" | grep -oE "avg=[0-9.]+" | cut -d= -f2)
tgs+=("${tg:-0}")
accs+=("${acc:-0}")
done
sorted=($(printf '%s\n' "${tgs[@]}" | sort -n))
median="${sorted[$((${#sorted[@]}/2))]}"
max="${sorted[-1]}"
accs_avg=$(printf '%s\n' "${accs[@]}" | awk '{s+=$1} END {printf "%.2f", s/NR}')
echo "$K,$(IFS=/; echo "${tgs[*]}"),$median,$max,$accs_avg" >> $OUT
printf " K=%-2d runs=[%s] median=%s max=%s accept=%s\n" "$K" "${tgs[*]}" "$median" "$max" "$accs_avg"
done
echo ""
echo "====== Sorted by median ======"
(head -1 $OUT; tail -n +2 $OUT | sort -t, -k3 -gr) | column -t -s,