llm_mutil_npu / tests /test_chat_flow.sh
xianglarry's picture
Initial C++ aclnn EAGER inference for Qwen3-235B-A22B MoE on Ascend 910 × 16 NPU
4b9fefd
#!/usr/bin/env bash
# test_chat_flow.sh — end-to-end integration smoke test for the CLI.
#
# Exercises:
# - --prompt-file
# - Multi-turn --chat memory (remembers Alice's name in turn 2)
# - --reset command in REPL
# - --system prompt
# - EOS detection at <|im_end|>
#
# Requires TP=16 Ascend 910 setup. Run from the repo root.
#
# Exit: 0 on all-pass, nonzero with reason.
set -u
BIN="./build/qwen3-moe-aclnn"
MODEL="${MODEL_DIR:-/path/to/Qwen3-235B-A22B-Instruct-2507-BF16}"
LAUNCH="./scripts/tp_launch.sh"
TP="${TP_SIZE:-16}"
VOCAB="tokenizer_data/vocab.bin"
[ -x "$BIN" ] || { echo "FAIL: $BIN not built"; exit 1; }
[ -x "$LAUNCH" ] || { echo "FAIL: $LAUNCH not found"; exit 1; }
pass=0; fail=0
check() {
local name="$1"; shift
local out="$1"; shift
local needle="$1"; shift
if echo "$out" | grep -qiF "$needle"; then
echo " [PASS] $name (found: '$needle')"; pass=$((pass+1))
else
echo " [FAIL] $name (did NOT find: '$needle')"; fail=$((fail+1))
echo " ---- output ----"; echo "$out" | tail -20; echo " ---- end ----"
fi
}
echo "===== Test 1: --prompt-file + EOS ====="
echo "What is the capital of Japan?" > /tmp/chat_test_prompt.txt
OUT=$(${LAUNCH} ${TP} ${BIN} --model-dir "$MODEL" \
--prompt-file /tmp/chat_test_prompt.txt \
--chat --n-predict 50 --temperature 0 --vocab "$VOCAB" 2>&1)
check "prompt-file loaded" "$OUT" "capital of Japan"
check "answer mentions Tokyo" "$OUT" "Tokyo"
check "hit EOS" "$OUT" "hit EOS"
echo ""
echo "===== Test 2: multi-turn memory (remembers name) ====="
OUT=$(printf "My name is Alice.\nWhat is my name?\nquit\n" | \
${LAUNCH} ${TP} ${BIN} --model-dir "$MODEL" \
--interactive --chat \
--system "You are a concise assistant. Answer in one short sentence." \
--temperature 0 --n-predict 40 --max-seq 512 \
--vocab "$VOCAB" 2>&1)
check "recalls Alice" "$OUT" "Alice"
check "has 2 turns" "$OUT" "past_len="
echo ""
echo "===== Test 3: reset command clears memory ====="
OUT=$(printf "My name is Bob.\nreset\nWhat is my name?\nquit\n" | \
${LAUNCH} ${TP} ${BIN} --model-dir "$MODEL" \
--interactive --chat \
--system "Answer truthfully in one sentence." \
--temperature 0 --n-predict 40 --max-seq 512 \
--vocab "$VOCAB" 2>&1)
check "reset acknowledged" "$OUT" "cache + conversation reset"
# After reset, model should NOT know the name is Bob (probably says "don't know" or asks)
# We can't reliably check negation, so just check that the reset ran and turn 3 produced output
check "turn 3 ran" "$OUT" "bye"
echo ""
echo "===== Summary: $pass passed, $fail failed ====="
exit $fail