#!/usr/bin/env bash # test_chat_flow.sh — end-to-end integration smoke test for the CLI. # # Exercises: # - --prompt-file # - Multi-turn --chat memory (remembers Alice's name in turn 2) # - --reset command in REPL # - --system prompt # - EOS detection at <|im_end|> # # Requires TP=16 Ascend 910 setup. Run from the repo root. # # Exit: 0 on all-pass, nonzero with reason. set -u BIN="./build/qwen3-moe-aclnn" MODEL="${MODEL_DIR:-/path/to/Qwen3-235B-A22B-Instruct-2507-BF16}" LAUNCH="./scripts/tp_launch.sh" TP="${TP_SIZE:-16}" VOCAB="tokenizer_data/vocab.bin" [ -x "$BIN" ] || { echo "FAIL: $BIN not built"; exit 1; } [ -x "$LAUNCH" ] || { echo "FAIL: $LAUNCH not found"; exit 1; } pass=0; fail=0 check() { local name="$1"; shift local out="$1"; shift local needle="$1"; shift if echo "$out" | grep -qiF "$needle"; then echo " [PASS] $name (found: '$needle')"; pass=$((pass+1)) else echo " [FAIL] $name (did NOT find: '$needle')"; fail=$((fail+1)) echo " ---- output ----"; echo "$out" | tail -20; echo " ---- end ----" fi } echo "===== Test 1: --prompt-file + EOS =====" echo "What is the capital of Japan?" > /tmp/chat_test_prompt.txt OUT=$(${LAUNCH} ${TP} ${BIN} --model-dir "$MODEL" \ --prompt-file /tmp/chat_test_prompt.txt \ --chat --n-predict 50 --temperature 0 --vocab "$VOCAB" 2>&1) check "prompt-file loaded" "$OUT" "capital of Japan" check "answer mentions Tokyo" "$OUT" "Tokyo" check "hit EOS" "$OUT" "hit EOS" echo "" echo "===== Test 2: multi-turn memory (remembers name) =====" OUT=$(printf "My name is Alice.\nWhat is my name?\nquit\n" | \ ${LAUNCH} ${TP} ${BIN} --model-dir "$MODEL" \ --interactive --chat \ --system "You are a concise assistant. Answer in one short sentence." \ --temperature 0 --n-predict 40 --max-seq 512 \ --vocab "$VOCAB" 2>&1) check "recalls Alice" "$OUT" "Alice" check "has 2 turns" "$OUT" "past_len=" echo "" echo "===== Test 3: reset command clears memory =====" OUT=$(printf "My name is Bob.\nreset\nWhat is my name?\nquit\n" | \ ${LAUNCH} ${TP} ${BIN} --model-dir "$MODEL" \ --interactive --chat \ --system "Answer truthfully in one sentence." \ --temperature 0 --n-predict 40 --max-seq 512 \ --vocab "$VOCAB" 2>&1) check "reset acknowledged" "$OUT" "cache + conversation reset" # After reset, model should NOT know the name is Bob (probably says "don't know" or asks) # We can't reliably check negation, so just check that the reset ran and turn 3 produced output check "turn 3 ran" "$OUT" "bye" echo "" echo "===== Summary: $pass passed, $fail failed =====" exit $fail