Spaces:
Runtime error
Runtime error
| [ | |
| { | |
| "id": "demo_01_opening_and_talk", | |
| "title": "Opening + Talk Path", | |
| "purpose": "验证开场生成、自由输入解析、对话型剧情推进是否稳定。", | |
| "recommended_for": [ | |
| "课堂演示", | |
| "日常回归" | |
| ], | |
| "steps": [ | |
| "启动应用并输入角色名。", | |
| "等待开场故事生成完成。", | |
| "输入:和村长老伯谈谈最近森林里的怪事", | |
| "观察状态栏、剧情文本和下一组选项。" | |
| ], | |
| "expected_checks": [ | |
| "开场文本能正常生成,不会卡在加载状态。", | |
| "NLU 应将输入识别为 TALK 或至少正确指向 村长老伯。", | |
| "剧情文本应与村庄/森林线索相关,而不是完全无关的泛化回复。", | |
| "页面仍然给出可继续操作的选项。" | |
| ], | |
| "use_logs": [ | |
| "查看 logs/interactions 最新 JSONL,确认 input_source=text_input。", | |
| "确认 nlu_result.target 包含 村长老伯。", | |
| "确认 used_fallback 为 false 或至少 fallback_reason 可解释。" | |
| ] | |
| }, | |
| { | |
| "id": "demo_02_invalid_action_guard", | |
| "title": "Invalid Action Guard", | |
| "purpose": "验证 pre_validate_action 是否能拦截明显非法动作,并保持 UI 可继续交互。", | |
| "recommended_for": [ | |
| "课堂演示", | |
| "规则回归" | |
| ], | |
| "steps": [ | |
| "在开场后直接输入:使用火把", | |
| "如果当前背包里没有 火把,系统应直接驳回。", | |
| "继续点击任意一个有效选项,确认会话没有被破坏。" | |
| ], | |
| "expected_checks": [ | |
| "系统给出明确的驳回说明,而不是生成一段错误剧情。", | |
| "本轮不应出现异常状态变化。", | |
| "驳回后仍然保留可继续点击的选项。" | |
| ], | |
| "use_logs": [ | |
| "确认 telemetry.engine_mode=pre_validation。", | |
| "确认 state_changes 为空。", | |
| "确认输出文本中包含驳回提示。" | |
| ] | |
| }, | |
| { | |
| "id": "demo_03_branch_difference", | |
| "title": "Branch Difference Check", | |
| "purpose": "验证不同选择是否带来可观察的剧情和状态差异。", | |
| "recommended_for": [ | |
| "课堂演示", | |
| "分支验证" | |
| ], | |
| "steps": [ | |
| "第一次游玩输入:前往村庄旅店", | |
| "第二次重新开始后输入:探索一下村庄广场", | |
| "对比两次输出、状态栏和下一组选项。" | |
| ], | |
| "expected_checks": [ | |
| "两次剧情文本不应高度重复。", | |
| "当前场景、事件氛围、后续选项至少有一项明显不同。", | |
| "日志中的 post_turn_snapshot.location 或 options 应存在差异。" | |
| ], | |
| "use_logs": [ | |
| "对比两份 JSONL 中的 output_text。", | |
| "对比 options 和 post_turn_snapshot.location。" | |
| ] | |
| }, | |
| { | |
| "id": "demo_04_resource_update", | |
| "title": "Resource Update Check", | |
| "purpose": "验证资源类操作后的状态更新和日志记录是否完整。", | |
| "recommended_for": [ | |
| "日常回归", | |
| "报告案例" | |
| ], | |
| "steps": [ | |
| "开场后输入:使用小型治疗药水", | |
| "再输入:休息一会儿", | |
| "观察 HP、士气、理智、饥饿度以及输出文本。" | |
| ], | |
| "expected_checks": [ | |
| "如果药水/休息被正常处理,状态栏应发生可解释变化。", | |
| "即使触发 fallback,日志也应完整记录 latency、fallback_reason、output_text。", | |
| "不会出现 UI 卡住、按钮消失、会话丢失。" | |
| ], | |
| "use_logs": [ | |
| "查看 change_log 和 post_turn_snapshot 中的属性变化。", | |
| "确认 turn_index 连续递增。" | |
| ] | |
| }, | |
| { | |
| "id": "demo_05_failure_case_capture", | |
| "title": "Failure Case Capture", | |
| "purpose": "为报告准备失败案例,不追求系统成功,而是追求可解释、可记录。", | |
| "recommended_for": [ | |
| "报告撰写", | |
| "失败案例收集" | |
| ], | |
| "steps": [ | |
| "尝试一个偏自由、模糊或边界的输入,例如:我想扔石头试试看", | |
| "观察系统将其解析成什么意图,以及输出是否合理。", | |
| "记录这一轮日志,留作后续 failure case 分析。" | |
| ], | |
| "expected_checks": [ | |
| "系统即使处理得不好,也应能给出可继续游玩的输出。", | |
| "日志里应保留 parser_source、fallback 信息和完整 output_text。", | |
| "这一轮适合在报告里分析 NLU 或分支控制的局限。" | |
| ], | |
| "use_logs": [ | |
| "保存对应 JSONL 片段。", | |
| "重点关注 nlu_result.intent、parser_source、used_fallback。" | |
| ] | |
| } | |
| ] | |