Spaces:
Sleeping
Sleeping
PPP commited on
Commit ·
ef60390
1
Parent(s): e39651c
feat(demo): add scripted demo checklist for regression testing
Browse files- demo/scenarios.json +121 -0
- demo/show_demo_checklist.py +34 -0
demo/scenarios.json
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"id": "demo_01_opening_and_talk",
|
| 4 |
+
"title": "Opening + Talk Path",
|
| 5 |
+
"purpose": "验证开场生成、自由输入解析、对话型剧情推进是否稳定。",
|
| 6 |
+
"recommended_for": [
|
| 7 |
+
"课堂演示",
|
| 8 |
+
"日常回归"
|
| 9 |
+
],
|
| 10 |
+
"steps": [
|
| 11 |
+
"启动应用并输入角色名。",
|
| 12 |
+
"等待开场故事生成完成。",
|
| 13 |
+
"输入:和村长老伯谈谈最近森林里的怪事",
|
| 14 |
+
"观察状态栏、剧情文本和下一组选项。"
|
| 15 |
+
],
|
| 16 |
+
"expected_checks": [
|
| 17 |
+
"开场文本能正常生成,不会卡在加载状态。",
|
| 18 |
+
"NLU 应将输入识别为 TALK 或至少正确指向 村长老伯。",
|
| 19 |
+
"剧情文本应与村庄/森林线索相关,而不是完全无关的泛化回复。",
|
| 20 |
+
"页面仍然给出可继续操作的选项。"
|
| 21 |
+
],
|
| 22 |
+
"use_logs": [
|
| 23 |
+
"查看 logs/interactions 最新 JSONL,确认 input_source=text_input。",
|
| 24 |
+
"确认 nlu_result.target 包含 村长老伯。",
|
| 25 |
+
"确认 used_fallback 为 false 或至少 fallback_reason 可解释。"
|
| 26 |
+
]
|
| 27 |
+
},
|
| 28 |
+
{
|
| 29 |
+
"id": "demo_02_invalid_action_guard",
|
| 30 |
+
"title": "Invalid Action Guard",
|
| 31 |
+
"purpose": "验证 pre_validate_action 是否能拦截明显非法动作,并保持 UI 可继续交互。",
|
| 32 |
+
"recommended_for": [
|
| 33 |
+
"课堂演示",
|
| 34 |
+
"规则回归"
|
| 35 |
+
],
|
| 36 |
+
"steps": [
|
| 37 |
+
"在开场后直接输入:使用火把",
|
| 38 |
+
"如果当前背包里没有 火把,系统应直接驳回。",
|
| 39 |
+
"继续点击任意一个有效选项,确认会话没有被破坏。"
|
| 40 |
+
],
|
| 41 |
+
"expected_checks": [
|
| 42 |
+
"系统给出明确的驳回说明,而不是生成一段错误剧情。",
|
| 43 |
+
"本轮不应出现异常状态变化。",
|
| 44 |
+
"驳回后仍然保留可继续点击的选项。"
|
| 45 |
+
],
|
| 46 |
+
"use_logs": [
|
| 47 |
+
"确认 telemetry.engine_mode=pre_validation。",
|
| 48 |
+
"确认 state_changes 为空。",
|
| 49 |
+
"确认输出文本中包含驳回提示。"
|
| 50 |
+
]
|
| 51 |
+
},
|
| 52 |
+
{
|
| 53 |
+
"id": "demo_03_branch_difference",
|
| 54 |
+
"title": "Branch Difference Check",
|
| 55 |
+
"purpose": "验证不同选择是否带来可观察的剧情和状态差异。",
|
| 56 |
+
"recommended_for": [
|
| 57 |
+
"课堂演示",
|
| 58 |
+
"分支验证"
|
| 59 |
+
],
|
| 60 |
+
"steps": [
|
| 61 |
+
"第一次游玩输入:前往村庄旅店",
|
| 62 |
+
"第二次重新开始后输入:探索一下村庄广场",
|
| 63 |
+
"对比两次输出、状态栏和下一组选项。"
|
| 64 |
+
],
|
| 65 |
+
"expected_checks": [
|
| 66 |
+
"两次剧情文本不应高度重复。",
|
| 67 |
+
"当前场景、事件氛围、后续选项至少有一项明显不同。",
|
| 68 |
+
"日志中的 post_turn_snapshot.location 或 options 应存在差异。"
|
| 69 |
+
],
|
| 70 |
+
"use_logs": [
|
| 71 |
+
"对比两份 JSONL 中的 output_text。",
|
| 72 |
+
"对比 options 和 post_turn_snapshot.location。"
|
| 73 |
+
]
|
| 74 |
+
},
|
| 75 |
+
{
|
| 76 |
+
"id": "demo_04_resource_update",
|
| 77 |
+
"title": "Resource Update Check",
|
| 78 |
+
"purpose": "验证资源类操作后的状态更新和日志记录是否完整。",
|
| 79 |
+
"recommended_for": [
|
| 80 |
+
"日常回归",
|
| 81 |
+
"报告案例"
|
| 82 |
+
],
|
| 83 |
+
"steps": [
|
| 84 |
+
"开场后输入:使用小型治疗药水",
|
| 85 |
+
"再输入:休息一会儿",
|
| 86 |
+
"观察 HP、士气、理智、饥饿度以及输出文本。"
|
| 87 |
+
],
|
| 88 |
+
"expected_checks": [
|
| 89 |
+
"如果药水/休息被正常处理,状态栏应发生可解释变化。",
|
| 90 |
+
"即使触发 fallback,日志也应完整记录 latency、fallback_reason、output_text。",
|
| 91 |
+
"不会出现 UI 卡住、按钮消失、会话丢失。"
|
| 92 |
+
],
|
| 93 |
+
"use_logs": [
|
| 94 |
+
"查看 change_log 和 post_turn_snapshot 中的属性变化。",
|
| 95 |
+
"确认 turn_index 连续递增。"
|
| 96 |
+
]
|
| 97 |
+
},
|
| 98 |
+
{
|
| 99 |
+
"id": "demo_05_failure_case_capture",
|
| 100 |
+
"title": "Failure Case Capture",
|
| 101 |
+
"purpose": "为报告准备失败案例,不追求系统成功,而是追求可解释、可记录。",
|
| 102 |
+
"recommended_for": [
|
| 103 |
+
"报告撰写",
|
| 104 |
+
"失败案例收集"
|
| 105 |
+
],
|
| 106 |
+
"steps": [
|
| 107 |
+
"尝试一个偏自由、模糊或边界的输入,例如:我想扔石头试试看",
|
| 108 |
+
"观察系统将其解析成什么意图,以及输出是否合理。",
|
| 109 |
+
"记录这一轮日志,留作后续 failure case 分析。"
|
| 110 |
+
],
|
| 111 |
+
"expected_checks": [
|
| 112 |
+
"系统即使处理得不好,也应能给出可继续游玩的输出。",
|
| 113 |
+
"日志里应保留 parser_source、fallback 信息和完整 output_text。",
|
| 114 |
+
"这一轮适合在报告里分析 NLU 或分支控制的局限。"
|
| 115 |
+
],
|
| 116 |
+
"use_logs": [
|
| 117 |
+
"保存对应 JSONL 片段。",
|
| 118 |
+
"重点关注 nlu_result.intent、parser_source、used_fallback。"
|
| 119 |
+
]
|
| 120 |
+
}
|
| 121 |
+
]
|
demo/show_demo_checklist.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import json
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
SCENARIO_FILE = Path(__file__).with_name("scenarios.json")
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def main() -> None:
|
| 11 |
+
scenarios = json.loads(SCENARIO_FILE.read_text(encoding="utf-8"))
|
| 12 |
+
print("StoryWeaver Demo Checklist")
|
| 13 |
+
print("=" * 28)
|
| 14 |
+
|
| 15 |
+
for index, scenario in enumerate(scenarios, start=1):
|
| 16 |
+
print(f"\n[{index}] {scenario['title']} ({scenario['id']})")
|
| 17 |
+
print(f"Purpose: {scenario['purpose']}")
|
| 18 |
+
print("Recommended for: " + ", ".join(scenario.get("recommended_for", [])))
|
| 19 |
+
|
| 20 |
+
print("Steps:")
|
| 21 |
+
for step_index, step in enumerate(scenario.get("steps", []), start=1):
|
| 22 |
+
print(f" {step_index}. {step}")
|
| 23 |
+
|
| 24 |
+
print("Expected checks:")
|
| 25 |
+
for item in scenario.get("expected_checks", []):
|
| 26 |
+
print(f" - {item}")
|
| 27 |
+
|
| 28 |
+
print("Use logs:")
|
| 29 |
+
for item in scenario.get("use_logs", []):
|
| 30 |
+
print(f" - {item}")
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
if __name__ == "__main__":
|
| 34 |
+
main()
|