Spaces:
Sleeping
Sleeping
File size: 9,855 Bytes
825a24e 20984d5 825a24e 20984d5 825a24e 20984d5 825a24e 20984d5 825a24e 20984d5 825a24e 20984d5 825a24e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 | """
对模型输出做轻量静态校验(不依赖外部库)。
目标:
- 提前发现“缺少 mGDL / 缺少核心模块 / 输出里存在占位符”等高频失败
- 为“最小修改修复”提供结构化的 FAIL 列表
"""
import re
from typing import Dict, List, Optional, Tuple
_FENCE_RE = re.compile(r"```(?:[a-zA-Z0-9_-]+)?\n(.*?)```", re.DOTALL)
_CJK_GT = r"[>>]"
def _extract_mgdl_block(text: str) -> str:
"""
尝试从回答中提取最像 mGDL 的代码块(优先含 (game ... ))。
"""
if not text:
return ""
blocks = _FENCE_RE.findall(text)
if not blocks:
# 没有代码块时,退化为全文搜索
return text if "(game " in text else ""
best = ""
for b in blocks:
b_strip = (b or "").strip()
if "(game " in b_strip:
return b_strip
if b_strip.startswith("(game ") or b_strip.startswith("(define_game"):
best = b_strip
return best
def _has_any(text: str, needles: List[str]) -> bool:
t = text or ""
return any(n in t for n in needles)
def _re_search(pattern: str, text: str) -> bool:
return bool(re.search(pattern, text or "", flags=re.DOTALL))
def validate_mahjong_response(text: str) -> List[Dict[str, str]]:
issues: List[Dict[str, str]] = []
if not text or not text.strip():
return [{"code": "EMPTY", "level": "error", "message": "模型返回空内容"}]
# 1) 占位符检查(m_prompt 强制禁止)
if any(token in text for token in ["<PID>", "<NextPID>", "<Variant_Name>", "<custom>"]):
issues.append({
"code": "PLACEHOLDER",
"level": "error",
"message": "输出包含占位符(如 <PID>/<Variant_Name>),需展开为实际值(A1/A2/A3/A4 等)。"
})
mgdl = _extract_mgdl_block(text)
if not mgdl:
issues.append({
"code": "NO_MGDL",
"level": "error",
"message": "未检测到 mGDL 代码块或 (game ...) 根节点。"
})
return issues
# 0.5) 思维日志(设计日志)检查:生成模式下应包含“设计日志(创新推演摘要)”
# Phase-1 聚焦“融合决策可审核”,避免停留在文本拼接。
if "设计日志(创新推演摘要)" not in text:
issues.append({
"code": "NO_DESIGN_LOG",
"level": "warning",
"message": "未检测到“设计日志(创新推演摘要)”段落;Phase-1 建议补齐融合清单/冲突桥接/推演摘要/落地映射。"
})
# 0.6) 底层物理守恒表达检查(当前阶段重点)
# 目标:强制模型在自然语言规则里显式给出“动作-手牌变化-轮次影响表”和“最小回合推演”
if "动作—手牌变化—轮次影响表" not in text and "动作-手牌变化-轮次影响表" not in text:
issues.append({
"code": "NO_HAND_DELTA_TABLE",
"level": "warning",
"message": "未检测到《动作—手牌变化—轮次影响表》;该表用于避免“出牌后手牌不变”等守恒错误,建议补齐。"
})
if "最小回合推演" not in text:
issues.append({
"code": "NO_MIN_SIMULATION",
"level": "warning",
"message": "未检测到“最小回合推演”(普通/碰/杠三段);建议补齐以验证手牌守恒与轮次控制。"
})
# 0.7) “硬真理”与机制说明对齐检查(以自然语言显式声明为主)
# 说明:这里做的是“声明存在性”的静态校验(不是逻辑证明),用于减少模型忘写/乱写导致的回归。
if not _re_search(r"(起手|初始).*13\s*张", text):
issues.append({
"code": "NO_START_HAND_13",
"level": "warning",
"message": "未显式声明“标准起手 13 张”(麻将机制说明的基础逻辑);建议在基础规则中补一句。"
})
if not _re_search(r"(摸|抓).*(14\s*张)", text):
issues.append({
"code": "NO_DRAW_TO_14",
"level": "warning",
"message": "未显式声明“摸牌后手牌为 14 张”(基础逻辑);建议补充以便审计守恒。"
})
if not _re_search(r"(打|弃|出).*(回到|恢复|为).*(13\s*张)", text):
issues.append({
"code": "NO_DISCARD_BACK_13",
"level": "warning",
"message": "未显式声明“打牌后手牌回到 13 张”(基础逻辑);建议补充以便审计守恒。"
})
# 胡/碰/杠/吃优先级(允许不同符号表达)
if not _re_search(rf"胡.*{_CJK_GT}.*碰.*{_CJK_GT}.*杠.*{_CJK_GT}.*吃", text):
issues.append({
"code": "NO_PRIORITY_ORDER",
"level": "warning",
"message": "未显式声明“胡>碰>杠>吃”的响应优先级(基础逻辑);建议补齐以避免争议场景。"
})
# 吃的限制
if not _has_any(text, ["仅能吃上家", "只能吃上家", "只可吃上家"]):
issues.append({
"code": "NO_CHI_UPWIND_ONLY",
"level": "warning",
"message": "未显式声明“吃仅能吃上家牌”(基础逻辑);建议补齐。"
})
# 行牌顺序与出牌权归属(声明存在性)
if not _has_any(text, ["庄家-下家-对家-上家", "庄家→下家→对家→上家", "庄家 → 下家 → 对家 → 上家"]):
issues.append({
"code": "NO_TURN_ORDER_BASE",
"level": "warning",
"message": "未显式声明“庄家-下家-对家-上家”的行牌顺序(基础逻辑);建议补齐。"
})
if not _has_any(text, ["由碰牌的玩家继续出牌", "由吃/碰者继续出牌", "碰后由碰者出牌"]):
issues.append({
"code": "NO_POST_PENG_RIGHTS",
"level": "warning",
"message": "未显式声明“碰/吃后由碰/吃者继续出牌”的出牌权规则(基础逻辑);建议补齐。"
})
if not _has_any(text, ["由杠牌的玩家摸牌后继续出牌", "杠后由杠者补牌后继续出牌", "杠后由杠者继续出牌"]):
issues.append({
"code": "NO_POST_KONG_RIGHTS",
"level": "warning",
"message": "未显式声明“杠后由杠者补牌/摸牌后继续出牌”的出牌权规则(基础逻辑);建议补齐。"
})
# 自摸/点炮触发方式
if not (_has_any(text, ["自摸"]) and _has_any(text, ["点炮"])):
issues.append({
"code": "NO_ZIMO_DIANPAO",
"level": "warning",
"message": "未同时出现“自摸/点炮”两种胡牌触发方式(基础逻辑);建议补齐。"
})
# 若引入改变摸打节奏的机制,建议额外最小推演(Prompt 已要求)
special_rhythm_terms = ["连续摸", "摸三打三", "海底漫游", "海捞阶段", "海捞区"]
if _has_any(text, special_rhythm_terms):
if not _re_search(r"(最小回合推演).*(" + "|".join(map(re.escape, special_rhythm_terms)) + ")", text):
issues.append({
"code": "NO_SPECIAL_MIN_SIM",
"level": "warning",
"message": "检测到改变摸打节奏的机制(如 摸三打三/连续摸/海捞),但未看到对应机制的额外“最小回合推演”;建议补齐以验证守恒。"
})
# 2) 核心模块检查(按 m_prompt 的“零容忍项”)
required_markers = [
"(game_variant",
"(players",
"(tileset",
"(extensions",
"(seats",
"(turn_order",
"(setup",
"(actions",
"(win_rules",
"(scoring",
"(fan_table",
"(settlement",
"(invariants",
]
missing = [m for m in required_markers if m not in mgdl]
if missing:
issues.append({
"code": "MISSING_MODULES",
"level": "error",
"message": "mGDL 缺少核心模块: {0}".format(", ".join(missing))
})
# 3) special_mechanics 注册点(你们工程关键)
if "(special_mechanics" not in mgdl and "extensions.special_mechanics" in text:
issues.append({
"code": "SPECIAL_MECH_MISMATCH",
"level": "warning",
"message": "自然语言提到 special_mechanics,但 mGDL 中未出现 (special_mechanics ...) 或 extensions.special_mechanics 结构。"
})
# 4) 简单守恒提示:total 字段存在性(无法保证正确,但能抓掉一批缺失)
if "(total" not in mgdl:
issues.append({
"code": "NO_TILE_TOTAL",
"level": "warning",
"message": "tileset 中未检测到 (total N),容易导致牌数不自洽。"
})
# 5) invariants 强制项(Prompt 硬性要求)
if "tile_conservation" not in mgdl:
issues.append({
"code": "NO_TILE_CONSERVATION",
"level": "warning",
"message": "(invariants ...) 中未检测到 tile_conservation;建议补齐以显式声明牌数守恒。"
})
if "hand_size_stable" not in mgdl:
issues.append({
"code": "NO_HAND_SIZE_STABLE",
"level": "warning",
"message": "(invariants ...) 中未检测到 hand_size_stable(或等价声明);建议补齐以约束回合结束手牌稳定值。"
})
return issues
def format_issues_for_llm(issues: List[Dict[str, str]]) -> str:
if not issues:
return ""
lines = []
for idx, it in enumerate(issues, start=1):
lines.append("{0}. [{1}] {2}".format(idx, it.get("code"), it.get("message")))
return "\n".join(lines).strip()
|