Spaces:
Sleeping
Sleeping
| """ | |
| 优化的 GDL 和自然语言提取函数 | |
| 可以直接复制到 app.py 中使用 | |
| """ | |
| import re | |
| def extract_gdl_and_narrative(content): | |
| """ | |
| 提取 GDL 和自然语言部分(优化版 - 平衡功能与复杂度) | |
| 支持的格式: | |
| - ## GDL描述 / ## GDL 描述 | |
| - ##GDL描述 / GDL描述 | |
| - ## 自然语言规则说明 / ##自然语言规则说明 | |
| - 以及其他常见变体 | |
| Args: | |
| content: AI 生成的完整文本 | |
| Returns: | |
| tuple: (gdl_content, narrative_content) | |
| """ | |
| # ========== 定义标记模式(按优先级排序) ========== | |
| gdl_patterns = [ | |
| r"##\s*GDL\s*描述", # ## GDL描述 或 ## GDL 描述(推荐格式) | |
| r"##\s*GDL描述", # ##GDL描述 | |
| r"GDL\s*描述", # GDL描述 或 GDL 描述 | |
| r"##\s*GDL\s*Description", # ## GDL Description(英文) | |
| r"##\s*GDL", # ## GDL(简化版) | |
| ] | |
| narrative_patterns = [ | |
| r"##\s*自然语言规则说明", # ## 自然语言规则说明(推荐格式) | |
| r"##\s*自然语言规则", # ## 自然语言规则 | |
| r"自然语言规则说明", # 自然语言规则说明 | |
| r"自然语言规则", # 自然语言规则 | |
| r"##\s*Natural\s*Language", # ## Natural Language(英文) | |
| r"规则说明", # 规则说明(简化版) | |
| ] | |
| # ========== 查找标记位置 ========== | |
| gdl_start = -1 | |
| gdl_marker = None | |
| for pattern in gdl_patterns: | |
| match = re.search(pattern, content, re.IGNORECASE) | |
| if match: | |
| gdl_start = match.start() | |
| gdl_marker = match.group() | |
| break | |
| narrative_start = -1 | |
| narrative_marker = None | |
| for pattern in narrative_patterns: | |
| match = re.search(pattern, content, re.IGNORECASE) | |
| if match: | |
| narrative_start = match.start() | |
| narrative_marker = match.group() | |
| break | |
| # ========== 处理提取结果 ========== | |
| if gdl_start != -1 and narrative_start != -1: | |
| # 正常情况:两个标记都找到 | |
| # 检查顺序(GDL 应该在自然语言之前) | |
| if gdl_start >= narrative_start: | |
| print(f"⚠️ 警告: 标记顺序异常,已自动纠正") | |
| gdl_start, narrative_start = narrative_start, gdl_start | |
| # 提取内容 | |
| gdl_content = content[gdl_start:narrative_start].strip() | |
| narrative_content = content[narrative_start:].strip() | |
| # 验证长度 | |
| if len(gdl_content) < 20 or len(narrative_content) < 20: | |
| print(f"⚠️ 警告: 提取内容过短 (GDL:{len(gdl_content)}, 自然语言:{len(narrative_content)})") | |
| print(f"✅ 提取成功: GDL({len(gdl_content)}字符), 自然语言({len(narrative_content)}字符)") | |
| return gdl_content, narrative_content | |
| elif gdl_start != -1: | |
| # 只找到 GDL 标记 | |
| print(f"⚠️ 仅找到GDL标记 '{gdl_marker}',将其后全部内容作为GDL") | |
| gdl_content = content[gdl_start:].strip() | |
| return gdl_content, "" | |
| elif narrative_start != -1: | |
| # 只找到自然语言标记 | |
| print(f"⚠️ 仅找到自然语言标记 '{narrative_marker}',将其后全部内容作为自然语言") | |
| narrative_content = content[narrative_start:].strip() | |
| return "", narrative_content | |
| else: | |
| # 都没找到:尝试智能分割 | |
| print(f"❌ 未找到任何标记,尝试智能分割...") | |
| return smart_split_content(content) | |
| def smart_split_content(content): | |
| """ | |
| 智能分割内容(后备方案) | |
| 策略:通过识别 GDL 代码特征(如 (game "..." )来分割 | |
| """ | |
| # 查找 GDL 代码块的起始标志 | |
| gdl_code_pattern = r'\(game\s+"[^"]+?"' | |
| match = re.search(gdl_code_pattern, content) | |
| if match: | |
| gdl_start = match.start() | |
| # 简单的括号匹配查找结束位置 | |
| count = 0 | |
| gdl_end = -1 | |
| for i in range(gdl_start, len(content)): | |
| if content[i] == '(': | |
| count += 1 | |
| elif content[i] == ')': | |
| count -= 1 | |
| if count == 0: | |
| gdl_end = i | |
| break | |
| if gdl_end != -1: | |
| gdl_content = content[gdl_start:gdl_end + 1].strip() | |
| narrative_content = content[gdl_end + 1:].strip() | |
| print(f"🔍 智能分割成功: 识别到GDL代码块") | |
| return gdl_content, narrative_content | |
| print(f"❌ 智能分割失败") | |
| return "", "" | |
| # ========== 测试代码 ========== | |
| if __name__ == "__main__": | |
| # 测试用例 | |
| test_cases = [ | |
| # 测试 1: 标准格式 | |
| """ | |
| ## 游戏名称 | |
| 测试游戏 | |
| ## GDL描述 | |
| (game "TestGame" | |
| (players 3) | |
| ) | |
| ## 自然语言规则说明 | |
| 1. 这是规则 | |
| """, | |
| # 测试 2: 无空格格式 | |
| """ | |
| ##GDL描述 | |
| (game "TestGame" | |
| (players 3) | |
| ) | |
| ##自然语言规则说明 | |
| 1. 这是规则 | |
| """, | |
| # 测试 3: 带空格格式 | |
| """ | |
| ## GDL 描述 | |
| (game "TestGame" | |
| (players 3) | |
| ) | |
| ## 自然语言规则说明 | |
| 1. 这是规则 | |
| """, | |
| # 测试 4: 无标记(智能分割) | |
| """ | |
| (game "TestGame" | |
| (players 3) | |
| ) | |
| 这是自然语言规则说明 | |
| """, | |
| ] | |
| print("=" * 60) | |
| print("测试 GDL 提取功能") | |
| print("=" * 60) | |
| for i, test_content in enumerate(test_cases, 1): | |
| print(f"\n测试用例 {i}:") | |
| print("-" * 60) | |
| gdl, narrative = extract_gdl_and_narrative(test_content) | |
| if gdl or narrative: | |
| print(f"GDL 长度: {len(gdl)}") | |
| print(f"自然语言长度: {len(narrative)}") | |
| else: | |
| print("提取失败") | |
| print("\n" + "=" * 60) | |