|
|
"""测试和修复正则表达式问题""" |
|
|
|
|
|
import json |
|
|
import re |
|
|
|
|
|
|
|
|
TOOL_CALL_FENCE_PATTERN = re.compile(r"```json\s*(\{.*?\})\s*```", re.DOTALL) |
|
|
TOOL_CALL_INLINE_PATTERN_OLD = re.compile(r"(\{[^{}]{0,10000}\"tool_calls\".*?\})", re.DOTALL) |
|
|
|
|
|
|
|
|
|
|
|
TOOL_CALL_INLINE_PATTERN_NEW = re.compile( |
|
|
r'\{(?:[^{}]|\{[^{}]*\})*"tool_calls"\s*:\s*\[[^\]]*\](?:[^{}]|\{[^{}]*\})*\}', |
|
|
re.MULTILINE |
|
|
) |
|
|
|
|
|
def remove_tool_json_content_old(text: str) -> str: |
|
|
"""原始的移除工具JSON内容函数""" |
|
|
|
|
|
def remove_tool_call_block(match: re.Match) -> str: |
|
|
json_content = match.group(1) |
|
|
try: |
|
|
parsed_data = json.loads(json_content) |
|
|
if "tool_calls" in parsed_data: |
|
|
return "" |
|
|
except (json.JSONDecodeError, AttributeError): |
|
|
pass |
|
|
return match.group(0) |
|
|
|
|
|
|
|
|
cleaned_text = TOOL_CALL_FENCE_PATTERN.sub(remove_tool_call_block, text) |
|
|
|
|
|
cleaned_text = TOOL_CALL_INLINE_PATTERN_OLD.sub("", cleaned_text) |
|
|
return cleaned_text.strip() |
|
|
|
|
|
def remove_tool_json_content_new(text: str) -> str: |
|
|
"""改进的移除工具JSON内容函数 - 使用基于括号平衡的方法""" |
|
|
|
|
|
def remove_tool_call_block(match: re.Match) -> str: |
|
|
json_content = match.group(1) |
|
|
try: |
|
|
parsed_data = json.loads(json_content) |
|
|
if "tool_calls" in parsed_data: |
|
|
return "" |
|
|
except (json.JSONDecodeError, AttributeError): |
|
|
pass |
|
|
return match.group(0) |
|
|
|
|
|
|
|
|
cleaned_text = TOOL_CALL_FENCE_PATTERN.sub(remove_tool_call_block, text) |
|
|
|
|
|
|
|
|
|
|
|
result = [] |
|
|
i = 0 |
|
|
while i < len(cleaned_text): |
|
|
if cleaned_text[i] == '{': |
|
|
|
|
|
brace_count = 1 |
|
|
j = i + 1 |
|
|
in_string = False |
|
|
escape_next = False |
|
|
|
|
|
while j < len(cleaned_text) and brace_count > 0: |
|
|
if escape_next: |
|
|
escape_next = False |
|
|
elif cleaned_text[j] == '\\': |
|
|
escape_next = True |
|
|
elif cleaned_text[j] == '"' and not escape_next: |
|
|
in_string = not in_string |
|
|
elif not in_string: |
|
|
if cleaned_text[j] == '{': |
|
|
brace_count += 1 |
|
|
elif cleaned_text[j] == '}': |
|
|
brace_count -= 1 |
|
|
j += 1 |
|
|
|
|
|
if brace_count == 0: |
|
|
|
|
|
json_str = cleaned_text[i:j] |
|
|
try: |
|
|
parsed = json.loads(json_str) |
|
|
if "tool_calls" in parsed: |
|
|
|
|
|
i = j |
|
|
continue |
|
|
except: |
|
|
pass |
|
|
|
|
|
|
|
|
result.append(cleaned_text[i]) |
|
|
i += 1 |
|
|
else: |
|
|
result.append(cleaned_text[i]) |
|
|
i += 1 |
|
|
|
|
|
return ''.join(result).strip() |
|
|
|
|
|
|
|
|
test_cases = [ |
|
|
|
|
|
{ |
|
|
"name": "纯工具调用JSON", |
|
|
"input": """{"tool_calls": [{"id": "call_1", "type": "function", "function": {"name": "test", "arguments": "{}"}}]}""", |
|
|
"expected": "" |
|
|
}, |
|
|
|
|
|
|
|
|
{ |
|
|
"name": "代码块中的工具调用", |
|
|
"input": """这是一些正常的文本内容。 |
|
|
|
|
|
```json |
|
|
{ |
|
|
"tool_calls": [ |
|
|
{ |
|
|
"id": "call_123", |
|
|
"type": "function", |
|
|
"function": { |
|
|
"name": "test_function", |
|
|
"arguments": "{\\"param\\": \\"value\\"}" |
|
|
} |
|
|
} |
|
|
] |
|
|
} |
|
|
``` |
|
|
|
|
|
这部分内容应该被保留。""", |
|
|
"expected": """这是一些正常的文本内容。 |
|
|
|
|
|
|
|
|
|
|
|
这部分内容应该被保留。""" |
|
|
}, |
|
|
|
|
|
|
|
|
{ |
|
|
"name": "混合内容", |
|
|
"input": """让我为您执行一个函数调用: |
|
|
|
|
|
{"tool_calls": [{"id": "call_789", "type": "function", "function": {"name": "search", "arguments": "{\\"query\\": \\"test\\"}"}}]} |
|
|
|
|
|
函数执行结果如下: |
|
|
- 找到了相关内容 |
|
|
- 处理完成 |
|
|
|
|
|
这里还有其他重要信息需要保留。""", |
|
|
"expected": """让我为您执行一个函数调用: |
|
|
|
|
|
|
|
|
|
|
|
函数执行结果如下: |
|
|
- 找到了相关内容 |
|
|
- 处理完成 |
|
|
|
|
|
这里还有其他重要信息需要保留。""" |
|
|
}, |
|
|
|
|
|
|
|
|
{ |
|
|
"name": "普通JSON(应保留)", |
|
|
"input": """这是一个普通的 JSON 示例: |
|
|
{"data": {"result": "success"}} |
|
|
|
|
|
这不是工具调用,应该保留。""", |
|
|
"expected": """这是一个普通的 JSON 示例: |
|
|
{"data": {"result": "success"}} |
|
|
|
|
|
这不是工具调用,应该保留。""" |
|
|
}, |
|
|
|
|
|
|
|
|
{ |
|
|
"name": "嵌套复杂JSON", |
|
|
"input": """开始文本 |
|
|
{"tool_calls": [{"id": "call_1", "function": {"name": "test", "arguments": "{\\"nested\\": {\\"deep\\": \\"value\\"}}"}}]} |
|
|
中间文本 |
|
|
{"normal": {"data": "keep this"}} |
|
|
结束文本""", |
|
|
"expected": """开始文本 |
|
|
|
|
|
中间文本 |
|
|
{"normal": {"data": "keep this"}} |
|
|
结束文本""" |
|
|
} |
|
|
] |
|
|
|
|
|
def run_tests(): |
|
|
print("=" * 80) |
|
|
print("测试正则表达式处理") |
|
|
print("=" * 80) |
|
|
|
|
|
passed = 0 |
|
|
failed = 0 |
|
|
|
|
|
for test_case in test_cases: |
|
|
print(f"\n测试案例: {test_case['name']}") |
|
|
print("-" * 40) |
|
|
print("输入文本:") |
|
|
print(repr(test_case['input'])) |
|
|
|
|
|
print("\n使用原始函数处理后:") |
|
|
result_old = remove_tool_json_content_old(test_case['input']) |
|
|
print(repr(result_old)) |
|
|
|
|
|
print("\n使用改进函数处理后:") |
|
|
result_new = remove_tool_json_content_new(test_case['input']) |
|
|
print(repr(result_new)) |
|
|
|
|
|
print("\n期望结果:") |
|
|
print(repr(test_case['expected'])) |
|
|
|
|
|
|
|
|
if result_new == test_case['expected']: |
|
|
print("[PASS] 新函数通过测试") |
|
|
passed += 1 |
|
|
else: |
|
|
print("[FAIL] 新函数测试失败") |
|
|
failed += 1 |
|
|
|
|
|
print("-" * 40) |
|
|
|
|
|
print(f"\n\n总结: {passed} 个通过, {failed} 个失败") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
run_tests() |