Spaces:
Sleeping
Sleeping
| import json | |
| import re | |
| from typing import Any, Dict, Optional, Iterable | |
| def try_parse_tool_call(text:str) -> Optional[Dict[str, Any]]: | |
| """ | |
| 如果 text 是一个合法的工具调用JSON,就返回dict | |
| 否则返回None | |
| """ | |
| try: | |
| obj = parse_json_dict(text) | |
| except: | |
| return None | |
| if obj.get("action") != "call_tool": | |
| return None | |
| if "tool" not in obj: | |
| return None | |
| args = obj.get("arguments", {}) | |
| if args is not None and not isinstance(args, dict): | |
| return None | |
| return obj | |
| # 支持 ```json ... ``` 以及 ```jsonc ... ```(可按需删掉 jsonc) | |
| _CODE_FENCE_RE = re.compile( | |
| r"```(?:json|jsonc)\s*(.*?)\s*```", | |
| flags=re.IGNORECASE | re.DOTALL, | |
| ) | |
| def _strip_trailing_commas_once(s: str) -> str: | |
| """ | |
| 删除 JSON 文本中 '}' 或 ']' 前的多余逗号(单次 pass)。 | |
| 注意:会跳过字符串内部内容,不会误删字符串里的逗号。 | |
| """ | |
| out = [] | |
| in_str = False | |
| escape = False | |
| i = 0 | |
| n = len(s) | |
| while i < n: | |
| c = s[i] | |
| if in_str: | |
| out.append(c) | |
| if escape: | |
| escape = False | |
| elif c == "\\": | |
| escape = True | |
| elif c == '"': | |
| in_str = False | |
| i += 1 | |
| continue | |
| # not in string | |
| if c == '"': | |
| in_str = True | |
| out.append(c) | |
| i += 1 | |
| continue | |
| if c == ",": | |
| # look ahead to next non-whitespace | |
| j = i + 1 | |
| while j < n and s[j] in " \t\r\n": | |
| j += 1 | |
| if j < n and s[j] in "}]": | |
| # drop this comma | |
| i += 1 | |
| continue | |
| out.append(c) | |
| i += 1 | |
| return "".join(out) | |
| def _strip_trailing_commas(s: str, max_passes: int = 10) -> str: | |
| """ | |
| 多次 pass,处理类似 ',,}' / ', ,}' 这种需要多次清理才能干净的情况。 | |
| """ | |
| for _ in range(max_passes): | |
| s2 = _strip_trailing_commas_once(s) | |
| if s2 == s: | |
| return s2 | |
| s = s2 | |
| return s # best effort | |
| def _extract_balanced_object(text: str, start: int) -> Optional[str]: | |
| """ | |
| 从 text[start] == '{' 开始,提取一个括号平衡的 JSON object 子串 {...}。 | |
| 会正确跳过字符串中的花括号。 | |
| """ | |
| depth = 0 | |
| in_str = False | |
| escape = False | |
| for i in range(start, len(text)): | |
| c = text[i] | |
| if in_str: | |
| if escape: | |
| escape = False | |
| elif c == "\\": | |
| escape = True | |
| elif c == '"': | |
| in_str = False | |
| continue | |
| if c == '"': | |
| in_str = True | |
| continue | |
| if c == "{": | |
| depth += 1 | |
| elif c == "}": | |
| depth -= 1 | |
| if depth == 0: | |
| return text[start : i + 1] | |
| return None | |
| def _iter_fenced_json_blocks(text: str) -> Iterable[str]: | |
| for m in _CODE_FENCE_RE.finditer(text): | |
| block = m.group(1) | |
| if block is not None: | |
| yield block.strip() | |
| def _iter_object_candidates(text: str) -> Iterable[str]: | |
| """ | |
| 在任意文本中枚举可能的 {...} 子串(按出现顺序)。 | |
| """ | |
| for idx, ch in enumerate(text): | |
| if ch == "{": | |
| cand = _extract_balanced_object(text, idx) | |
| if cand: | |
| yield cand | |
| def parse_json_dict(text: str) -> Dict[str, Any]: | |
| """ | |
| 从任意字符串中解析 JSON 对象(dict)。 | |
| 支持: | |
| 1) Markdown fenced JSON: ```json ... ``` | |
| 2) JSON 前后夹杂额外文字 | |
| 3) 去掉 '}' / ']' 前的多余逗号(trailing commas) | |
| 参数: | |
| text: 输入字符串 | |
| 返回: | |
| dict | |
| 异常: | |
| ValueError: 找不到可解析成 dict 的 JSON | |
| TypeError: text 不是 str | |
| """ | |
| if not isinstance(text, str): | |
| raise TypeError(f"text must be str, got {type(text).__name__}") | |
| # 优先尝试 fenced block,其次尝试整段文本 | |
| search_spaces = list(_iter_fenced_json_blocks(text)) | |
| search_spaces.append(text) | |
| last_err: Optional[Exception] = None | |
| for space in search_spaces: | |
| # 若开头就是 '{',优先尝试从开头截一个平衡对象出来(避免尾部噪声干扰) | |
| candidates = [] | |
| stripped = space.lstrip().lstrip("\ufeff") # 顺便去 BOM | |
| if stripped.startswith("{"): | |
| first = _extract_balanced_object(stripped, 0) | |
| if first: | |
| candidates.append(first) | |
| # 同时尝试文本中任意位置出现的对象 | |
| candidates.extend(_iter_object_candidates(space)) | |
| # 去重(避免重复尝试相同子串) | |
| seen = set() | |
| for cand in candidates: | |
| if cand in seen: | |
| continue | |
| seen.add(cand) | |
| cleaned = _strip_trailing_commas(cand).strip() | |
| try: | |
| obj = json.loads(cleaned) | |
| if isinstance(obj, dict): | |
| return obj | |
| except Exception as e: | |
| last_err = e | |
| continue | |
| raise ValueError("No valid JSON object (dict) found in input") from last_err | |
| # ----------------- 使用示例 ----------------- | |
| if __name__ == "__main__": | |
| s1 = """这里是结果: | |
| ```json | |
| { | |
| "a": 1, | |
| "b": [1, 2,], | |
| } | |
| """ | |
| print(parse_json_dict(s1)) |