import json import re from typing import Any, Dict, Optional, Iterable def try_parse_tool_call(text:str) -> Optional[Dict[str, Any]]: """ 如果 text 是一个合法的工具调用JSON,就返回dict 否则返回None """ try: obj = parse_json_dict(text) except: return None if obj.get("action") != "call_tool": return None if "tool" not in obj: return None args = obj.get("arguments", {}) if args is not None and not isinstance(args, dict): return None return obj # 支持 ```json ... ``` 以及 ```jsonc ... ```(可按需删掉 jsonc) _CODE_FENCE_RE = re.compile( r"```(?:json|jsonc)\s*(.*?)\s*```", flags=re.IGNORECASE | re.DOTALL, ) def _strip_trailing_commas_once(s: str) -> str: """ 删除 JSON 文本中 '}' 或 ']' 前的多余逗号(单次 pass)。 注意:会跳过字符串内部内容,不会误删字符串里的逗号。 """ out = [] in_str = False escape = False i = 0 n = len(s) while i < n: c = s[i] if in_str: out.append(c) if escape: escape = False elif c == "\\": escape = True elif c == '"': in_str = False i += 1 continue # not in string if c == '"': in_str = True out.append(c) i += 1 continue if c == ",": # look ahead to next non-whitespace j = i + 1 while j < n and s[j] in " \t\r\n": j += 1 if j < n and s[j] in "}]": # drop this comma i += 1 continue out.append(c) i += 1 return "".join(out) def _strip_trailing_commas(s: str, max_passes: int = 10) -> str: """ 多次 pass,处理类似 ',,}' / ', ,}' 这种需要多次清理才能干净的情况。 """ for _ in range(max_passes): s2 = _strip_trailing_commas_once(s) if s2 == s: return s2 s = s2 return s # best effort def _extract_balanced_object(text: str, start: int) -> Optional[str]: """ 从 text[start] == '{' 开始,提取一个括号平衡的 JSON object 子串 {...}。 会正确跳过字符串中的花括号。 """ depth = 0 in_str = False escape = False for i in range(start, len(text)): c = text[i] if in_str: if escape: escape = False elif c == "\\": escape = True elif c == '"': in_str = False continue if c == '"': in_str = True continue if c == "{": depth += 1 elif c == "}": depth -= 1 if depth == 0: return text[start : i + 1] return None def _iter_fenced_json_blocks(text: str) -> Iterable[str]: for m in _CODE_FENCE_RE.finditer(text): block = m.group(1) if block is not None: yield block.strip() def _iter_object_candidates(text: str) -> Iterable[str]: """ 在任意文本中枚举可能的 {...} 子串(按出现顺序)。 """ for idx, ch in enumerate(text): if ch == "{": cand = _extract_balanced_object(text, idx) if cand: yield cand def parse_json_dict(text: str) -> Dict[str, Any]: """ 从任意字符串中解析 JSON 对象(dict)。 支持: 1) Markdown fenced JSON: ```json ... ``` 2) JSON 前后夹杂额外文字 3) 去掉 '}' / ']' 前的多余逗号(trailing commas) 参数: text: 输入字符串 返回: dict 异常: ValueError: 找不到可解析成 dict 的 JSON TypeError: text 不是 str """ if not isinstance(text, str): raise TypeError(f"text must be str, got {type(text).__name__}") # 优先尝试 fenced block,其次尝试整段文本 search_spaces = list(_iter_fenced_json_blocks(text)) search_spaces.append(text) last_err: Optional[Exception] = None for space in search_spaces: # 若开头就是 '{',优先尝试从开头截一个平衡对象出来(避免尾部噪声干扰) candidates = [] stripped = space.lstrip().lstrip("\ufeff") # 顺便去 BOM if stripped.startswith("{"): first = _extract_balanced_object(stripped, 0) if first: candidates.append(first) # 同时尝试文本中任意位置出现的对象 candidates.extend(_iter_object_candidates(space)) # 去重(避免重复尝试相同子串) seen = set() for cand in candidates: if cand in seen: continue seen.add(cand) cleaned = _strip_trailing_commas(cand).strip() try: obj = json.loads(cleaned) if isinstance(obj, dict): return obj except Exception as e: last_err = e continue raise ValueError("No valid JSON object (dict) found in input") from last_err # ----------------- 使用示例 ----------------- if __name__ == "__main__": s1 = """这里是结果: ```json { "a": 1, "b": [1, 2,], } """ print(parse_json_dict(s1))