import json import re from typing import Any, Dict, Optional, Iterable def try_parse_tool_call(text:str) -> Optional[Dict[str, Any]]: """ Return dict if text is a valid tool call JSON, otherwise return None """ try: obj = parse_json_dict(text) except: return None if obj.get("action") != "call_tool": return None if "tool" not in obj: return None args = obj.get("arguments", {}) if args is not None and not isinstance(args, dict): return None return obj # Support ```json ... ``` and ```jsonc ... ``` (can remove jsonc if needed) _CODE_FENCE_RE = re.compile( r"```(?:json|jsonc)\s*(.*?)\s*```", flags=re.IGNORECASE | re.DOTALL, ) def _strip_trailing_commas_once(s: str) -> str: """ Remove trailing commas before '}' or ']' in JSON text (single pass). Note: Skips content inside strings, won't remove commas within strings. """ out = [] in_str = False escape = False i = 0 n = len(s) while i < n: c = s[i] if in_str: out.append(c) if escape: escape = False elif c == "\\": escape = True elif c == '"': in_str = False i += 1 continue # not in string if c == '"': in_str = True out.append(c) i += 1 continue if c == ",": # look ahead to next non-whitespace j = i + 1 while j < n and s[j] in " \t\r\n": j += 1 if j < n and s[j] in "}]": # drop this comma i += 1 continue out.append(c) i += 1 return "".join(out) def _strip_trailing_commas(s: str, max_passes: int = 10) -> str: """ Remove extra commas before '}' or ']' in JSON text (single pass). Note: String content is skipped, so commas inside strings won't be removed. """ for _ in range(max_passes): s2 = _strip_trailing_commas_once(s) if s2 == s: return s2 s = s2 return s # best effort def _extract_balanced_object(text: str, start: int) -> Optional[str]: """ Extract a balanced JSON object substring {...} starting from text[start] == '{'. Correctly skips braces within strings. """ depth = 0 in_str = False escape = False for i in range(start, len(text)): c = text[i] if in_str: if escape: escape = False elif c == "\\": escape = True elif c == '"': in_str = False continue if c == '"': in_str = True continue if c == "{": depth += 1 elif c == "}": depth -= 1 if depth == 0: return text[start : i + 1] return None def _iter_fenced_json_blocks(text: str) -> Iterable[str]: for m in _CODE_FENCE_RE.finditer(text): block = m.group(1) if block is not None: yield block.strip() def _iter_object_candidates(text: str) -> Iterable[str]: """ Enumerate all possible {...} substrings in arbitrary text (in order of appearance). """ for idx, ch in enumerate(text): if ch == "{": cand = _extract_balanced_object(text, idx) if cand: yield cand def parse_json_dict(text: str) -> Dict[str, Any]: """ Parse a JSON object (dict) from arbitrary text. Supports: 1) Markdown fenced JSON code blocks: ```json ... ``` 2) JSON surrounded by extra text 3) Removing trailing commas before '}' or ']' Args: text: Input string to parse Returns: Parsed dictionary Raises: ValueError: Cannot find a valid JSON dict to parse TypeError: Input text is not a string """ if not isinstance(text, str): raise TypeError(f"text must be str, got {type(text).__name__}") # Try fenced block first, then try the entire text search_spaces = list(_iter_fenced_json_blocks(text)) search_spaces.append(text) last_err: Optional[Exception] = None for space in search_spaces: # If starts with '{', try to extract a balanced object from the beginning first (to avoid trailing noise) candidates = [] stripped = space.lstrip().lstrip("\ufeff") # 顺便去 BOM if stripped.startswith("{"): first = _extract_balanced_object(stripped, 0) if first: candidates.append(first) # Also try objects appearing at any position in the text candidates.extend(_iter_object_candidates(space)) # Deduplicate (avoid retrying same substrings) seen = set() for cand in candidates: if cand in seen: continue seen.add(cand) cleaned = _strip_trailing_commas(cand).strip() try: obj = json.loads(cleaned) if isinstance(obj, dict): return obj except Exception as e: last_err = e continue raise ValueError("No valid JSON object (dict) found in input") from last_err