Spaces:

minipuding
/

test_agent

Sleeping

File size: 5,477 Bytes

5c11d93

import json
import re
from typing import Any, Dict, Optional, Iterable

def try_parse_tool_call(text:str) -> Optional[Dict[str, Any]]:
    """
    如果 text 是一个合法的工具调用JSON，就返回dict
    否则返回None
    """
    try:
        obj = parse_json_dict(text)
    except:
        return None
    
    if obj.get("action") != "call_tool":
        return None
    if "tool" not in obj:
        return None
    
    args = obj.get("arguments", {})
    if args is not None and not isinstance(args, dict):
        return None
    
    return obj

# 支持 ```json ... ``` 以及 ```jsonc ... ```（可按需删掉 jsonc）
_CODE_FENCE_RE = re.compile(
    r"```(?:json|jsonc)\s*(.*?)\s*```",
    flags=re.IGNORECASE | re.DOTALL,
)


def _strip_trailing_commas_once(s: str) -> str:
    """
    删除 JSON 文本中 '}' 或 ']' 前的多余逗号（单次 pass）。
    注意：会跳过字符串内部内容，不会误删字符串里的逗号。
    """
    out = []
    in_str = False
    escape = False
    i = 0
    n = len(s)

    while i < n:
        c = s[i]

        if in_str:
            out.append(c)
            if escape:
                escape = False
            elif c == "\\":
                escape = True
            elif c == '"':
                in_str = False
            i += 1
            continue

        # not in string
        if c == '"':
            in_str = True
            out.append(c)
            i += 1
            continue

        if c == ",":
            # look ahead to next non-whitespace
            j = i + 1
            while j < n and s[j] in " \t\r\n":
                j += 1
            if j < n and s[j] in "}]":
                # drop this comma
                i += 1
                continue

        out.append(c)
        i += 1

    return "".join(out)


def _strip_trailing_commas(s: str, max_passes: int = 10) -> str:
    """
    多次 pass，处理类似 ',,}' / ', ,}' 这种需要多次清理才能干净的情况。
    """
    for _ in range(max_passes):
        s2 = _strip_trailing_commas_once(s)
        if s2 == s:
            return s2
        s = s2
    return s  # best effort


def _extract_balanced_object(text: str, start: int) -> Optional[str]:
    """
    从 text[start] == '{' 开始，提取一个括号平衡的 JSON object 子串 {...}。
    会正确跳过字符串中的花括号。
    """
    depth = 0
    in_str = False
    escape = False

    for i in range(start, len(text)):
        c = text[i]

        if in_str:
            if escape:
                escape = False
            elif c == "\\":
                escape = True
            elif c == '"':
                in_str = False
            continue

        if c == '"':
            in_str = True
            continue

        if c == "{":
            depth += 1
        elif c == "}":
            depth -= 1
            if depth == 0:
                return text[start : i + 1]

    return None


def _iter_fenced_json_blocks(text: str) -> Iterable[str]:
    for m in _CODE_FENCE_RE.finditer(text):
        block = m.group(1)
        if block is not None:
            yield block.strip()


def _iter_object_candidates(text: str) -> Iterable[str]:
    """
    在任意文本中枚举可能的 {...} 子串（按出现顺序）。
    """
    for idx, ch in enumerate(text):
        if ch == "{":
            cand = _extract_balanced_object(text, idx)
            if cand:
                yield cand


def parse_json_dict(text: str) -> Dict[str, Any]:
    """
    从任意字符串中解析 JSON 对象（dict）。

    支持：
      1) Markdown fenced JSON: ```json ... ```
      2) JSON 前后夹杂额外文字
      3) 去掉 '}' / ']' 前的多余逗号（trailing commas）

    参数：
        text: 输入字符串
    返回：
        dict
    异常：
        ValueError: 找不到可解析成 dict 的 JSON
        TypeError: text 不是 str
    """
    if not isinstance(text, str):
        raise TypeError(f"text must be str, got {type(text).__name__}")

    # 优先尝试 fenced block，其次尝试整段文本
    search_spaces = list(_iter_fenced_json_blocks(text))
    search_spaces.append(text)

    last_err: Optional[Exception] = None

    for space in search_spaces:
        # 若开头就是 '{'，优先尝试从开头截一个平衡对象出来（避免尾部噪声干扰）
        candidates = []
        stripped = space.lstrip().lstrip("\ufeff")  # 顺便去 BOM
        if stripped.startswith("{"):
            first = _extract_balanced_object(stripped, 0)
            if first:
                candidates.append(first)

        # 同时尝试文本中任意位置出现的对象
        candidates.extend(_iter_object_candidates(space))

        # 去重（避免重复尝试相同子串）
        seen = set()
        for cand in candidates:
            if cand in seen:
                continue
            seen.add(cand)

            cleaned = _strip_trailing_commas(cand).strip()
            try:
                obj = json.loads(cleaned)
                if isinstance(obj, dict):
                    return obj
            except Exception as e:
                last_err = e
                continue

    raise ValueError("No valid JSON object (dict) found in input") from last_err


# ----------------- 使用示例 -----------------
if __name__ == "__main__":
    s1 = """这里是结果：
```json
{
  "a": 1,
  "b": [1, 2,],
}
"""
    print(parse_json_dict(s1))