Spaces:
Sleeping
Sleeping
File size: 5,477 Bytes
5c11d93 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 | import json
import re
from typing import Any, Dict, Optional, Iterable
def try_parse_tool_call(text:str) -> Optional[Dict[str, Any]]:
"""
如果 text 是一个合法的工具调用JSON,就返回dict
否则返回None
"""
try:
obj = parse_json_dict(text)
except:
return None
if obj.get("action") != "call_tool":
return None
if "tool" not in obj:
return None
args = obj.get("arguments", {})
if args is not None and not isinstance(args, dict):
return None
return obj
# 支持 ```json ... ``` 以及 ```jsonc ... ```(可按需删掉 jsonc)
_CODE_FENCE_RE = re.compile(
r"```(?:json|jsonc)\s*(.*?)\s*```",
flags=re.IGNORECASE | re.DOTALL,
)
def _strip_trailing_commas_once(s: str) -> str:
"""
删除 JSON 文本中 '}' 或 ']' 前的多余逗号(单次 pass)。
注意:会跳过字符串内部内容,不会误删字符串里的逗号。
"""
out = []
in_str = False
escape = False
i = 0
n = len(s)
while i < n:
c = s[i]
if in_str:
out.append(c)
if escape:
escape = False
elif c == "\\":
escape = True
elif c == '"':
in_str = False
i += 1
continue
# not in string
if c == '"':
in_str = True
out.append(c)
i += 1
continue
if c == ",":
# look ahead to next non-whitespace
j = i + 1
while j < n and s[j] in " \t\r\n":
j += 1
if j < n and s[j] in "}]":
# drop this comma
i += 1
continue
out.append(c)
i += 1
return "".join(out)
def _strip_trailing_commas(s: str, max_passes: int = 10) -> str:
"""
多次 pass,处理类似 ',,}' / ', ,}' 这种需要多次清理才能干净的情况。
"""
for _ in range(max_passes):
s2 = _strip_trailing_commas_once(s)
if s2 == s:
return s2
s = s2
return s # best effort
def _extract_balanced_object(text: str, start: int) -> Optional[str]:
"""
从 text[start] == '{' 开始,提取一个括号平衡的 JSON object 子串 {...}。
会正确跳过字符串中的花括号。
"""
depth = 0
in_str = False
escape = False
for i in range(start, len(text)):
c = text[i]
if in_str:
if escape:
escape = False
elif c == "\\":
escape = True
elif c == '"':
in_str = False
continue
if c == '"':
in_str = True
continue
if c == "{":
depth += 1
elif c == "}":
depth -= 1
if depth == 0:
return text[start : i + 1]
return None
def _iter_fenced_json_blocks(text: str) -> Iterable[str]:
for m in _CODE_FENCE_RE.finditer(text):
block = m.group(1)
if block is not None:
yield block.strip()
def _iter_object_candidates(text: str) -> Iterable[str]:
"""
在任意文本中枚举可能的 {...} 子串(按出现顺序)。
"""
for idx, ch in enumerate(text):
if ch == "{":
cand = _extract_balanced_object(text, idx)
if cand:
yield cand
def parse_json_dict(text: str) -> Dict[str, Any]:
"""
从任意字符串中解析 JSON 对象(dict)。
支持:
1) Markdown fenced JSON: ```json ... ```
2) JSON 前后夹杂额外文字
3) 去掉 '}' / ']' 前的多余逗号(trailing commas)
参数:
text: 输入字符串
返回:
dict
异常:
ValueError: 找不到可解析成 dict 的 JSON
TypeError: text 不是 str
"""
if not isinstance(text, str):
raise TypeError(f"text must be str, got {type(text).__name__}")
# 优先尝试 fenced block,其次尝试整段文本
search_spaces = list(_iter_fenced_json_blocks(text))
search_spaces.append(text)
last_err: Optional[Exception] = None
for space in search_spaces:
# 若开头就是 '{',优先尝试从开头截一个平衡对象出来(避免尾部噪声干扰)
candidates = []
stripped = space.lstrip().lstrip("\ufeff") # 顺便去 BOM
if stripped.startswith("{"):
first = _extract_balanced_object(stripped, 0)
if first:
candidates.append(first)
# 同时尝试文本中任意位置出现的对象
candidates.extend(_iter_object_candidates(space))
# 去重(避免重复尝试相同子串)
seen = set()
for cand in candidates:
if cand in seen:
continue
seen.add(cand)
cleaned = _strip_trailing_commas(cand).strip()
try:
obj = json.loads(cleaned)
if isinstance(obj, dict):
return obj
except Exception as e:
last_err = e
continue
raise ValueError("No valid JSON object (dict) found in input") from last_err
# ----------------- 使用示例 -----------------
if __name__ == "__main__":
s1 = """这里是结果:
```json
{
"a": 1,
"b": [1, 2,],
}
"""
print(parse_json_dict(s1)) |