ROCmPort-AI / backend /tools /json_utils.py
tazwarrrr's picture
fixv2
28263c0
import json
import re
from typing import Any, Optional
def extract_json_block(text: str) -> str:
"""
Extract the first continuous JSON-like block (starting with { and ending with }).
This helps skip LLM chatter before or after the JSON.
"""
# Find the first occurrences of { and the last occurrence of }
start = text.find('{')
end = text.rfind('}')
if start != -1 and end != -1 and end > start:
return text[start:end+1]
return text
def safe_json_loads(raw: str) -> dict:
"""
Safely load JSON from a string that may contain:
1. Markdown code blocks (```json ... ```)
2. Prefix/suffix text
3. Unescaped control characters (newlines, tabs) inside strings
"""
if not raw:
return {}
# 1. Strip markdown syntax if present
cleaned = re.sub(r"```json|```", "", raw).strip()
# 2. Extract only the JSON part
json_str = extract_json_block(cleaned)
try:
# 3. Parse with strict=False to allow unescaped control characters
return json.loads(json_str, strict=False)
except json.JSONDecodeError as e:
# 4. If it fails, try some common cleaning
try:
# Replace actual newlines within strings with \n (fragile but sometimes helps)
# This is a bit risky, so we only try it as a last resort
# Actually, strict=False should have handled most of this.
# Let's just log and raise for now to debug if strict=False isn't enough.
raise e
except Exception:
print(f"Failed to parse JSON: {raw[:200]}...")
return {}