File size: 1,632 Bytes
28263c0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import json
import re
from typing import Any, Optional

def extract_json_block(text: str) -> str:
    """
    Extract the first continuous JSON-like block (starting with { and ending with }).
    This helps skip LLM chatter before or after the JSON.
    """
    # Find the first occurrences of { and the last occurrence of }
    start = text.find('{')
    end = text.rfind('}')
    
    if start != -1 and end != -1 and end > start:
        return text[start:end+1]
    return text

def safe_json_loads(raw: str) -> dict:
    """
    Safely load JSON from a string that may contain:
    1. Markdown code blocks (```json ... ```)
    2. Prefix/suffix text
    3. Unescaped control characters (newlines, tabs) inside strings
    """
    if not raw:
        return {}

    # 1. Strip markdown syntax if present
    cleaned = re.sub(r"```json|```", "", raw).strip()
    
    # 2. Extract only the JSON part
    json_str = extract_json_block(cleaned)
    
    try:
        # 3. Parse with strict=False to allow unescaped control characters
        return json.loads(json_str, strict=False)
    except json.JSONDecodeError as e:
        # 4. If it fails, try some common cleaning
        try:
            # Replace actual newlines within strings with \n (fragile but sometimes helps)
            # This is a bit risky, so we only try it as a last resort
            # Actually, strict=False should have handled most of this.
            # Let's just log and raise for now to debug if strict=False isn't enough.
            raise e
        except Exception:
            print(f"Failed to parse JSON: {raw[:200]}...")
            return {}