File size: 3,389 Bytes
5df8a73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#!/usr/bin/env python
"""
Robust JSON parsing utilities with automatic repair and markdown extraction.

Provides safe JSON parsing that handles:
- Markdown code block wrapping (```json...```)
- Malformed JSON (missing commas, trailing commas, etc.)
- Unescaped newlines and control characters
- Empty responses
"""

import json
import logging
import re
from typing import Any

try:
    from json_repair import repair_json
except ImportError:
    repair_json = None

logger = logging.getLogger(__name__)

_UNSET = object()


def parse_json_response(
    response: str,
    logger_instance: logging.Logger | None = None,
    fallback: Any = _UNSET,
) -> Any:
    """
    Safely parse JSON from LLM responses with automatic repair.

    Implements a three-tier parsing strategy:
    1. Extract JSON from markdown code blocks if present
    2. Direct JSON parsing
    3. Automated repair using json-repair library with fallback

    Args:
        response: Raw string response from LLM
        logger_instance: Logger instance for debugging (optional)
        fallback: Value to return if all parsing fails.
                  Pass ``None`` explicitly to get ``None`` on failure;
                  omit the argument (or leave default) to get ``{}``.

    Returns:
        Parsed JSON object, or fallback value if parsing fails

    Example:
        >>> response = '```json\\n{"key": "value"}\\n```'
        >>> data = parse_json_response(response)
        >>> data
        {'key': 'value'}
    """
    log = logger_instance or logger

    if fallback is _UNSET:
        fallback = {}

    # Handle empty response
    if not response or not response.strip():
        log.warning("LLM returned empty response")
        return fallback

    # Extract from markdown code blocks if present
    extracted_response = response
    if "```" in response:
        json_match = re.search(r"```(?:json)?\s*\n?(.*?)```", response, re.DOTALL)
        if json_match:
            extracted_response = json_match.group(1).strip()
            log.debug("Extracted JSON from markdown code block")

    # Strategy 1: Direct parsing
    try:
        return json.loads(extracted_response)
    except json.JSONDecodeError as parse_error:
        log.debug(f"Direct JSON parse failed: {parse_error}")

    # Strategy 2: Try json-repair if available
    if repair_json is None:
        log.warning("json-repair library not installed, cannot repair malformed JSON")
        log.debug(f"Response: {extracted_response[:200]}")
        return fallback

    try:
        log.debug("Attempting JSON repair")
        repaired = repair_json(extracted_response)
        result = json.loads(repaired)
        log.info("Successfully repaired malformed JSON")
        return result
    except Exception as repair_error:
        log.error(f"JSON repair failed: {repair_error}")
        log.debug(f"Response: {extracted_response[:200]}")
        return fallback


def safe_json_loads(data: str, fallback: Any = _UNSET) -> Any:
    """
    Simple wrapper for safe JSON loading.

    Args:
        data: JSON string
        fallback: Value to return on failure (default: {})

    Returns:
        Parsed JSON or fallback value
    """
    if fallback is _UNSET:
        fallback = {}
    try:
        return json.loads(data)
    except json.JSONDecodeError as e:
        logger.warning(f"JSON parse error: {e}")
        return fallback