File size: 4,946 Bytes
61f8894
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
"""
JSON parsing utilities for handling malformed LLM outputs.
Provides robust parsing with fallback strategies.
"""

import json
import re
from typing import Any, Dict, Optional


def parse_llm_json(text: str) -> Optional[Dict[str, Any]]:
    """
    Parse JSON from LLM output with multiple fallback strategies.

    Handles common issues:
    - Extra text before/after JSON
    - Line breaks within JSON
    - Single quotes instead of double quotes
    - Trailing commas

    Args:
        text: Raw text from LLM that should contain JSON

    Returns:
        Parsed JSON dictionary or None if parsing fails
    """
    if not text or not isinstance(text, str):
        return None

    # Strategy 1: Try direct parsing
    try:
        return json.loads(text)
    except json.JSONDecodeError:
        pass

    # Strategy 2: Extract JSON from text (look for {...} or [...])
    try:
        # Find first { and last }
        start = text.find("{")
        end = text.rfind("}")

        if start != -1 and end != -1 and end > start:
            json_str = text[start : end + 1]
            return json.loads(json_str)
    except json.JSONDecodeError:
        pass

    # Strategy 3: Try to extract JSON array
    try:
        start = text.find("[")
        end = text.rfind("]")

        if start != -1 and end != -1 and end > start:
            json_str = text[start : end + 1]
            return json.loads(json_str)
    except json.JSONDecodeError:
        pass

    # Strategy 4: Fix common issues and retry
    try:
        # Remove line breaks within JSON
        cleaned = re.sub(r"\n\s*", " ", text)

        # Extract JSON portion
        start = cleaned.find("{")
        end = cleaned.rfind("}")

        if start != -1 and end != -1 and end > start:
            json_str = cleaned[start : end + 1]

            # Replace single quotes with double quotes (carefully)
            # This is a simple heuristic and may not work for all cases
            json_str = json_str.replace("'", '"')

            # Remove trailing commas before } or ]
            json_str = re.sub(r",(\s*[}\]])", r"\1", json_str)

            return json.loads(json_str)
    except (json.JSONDecodeError, Exception):
        pass

    # Strategy 5: Try to parse as key-value pairs using regex
    try:
        # Look for key: value patterns
        pattern = r'"?(\w+)"?\s*:\s*"?([^",}\]]+)"?'
        matches = re.findall(pattern, text)

        if matches:
            result = {}
            for key, value in matches:
                # Try to parse value as number if possible
                try:
                    if "." in value:
                        result[key] = float(value)
                    else:
                        result[key] = int(value)
                except ValueError:
                    result[key] = value.strip()

            if result:
                return result
    except Exception:
        pass

    return None


def parse_tool_input(input_str: str) -> Dict[str, Any]:
    """
    Parse tool input from LLM, handling both string and JSON inputs.

    Args:
        input_str: Input string from LLM (may be JSON or plain string)

    Returns:
        Dictionary with parsed values
    """
    # If it's already a dict, return it
    if isinstance(input_str, dict):
        return input_str

    # Try to parse as JSON
    parsed = parse_llm_json(input_str)
    if parsed:
        return parsed

    # If it's a simple string that might be a user_id, wrap it
    if isinstance(input_str, str):
        input_str = input_str.strip().strip('"').strip("'")

        # Check if it looks like JSON but failed to parse
        if "{" in input_str or "[" in input_str:
            # Return empty dict to signal parsing failure
            return {}

        # If it's a simple value, treat it as user_id
        if not any(char in input_str for char in ["{", "}", "[", "]", ":"]):
            return {"user_id": input_str}

    return {}


def extract_json_value(text: str, key: str, default: Any = None) -> Any:
    """
    Extract a specific value from JSON text without full parsing.

    Args:
        text: Text containing JSON
        key: Key to extract
        default: Default value if key not found

    Returns:
        Extracted value or default
    """
    try:
        parsed = parse_llm_json(text)
        if parsed and isinstance(parsed, dict):
            return parsed.get(key, default)
    except Exception:
        pass

    # Try regex extraction as fallback
    try:
        pattern = rf'"{key}"\s*:\s*"?([^",}}\]]+)"?'
        match = re.search(pattern, text)
        if match:
            value = match.group(1).strip()
            # Try to convert to number
            try:
                if "." in value:
                    return float(value)
                return int(value)
            except ValueError:
                return value
    except Exception:
        pass

    return default