|
|
""" |
|
|
JSON parsing utilities for handling malformed LLM outputs. |
|
|
Provides robust parsing with fallback strategies. |
|
|
""" |
|
|
|
|
|
import json |
|
|
import re |
|
|
from typing import Any, Dict, Optional |
|
|
|
|
|
|
|
|
def parse_llm_json(text: str) -> Optional[Dict[str, Any]]: |
|
|
""" |
|
|
Parse JSON from LLM output with multiple fallback strategies. |
|
|
|
|
|
Handles common issues: |
|
|
- Extra text before/after JSON |
|
|
- Line breaks within JSON |
|
|
- Single quotes instead of double quotes |
|
|
- Trailing commas |
|
|
|
|
|
Args: |
|
|
text: Raw text from LLM that should contain JSON |
|
|
|
|
|
Returns: |
|
|
Parsed JSON dictionary or None if parsing fails |
|
|
""" |
|
|
if not text or not isinstance(text, str): |
|
|
return None |
|
|
|
|
|
|
|
|
try: |
|
|
return json.loads(text) |
|
|
except json.JSONDecodeError: |
|
|
pass |
|
|
|
|
|
|
|
|
try: |
|
|
|
|
|
start = text.find("{") |
|
|
end = text.rfind("}") |
|
|
|
|
|
if start != -1 and end != -1 and end > start: |
|
|
json_str = text[start : end + 1] |
|
|
return json.loads(json_str) |
|
|
except json.JSONDecodeError: |
|
|
pass |
|
|
|
|
|
|
|
|
try: |
|
|
start = text.find("[") |
|
|
end = text.rfind("]") |
|
|
|
|
|
if start != -1 and end != -1 and end > start: |
|
|
json_str = text[start : end + 1] |
|
|
return json.loads(json_str) |
|
|
except json.JSONDecodeError: |
|
|
pass |
|
|
|
|
|
|
|
|
try: |
|
|
|
|
|
cleaned = re.sub(r"\n\s*", " ", text) |
|
|
|
|
|
|
|
|
start = cleaned.find("{") |
|
|
end = cleaned.rfind("}") |
|
|
|
|
|
if start != -1 and end != -1 and end > start: |
|
|
json_str = cleaned[start : end + 1] |
|
|
|
|
|
|
|
|
|
|
|
json_str = json_str.replace("'", '"') |
|
|
|
|
|
|
|
|
json_str = re.sub(r",(\s*[}\]])", r"\1", json_str) |
|
|
|
|
|
return json.loads(json_str) |
|
|
except (json.JSONDecodeError, Exception): |
|
|
pass |
|
|
|
|
|
|
|
|
try: |
|
|
|
|
|
pattern = r'"?(\w+)"?\s*:\s*"?([^",}\]]+)"?' |
|
|
matches = re.findall(pattern, text) |
|
|
|
|
|
if matches: |
|
|
result = {} |
|
|
for key, value in matches: |
|
|
|
|
|
try: |
|
|
if "." in value: |
|
|
result[key] = float(value) |
|
|
else: |
|
|
result[key] = int(value) |
|
|
except ValueError: |
|
|
result[key] = value.strip() |
|
|
|
|
|
if result: |
|
|
return result |
|
|
except Exception: |
|
|
pass |
|
|
|
|
|
return None |
|
|
|
|
|
|
|
|
def parse_tool_input(input_str: str) -> Dict[str, Any]: |
|
|
""" |
|
|
Parse tool input from LLM, handling both string and JSON inputs. |
|
|
|
|
|
Args: |
|
|
input_str: Input string from LLM (may be JSON or plain string) |
|
|
|
|
|
Returns: |
|
|
Dictionary with parsed values |
|
|
""" |
|
|
|
|
|
if isinstance(input_str, dict): |
|
|
return input_str |
|
|
|
|
|
|
|
|
parsed = parse_llm_json(input_str) |
|
|
if parsed: |
|
|
return parsed |
|
|
|
|
|
|
|
|
if isinstance(input_str, str): |
|
|
input_str = input_str.strip().strip('"').strip("'") |
|
|
|
|
|
|
|
|
if "{" in input_str or "[" in input_str: |
|
|
|
|
|
return {} |
|
|
|
|
|
|
|
|
if not any(char in input_str for char in ["{", "}", "[", "]", ":"]): |
|
|
return {"user_id": input_str} |
|
|
|
|
|
return {} |
|
|
|
|
|
|
|
|
def extract_json_value(text: str, key: str, default: Any = None) -> Any: |
|
|
""" |
|
|
Extract a specific value from JSON text without full parsing. |
|
|
|
|
|
Args: |
|
|
text: Text containing JSON |
|
|
key: Key to extract |
|
|
default: Default value if key not found |
|
|
|
|
|
Returns: |
|
|
Extracted value or default |
|
|
""" |
|
|
try: |
|
|
parsed = parse_llm_json(text) |
|
|
if parsed and isinstance(parsed, dict): |
|
|
return parsed.get(key, default) |
|
|
except Exception: |
|
|
pass |
|
|
|
|
|
|
|
|
try: |
|
|
pattern = rf'"{key}"\s*:\s*"?([^",}}\]]+)"?' |
|
|
match = re.search(pattern, text) |
|
|
if match: |
|
|
value = match.group(1).strip() |
|
|
|
|
|
try: |
|
|
if "." in value: |
|
|
return float(value) |
|
|
return int(value) |
|
|
except ValueError: |
|
|
return value |
|
|
except Exception: |
|
|
pass |
|
|
|
|
|
return default |
|
|
|