Spaces:
Sleeping
Sleeping
File size: 3,389 Bytes
5df8a73 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 | #!/usr/bin/env python
"""
Robust JSON parsing utilities with automatic repair and markdown extraction.
Provides safe JSON parsing that handles:
- Markdown code block wrapping (```json...```)
- Malformed JSON (missing commas, trailing commas, etc.)
- Unescaped newlines and control characters
- Empty responses
"""
import json
import logging
import re
from typing import Any
try:
from json_repair import repair_json
except ImportError:
repair_json = None
logger = logging.getLogger(__name__)
_UNSET = object()
def parse_json_response(
response: str,
logger_instance: logging.Logger | None = None,
fallback: Any = _UNSET,
) -> Any:
"""
Safely parse JSON from LLM responses with automatic repair.
Implements a three-tier parsing strategy:
1. Extract JSON from markdown code blocks if present
2. Direct JSON parsing
3. Automated repair using json-repair library with fallback
Args:
response: Raw string response from LLM
logger_instance: Logger instance for debugging (optional)
fallback: Value to return if all parsing fails.
Pass ``None`` explicitly to get ``None`` on failure;
omit the argument (or leave default) to get ``{}``.
Returns:
Parsed JSON object, or fallback value if parsing fails
Example:
>>> response = '```json\\n{"key": "value"}\\n```'
>>> data = parse_json_response(response)
>>> data
{'key': 'value'}
"""
log = logger_instance or logger
if fallback is _UNSET:
fallback = {}
# Handle empty response
if not response or not response.strip():
log.warning("LLM returned empty response")
return fallback
# Extract from markdown code blocks if present
extracted_response = response
if "```" in response:
json_match = re.search(r"```(?:json)?\s*\n?(.*?)```", response, re.DOTALL)
if json_match:
extracted_response = json_match.group(1).strip()
log.debug("Extracted JSON from markdown code block")
# Strategy 1: Direct parsing
try:
return json.loads(extracted_response)
except json.JSONDecodeError as parse_error:
log.debug(f"Direct JSON parse failed: {parse_error}")
# Strategy 2: Try json-repair if available
if repair_json is None:
log.warning("json-repair library not installed, cannot repair malformed JSON")
log.debug(f"Response: {extracted_response[:200]}")
return fallback
try:
log.debug("Attempting JSON repair")
repaired = repair_json(extracted_response)
result = json.loads(repaired)
log.info("Successfully repaired malformed JSON")
return result
except Exception as repair_error:
log.error(f"JSON repair failed: {repair_error}")
log.debug(f"Response: {extracted_response[:200]}")
return fallback
def safe_json_loads(data: str, fallback: Any = _UNSET) -> Any:
"""
Simple wrapper for safe JSON loading.
Args:
data: JSON string
fallback: Value to return on failure (default: {})
Returns:
Parsed JSON or fallback value
"""
if fallback is _UNSET:
fallback = {}
try:
return json.loads(data)
except json.JSONDecodeError as e:
logger.warning(f"JSON parse error: {e}")
return fallback
|