Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python | |
| """ | |
| Robust JSON parsing utilities with automatic repair and markdown extraction. | |
| Provides safe JSON parsing that handles: | |
| - Markdown code block wrapping (```json...```) | |
| - Malformed JSON (missing commas, trailing commas, etc.) | |
| - Unescaped newlines and control characters | |
| - Empty responses | |
| """ | |
| import json | |
| import logging | |
| import re | |
| from typing import Any | |
| try: | |
| from json_repair import repair_json | |
| except ImportError: | |
| repair_json = None | |
| logger = logging.getLogger(__name__) | |
| _UNSET = object() | |
| def parse_json_response( | |
| response: str, | |
| logger_instance: logging.Logger | None = None, | |
| fallback: Any = _UNSET, | |
| ) -> Any: | |
| """ | |
| Safely parse JSON from LLM responses with automatic repair. | |
| Implements a three-tier parsing strategy: | |
| 1. Extract JSON from markdown code blocks if present | |
| 2. Direct JSON parsing | |
| 3. Automated repair using json-repair library with fallback | |
| Args: | |
| response: Raw string response from LLM | |
| logger_instance: Logger instance for debugging (optional) | |
| fallback: Value to return if all parsing fails. | |
| Pass ``None`` explicitly to get ``None`` on failure; | |
| omit the argument (or leave default) to get ``{}``. | |
| Returns: | |
| Parsed JSON object, or fallback value if parsing fails | |
| Example: | |
| >>> response = '```json\\n{"key": "value"}\\n```' | |
| >>> data = parse_json_response(response) | |
| >>> data | |
| {'key': 'value'} | |
| """ | |
| log = logger_instance or logger | |
| if fallback is _UNSET: | |
| fallback = {} | |
| # Handle empty response | |
| if not response or not response.strip(): | |
| log.warning("LLM returned empty response") | |
| return fallback | |
| # Extract from markdown code blocks if present | |
| extracted_response = response | |
| if "```" in response: | |
| json_match = re.search(r"```(?:json)?\s*\n?(.*?)```", response, re.DOTALL) | |
| if json_match: | |
| extracted_response = json_match.group(1).strip() | |
| log.debug("Extracted JSON from markdown code block") | |
| # Strategy 1: Direct parsing | |
| try: | |
| return json.loads(extracted_response) | |
| except json.JSONDecodeError as parse_error: | |
| log.debug(f"Direct JSON parse failed: {parse_error}") | |
| # Strategy 2: Try json-repair if available | |
| if repair_json is None: | |
| log.warning("json-repair library not installed, cannot repair malformed JSON") | |
| log.debug(f"Response: {extracted_response[:200]}") | |
| return fallback | |
| try: | |
| log.debug("Attempting JSON repair") | |
| repaired = repair_json(extracted_response) | |
| result = json.loads(repaired) | |
| log.info("Successfully repaired malformed JSON") | |
| return result | |
| except Exception as repair_error: | |
| log.error(f"JSON repair failed: {repair_error}") | |
| log.debug(f"Response: {extracted_response[:200]}") | |
| return fallback | |
| def safe_json_loads(data: str, fallback: Any = _UNSET) -> Any: | |
| """ | |
| Simple wrapper for safe JSON loading. | |
| Args: | |
| data: JSON string | |
| fallback: Value to return on failure (default: {}) | |
| Returns: | |
| Parsed JSON or fallback value | |
| """ | |
| if fallback is _UNSET: | |
| fallback = {} | |
| try: | |
| return json.loads(data) | |
| except json.JSONDecodeError as e: | |
| logger.warning(f"JSON parse error: {e}") | |
| return fallback | |