Spaces:
Configuration error
Configuration error
File size: 13,609 Bytes
21fb2c3 e36cfa2 21fb2c3 525124a 21fb2c3 525124a 21fb2c3 f898356 21fb2c3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 | """
Response parser for AI agent LLM responses.
This module handles:
1. Parsing JSON responses from LLM
2. Validating response structure against schemas
3. Error handling and recovery
4. Fallback mechanisms for malformed responses
5. Logging all parsing attempts
"""
import json
import logging
import re
from typing import Dict, Any, Optional, Tuple
from dataclasses import dataclass
from enum import Enum
from pycatan.ai.schemas import (
ResponseType,
get_schema_for_response_type,
validate_action_parameters,
ACTIVE_TURN_RESPONSE_SCHEMA,
OBSERVING_RESPONSE_SCHEMA
)
# Set up logging
logger = logging.getLogger(__name__)
class ParseError(Enum):
"""Types of parsing errors."""
INVALID_JSON = "invalid_json"
MISSING_REQUIRED_FIELD = "missing_required_field"
INVALID_FIELD_TYPE = "invalid_field_type"
INVALID_ACTION = "invalid_action"
VALIDATION_ERROR = "validation_error"
@dataclass
class ParseResult:
"""Result of parsing an LLM response."""
success: bool
data: Optional[Dict[str, Any]] = None
error_type: Optional[ParseError] = None
error_message: Optional[str] = None
raw_response: Optional[str] = None
fallback_used: bool = False
class ResponseParser:
"""
Parser for AI agent LLM responses with error handling and fallback mechanisms.
"""
def __init__(self, enable_fallbacks: bool = True, strict_mode: bool = False):
"""
Initialize the response parser.
Args:
enable_fallbacks: Whether to use fallback mechanisms for parsing errors
strict_mode: If True, fail on any validation error. If False, be lenient.
"""
self.enable_fallbacks = enable_fallbacks
self.strict_mode = strict_mode
self.parse_attempts = 0
self.successful_parses = 0
self.failed_parses = 0
def parse(self,
raw_response: str,
response_type: ResponseType,
allowed_actions: Optional[list] = None) -> ParseResult:
"""
Parse and validate an LLM response.
Args:
raw_response: Raw string response from LLM
response_type: Expected response type (active turn or observing)
allowed_actions: List of allowed action types (for validation)
Returns:
ParseResult with success status and parsed data or error info
"""
self.parse_attempts += 1
logger.info(f"Parsing response (attempt #{self.parse_attempts})")
logger.debug(f"Raw response: {raw_response[:200]}...")
# Step 1: Extract JSON from response
json_str = self._extract_json(raw_response)
if json_str is None:
self.failed_parses += 1
return ParseResult(
success=False,
error_type=ParseError.INVALID_JSON,
error_message="Could not find valid JSON in response",
raw_response=raw_response
)
# Step 2: Parse JSON
try:
data = json.loads(json_str)
except json.JSONDecodeError as e:
logger.error(f"JSON decode error: {e}")
if self.enable_fallbacks:
# Try to fix common JSON errors
fixed_data = self._try_fix_json(json_str)
if fixed_data is not None:
logger.warning("Used fallback JSON repair mechanism")
data = fixed_data
else:
self.failed_parses += 1
return ParseResult(
success=False,
error_type=ParseError.INVALID_JSON,
error_message=f"JSON parse error: {str(e)}",
raw_response=raw_response
)
else:
self.failed_parses += 1
return ParseResult(
success=False,
error_type=ParseError.INVALID_JSON,
error_message=f"JSON parse error: {str(e)}",
raw_response=raw_response
)
# Step 3: Validate structure
validation_result = self._validate_structure(data, response_type)
if not validation_result[0]:
# Log validation errors (will appear in LLM Logger Console)
logger.warning(f"Validation failed: {validation_result[1]}")
logger.debug(f"Data: {json.dumps(data, indent=2)}")
logger.debug(f"Response Type: {response_type}")
logger.debug(f"Schema required fields: {get_schema_for_response_type(response_type).get('required')}")
if self.enable_fallbacks and not self.strict_mode:
# Try to repair structure
data = self._try_repair_structure(data, response_type)
if data is None:
self.failed_parses += 1
return ParseResult(
success=False,
error_type=ParseError.VALIDATION_ERROR,
error_message=validation_result[1],
raw_response=raw_response,
data=data
)
logger.warning("Used fallback structure repair mechanism")
else:
self.failed_parses += 1
return ParseResult(
success=False,
error_type=ParseError.VALIDATION_ERROR,
error_message=validation_result[1],
raw_response=raw_response,
data=data
)
# Step 4: Validate action if present
if response_type == ResponseType.ACTIVE_TURN and "action" in data:
self._normalize_action_parameters(data)
action_validation = self._validate_action(data["action"], allowed_actions)
if not action_validation[0]:
if self.strict_mode:
self.failed_parses += 1
return ParseResult(
success=False,
error_type=ParseError.INVALID_ACTION,
error_message=action_validation[1],
raw_response=raw_response,
data=data
)
else:
logger.warning(f"Action validation warning: {action_validation[1]}")
# Success!
self.successful_parses += 1
logger.info("Successfully parsed and validated response")
return ParseResult(
success=True,
data=data,
raw_response=raw_response
)
def _normalize_action_parameters(self, data: Dict[str, Any]) -> None:
"""Accept models that return action.parameters as a JSON string."""
action = data.get("action")
if not isinstance(action, dict):
return
params = action.get("parameters")
if isinstance(params, str):
try:
parsed = json.loads(params) if params.strip() else {}
except json.JSONDecodeError:
parsed = {}
action["parameters"] = parsed if isinstance(parsed, dict) else {}
elif params is None:
action["parameters"] = {}
def _extract_json(self, text: str) -> Optional[str]:
"""
Extract JSON from text that may contain additional content.
Handles cases where LLM returns JSON wrapped in markdown code blocks
or with additional text before/after.
"""
# Try to find JSON in code blocks first
code_block_pattern = r'```(?:json)?\s*(\{.*?\})\s*```'
matches = re.findall(code_block_pattern, text, re.DOTALL)
if matches:
return matches[0]
# If text looks like pure JSON, return as is
stripped = text.strip()
if stripped.startswith('{') and stripped.endswith('}'):
return stripped
# Try to find the first '{' and last '}' - simple but effective
first_brace = text.find('{')
last_brace = text.rfind('}')
if first_brace != -1 and last_brace != -1 and last_brace > first_brace:
return text[first_brace:last_brace + 1]
return None
def _try_fix_json(self, json_str: str) -> Optional[Dict[str, Any]]:
"""
Attempt to fix common JSON errors.
Common issues:
- Missing closing quotes
- Trailing commas
- Single quotes instead of double quotes
"""
fixes = [
# Replace single quotes with double quotes (careful with apostrophes)
lambda s: s.replace("'", '"'),
# Remove trailing commas
lambda s: re.sub(r',\s*}', '}', s),
lambda s: re.sub(r',\s*]', ']', s),
]
for fix in fixes:
try:
fixed = fix(json_str)
return json.loads(fixed)
except (json.JSONDecodeError, Exception):
continue
return None
def _validate_structure(self, data: Dict[str, Any], response_type: ResponseType) -> Tuple[bool, Optional[str]]:
"""
Validate response structure against schema.
Returns:
Tuple of (is_valid, error_message)
"""
schema = get_schema_for_response_type(response_type)
# Check required fields
for field in schema.get("required", []):
if field not in data:
return False, f"Missing required field: '{field}'"
# Validate field types and constraints
for field, value in data.items():
if field in schema["properties"]:
field_schema = schema["properties"][field]
# Check type
expected_type = field_schema.get("type")
if expected_type == "string" and not isinstance(value, str):
return False, f"Field '{field}' must be a string"
elif expected_type == "object" and not isinstance(value, dict):
return False, f"Field '{field}' must be an object"
# Check string constraints
if isinstance(value, str):
min_length = field_schema.get("minLength")
max_length = field_schema.get("maxLength")
if min_length and len(value) < min_length:
return False, f"Field '{field}' must be at least {min_length} characters"
if max_length and len(value) > max_length:
return False, f"Field '{field}' must be at most {max_length} characters"
return True, None
def _validate_action(self, action: Dict[str, Any], allowed_actions: Optional[list]) -> Tuple[bool, Optional[str]]:
"""
Validate action structure and parameters.
Returns:
Tuple of (is_valid, error_message)
"""
if not isinstance(action, dict):
return False, "Action must be an object"
if "type" not in action:
return False, "Action missing 'type' field"
if "parameters" not in action:
return False, "Action missing 'parameters' field"
action_type = action["type"]
# Check if action is in allowed list
if allowed_actions:
if action_type not in allowed_actions:
return False, f"Action type '{action_type}' not in allowed actions: {allowed_actions}"
# Validate parameters
parameters = action["parameters"]
if not isinstance(parameters, dict):
return False, "Action parameters must be an object"
# Validate parameter schema
param_valid, param_error = validate_action_parameters(action_type, parameters)
if not param_valid:
return False, param_error
return True, None
def _try_repair_structure(self, data: Dict[str, Any], response_type: ResponseType) -> Optional[Dict[str, Any]]:
"""
Attempt to repair missing or invalid fields.
Strategies:
- Add default values for missing optional fields
- Convert types if possible
- Use empty objects/strings as fallbacks
"""
schema = get_schema_for_response_type(response_type)
repaired = data.copy()
# Add missing required fields with defaults
for field in schema.get("required", []):
if field not in repaired:
if field == "internal_thinking":
repaired[field] = "[No reasoning provided]"
elif field == "action":
repaired[field] = {"type": "wait_for_response", "parameters": {}}
else:
return None # Can't repair
return repaired
def get_statistics(self) -> Dict[str, Any]:
"""Get parser statistics."""
return {
"total_attempts": self.parse_attempts,
"successful": self.successful_parses,
"failed": self.failed_parses,
"success_rate": (
self.successful_parses / self.parse_attempts
if self.parse_attempts > 0
else 0.0
)
}
|