File size: 13,609 Bytes
21fb2c3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e36cfa2
 
 
 
 
21fb2c3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
525124a
21fb2c3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
525124a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21fb2c3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f898356
 
 
 
 
 
21fb2c3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
"""
Response parser for AI agent LLM responses.

This module handles:
1. Parsing JSON responses from LLM
2. Validating response structure against schemas
3. Error handling and recovery
4. Fallback mechanisms for malformed responses
5. Logging all parsing attempts
"""

import json
import logging
import re
from typing import Dict, Any, Optional, Tuple
from dataclasses import dataclass
from enum import Enum

from pycatan.ai.schemas import (
    ResponseType,
    get_schema_for_response_type,
    validate_action_parameters,
    ACTIVE_TURN_RESPONSE_SCHEMA,
    OBSERVING_RESPONSE_SCHEMA
)


# Set up logging
logger = logging.getLogger(__name__)


class ParseError(Enum):
    """Types of parsing errors."""
    INVALID_JSON = "invalid_json"
    MISSING_REQUIRED_FIELD = "missing_required_field"
    INVALID_FIELD_TYPE = "invalid_field_type"
    INVALID_ACTION = "invalid_action"
    VALIDATION_ERROR = "validation_error"


@dataclass
class ParseResult:
    """Result of parsing an LLM response."""
    success: bool
    data: Optional[Dict[str, Any]] = None
    error_type: Optional[ParseError] = None
    error_message: Optional[str] = None
    raw_response: Optional[str] = None
    fallback_used: bool = False


class ResponseParser:
    """
    Parser for AI agent LLM responses with error handling and fallback mechanisms.
    """
    
    def __init__(self, enable_fallbacks: bool = True, strict_mode: bool = False):
        """
        Initialize the response parser.
        
        Args:
            enable_fallbacks: Whether to use fallback mechanisms for parsing errors
            strict_mode: If True, fail on any validation error. If False, be lenient.
        """
        self.enable_fallbacks = enable_fallbacks
        self.strict_mode = strict_mode
        self.parse_attempts = 0
        self.successful_parses = 0
        self.failed_parses = 0
    
    def parse(self, 
              raw_response: str, 
              response_type: ResponseType,
              allowed_actions: Optional[list] = None) -> ParseResult:
        """
        Parse and validate an LLM response.
        
        Args:
            raw_response: Raw string response from LLM
            response_type: Expected response type (active turn or observing)
            allowed_actions: List of allowed action types (for validation)
            
        Returns:
            ParseResult with success status and parsed data or error info
        """
        self.parse_attempts += 1
        
        logger.info(f"Parsing response (attempt #{self.parse_attempts})")
        logger.debug(f"Raw response: {raw_response[:200]}...")
        
        # Step 1: Extract JSON from response
        json_str = self._extract_json(raw_response)
        if json_str is None:
            self.failed_parses += 1
            return ParseResult(
                success=False,
                error_type=ParseError.INVALID_JSON,
                error_message="Could not find valid JSON in response",
                raw_response=raw_response
            )
        
        # Step 2: Parse JSON
        try:
            data = json.loads(json_str)
        except json.JSONDecodeError as e:
            logger.error(f"JSON decode error: {e}")
            
            if self.enable_fallbacks:
                # Try to fix common JSON errors
                fixed_data = self._try_fix_json(json_str)
                if fixed_data is not None:
                    logger.warning("Used fallback JSON repair mechanism")
                    data = fixed_data
                else:
                    self.failed_parses += 1
                    return ParseResult(
                        success=False,
                        error_type=ParseError.INVALID_JSON,
                        error_message=f"JSON parse error: {str(e)}",
                        raw_response=raw_response
                    )
            else:
                self.failed_parses += 1
                return ParseResult(
                    success=False,
                    error_type=ParseError.INVALID_JSON,
                    error_message=f"JSON parse error: {str(e)}",
                    raw_response=raw_response
                )
        
        # Step 3: Validate structure
        validation_result = self._validate_structure(data, response_type)
        if not validation_result[0]:
            # Log validation errors (will appear in LLM Logger Console)
            logger.warning(f"Validation failed: {validation_result[1]}")
            logger.debug(f"Data: {json.dumps(data, indent=2)}")
            logger.debug(f"Response Type: {response_type}")
            logger.debug(f"Schema required fields: {get_schema_for_response_type(response_type).get('required')}")
            if self.enable_fallbacks and not self.strict_mode:
                # Try to repair structure
                data = self._try_repair_structure(data, response_type)
                if data is None:
                    self.failed_parses += 1
                    return ParseResult(
                        success=False,
                        error_type=ParseError.VALIDATION_ERROR,
                        error_message=validation_result[1],
                        raw_response=raw_response,
                        data=data
                    )
                logger.warning("Used fallback structure repair mechanism")
            else:
                self.failed_parses += 1
                return ParseResult(
                    success=False,
                    error_type=ParseError.VALIDATION_ERROR,
                    error_message=validation_result[1],
                    raw_response=raw_response,
                    data=data
                )
        
        # Step 4: Validate action if present
        if response_type == ResponseType.ACTIVE_TURN and "action" in data:
            self._normalize_action_parameters(data)
            action_validation = self._validate_action(data["action"], allowed_actions)
            if not action_validation[0]:
                if self.strict_mode:
                    self.failed_parses += 1
                    return ParseResult(
                        success=False,
                        error_type=ParseError.INVALID_ACTION,
                        error_message=action_validation[1],
                        raw_response=raw_response,
                        data=data
                    )
                else:
                    logger.warning(f"Action validation warning: {action_validation[1]}")
        
        # Success!
        self.successful_parses += 1
        logger.info("Successfully parsed and validated response")
        
        return ParseResult(
            success=True,
            data=data,
            raw_response=raw_response
        )

    def _normalize_action_parameters(self, data: Dict[str, Any]) -> None:
        """Accept models that return action.parameters as a JSON string."""
        action = data.get("action")
        if not isinstance(action, dict):
            return

        params = action.get("parameters")
        if isinstance(params, str):
            try:
                parsed = json.loads(params) if params.strip() else {}
            except json.JSONDecodeError:
                parsed = {}
            action["parameters"] = parsed if isinstance(parsed, dict) else {}
        elif params is None:
            action["parameters"] = {}
    
    def _extract_json(self, text: str) -> Optional[str]:
        """
        Extract JSON from text that may contain additional content.
        
        Handles cases where LLM returns JSON wrapped in markdown code blocks
        or with additional text before/after.
        """
        # Try to find JSON in code blocks first
        code_block_pattern = r'```(?:json)?\s*(\{.*?\})\s*```'
        matches = re.findall(code_block_pattern, text, re.DOTALL)
        if matches:
            return matches[0]
        
        # If text looks like pure JSON, return as is
        stripped = text.strip()
        if stripped.startswith('{') and stripped.endswith('}'):
            return stripped
        
        # Try to find the first '{' and last '}' - simple but effective
        first_brace = text.find('{')
        last_brace = text.rfind('}')
        if first_brace != -1 and last_brace != -1 and last_brace > first_brace:
            return text[first_brace:last_brace + 1]
        
        return None
    
    def _try_fix_json(self, json_str: str) -> Optional[Dict[str, Any]]:
        """
        Attempt to fix common JSON errors.
        
        Common issues:
        - Missing closing quotes
        - Trailing commas
        - Single quotes instead of double quotes
        """
        fixes = [
            # Replace single quotes with double quotes (careful with apostrophes)
            lambda s: s.replace("'", '"'),
            # Remove trailing commas
            lambda s: re.sub(r',\s*}', '}', s),
            lambda s: re.sub(r',\s*]', ']', s),
        ]
        
        for fix in fixes:
            try:
                fixed = fix(json_str)
                return json.loads(fixed)
            except (json.JSONDecodeError, Exception):
                continue
        
        return None
    
    def _validate_structure(self, data: Dict[str, Any], response_type: ResponseType) -> Tuple[bool, Optional[str]]:
        """
        Validate response structure against schema.
        
        Returns:
            Tuple of (is_valid, error_message)
        """
        schema = get_schema_for_response_type(response_type)
        
        # Check required fields
        for field in schema.get("required", []):
            if field not in data:
                return False, f"Missing required field: '{field}'"
        
        # Validate field types and constraints
        for field, value in data.items():
            if field in schema["properties"]:
                field_schema = schema["properties"][field]
                
                # Check type
                expected_type = field_schema.get("type")
                if expected_type == "string" and not isinstance(value, str):
                    return False, f"Field '{field}' must be a string"
                elif expected_type == "object" and not isinstance(value, dict):
                    return False, f"Field '{field}' must be an object"
                
                # Check string constraints
                if isinstance(value, str):
                    min_length = field_schema.get("minLength")
                    max_length = field_schema.get("maxLength")
                    if min_length and len(value) < min_length:
                        return False, f"Field '{field}' must be at least {min_length} characters"
                    if max_length and len(value) > max_length:
                        return False, f"Field '{field}' must be at most {max_length} characters"
        
        return True, None
    
    def _validate_action(self, action: Dict[str, Any], allowed_actions: Optional[list]) -> Tuple[bool, Optional[str]]:
        """
        Validate action structure and parameters.
        
        Returns:
            Tuple of (is_valid, error_message)
        """
        if not isinstance(action, dict):
            return False, "Action must be an object"
        
        if "type" not in action:
            return False, "Action missing 'type' field"
        
        if "parameters" not in action:
            return False, "Action missing 'parameters' field"
        
        action_type = action["type"]
        
        # Check if action is in allowed list
        if allowed_actions:
            if action_type not in allowed_actions:
                return False, f"Action type '{action_type}' not in allowed actions: {allowed_actions}"
        
        # Validate parameters
        parameters = action["parameters"]
        if not isinstance(parameters, dict):
            return False, "Action parameters must be an object"
        
        # Validate parameter schema
        param_valid, param_error = validate_action_parameters(action_type, parameters)
        if not param_valid:
            return False, param_error
        
        return True, None
    
    def _try_repair_structure(self, data: Dict[str, Any], response_type: ResponseType) -> Optional[Dict[str, Any]]:
        """
        Attempt to repair missing or invalid fields.
        
        Strategies:
        - Add default values for missing optional fields
        - Convert types if possible
        - Use empty objects/strings as fallbacks
        """
        schema = get_schema_for_response_type(response_type)
        repaired = data.copy()
        
        # Add missing required fields with defaults
        for field in schema.get("required", []):
            if field not in repaired:
                if field == "internal_thinking":
                    repaired[field] = "[No reasoning provided]"
                elif field == "action":
                    repaired[field] = {"type": "wait_for_response", "parameters": {}}
                else:
                    return None  # Can't repair
        
        return repaired
    
    def get_statistics(self) -> Dict[str, Any]:
        """Get parser statistics."""
        return {
            "total_attempts": self.parse_attempts,
            "successful": self.successful_parses,
            "failed": self.failed_parses,
            "success_rate": (
                self.successful_parses / self.parse_attempts 
                if self.parse_attempts > 0 
                else 0.0
            )
        }