import re import json def parse_expected_output_fields(expected_output_text): """ Parses expected_output_text into a list of (key, description) tuples. """ fields = [] lines = expected_output_text.strip().splitlines() for line in lines: if ':' not in line: continue key, description = line.split(':', 1) fields.append((key.strip(), description.strip())) return fields def extract_fields_from_expected_output(expected_output_text): """ Returns just the list of keys (field names) from expected_output_text. """ parsed_fields = parse_expected_output_fields(expected_output_text) return [key for key, _ in parsed_fields] def split_json_string(text): """ Best of both worlds: - Splits text into 'thought' and 'JSON' parts - Scans for all possible { positions - Cleans unescaped newlines inside quotes - Strips junk between and JSON if JSON exists - Preserves full text after if no JSON """ # Step 1: Split at if exists if '' in text: thought_part, possible_json_part = text.split('', 1) thought_part = thought_part.strip() possible_json_part = possible_json_part.strip() else: thought_part = None possible_json_part = text.strip() # Step 2: Find all { positions brace_positions = [m.start() for m in re.finditer(r'{', possible_json_part)] # Clean function: fix newlines inside quoted strings def clean_json_formatting(text): def fix_inside_quotes(match): content = match.group(1) fixed = content.replace('\n', '\\n').replace('\r', '\\n') return f'"{fixed}"' return re.sub(r'"(.*?)"', fix_inside_quotes, text, flags=re.DOTALL) for pos in brace_positions: candidate = possible_json_part[pos:].strip() # Pre-clean candidate = clean_json_formatting(candidate) # Fix double braces if necessary if candidate.startswith("{{") and "}}" in candidate: candidate = candidate.replace("{{", "{", 1).replace("}}", "}", 1) # Must start with {" or {' if not re.match(r'^\{\s*["\']', candidate): continue # not real JSON, skip try: json.loads(candidate) # ✅ Successful parse return thought_part, candidate except json.JSONDecodeError: continue # try next # 🛠 No valid JSON found — return thought and full original remainder (no chopping) return thought_part, possible_json_part def extract_and_parse_json(result_text): """ Extracts and parses JSON output, handling cases where JSON is enclosed in triple backticks (```json ... ```) or already correctly formatted `{}`. Args: result_text (str): The raw text output containing JSON data. Returns: dict or None: Parsed JSON object if successful, None otherwise. """ if not result_text: print("🚨 No result text data received.") return None # 🛠 Clean unescaped line breaks that often break LLM JSON output def clean_json_formatting(text): # Replace unescaped newlines with a space return re.sub(r'(?