Spaces:
Running
Running
| import json | |
| import logging | |
| from typing import List, Any | |
| logger = logging.getLogger("json-extractor") | |
| def find_balanced_closing_index(text: str, start_index: int) -> int: | |
| """ | |
| Finds the matching closing bracket for the bracket at start_index. | |
| Ignores brackets inside strings and comments. | |
| """ | |
| start_char = text[start_index] | |
| end_char = '}' if start_char == '{' else ']' | |
| depth = 0 | |
| in_double_quote = False | |
| in_single_quote = False | |
| in_backtick = False | |
| in_line_comment = False | |
| in_block_comment = False | |
| is_escaped = False | |
| length = len(text) | |
| i = start_index | |
| while i < length: | |
| char = text[i] | |
| next_char = text[i+1] if i + 1 < length else '' | |
| # Handle Escaping | |
| if is_escaped: | |
| is_escaped = False | |
| i += 1 | |
| continue | |
| if char == '\\' and not in_line_comment and not in_block_comment: | |
| is_escaped = True | |
| i += 1 | |
| continue | |
| # Handle Comments | |
| if in_line_comment: | |
| if char == '\n': in_line_comment = False | |
| i += 1 | |
| continue | |
| if in_block_comment: | |
| if char == '*' and next_char == '/': | |
| in_block_comment = False | |
| i += 2 | |
| continue | |
| i += 1 | |
| continue | |
| # Check comment starts | |
| if not in_double_quote and not in_single_quote and not in_backtick: | |
| if char == '/' and next_char == '/': | |
| in_line_comment = True | |
| i += 2 | |
| continue | |
| if char == '/' and next_char == '*': | |
| in_block_comment = True | |
| i += 2 | |
| continue | |
| # Handle Strings | |
| if in_double_quote: | |
| if char == '"': in_double_quote = False | |
| i += 1 | |
| continue | |
| if in_single_quote: | |
| if char == "'": in_single_quote = False | |
| i += 1 | |
| continue | |
| if in_backtick: | |
| if char == '`': in_backtick = False | |
| i += 1 | |
| continue | |
| if char == '"': | |
| in_double_quote = True | |
| i += 1 | |
| continue | |
| if char == "'": | |
| in_single_quote = True | |
| i += 1 | |
| continue | |
| if char == '`': | |
| in_backtick = True | |
| i += 1 | |
| continue | |
| # Handle Bracket Counting | |
| if char == start_char: | |
| depth += 1 | |
| elif char == end_char: | |
| depth -= 1 | |
| if depth == 0: | |
| return i | |
| i += 1 | |
| return -1 | |
| def extract_json_from_content(content: str) -> List[Any]: | |
| """ | |
| Scans text for JSON objects/arrays using state machine logic. | |
| """ | |
| if not content or not isinstance(content, str): | |
| return [] | |
| found_blocks = [] | |
| cursor = 0 | |
| length = len(content) | |
| while cursor < length: | |
| if content[cursor] not in ['{', '[']: | |
| cursor += 1 | |
| continue | |
| end_index = find_balanced_closing_index(content, cursor) | |
| if end_index != -1: | |
| raw_candidate = content[cursor : end_index + 1] | |
| try: | |
| parsed = json.loads(raw_candidate) | |
| found_blocks.append(parsed) | |
| cursor = end_index + 1 | |
| continue | |
| except json.JSONDecodeError: | |
| pass | |
| cursor += 1 | |
| return found_blocks |