File size: 3,476 Bytes
a2e3298
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import json
import logging
from typing import List, Any

logger = logging.getLogger("json-extractor")

def find_balanced_closing_index(text: str, start_index: int) -> int:
    """
    Finds the matching closing bracket for the bracket at start_index.
    Ignores brackets inside strings and comments.
    """
    start_char = text[start_index]
    end_char = '}' if start_char == '{' else ']'
    
    depth = 0
    in_double_quote = False
    in_single_quote = False
    in_backtick = False
    in_line_comment = False
    in_block_comment = False
    is_escaped = False

    length = len(text)
    i = start_index

    while i < length:
        char = text[i]
        next_char = text[i+1] if i + 1 < length else ''
        
        # Handle Escaping
        if is_escaped:
            is_escaped = False
            i += 1
            continue
        if char == '\\' and not in_line_comment and not in_block_comment:
            is_escaped = True
            i += 1
            continue

        # Handle Comments
        if in_line_comment:
            if char == '\n': in_line_comment = False
            i += 1
            continue
        if in_block_comment:
            if char == '*' and next_char == '/':
                in_block_comment = False
                i += 2
                continue
            i += 1
            continue

        # Check comment starts
        if not in_double_quote and not in_single_quote and not in_backtick:
            if char == '/' and next_char == '/':
                in_line_comment = True
                i += 2
                continue
            if char == '/' and next_char == '*':
                in_block_comment = True
                i += 2
                continue

        # Handle Strings
        if in_double_quote:
            if char == '"': in_double_quote = False
            i += 1
            continue
        if in_single_quote:
            if char == "'": in_single_quote = False
            i += 1
            continue
        if in_backtick:
            if char == '`': in_backtick = False
            i += 1
            continue

        if char == '"': 
            in_double_quote = True
            i += 1
            continue
        if char == "'": 
            in_single_quote = True
            i += 1
            continue
        if char == '`': 
            in_backtick = True
            i += 1
            continue

        # Handle Bracket Counting
        if char == start_char:
            depth += 1
        elif char == end_char:
            depth -= 1
            if depth == 0:
                return i
        
        i += 1

    return -1

def extract_json_from_content(content: str) -> List[Any]:
    """
    Scans text for JSON objects/arrays using state machine logic.
    """
    if not content or not isinstance(content, str):
        return []

    found_blocks = []
    cursor = 0
    length = len(content)

    while cursor < length:
        if content[cursor] not in ['{', '[']:
            cursor += 1
            continue

        end_index = find_balanced_closing_index(content, cursor)

        if end_index != -1:
            raw_candidate = content[cursor : end_index + 1]
            try:
                parsed = json.loads(raw_candidate)
                found_blocks.append(parsed)
                cursor = end_index + 1
                continue
            except json.JSONDecodeError:
                pass
        
        cursor += 1

    return found_blocks