File size: 9,070 Bytes
66180d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
import json
import xml.etree.ElementTree as ET
from xml.dom import minidom

def get_visible_cell_coords(table_properties, table_x, table_y):
    """Calculate coordinates for visible cells based on table properties"""
    rows = table_properties.get("rows", 0)
    columns = table_properties.get("columns", 0)
    column_widths = table_properties.get("columnWidths", {})
    row_heights = table_properties.get("rowHeights", {})
    merged_cells = table_properties.get("mergedCells", {})
    hidden_cells = table_properties.get("hiddenCells", {})

    DEFAULT_WIDTH = 100
    DEFAULT_HEIGHT = 30

    def get_col_width(col):
        return column_widths.get(str(col), DEFAULT_WIDTH)

    def get_row_height(row):
        return row_heights.get(str(row), DEFAULT_HEIGHT)

    # Convert merge cell coordinates to set of all spanned cells (excluding top-left)
    merged_spanned_cells = set()
    for key, merge_info in merged_cells.items():
        base_row, base_col = map(int, key.split('-'))
        rowspan = merge_info.get('rowspan', 1)
        colspan = merge_info.get('colspan', 1)
        for r in range(base_row, base_row + rowspan):
            for c in range(base_col, base_col + colspan):
                if (r, c) != (base_row, base_col):
                    merged_spanned_cells.add((r, c))

    result = {}

    for row in range(rows):
        for col in range(columns):
            coord_key = f"{row}-{col}"
            if hidden_cells.get(coord_key):
                continue  # Skip hidden cells
            if (row, col) in merged_spanned_cells:
                continue  # Skip cells covered by merged cells

            # Calculate x by summing widths of all previous columns
            x = sum(get_col_width(c) for c in range(col))
            y = sum(get_row_height(r) for r in range(row))

            # Check if it's a merged cell origin
            if coord_key in merged_cells:
                colspan = merged_cells[coord_key].get("colspan", 1)
                rowspan = merged_cells[coord_key].get("rowspan", 1)
            else:
                colspan = 1
                rowspan = 1

            width = sum(get_col_width(c) for c in range(col, col + colspan))
            height = sum(get_row_height(r) for r in range(row, row + rowspan))

            result[(row, col)] = {
                "x": 2 * (x + table_x),
                "y": 2 * (y + table_y),
                "width": 2 * width,
                "height": 2 * height
            }

    return result

def get_cell_borders(cell_data, table_properties):
    """Extract border information for a cell"""
    # Get global table border settings
    cell_borders = table_properties.get("cellBorders", {})
    has_global_borders = cell_borders.get("all", False)
    
    # Start with default border values
    borders = {
        "top": 1 if has_global_borders else 0,
        "bottom": 1 if has_global_borders else 0,
        "left": 1 if has_global_borders else 0,
        "right": 1 if has_global_borders else 0
    }
    
    # Check if cell has custom border styling
    if cell_data and "cellStyle" in cell_data:
        cell_style = cell_data["cellStyle"]
        
        # Check each border side if explicitly defined
        border_mappings = {
            "borderTopWidth": "top",
            "borderBottomWidth": "bottom", 
            "borderLeftWidth": "left",
            "borderRightWidth": "right"
        }
        
        style_mappings = {
            "borderTopStyle": "top",
            "borderBottomStyle": "bottom",
            "borderLeftStyle": "left", 
            "borderRightStyle": "right"
        }
        
        # If any border width is defined, this cell has custom borders
        has_custom_borders = any(key in cell_style for key in border_mappings.keys())
        
        if has_custom_borders:
            # Apply custom border settings
            for width_key, border_side in border_mappings.items():
                if width_key in cell_style:
                    # Check width
                    width = cell_style[width_key]
                    has_border = width > 0
                    
                    # Check style if defined
                    style_key = width_key.replace("Width", "Style")
                    if style_key in cell_style:
                        style = cell_style[style_key]
                        if style == "none":
                            has_border = False
                    
                    borders[border_side] = 1 if has_border else 0
    
    return borders["top"], borders["bottom"], borders["left"], borders["right"]

def convert_json_to_xml(json_data, filename="table.jpg"):
    """Convert JSON table data to XML format"""
    
    # Parse JSON if it's a string
    if isinstance(json_data, str):
        data = json.loads(json_data)
    else:
        data = json_data
    
    # Handle list of tables (take first one)
    if isinstance(data, list):
        table_data = data[0]
    else:
        table_data = data
    
    # Extract table information
    properties = table_data.get("properties", {})
    table_x = table_data.get("x", 0)
    table_y = table_data.get("y", 0) 
    table_width = table_data.get("width", properties.get("width", 0))
    table_height = table_data.get("height", properties.get("height", 0))
    
    # Create XML root structure
    root = ET.Element("document", filename=filename)
    table_elem = ET.SubElement(root, "table")
    
    # Add table coordinates (rectangle points)
    x1, y1 = int(table_x), int(table_y)
    x2, y2 = int(table_x + table_width), int(table_y + table_height)
    table_coords = f"{x1},{y1} {x2},{y1} {x2},{y2} {x1},{y2}"
    ET.SubElement(table_elem, "Coords", points=table_coords)
    
    # Get cell coordinates and data
    cell_coords = get_visible_cell_coords(properties, table_x, table_y)
    cell_data = properties.get("cellData", {})
    merged_cells = properties.get("mergedCells", {})
    
    # Create XML elements for each visible cell
    for (row, col), coords in cell_coords.items():
        cell_key = f"{row}-{col}"
        current_cell_data = cell_data.get(cell_key, {})
        
        # Calculate end positions for merged cells
        if cell_key in merged_cells:
            merge_info = merged_cells[cell_key]
            end_row = row + merge_info.get("rowspan", 1) - 1
            end_col = col + merge_info.get("colspan", 1) - 1
        else:
            end_row = row
            end_col = col
        
        # Create cell XML element
        cell_elem = ET.SubElement(table_elem, "cell")
        cell_elem.set("start-row", str(row))
        cell_elem.set("end-row", str(end_row))
        cell_elem.set("start-col", str(col))
        cell_elem.set("end-col", str(end_col))
        
        # Add cell coordinates
        x1 = int(coords["x"])
        y1 = int(coords["y"])
        x2 = int(coords["x"] + coords["width"])
        y2 = int(coords["y"] + coords["height"])
        cell_coord_str = f"{x1},{y1} {x2},{y1} {x2},{y2} {x1},{y2}"
        ET.SubElement(cell_elem, "Coords", points=cell_coord_str)
        
        # Add border information
        top, bottom, left, right = get_cell_borders(current_cell_data, properties)
        ET.SubElement(cell_elem, "Lines", 
                     top=str(top), 
                     bottom=str(bottom), 
                     left=str(left), 
                     right=str(right))
    
    return root

def save_xml_to_file(xml_root, output_path):
    """Save XML to file with pretty formatting"""
    # Convert to pretty-formatted string
    rough_string = ET.tostring(xml_root, encoding='unicode')
    reparsed = minidom.parseString(rough_string)
    pretty_xml = reparsed.toprettyxml(indent="    ")
    
    # Clean up extra whitespace lines
    lines = [line for line in pretty_xml.split('\n') if line.strip()]
    pretty_xml = '\n'.join(lines)
    
    # Write to file
    with open(output_path, 'w', encoding='utf-8') as f:
        f.write(pretty_xml)

def convert_json_file_to_xml(json_file_path, xml_file_path, filename="table.jpg"):
    """Convert JSON file to XML file"""
    try:
        # Read JSON file
        with open(json_file_path, 'r', encoding='utf-8') as f:
            json_data = json.load(f)
        json_data = json_data.get('items')
        # Convert to XML
        xml_root = convert_json_to_xml(json_data, filename)
        
        # Save XML file
        save_xml_to_file(xml_root, xml_file_path)
        
        print(f"✅ Successfully converted {json_file_path} to {xml_file_path}")
        return True
        
    except Exception as e:
        print(f"❌ Error converting file: {e}")
        return False

# Example usage and testing
if __name__ == "__main__":

    import os
    folder = "/Users/tuvn18/Desktop/tuvn18/dev/KIAI/dev/trace/test_json"
    for name in os.listdir(folder):
        if name.endswith('json'):
            json_name = os.path.join(folder, name)
            xml_name = name.replace('.json' , '.xml')
            convert_json_file_to_xml(json_name,xml_name, xml_name.replace('.xml','.png'))