import json import xml.etree.ElementTree as ET from xml.dom import minidom def get_visible_cell_coords(table_properties, table_x, table_y): """Calculate coordinates for visible cells based on table properties""" rows = table_properties.get("rows", 0) columns = table_properties.get("columns", 0) column_widths = table_properties.get("columnWidths", {}) row_heights = table_properties.get("rowHeights", {}) merged_cells = table_properties.get("mergedCells", {}) hidden_cells = table_properties.get("hiddenCells", {}) DEFAULT_WIDTH = 100 DEFAULT_HEIGHT = 30 def get_col_width(col): return column_widths.get(str(col), DEFAULT_WIDTH) def get_row_height(row): return row_heights.get(str(row), DEFAULT_HEIGHT) # Convert merge cell coordinates to set of all spanned cells (excluding top-left) merged_spanned_cells = set() for key, merge_info in merged_cells.items(): base_row, base_col = map(int, key.split('-')) rowspan = merge_info.get('rowspan', 1) colspan = merge_info.get('colspan', 1) for r in range(base_row, base_row + rowspan): for c in range(base_col, base_col + colspan): if (r, c) != (base_row, base_col): merged_spanned_cells.add((r, c)) result = {} for row in range(rows): for col in range(columns): coord_key = f"{row}-{col}" if hidden_cells.get(coord_key): continue # Skip hidden cells if (row, col) in merged_spanned_cells: continue # Skip cells covered by merged cells # Calculate x by summing widths of all previous columns x = sum(get_col_width(c) for c in range(col)) y = sum(get_row_height(r) for r in range(row)) # Check if it's a merged cell origin if coord_key in merged_cells: colspan = merged_cells[coord_key].get("colspan", 1) rowspan = merged_cells[coord_key].get("rowspan", 1) else: colspan = 1 rowspan = 1 width = sum(get_col_width(c) for c in range(col, col + colspan)) height = sum(get_row_height(r) for r in range(row, row + rowspan)) result[(row, col)] = { "x": 2 * (x + table_x), "y": 2 * (y + table_y), "width": 2 * width, "height": 2 * height } return result def get_cell_borders(cell_data, table_properties): """Extract border information for a cell""" # Get global table border settings cell_borders = table_properties.get("cellBorders", {}) has_global_borders = cell_borders.get("all", False) # Start with default border values borders = { "top": 1 if has_global_borders else 0, "bottom": 1 if has_global_borders else 0, "left": 1 if has_global_borders else 0, "right": 1 if has_global_borders else 0 } # Check if cell has custom border styling if cell_data and "cellStyle" in cell_data: cell_style = cell_data["cellStyle"] # Check each border side if explicitly defined border_mappings = { "borderTopWidth": "top", "borderBottomWidth": "bottom", "borderLeftWidth": "left", "borderRightWidth": "right" } style_mappings = { "borderTopStyle": "top", "borderBottomStyle": "bottom", "borderLeftStyle": "left", "borderRightStyle": "right" } # If any border width is defined, this cell has custom borders has_custom_borders = any(key in cell_style for key in border_mappings.keys()) if has_custom_borders: # Apply custom border settings for width_key, border_side in border_mappings.items(): if width_key in cell_style: # Check width width = cell_style[width_key] has_border = width > 0 # Check style if defined style_key = width_key.replace("Width", "Style") if style_key in cell_style: style = cell_style[style_key] if style == "none": has_border = False borders[border_side] = 1 if has_border else 0 return borders["top"], borders["bottom"], borders["left"], borders["right"] def convert_json_to_xml(json_data, filename="table.jpg"): """Convert JSON table data to XML format""" # Parse JSON if it's a string if isinstance(json_data, str): data = json.loads(json_data) else: data = json_data # Handle list of tables (take first one) if isinstance(data, list): table_data = data[0] else: table_data = data # Extract table information properties = table_data.get("properties", {}) table_x = table_data.get("x", 0) table_y = table_data.get("y", 0) table_width = table_data.get("width", properties.get("width", 0)) table_height = table_data.get("height", properties.get("height", 0)) # Create XML root structure root = ET.Element("document", filename=filename) table_elem = ET.SubElement(root, "table") # Add table coordinates (rectangle points) x1, y1 = int(table_x), int(table_y) x2, y2 = int(table_x + table_width), int(table_y + table_height) table_coords = f"{x1},{y1} {x2},{y1} {x2},{y2} {x1},{y2}" ET.SubElement(table_elem, "Coords", points=table_coords) # Get cell coordinates and data cell_coords = get_visible_cell_coords(properties, table_x, table_y) cell_data = properties.get("cellData", {}) merged_cells = properties.get("mergedCells", {}) # Create XML elements for each visible cell for (row, col), coords in cell_coords.items(): cell_key = f"{row}-{col}" current_cell_data = cell_data.get(cell_key, {}) # Calculate end positions for merged cells if cell_key in merged_cells: merge_info = merged_cells[cell_key] end_row = row + merge_info.get("rowspan", 1) - 1 end_col = col + merge_info.get("colspan", 1) - 1 else: end_row = row end_col = col # Create cell XML element cell_elem = ET.SubElement(table_elem, "cell") cell_elem.set("start-row", str(row)) cell_elem.set("end-row", str(end_row)) cell_elem.set("start-col", str(col)) cell_elem.set("end-col", str(end_col)) # Add cell coordinates x1 = int(coords["x"]) y1 = int(coords["y"]) x2 = int(coords["x"] + coords["width"]) y2 = int(coords["y"] + coords["height"]) cell_coord_str = f"{x1},{y1} {x2},{y1} {x2},{y2} {x1},{y2}" ET.SubElement(cell_elem, "Coords", points=cell_coord_str) # Add border information top, bottom, left, right = get_cell_borders(current_cell_data, properties) ET.SubElement(cell_elem, "Lines", top=str(top), bottom=str(bottom), left=str(left), right=str(right)) return root def save_xml_to_file(xml_root, output_path): """Save XML to file with pretty formatting""" # Convert to pretty-formatted string rough_string = ET.tostring(xml_root, encoding='unicode') reparsed = minidom.parseString(rough_string) pretty_xml = reparsed.toprettyxml(indent=" ") # Clean up extra whitespace lines lines = [line for line in pretty_xml.split('\n') if line.strip()] pretty_xml = '\n'.join(lines) # Write to file with open(output_path, 'w', encoding='utf-8') as f: f.write(pretty_xml) def convert_json_file_to_xml(json_file_path, xml_file_path, filename="table.jpg"): """Convert JSON file to XML file""" try: # Read JSON file with open(json_file_path, 'r', encoding='utf-8') as f: json_data = json.load(f) json_data = json_data.get('items') # Convert to XML xml_root = convert_json_to_xml(json_data, filename) # Save XML file save_xml_to_file(xml_root, xml_file_path) print(f"✅ Successfully converted {json_file_path} to {xml_file_path}") return True except Exception as e: print(f"❌ Error converting file: {e}") return False # Example usage and testing if __name__ == "__main__": import os folder = "/Users/tuvn18/Desktop/tuvn18/dev/KIAI/dev/trace/test_json" for name in os.listdir(folder): if name.endswith('json'): json_name = os.path.join(folder, name) xml_name = name.replace('.json' , '.xml') convert_json_file_to_xml(json_name,xml_name, xml_name.replace('.xml','.png'))