trace_src / json2xml.py
kiaisoft's picture
Upload 7 files
66180d7 verified
import json
import xml.etree.ElementTree as ET
from xml.dom import minidom
def get_visible_cell_coords(table_properties, table_x, table_y):
"""Calculate coordinates for visible cells based on table properties"""
rows = table_properties.get("rows", 0)
columns = table_properties.get("columns", 0)
column_widths = table_properties.get("columnWidths", {})
row_heights = table_properties.get("rowHeights", {})
merged_cells = table_properties.get("mergedCells", {})
hidden_cells = table_properties.get("hiddenCells", {})
DEFAULT_WIDTH = 100
DEFAULT_HEIGHT = 30
def get_col_width(col):
return column_widths.get(str(col), DEFAULT_WIDTH)
def get_row_height(row):
return row_heights.get(str(row), DEFAULT_HEIGHT)
# Convert merge cell coordinates to set of all spanned cells (excluding top-left)
merged_spanned_cells = set()
for key, merge_info in merged_cells.items():
base_row, base_col = map(int, key.split('-'))
rowspan = merge_info.get('rowspan', 1)
colspan = merge_info.get('colspan', 1)
for r in range(base_row, base_row + rowspan):
for c in range(base_col, base_col + colspan):
if (r, c) != (base_row, base_col):
merged_spanned_cells.add((r, c))
result = {}
for row in range(rows):
for col in range(columns):
coord_key = f"{row}-{col}"
if hidden_cells.get(coord_key):
continue # Skip hidden cells
if (row, col) in merged_spanned_cells:
continue # Skip cells covered by merged cells
# Calculate x by summing widths of all previous columns
x = sum(get_col_width(c) for c in range(col))
y = sum(get_row_height(r) for r in range(row))
# Check if it's a merged cell origin
if coord_key in merged_cells:
colspan = merged_cells[coord_key].get("colspan", 1)
rowspan = merged_cells[coord_key].get("rowspan", 1)
else:
colspan = 1
rowspan = 1
width = sum(get_col_width(c) for c in range(col, col + colspan))
height = sum(get_row_height(r) for r in range(row, row + rowspan))
result[(row, col)] = {
"x": 2 * (x + table_x),
"y": 2 * (y + table_y),
"width": 2 * width,
"height": 2 * height
}
return result
def get_cell_borders(cell_data, table_properties):
"""Extract border information for a cell"""
# Get global table border settings
cell_borders = table_properties.get("cellBorders", {})
has_global_borders = cell_borders.get("all", False)
# Start with default border values
borders = {
"top": 1 if has_global_borders else 0,
"bottom": 1 if has_global_borders else 0,
"left": 1 if has_global_borders else 0,
"right": 1 if has_global_borders else 0
}
# Check if cell has custom border styling
if cell_data and "cellStyle" in cell_data:
cell_style = cell_data["cellStyle"]
# Check each border side if explicitly defined
border_mappings = {
"borderTopWidth": "top",
"borderBottomWidth": "bottom",
"borderLeftWidth": "left",
"borderRightWidth": "right"
}
style_mappings = {
"borderTopStyle": "top",
"borderBottomStyle": "bottom",
"borderLeftStyle": "left",
"borderRightStyle": "right"
}
# If any border width is defined, this cell has custom borders
has_custom_borders = any(key in cell_style for key in border_mappings.keys())
if has_custom_borders:
# Apply custom border settings
for width_key, border_side in border_mappings.items():
if width_key in cell_style:
# Check width
width = cell_style[width_key]
has_border = width > 0
# Check style if defined
style_key = width_key.replace("Width", "Style")
if style_key in cell_style:
style = cell_style[style_key]
if style == "none":
has_border = False
borders[border_side] = 1 if has_border else 0
return borders["top"], borders["bottom"], borders["left"], borders["right"]
def convert_json_to_xml(json_data, filename="table.jpg"):
"""Convert JSON table data to XML format"""
# Parse JSON if it's a string
if isinstance(json_data, str):
data = json.loads(json_data)
else:
data = json_data
# Handle list of tables (take first one)
if isinstance(data, list):
table_data = data[0]
else:
table_data = data
# Extract table information
properties = table_data.get("properties", {})
table_x = table_data.get("x", 0)
table_y = table_data.get("y", 0)
table_width = table_data.get("width", properties.get("width", 0))
table_height = table_data.get("height", properties.get("height", 0))
# Create XML root structure
root = ET.Element("document", filename=filename)
table_elem = ET.SubElement(root, "table")
# Add table coordinates (rectangle points)
x1, y1 = int(table_x), int(table_y)
x2, y2 = int(table_x + table_width), int(table_y + table_height)
table_coords = f"{x1},{y1} {x2},{y1} {x2},{y2} {x1},{y2}"
ET.SubElement(table_elem, "Coords", points=table_coords)
# Get cell coordinates and data
cell_coords = get_visible_cell_coords(properties, table_x, table_y)
cell_data = properties.get("cellData", {})
merged_cells = properties.get("mergedCells", {})
# Create XML elements for each visible cell
for (row, col), coords in cell_coords.items():
cell_key = f"{row}-{col}"
current_cell_data = cell_data.get(cell_key, {})
# Calculate end positions for merged cells
if cell_key in merged_cells:
merge_info = merged_cells[cell_key]
end_row = row + merge_info.get("rowspan", 1) - 1
end_col = col + merge_info.get("colspan", 1) - 1
else:
end_row = row
end_col = col
# Create cell XML element
cell_elem = ET.SubElement(table_elem, "cell")
cell_elem.set("start-row", str(row))
cell_elem.set("end-row", str(end_row))
cell_elem.set("start-col", str(col))
cell_elem.set("end-col", str(end_col))
# Add cell coordinates
x1 = int(coords["x"])
y1 = int(coords["y"])
x2 = int(coords["x"] + coords["width"])
y2 = int(coords["y"] + coords["height"])
cell_coord_str = f"{x1},{y1} {x2},{y1} {x2},{y2} {x1},{y2}"
ET.SubElement(cell_elem, "Coords", points=cell_coord_str)
# Add border information
top, bottom, left, right = get_cell_borders(current_cell_data, properties)
ET.SubElement(cell_elem, "Lines",
top=str(top),
bottom=str(bottom),
left=str(left),
right=str(right))
return root
def save_xml_to_file(xml_root, output_path):
"""Save XML to file with pretty formatting"""
# Convert to pretty-formatted string
rough_string = ET.tostring(xml_root, encoding='unicode')
reparsed = minidom.parseString(rough_string)
pretty_xml = reparsed.toprettyxml(indent=" ")
# Clean up extra whitespace lines
lines = [line for line in pretty_xml.split('\n') if line.strip()]
pretty_xml = '\n'.join(lines)
# Write to file
with open(output_path, 'w', encoding='utf-8') as f:
f.write(pretty_xml)
def convert_json_file_to_xml(json_file_path, xml_file_path, filename="table.jpg"):
"""Convert JSON file to XML file"""
try:
# Read JSON file
with open(json_file_path, 'r', encoding='utf-8') as f:
json_data = json.load(f)
json_data = json_data.get('items')
# Convert to XML
xml_root = convert_json_to_xml(json_data, filename)
# Save XML file
save_xml_to_file(xml_root, xml_file_path)
print(f"✅ Successfully converted {json_file_path} to {xml_file_path}")
return True
except Exception as e:
print(f"❌ Error converting file: {e}")
return False
# Example usage and testing
if __name__ == "__main__":
import os
folder = "/Users/tuvn18/Desktop/tuvn18/dev/KIAI/dev/trace/test_json"
for name in os.listdir(folder):
if name.endswith('json'):
json_name = os.path.join(folder, name)
xml_name = name.replace('.json' , '.xml')
convert_json_file_to_xml(json_name,xml_name, xml_name.replace('.xml','.png'))