trace_src / vis_json_cell.py
kiaisoft's picture
Upload 7 files
66180d7 verified
import json
import cv2
import os
def get_visible_cell_coords(table_properties, table_x, table_y):
rows = table_properties.get("rows", 0)
columns = table_properties.get("columns", 0)
column_widths = table_properties.get("columnWidths", {})
row_heights = table_properties.get("rowHeights", {})
merged_cells = table_properties.get("mergedCells", {})
hidden_cells = table_properties.get("hiddenCells", {})
DEFAULT_WIDTH = 100
DEFAULT_HEIGHT = 30
def get_col_width(col):
return column_widths.get(str(col), DEFAULT_WIDTH)
def get_row_height(row):
return row_heights.get(str(row), DEFAULT_HEIGHT)
# Convert merge cell coordinates to set of all spanned cells (excluding top-left)
merged_spanned_cells = set()
for key, merge_info in merged_cells.items():
base_row, base_col = map(int, key.split('-'))
rowspan = merge_info.get('rowspan', 1)
colspan = merge_info.get('colspan', 1)
for r in range(base_row, base_row + rowspan):
for c in range(base_col, base_col + colspan):
if (r, c) != (base_row, base_col):
merged_spanned_cells.add((r, c))
result = {}
for row in range(rows):
for col in range(columns):
coord_key = f"{row}-{col}"
if hidden_cells.get(coord_key):
continue # Skip hidden cells
if (row, col) in merged_spanned_cells:
continue # Skip cells covered by merged cells
# Calculate x by summing widths of all previous columns
x = sum(get_col_width(c) for c in range(col))
y = sum(get_row_height(r) for r in range(row))
# Check if it's a merged cell origin
if coord_key in merged_cells:
colspan = merged_cells[coord_key].get("colspan", 1)
rowspan = merged_cells[coord_key].get("rowspan", 1)
else:
colspan = 1
rowspan = 1
width = sum(get_col_width(c) for c in range(col, col + colspan))
height = sum(get_row_height(r) for r in range(row, row + rowspan))
result[(row, col)] = {
"x": int(x + table_x),
"y": int(y + table_y),
"width": int(width),
"height": int(height)
}
return result
folder_path = "/Users/tuvn18/Desktop/tuvn18/dev/KIAI/dev/trace/train_table_1209"
save_folder = "cell_vis"
os.makedirs(save_folder, exist_ok=True)
for name in os.listdir(folder_path):
if name.endswith("json"):
json_file = os.path.join(folder_path, name)
with open(json_file, "r") as f:
data = json.load(f)
img_name = name.split('.')[0] + ".png"
print(img_name)
image = cv2.imread(os.path.join(folder_path,img_name))
items = data.get('items')
for index in range(len(items)):
table = items[index]
if table.get('type') != 'table':
continue
table_x = table.get('x')
table_y = table.get('y')
table_prob = table.get('properties')
boxes = get_visible_cell_coords(table_prob, table_x, table_y)
for key, box in boxes.items():
row, col = key
x, y, w, h = int(box["x"]), int(box["y"]), int(box["width"]), int(box["height"])
top_left = (x, y)
bottom_right = (x + w, y + h)
cv2.rectangle(image, top_left, bottom_right, (0, 255, 0), 2)
# cv2.putText(image, f"{row},{col}", (x + 5, y + 20),
# cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 1)
# Save or show
cv2.imwrite(os.path.join(save_folder, img_name), image)