Spaces:

nassimb0u
/

chart-text-role-classification-demo

Running

App Files Files Community

chart-text-role-classification-demo / utils.py

nassimb0u

add more examples from test datasets

749215f verified 10 months ago

raw

history blame contribute delete

5.16 kB

	import json

	import cv2
	from PIL import ImageDraw
	from pytesseract import Output, image_to_data
	import numpy as np


	def load_annots(annots_file, annot_format):
	with open(annots_file, "r") as f:
	data = json.load(f)

	annots = {"text": [], "bbox": []}
	if annot_format == "STD":
	for text_block in data:
	annots["text"].append(text_block["text"])
	annots["bbox"].append(text_block["bbox"])

	elif annot_format == "ICPR22":
	for text_block in data["task2"]["output"]["text_blocks"]:
	annots["text"].append(text_block["text"])
	annots["bbox"].append(quad_to_box(text_block["polygon"]))

	elif annot_format == "EconBiz & CHIMIE-R":
	for text_block in data["textelements"]:
	annots["text"].append(text_block["content"])
	annots["bbox"].append(
	quad_to_box(
	get_quad(text_block["boundingbox"], data["width"], data["height"])
	)
	)
	else:
	raise ValueError(f"Unknown annotation format: {annot_format}")

	return annots


	def draw_annot_image(image, labeled_annots_data):
	draw = ImageDraw.Draw(image)

	for b, label in zip(labeled_annots_data["bbox"], labeled_annots_data["labels"]):
	x0, y0, x1, y1 = b
	# Skip zero bboxes if needed
	if (x0, y0, x1, y1) == (0, 0, 0, 0):
	continue
	draw.rectangle([x0, y0, x1, y1], outline="red", width=2)
	draw.text((x0, y0 - 10), label, fill="red")

	return image


	def normalize_bbox(bbox, size, type=None):
	if type == "box":
	height = int(bbox["height"])
	width = int(bbox["width"])
	left = max(0, bbox["x0"])
	top = max(0, bbox["y0"])
	right = left + width
	bottom = top + height
	if type == "polygon":
	left = bbox[0]
	top = bbox[1]
	right = bbox[2]
	bottom = bbox[3]
	return [
	int(1000 * left / size[0]),
	int(1000 * top / size[1]),
	int(1000 * right / size[0]),
	int(1000 * bottom / size[1]),
	]


	def quad_to_box(quad):
	box = (max(0, quad["x0"]), max(0, quad["y0"]), quad["x2"], quad["y2"])
	if box[3] < box[1]:
	bbox = list(box)
	tmp = bbox[3]
	bbox[3] = bbox[1]
	bbox[1] = tmp
	box = tuple(bbox)
	if box[2] < box[0]:
	bbox = list(box)
	tmp = bbox[2]
	bbox[2] = bbox[0]
	bbox[0] = tmp
	box = tuple(bbox)
	return box


	def get_quad(bbox, width, height):
	x0 = int(bbox["center_x"] - bbox["width"] / 2)
	x1 = int(bbox["center_x"] + bbox["width"] / 2)
	x2 = int(bbox["center_x"] + bbox["width"] / 2)
	x3 = int(bbox["center_x"] - bbox["width"] / 2)
	y0 = int(bbox["center_y"] - bbox["height"] / 2)
	y1 = int(bbox["center_y"] - bbox["height"] / 2)
	y2 = int(bbox["center_y"] + bbox["height"] / 2)
	y3 = int(bbox["center_y"] + bbox["height"] / 2)

	if bbox["orientation"] == 0:
	return {
	"x0": x0,
	"x1": x1,
	"x2": x2,
	"x3": x3,
	"y0": y0,
	"y1": y1,
	"y2": y2,
	"y3": y3,
	}

	# rotate coordinates if orientation is not 0

	cx, cy = (int(width / 2), int(height / 2))

	bbox_tuple = [
	(x0, y0),
	(x1, y1),
	(x2, y2),
	(x3, y3),
	]

	rotated_bbox = []

	for i, coord in enumerate(bbox_tuple):
	M = cv2.getRotationMatrix2D((cx, cy), bbox["orientation"], 1.0)
	v = [coord[0], coord[1], 1]
	adjusted_coord = np.matmul(M, v)
	rotated_bbox.insert(i, (adjusted_coord[0], adjusted_coord[1]))

	result = [int(x) for t in rotated_bbox for x in t]

	# make sure resulting bbox coordinates are within the range of the image
	for i, n in enumerate(result):
	if i % 2 == 0 and n > width:
	result[i] = width
	elif i % 2 == 1 and n > height:
	result[i] = height
	elif n < 0:
	result[i] = 0

	return {
	"x0": result[0],
	"x1": result[2],
	"x2": result[4],
	"x3": result[6],
	"y0": result[1],
	"y1": result[3],
	"y2": result[5],
	"y3": result[7],
	}


	def auto_annotate(image):
	data = image_to_data(image, output_type=Output.DATAFRAME, config="--psm 12")

	# Filter out low confidence words and non-textual entries
	data.dropna(subset=["text"], inplace=True)
	data = data[data.conf > 0]
	data = data[data.text.str.strip() != ""]

	data["right"] = data["left"] + data["width"]
	data["bottom"] = data["top"] + data["height"]

	# Group words by block, paragraph, and line number
	grouped_data = data.groupby(
	["block_num", "par_num", "line_num"], as_index=False
	).agg(
	{
	"text": lambda x: " ".join(x),
	"left": "min",
	"top": "min",
	"right": "max",
	"bottom": "max",
	}
	)

	grouped_data["bbox"] = grouped_data.apply(
	lambda r: (r["left"], r["top"], r["right"], r["bottom"]), axis=1
	)

	return grouped_data[["text", "bbox"]].to_dict(orient="list")