| import json |
|
|
| import cv2 |
| from PIL import ImageDraw |
| from pytesseract import Output, image_to_data |
| import numpy as np |
|
|
|
|
| def load_annots(annots_file, annot_format): |
| with open(annots_file, "r") as f: |
| data = json.load(f) |
|
|
| annots = {"text": [], "bbox": []} |
| if annot_format == "STD": |
| for text_block in data: |
| annots["text"].append(text_block["text"]) |
| annots["bbox"].append(text_block["bbox"]) |
|
|
| elif annot_format == "ICPR22": |
| for text_block in data["task2"]["output"]["text_blocks"]: |
| annots["text"].append(text_block["text"]) |
| annots["bbox"].append(quad_to_box(text_block["polygon"])) |
|
|
| elif annot_format == "EconBiz & CHIMIE-R": |
| for text_block in data["textelements"]: |
| annots["text"].append(text_block["content"]) |
| annots["bbox"].append( |
| quad_to_box( |
| get_quad(text_block["boundingbox"], data["width"], data["height"]) |
| ) |
| ) |
| else: |
| raise ValueError(f"Unknown annotation format: {annot_format}") |
|
|
| return annots |
|
|
|
|
| def draw_annot_image(image, labeled_annots_data): |
| draw = ImageDraw.Draw(image) |
|
|
| for b, label in zip(labeled_annots_data["bbox"], labeled_annots_data["labels"]): |
| x0, y0, x1, y1 = b |
| |
| if (x0, y0, x1, y1) == (0, 0, 0, 0): |
| continue |
| draw.rectangle([x0, y0, x1, y1], outline="red", width=2) |
| draw.text((x0, y0 - 10), label, fill="red") |
|
|
| return image |
|
|
|
|
| def normalize_bbox(bbox, size, type=None): |
| if type == "box": |
| height = int(bbox["height"]) |
| width = int(bbox["width"]) |
| left = max(0, bbox["x0"]) |
| top = max(0, bbox["y0"]) |
| right = left + width |
| bottom = top + height |
| if type == "polygon": |
| left = bbox[0] |
| top = bbox[1] |
| right = bbox[2] |
| bottom = bbox[3] |
| return [ |
| int(1000 * left / size[0]), |
| int(1000 * top / size[1]), |
| int(1000 * right / size[0]), |
| int(1000 * bottom / size[1]), |
| ] |
|
|
|
|
| def quad_to_box(quad): |
| box = (max(0, quad["x0"]), max(0, quad["y0"]), quad["x2"], quad["y2"]) |
| if box[3] < box[1]: |
| bbox = list(box) |
| tmp = bbox[3] |
| bbox[3] = bbox[1] |
| bbox[1] = tmp |
| box = tuple(bbox) |
| if box[2] < box[0]: |
| bbox = list(box) |
| tmp = bbox[2] |
| bbox[2] = bbox[0] |
| bbox[0] = tmp |
| box = tuple(bbox) |
| return box |
|
|
|
|
| def get_quad(bbox, width, height): |
| x0 = int(bbox["center_x"] - bbox["width"] / 2) |
| x1 = int(bbox["center_x"] + bbox["width"] / 2) |
| x2 = int(bbox["center_x"] + bbox["width"] / 2) |
| x3 = int(bbox["center_x"] - bbox["width"] / 2) |
| y0 = int(bbox["center_y"] - bbox["height"] / 2) |
| y1 = int(bbox["center_y"] - bbox["height"] / 2) |
| y2 = int(bbox["center_y"] + bbox["height"] / 2) |
| y3 = int(bbox["center_y"] + bbox["height"] / 2) |
|
|
| if bbox["orientation"] == 0: |
| return { |
| "x0": x0, |
| "x1": x1, |
| "x2": x2, |
| "x3": x3, |
| "y0": y0, |
| "y1": y1, |
| "y2": y2, |
| "y3": y3, |
| } |
|
|
| |
|
|
| cx, cy = (int(width / 2), int(height / 2)) |
|
|
| bbox_tuple = [ |
| (x0, y0), |
| (x1, y1), |
| (x2, y2), |
| (x3, y3), |
| ] |
|
|
| rotated_bbox = [] |
|
|
| for i, coord in enumerate(bbox_tuple): |
| M = cv2.getRotationMatrix2D((cx, cy), bbox["orientation"], 1.0) |
| v = [coord[0], coord[1], 1] |
| adjusted_coord = np.matmul(M, v) |
| rotated_bbox.insert(i, (adjusted_coord[0], adjusted_coord[1])) |
|
|
| result = [int(x) for t in rotated_bbox for x in t] |
|
|
| |
| for i, n in enumerate(result): |
| if i % 2 == 0 and n > width: |
| result[i] = width |
| elif i % 2 == 1 and n > height: |
| result[i] = height |
| elif n < 0: |
| result[i] = 0 |
|
|
| return { |
| "x0": result[0], |
| "x1": result[2], |
| "x2": result[4], |
| "x3": result[6], |
| "y0": result[1], |
| "y1": result[3], |
| "y2": result[5], |
| "y3": result[7], |
| } |
|
|
|
|
| def auto_annotate(image): |
| data = image_to_data(image, output_type=Output.DATAFRAME, config="--psm 12") |
|
|
| |
| data.dropna(subset=["text"], inplace=True) |
| data = data[data.conf > 0] |
| data = data[data.text.str.strip() != ""] |
|
|
| data["right"] = data["left"] + data["width"] |
| data["bottom"] = data["top"] + data["height"] |
|
|
| |
| grouped_data = data.groupby( |
| ["block_num", "par_num", "line_num"], as_index=False |
| ).agg( |
| { |
| "text": lambda x: " ".join(x), |
| "left": "min", |
| "top": "min", |
| "right": "max", |
| "bottom": "max", |
| } |
| ) |
|
|
| grouped_data["bbox"] = grouped_data.apply( |
| lambda r: (r["left"], r["top"], r["right"], r["bottom"]), axis=1 |
| ) |
|
|
| return grouped_data[["text", "bbox"]].to_dict(orient="list") |
|
|