from pathlib import Path from PIL import Image, ImageDraw, ImageFont import pymupdf from docgenie.generation.models import OCRBox from docgenie.generation.models._bbox import LayoutBox def is_in_rect(rect: dict, bbox: OCRBox, threshold: float, document_id: str | None = None): # Convert back PDF points to pixels r_x0 = rect["x"] - threshold r_y0 = rect["y"] - threshold r_x2 = r_x0 + rect["width"] + 2 * threshold r_y2 = r_y0 + rect["height"] + 2 * threshold left = bbox.x0 >= r_x0 top = bbox.y0 >= r_y0 right = bbox.x2 <= r_x2 bottom = bbox.y2 <= r_y2 # if document_id == '74577486-4e36-425e-b733-8a745ca840f1_0': # print(f'{left=} {top=} {right=} {bottom=} {bbox.as_string} {rect=}') return left and top and right and bottom def save_bboxes( bboxes: list[OCRBox], bbox_path: Path, ): bbox_path.parent.mkdir(exist_ok=True, parents=True) with bbox_path.open(mode="w", encoding="utf-8") as f: for i, box in enumerate(bboxes): line = box.as_string() if i < len(bboxes) - 1: line += "\n" f.write(line) def read_syn_dataset_bbox_str(line: str) -> OCRBox: parts = line.split(",", 4) x0 = float(parts[0]) y0 = float(parts[1]) x2 = float(parts[2]) y2 = float(parts[3]) txt = parts[4] parts = txt.rsplit(",", 3) txt = parts[0] block_no = int(parts[1]) line_no = int(parts[2]) word_no = int(parts[3]) return OCRBox( x0=x0, y0=y0, x2=x2, y2=y2, text=txt, block_no=block_no, line_no=line_no, word_no=word_no, ) def read_syn_dataset_bboxes(box_path) -> list[OCRBox]: """ Reads bboxes from synthetic datasets """ bboxes = [] line: str for line in box_path.read_text(encoding="utf-8").splitlines(): bboxes.append(read_syn_dataset_bbox_str(line)) return bboxes def draw_pdf_bboxes_on_pdf(pdf_path, outpath: Path): doc = pymupdf.open(pdf_path) for page_num, page in enumerate(doc.pages()): for block in page.get_text("words"): x0, y0, x1, y1, txt = block[:5] # rect = pymupdf.Rect(block[:4]) block = (round(x0), round(y0), round(x1), round(y1)) rect = pymupdf.Rect(block) print(",".join([str(x) for x in block])) page.draw_rect(rect, color=(1, 0, 0)) # Red box doc.save(outpath) def draw_bboxes_on_pdf( pdf_path: Path, outpath: Path, bboxes: list[OCRBox], color=(1, 0, 0) ): doc = pymupdf.open(pdf_path) for page_num, page in enumerate(doc.pages()): for bbox in bboxes: # rect = pymupdf.Rect(block[:4]) block = (round(bbox.x0), round(bbox.y0), round(bbox.x2), round(bbox.y2)) rect = pymupdf.Rect(block) page.draw_rect(rect, color=color) # Red box doc.save(outpath) def draw_bboxes_on_image( image, bboxes: list[OCRBox], color="red", width=3, show_text=True ) -> Image.Image: """ Draws bounding boxes on a given Pillow image. :param image: Pillow Image object :param bboxes: List of bounding boxes [(x0, y0, x1, y1), ...] :param color: Color of the bounding box (default: red) :param width: Line width (default: 3) :return: Image with bounding boxes """ draw = ImageDraw.Draw(image) bbox: OCRBox for bbox in bboxes: box = (bbox.x0, bbox.y0, bbox.x2, bbox.y2) draw.rectangle(box, outline=color, width=width) # font = ImageFont.truetype("sans-serif.ttf", 16) if show_text: font = ImageFont.load_default(32) draw.text(box, bbox.text, (255, 0, 255), font=font) # type: ignore return image