File size: 2,196 Bytes
dc4e6da | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 | from pathlib import Path
import pymupdf
from docgenie.generation.models._bbox import OCRBox
from docgenie.generation.utils.bboxes import draw_bboxes_on_pdf
def draw_geos_on_pdf(geos: list[dict], pdf_in: Path, pdf_out: Path):
bboxes = []
for g in geos:
x0 = float(g["rect"]["x"])
y0 = float(g["rect"]["y"])
x2 = x0 + float(g["rect"]["width"])
y2 = y0 + float(g["rect"]["height"])
b = OCRBox(
x0=x0,
y0=y0,
x2=x2,
y2=y2,
text="",
block_no=-1,
line_no=-1,
word_no=-1,
)
bboxes.append(b)
draw_bboxes_on_pdf(
pdf_path=pdf_in,
outpath=pdf_out,
bboxes=bboxes,
)
def draw_geos_and_bboxes_on_pdf(
geos: list[dict], bboxes_: list[OCRBox], pdf_in: Path, pdf_out: Path, verbose: bool
):
bboxes = []
for g in geos:
x0 = float(g["rect"]["x"])
y0 = float(g["rect"]["y"])
x2 = x0 + float(g["rect"]["width"])
y2 = y0 + float(g["rect"]["height"])
b = OCRBox(
x0=x0,
y0=y0,
x2=x2,
y2=y2,
text="",
block_no=-1,
line_no=-1,
word_no=-1,
)
bboxes.append(b)
doc = pymupdf.open(pdf_in)
for page_num, page in enumerate(doc.pages()):
# geos red
color = (1, 0, 0)
for bbox in bboxes:
# rect = pymupdf.Rect(block[:4])
block = (round(bbox.x0), round(bbox.y0), round(bbox.x2), round(bbox.y2))
rect = pymupdf.Rect(block)
page.draw_rect(rect, color=color) # Red box
if verbose:
print(bbox)
# bboxes green
color = (0, 1, 0)
for bbox in bboxes_:
# rect = pymupdf.Rect(block[:4])
block = (round(bbox.x0), round(bbox.y0), round(bbox.x2), round(bbox.y2))
rect = pymupdf.Rect(block)
page.draw_rect(rect, color=color) # Red box
if verbose:
print(bbox)
doc.save(pdf_out)
|