| from __future__ import annotations
|
|
|
| from dataclasses import dataclass
|
|
|
|
|
| @dataclass(frozen=True)
|
| class OCRBox:
|
| x0: float
|
| y0: float
|
| x2: float
|
| y2: float
|
| text: str
|
| block_no: int
|
| line_no: int
|
| word_no: int
|
|
|
| @property
|
| def key(self):
|
| return (self.block_no, self.line_no, self.word_no)
|
|
|
| def as_string(self) -> str:
|
| return f"{self.x0},{self.y0},{self.x2},{self.y2},{self.text},{self.block_no},{self.line_no},{self.word_no}"
|
|
|
| @property
|
| def width(self):
|
| return self.x2 - self.x0
|
|
|
| @property
|
| def height(self):
|
| return self.y2 - self.y0
|
|
|
| def unnormalize(self, width_px, height_px):
|
| return OCRBox(self.x0 * width_px, self.y0 * height_px, self.x2 * width_px, self.y2 * height_px, text=self.text, block_no=self.block_no, line_no=self.line_no, word_no=self.word_no)
|
|
|
| def scale(self, scale):
|
| return OCRBox(self.x0 * scale, self.y0 * scale, self.x2 * scale, self.y2 * scale, text=self.text, block_no=self.block_no, line_no=self.line_no, word_no=self.word_no)
|
|
|
|
|
| @dataclass(frozen=True)
|
| class LayoutBox:
|
| x0: float
|
| y0: float
|
| x2: float
|
| y2: float
|
| label: str
|
|
|
| @staticmethod
|
| def box_contains(outer: LayoutBox, inner: LayoutBox) -> bool:
|
| """Check if outer box fully contains inner box."""
|
| return (outer.x0 <= inner.x0 and outer.y0 <= inner.y0 and
|
| outer.x2 >= inner.x2 and outer.y2 >= inner.y2)
|
|
|
| @staticmethod
|
| def calculate_overlap_ratio(box1: LayoutBox, box2: LayoutBox) -> float:
|
| """
|
| Calculate the overlap ratio between two boxes.
|
| Returns the ratio of intersection area to the smaller box's area.
|
| """
|
| x_left = max(box1.x0, box2.x0)
|
| y_top = max(box1.y0, box2.y0)
|
| x_right = min(box1.x2, box2.x2)
|
| y_bottom = min(box1.y2, box2.y2)
|
|
|
| if x_right <= x_left or y_bottom <= y_top:
|
| return 0.0
|
|
|
| intersection_area = (x_right - x_left) * (y_bottom - y_top)
|
|
|
| box1_area = (box1.x2 - box1.x0) * (box1.y2 - box1.y0)
|
| box2_area = (box2.x2 - box2.x0) * (box2.y2 - box2.y0)
|
|
|
| smaller_area = min(box1_area, box2_area)
|
| return intersection_area / smaller_area if smaller_area > 0 else 0.0
|
|
|
| @staticmethod
|
| def normalize_to_pdf(bbox: LayoutBox, width_pt: float, height_pt: float, dpi: float) -> LayoutBox:
|
| """Convert a bounding box from PDF points to normalized image coordinates."""
|
|
|
| scale = dpi / 72
|
| x_min_px = bbox.x0 * scale
|
| y_min_px = bbox.y0 * scale
|
| x_max_px = bbox.x2 * scale
|
| y_max_px = bbox.y2 * scale
|
|
|
|
|
| img_w_px = width_pt * scale
|
| img_h_px = height_pt * scale
|
|
|
|
|
| x_min_norm = x_min_px / img_w_px
|
| y_min_norm = y_min_px / img_h_px
|
| x_max_norm = x_max_px / img_w_px
|
| y_max_norm = y_max_px / img_h_px
|
|
|
| return LayoutBox(
|
| x0=x_min_norm, y0=y_min_norm,
|
| x2=x_max_norm, y2=y_max_norm,
|
| label=bbox.label,
|
| )
|
|
|