Ahadhassan-2003
deploy: update HF Space
dc4e6da
from __future__ import annotations
from dataclasses import dataclass
@dataclass(frozen=True)
class OCRBox:
x0: float
y0: float
x2: float
y2: float
text: str
block_no: int
line_no: int
word_no: int
@property
def key(self):
return (self.block_no, self.line_no, self.word_no)
def as_string(self) -> str:
return f"{self.x0},{self.y0},{self.x2},{self.y2},{self.text},{self.block_no},{self.line_no},{self.word_no}"
@property
def width(self):
return self.x2 - self.x0
@property
def height(self):
return self.y2 - self.y0
def unnormalize(self, width_px, height_px):
return OCRBox(self.x0 * width_px, self.y0 * height_px, self.x2 * width_px, self.y2 * height_px, text=self.text, block_no=self.block_no, line_no=self.line_no, word_no=self.word_no)
def scale(self, scale):
return OCRBox(self.x0 * scale, self.y0 * scale, self.x2 * scale, self.y2 * scale, text=self.text, block_no=self.block_no, line_no=self.line_no, word_no=self.word_no)
@dataclass(frozen=True)
class LayoutBox:
x0: float
y0: float
x2: float
y2: float
label: str
@staticmethod
def box_contains(outer: LayoutBox, inner: LayoutBox) -> bool:
"""Check if outer box fully contains inner box."""
return (outer.x0 <= inner.x0 and outer.y0 <= inner.y0 and
outer.x2 >= inner.x2 and outer.y2 >= inner.y2)
@staticmethod
def calculate_overlap_ratio(box1: LayoutBox, box2: LayoutBox) -> float:
"""
Calculate the overlap ratio between two boxes.
Returns the ratio of intersection area to the smaller box's area.
"""
x_left = max(box1.x0, box2.x0)
y_top = max(box1.y0, box2.y0)
x_right = min(box1.x2, box2.x2)
y_bottom = min(box1.y2, box2.y2)
if x_right <= x_left or y_bottom <= y_top:
return 0.0
intersection_area = (x_right - x_left) * (y_bottom - y_top)
box1_area = (box1.x2 - box1.x0) * (box1.y2 - box1.y0)
box2_area = (box2.x2 - box2.x0) * (box2.y2 - box2.y0)
smaller_area = min(box1_area, box2_area)
return intersection_area / smaller_area if smaller_area > 0 else 0.0
@staticmethod
def normalize_to_pdf(bbox: LayoutBox, width_pt: float, height_pt: float, dpi: float) -> LayoutBox:
"""Convert a bounding box from PDF points to normalized image coordinates."""
# Convert PDF points to pixels
scale = dpi / 72
x_min_px = bbox.x0 * scale
y_min_px = bbox.y0 * scale
x_max_px = bbox.x2 * scale
y_max_px = bbox.y2 * scale
# Get image size in pixels
img_w_px = width_pt * scale
img_h_px = height_pt * scale
# Normalize bounding box
x_min_norm = x_min_px / img_w_px
y_min_norm = y_min_px / img_h_px
x_max_norm = x_max_px / img_w_px
y_max_norm = y_max_px / img_h_px
return LayoutBox(
x0=x_min_norm, y0=y_min_norm,
x2=x_max_norm, y2=y_max_norm,
label=bbox.label,
)