File size: 3,219 Bytes
dc4e6da
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
from __future__ import annotations

from dataclasses import dataclass


@dataclass(frozen=True)
class OCRBox:
    x0: float
    y0: float
    x2: float
    y2: float
    text: str
    block_no: int
    line_no: int
    word_no: int

    @property
    def key(self):
        return (self.block_no, self.line_no, self.word_no)

    def as_string(self) -> str:
        return f"{self.x0},{self.y0},{self.x2},{self.y2},{self.text},{self.block_no},{self.line_no},{self.word_no}"

    @property
    def width(self):
        return self.x2 - self.x0

    @property
    def height(self):
        return self.y2 - self.y0
    
    def unnormalize(self, width_px, height_px):
        return OCRBox(self.x0 * width_px, self.y0 * height_px, self.x2 * width_px, self.y2 * height_px, text=self.text, block_no=self.block_no, line_no=self.line_no, word_no=self.word_no)
    
    def scale(self, scale):
        return OCRBox(self.x0 * scale, self.y0 * scale, self.x2 * scale, self.y2 * scale, text=self.text, block_no=self.block_no, line_no=self.line_no, word_no=self.word_no)


@dataclass(frozen=True)
class LayoutBox:
    x0: float
    y0: float
    x2: float
    y2: float
    label: str

    @staticmethod
    def box_contains(outer: LayoutBox, inner: LayoutBox) -> bool:
        """Check if outer box fully contains inner box."""
        return (outer.x0 <= inner.x0 and outer.y0 <= inner.y0 and
                outer.x2 >= inner.x2 and outer.y2 >= inner.y2)

    @staticmethod
    def calculate_overlap_ratio(box1: LayoutBox, box2: LayoutBox) -> float:
        """

        Calculate the overlap ratio between two boxes.

        Returns the ratio of intersection area to the smaller box's area.

        """
        x_left = max(box1.x0, box2.x0)
        y_top = max(box1.y0, box2.y0)
        x_right = min(box1.x2, box2.x2)
        y_bottom = min(box1.y2, box2.y2)

        if x_right <= x_left or y_bottom <= y_top:
            return 0.0

        intersection_area = (x_right - x_left) * (y_bottom - y_top)

        box1_area = (box1.x2 - box1.x0) * (box1.y2 - box1.y0)
        box2_area = (box2.x2 - box2.x0) * (box2.y2 - box2.y0)

        smaller_area = min(box1_area, box2_area)
        return intersection_area / smaller_area if smaller_area > 0 else 0.0

    @staticmethod
    def normalize_to_pdf(bbox: LayoutBox, width_pt: float, height_pt: float, dpi: float) -> LayoutBox:
        """Convert a bounding box from PDF points to normalized image coordinates."""
        # Convert PDF points to pixels
        scale = dpi / 72
        x_min_px = bbox.x0 * scale
        y_min_px = bbox.y0 * scale
        x_max_px = bbox.x2 * scale
        y_max_px = bbox.y2 * scale

        # Get image size in pixels
        img_w_px = width_pt * scale
        img_h_px = height_pt * scale

        # Normalize bounding box
        x_min_norm = x_min_px / img_w_px
        y_min_norm = y_min_px / img_h_px
        x_max_norm = x_max_px / img_w_px
        y_max_norm = y_max_px / img_h_px

        return LayoutBox(
            x0=x_min_norm, y0=y_min_norm,
            x2=x_max_norm, y2=y_max_norm,
            label=bbox.label,
        )