File size: 1,974 Bytes
5539271
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
"""Bounding box coordinate normalization for Docling output.

Docling's BoundingBox uses two possible coordinate origins:
- TOPLEFT:    y=0 at top,    t < b  (t is smaller, closer to origin)
- BOTTOMLEFT: y=0 at bottom, t > b  (t is larger, further from origin)

The frontend canvas uses TOPLEFT coordinates. This module ensures all
bboxes are normalized to TOPLEFT [left, top, right, bottom] before
being sent to the frontend.
"""

import logging

from docling_core.types.doc.base import BoundingBox

logger = logging.getLogger(__name__)

# Sentinel value returned when a bbox is invalid or degenerate.
# A zero-area rect is safe: the frontend draws nothing and hit-testing ignores it.
EMPTY_BBOX: list[float] = [0.0, 0.0, 0.0, 0.0]


def to_topleft_list(bbox: BoundingBox, page_height: float) -> list[float]:
    """Convert a Docling BoundingBox to a [l, t, r, b] list in TOPLEFT origin.

    Validates the result: left < right and top < bottom. If the bbox is
    degenerate (zero or negative area), returns EMPTY_BBOX so the frontend
    silently skips it instead of rendering a broken rectangle.

    Args:
        bbox: Docling BoundingBox (any origin).
        page_height: Height of the page (needed for BOTTOMLEFT conversion).

    Returns:
        [left, top, right, bottom] in TOPLEFT coordinates, or EMPTY_BBOX
        if the bbox is degenerate.
    """
    normalized = bbox.to_top_left_origin(page_height)
    left, top, right, bottom = normalized.l, normalized.t, normalized.r, normalized.b

    # Degenerate bbox: zero or negative dimensions — skip silently.
    # This can happen with corrupted PDFs or edge-case Docling outputs.
    if right <= left or bottom <= top:
        logger.debug(
            "Degenerate bbox skipped: [%.1f, %.1f, %.1f, %.1f] (page_height=%.1f)",
            left,
            top,
            right,
            bottom,
            page_height,
        )
        return list(EMPTY_BBOX)

    return [left, top, right, bottom]