File size: 1,207 Bytes
d7c9ee5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
"""Mapping from PP-DocLayout-V3 label names to docling DocItemLabel values.

Every label produced here must exist in
``docling.utils.layout_postprocessor.LayoutPostprocessor.CONFIDENCE_THRESHOLDS``
so that the postprocessor can apply confidence filtering without a ``KeyError``.
"""

from __future__ import annotations

from docling_core.types.doc import DocItemLabel

LABEL_MAP: dict[str, DocItemLabel] = {
    "abstract": DocItemLabel.TEXT,
    "algorithm": DocItemLabel.CODE,
    "aside_text": DocItemLabel.TEXT,
    "chart": DocItemLabel.PICTURE,
    "content": DocItemLabel.TEXT,
    "doc_title": DocItemLabel.TITLE,
    "figure_title": DocItemLabel.CAPTION,
    "footer": DocItemLabel.PAGE_FOOTER,
    "footnote": DocItemLabel.FOOTNOTE,
    "formula": DocItemLabel.FORMULA,
    "formula_number": DocItemLabel.TEXT,
    "header": DocItemLabel.PAGE_HEADER,
    "image": DocItemLabel.PICTURE,
    "number": DocItemLabel.TEXT,
    "paragraph_title": DocItemLabel.SECTION_HEADER,
    "reference": DocItemLabel.TEXT,
    "reference_content": DocItemLabel.TEXT,
    "seal": DocItemLabel.PICTURE,
    "table": DocItemLabel.TABLE,
    "text": DocItemLabel.TEXT,
    "vision_footnote": DocItemLabel.FOOTNOTE,
}