Spaces:
Running on Zero
Running on Zero
File size: 1,207 Bytes
d7c9ee5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 | """Mapping from PP-DocLayout-V3 label names to docling DocItemLabel values.
Every label produced here must exist in
``docling.utils.layout_postprocessor.LayoutPostprocessor.CONFIDENCE_THRESHOLDS``
so that the postprocessor can apply confidence filtering without a ``KeyError``.
"""
from __future__ import annotations
from docling_core.types.doc import DocItemLabel
LABEL_MAP: dict[str, DocItemLabel] = {
"abstract": DocItemLabel.TEXT,
"algorithm": DocItemLabel.CODE,
"aside_text": DocItemLabel.TEXT,
"chart": DocItemLabel.PICTURE,
"content": DocItemLabel.TEXT,
"doc_title": DocItemLabel.TITLE,
"figure_title": DocItemLabel.CAPTION,
"footer": DocItemLabel.PAGE_FOOTER,
"footnote": DocItemLabel.FOOTNOTE,
"formula": DocItemLabel.FORMULA,
"formula_number": DocItemLabel.TEXT,
"header": DocItemLabel.PAGE_HEADER,
"image": DocItemLabel.PICTURE,
"number": DocItemLabel.TEXT,
"paragraph_title": DocItemLabel.SECTION_HEADER,
"reference": DocItemLabel.TEXT,
"reference_content": DocItemLabel.TEXT,
"seal": DocItemLabel.PICTURE,
"table": DocItemLabel.TABLE,
"text": DocItemLabel.TEXT,
"vision_footnote": DocItemLabel.FOOTNOTE,
}
|