Spaces:
Running on Zero
Running on Zero
Upload 9 files
Browse files- app.py +38 -0
- docling_pp_doc_layout/__init__.py +3 -0
- docling_pp_doc_layout/label_mapping.py +34 -0
- docling_pp_doc_layout/model.py +225 -0
- docling_pp_doc_layout/options.py +110 -0
- docling_pp_doc_layout/plugin.py +12 -0
- docling_pp_doc_layout/py.typed +0 -0
- requirements.txt +5 -1
app.py
CHANGED
|
@@ -1,3 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import spaces
|
| 3 |
from docling.datamodel.base_models import InputFormat
|
|
|
|
| 1 |
+
# ---------------------------------------------------------------------------
|
| 2 |
+
# Plugin registration
|
| 3 |
+
# ---------------------------------------------------------------------------
|
| 4 |
+
# docling-pp-doc-layout requires Python >=3.12 on PyPI, but the code itself
|
| 5 |
+
# is compatible with Python 3.10 (all annotations are guarded by
|
| 6 |
+
# `from __future__ import annotations`). Instead of installing the package,
|
| 7 |
+
# we bundle the source directly and register the model with docling's factory
|
| 8 |
+
# by monkey-patching BaseFactory.load_from_plugins so that every new
|
| 9 |
+
# LayoutFactory instance automatically includes PPDocLayoutV3Model.
|
| 10 |
+
from docling.models.factories.base_factory import BaseFactory
|
| 11 |
+
from docling.models.factories.layout_factory import LayoutFactory
|
| 12 |
+
from docling_pp_doc_layout.model import PPDocLayoutV3Model
|
| 13 |
+
|
| 14 |
+
_orig_load = BaseFactory.load_from_plugins
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def _load_with_pp_doc_layout(
|
| 18 |
+
self, plugin_name=None, allow_external_plugins=False
|
| 19 |
+
):
|
| 20 |
+
_orig_load(
|
| 21 |
+
self,
|
| 22 |
+
plugin_name=plugin_name,
|
| 23 |
+
allow_external_plugins=allow_external_plugins,
|
| 24 |
+
)
|
| 25 |
+
if isinstance(self, LayoutFactory):
|
| 26 |
+
try:
|
| 27 |
+
self.register(
|
| 28 |
+
PPDocLayoutV3Model,
|
| 29 |
+
"docling-pp-doc-layout",
|
| 30 |
+
"docling_pp_doc_layout.model",
|
| 31 |
+
)
|
| 32 |
+
except ValueError:
|
| 33 |
+
pass # already registered on a previous factory creation
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
BaseFactory.load_from_plugins = _load_with_pp_doc_layout
|
| 37 |
+
|
| 38 |
+
# ---------------------------------------------------------------------------
|
| 39 |
import gradio as gr
|
| 40 |
import spaces
|
| 41 |
from docling.datamodel.base_models import InputFormat
|
docling_pp_doc_layout/__init__.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""A Docling plugin for PaddlePaddle PP-DocLayout-V3 model document layout detection."""
|
| 2 |
+
|
| 3 |
+
__version__ = "0.1.0"
|
docling_pp_doc_layout/label_mapping.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Mapping from PP-DocLayout-V3 label names to docling DocItemLabel values.
|
| 2 |
+
|
| 3 |
+
Every label produced here must exist in
|
| 4 |
+
``docling.utils.layout_postprocessor.LayoutPostprocessor.CONFIDENCE_THRESHOLDS``
|
| 5 |
+
so that the postprocessor can apply confidence filtering without a ``KeyError``.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from __future__ import annotations
|
| 9 |
+
|
| 10 |
+
from docling_core.types.doc import DocItemLabel
|
| 11 |
+
|
| 12 |
+
LABEL_MAP: dict[str, DocItemLabel] = {
|
| 13 |
+
"abstract": DocItemLabel.TEXT,
|
| 14 |
+
"algorithm": DocItemLabel.CODE,
|
| 15 |
+
"aside_text": DocItemLabel.TEXT,
|
| 16 |
+
"chart": DocItemLabel.PICTURE,
|
| 17 |
+
"content": DocItemLabel.TEXT,
|
| 18 |
+
"doc_title": DocItemLabel.TITLE,
|
| 19 |
+
"figure_title": DocItemLabel.CAPTION,
|
| 20 |
+
"footer": DocItemLabel.PAGE_FOOTER,
|
| 21 |
+
"footnote": DocItemLabel.FOOTNOTE,
|
| 22 |
+
"formula": DocItemLabel.FORMULA,
|
| 23 |
+
"formula_number": DocItemLabel.TEXT,
|
| 24 |
+
"header": DocItemLabel.PAGE_HEADER,
|
| 25 |
+
"image": DocItemLabel.PICTURE,
|
| 26 |
+
"number": DocItemLabel.TEXT,
|
| 27 |
+
"paragraph_title": DocItemLabel.SECTION_HEADER,
|
| 28 |
+
"reference": DocItemLabel.TEXT,
|
| 29 |
+
"reference_content": DocItemLabel.TEXT,
|
| 30 |
+
"seal": DocItemLabel.PICTURE,
|
| 31 |
+
"table": DocItemLabel.TABLE,
|
| 32 |
+
"text": DocItemLabel.TEXT,
|
| 33 |
+
"vision_footnote": DocItemLabel.FOOTNOTE,
|
| 34 |
+
}
|
docling_pp_doc_layout/model.py
ADDED
|
@@ -0,0 +1,225 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""PP-DocLayout-V3 layout model for the docling standard pipeline.
|
| 2 |
+
|
| 3 |
+
Runs PaddlePaddle PP-DocLayout-V3 locally via HuggingFace ``transformers``
|
| 4 |
+
to detect document layout elements and returns ``LayoutPrediction`` objects
|
| 5 |
+
that docling merges with its standard-pipeline output.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from __future__ import annotations
|
| 9 |
+
|
| 10 |
+
import logging
|
| 11 |
+
import warnings
|
| 12 |
+
from typing import TYPE_CHECKING
|
| 13 |
+
|
| 14 |
+
import numpy as np
|
| 15 |
+
import torch
|
| 16 |
+
from docling.datamodel.base_models import BoundingBox, Cluster, LayoutPrediction, Page
|
| 17 |
+
from docling.models.base_layout_model import BaseLayoutModel
|
| 18 |
+
from docling.utils.accelerator_utils import decide_device
|
| 19 |
+
from docling.utils.layout_postprocessor import LayoutPostprocessor
|
| 20 |
+
from docling.utils.profiling import TimeRecorder
|
| 21 |
+
from docling_core.types.doc import DocItemLabel
|
| 22 |
+
from transformers import AutoImageProcessor, AutoModelForObjectDetection
|
| 23 |
+
|
| 24 |
+
from docling_pp_doc_layout.label_mapping import LABEL_MAP
|
| 25 |
+
from docling_pp_doc_layout.options import PPDocLayoutV3Options
|
| 26 |
+
|
| 27 |
+
if TYPE_CHECKING:
|
| 28 |
+
from collections.abc import Sequence
|
| 29 |
+
from pathlib import Path
|
| 30 |
+
|
| 31 |
+
from docling.datamodel.accelerator_options import AcceleratorOptions
|
| 32 |
+
from docling.datamodel.document import ConversionResult
|
| 33 |
+
from docling.datamodel.pipeline_options import BaseLayoutOptions
|
| 34 |
+
from PIL import Image
|
| 35 |
+
|
| 36 |
+
logger = logging.getLogger(__name__)
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
class PPDocLayoutV3Model(BaseLayoutModel):
|
| 40 |
+
"""Layout engine using PP-DocLayout-V3 via HuggingFace transformers."""
|
| 41 |
+
|
| 42 |
+
def __init__(
|
| 43 |
+
self,
|
| 44 |
+
artifacts_path: Path | None,
|
| 45 |
+
accelerator_options: AcceleratorOptions,
|
| 46 |
+
options: PPDocLayoutV3Options,
|
| 47 |
+
*,
|
| 48 |
+
enable_remote_services: bool = False, # noqa: ARG002
|
| 49 |
+
) -> None:
|
| 50 |
+
self.options = options
|
| 51 |
+
self.artifacts_path = artifacts_path
|
| 52 |
+
self.accelerator_options = accelerator_options
|
| 53 |
+
|
| 54 |
+
self._device = decide_device(accelerator_options.device)
|
| 55 |
+
logger.info(
|
| 56 |
+
"Loading PP-DocLayout-V3 model %s on device=%s",
|
| 57 |
+
options.model_name,
|
| 58 |
+
self._device,
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
self._image_processor = AutoImageProcessor.from_pretrained(
|
| 62 |
+
options.model_name,
|
| 63 |
+
)
|
| 64 |
+
self._model = AutoModelForObjectDetection.from_pretrained(
|
| 65 |
+
options.model_name,
|
| 66 |
+
).to(self._device)
|
| 67 |
+
self._model.eval()
|
| 68 |
+
|
| 69 |
+
self._id2label: dict[int, str] = self._model.config.id2label
|
| 70 |
+
logger.info("PP-DocLayout-V3 model loaded successfully")
|
| 71 |
+
|
| 72 |
+
@classmethod
|
| 73 |
+
def get_options_type(cls) -> type[BaseLayoutOptions]:
|
| 74 |
+
"""Return the options class for this layout model."""
|
| 75 |
+
return PPDocLayoutV3Options
|
| 76 |
+
|
| 77 |
+
def _run_inference(
|
| 78 |
+
self,
|
| 79 |
+
images: list[Image.Image],
|
| 80 |
+
) -> list[list[dict]]:
|
| 81 |
+
"""Run PP-DocLayout-V3 on a batch of PIL images.
|
| 82 |
+
|
| 83 |
+
Returns a list (per image) of lists of detection dicts with keys
|
| 84 |
+
``label``, ``confidence``, ``l``, ``t``, ``r``, ``b``.
|
| 85 |
+
"""
|
| 86 |
+
inputs = self._image_processor(images=images, return_tensors="pt")
|
| 87 |
+
inputs = {k: v.to(self._device) for k, v in inputs.items()}
|
| 88 |
+
|
| 89 |
+
with torch.no_grad():
|
| 90 |
+
outputs = self._model(**inputs)
|
| 91 |
+
|
| 92 |
+
target_sizes = [img.size[::-1] for img in images] # (height, width)
|
| 93 |
+
results = self._image_processor.post_process_object_detection(
|
| 94 |
+
outputs,
|
| 95 |
+
target_sizes=target_sizes,
|
| 96 |
+
threshold=self.options.confidence_threshold,
|
| 97 |
+
)
|
| 98 |
+
|
| 99 |
+
batch_detections: list[list[dict]] = []
|
| 100 |
+
for result in results:
|
| 101 |
+
detections: list[dict] = []
|
| 102 |
+
|
| 103 |
+
polys = result.get("polygons") or result.get("polygon_points")
|
| 104 |
+
if polys is None:
|
| 105 |
+
polys = [None] * len(result["scores"])
|
| 106 |
+
|
| 107 |
+
for score, label_id, box, poly in zip(
|
| 108 |
+
result["scores"],
|
| 109 |
+
result["labels"],
|
| 110 |
+
result["boxes"],
|
| 111 |
+
polys,
|
| 112 |
+
strict=True,
|
| 113 |
+
):
|
| 114 |
+
raw_label = self._id2label.get(label_id.item(), "text")
|
| 115 |
+
doc_label = LABEL_MAP.get(raw_label, DocItemLabel.TEXT)
|
| 116 |
+
|
| 117 |
+
if poly is not None and len(poly) > 0:
|
| 118 |
+
# Flatten or handle nested points to extract min/max
|
| 119 |
+
if isinstance(poly[0], int | float):
|
| 120 |
+
xs = poly[0::2]
|
| 121 |
+
ys = poly[1::2]
|
| 122 |
+
else:
|
| 123 |
+
xs = [pt[0] for pt in poly]
|
| 124 |
+
ys = [pt[1] for pt in poly]
|
| 125 |
+
x_min, x_max = min(xs), max(xs)
|
| 126 |
+
y_min, y_max = min(ys), max(ys)
|
| 127 |
+
else:
|
| 128 |
+
x_min, y_min, x_max, y_max = box.tolist()
|
| 129 |
+
|
| 130 |
+
detections.append({
|
| 131 |
+
"label": doc_label,
|
| 132 |
+
"confidence": score.item(),
|
| 133 |
+
"l": x_min,
|
| 134 |
+
"t": y_min,
|
| 135 |
+
"r": x_max,
|
| 136 |
+
"b": y_max,
|
| 137 |
+
})
|
| 138 |
+
batch_detections.append(detections)
|
| 139 |
+
|
| 140 |
+
return batch_detections
|
| 141 |
+
|
| 142 |
+
def predict_layout(
|
| 143 |
+
self,
|
| 144 |
+
conv_res: ConversionResult,
|
| 145 |
+
pages: Sequence[Page],
|
| 146 |
+
) -> Sequence[LayoutPrediction]:
|
| 147 |
+
"""Detect layout regions for a batch of document pages."""
|
| 148 |
+
pages = list(pages)
|
| 149 |
+
|
| 150 |
+
valid_pages: list[Page] = []
|
| 151 |
+
valid_images: list[Image.Image] = []
|
| 152 |
+
is_page_valid: list[bool] = []
|
| 153 |
+
|
| 154 |
+
for page in pages:
|
| 155 |
+
if page._backend is None or not page._backend.is_valid(): # noqa: SLF001
|
| 156 |
+
is_page_valid.append(False)
|
| 157 |
+
continue
|
| 158 |
+
if page.size is None:
|
| 159 |
+
is_page_valid.append(False)
|
| 160 |
+
continue
|
| 161 |
+
page_image = page.get_image(scale=1.0)
|
| 162 |
+
if page_image is None:
|
| 163 |
+
is_page_valid.append(False)
|
| 164 |
+
continue
|
| 165 |
+
|
| 166 |
+
valid_pages.append(page)
|
| 167 |
+
valid_images.append(page_image)
|
| 168 |
+
is_page_valid.append(True)
|
| 169 |
+
|
| 170 |
+
batch_detections: list[list[dict]] = []
|
| 171 |
+
if valid_images:
|
| 172 |
+
with TimeRecorder(conv_res, "layout"):
|
| 173 |
+
bs = self.options.batch_size
|
| 174 |
+
for i in range(0, len(valid_images), bs):
|
| 175 |
+
batch = valid_images[i : i + bs]
|
| 176 |
+
batch_detections.extend(self._run_inference(batch))
|
| 177 |
+
|
| 178 |
+
layout_predictions: list[LayoutPrediction] = []
|
| 179 |
+
valid_idx = 0
|
| 180 |
+
|
| 181 |
+
for idx, page in enumerate(pages):
|
| 182 |
+
if not is_page_valid[idx]:
|
| 183 |
+
existing = page.predictions.layout or LayoutPrediction()
|
| 184 |
+
layout_predictions.append(existing)
|
| 185 |
+
continue
|
| 186 |
+
|
| 187 |
+
detections = batch_detections[valid_idx]
|
| 188 |
+
valid_idx += 1
|
| 189 |
+
|
| 190 |
+
clusters: list[Cluster] = []
|
| 191 |
+
for ix, det in enumerate(detections):
|
| 192 |
+
cluster = Cluster(
|
| 193 |
+
id=ix,
|
| 194 |
+
label=det["label"],
|
| 195 |
+
confidence=det["confidence"],
|
| 196 |
+
bbox=BoundingBox(
|
| 197 |
+
l=det["l"],
|
| 198 |
+
t=det["t"],
|
| 199 |
+
r=det["r"],
|
| 200 |
+
b=det["b"],
|
| 201 |
+
),
|
| 202 |
+
cells=[],
|
| 203 |
+
)
|
| 204 |
+
clusters.append(cluster)
|
| 205 |
+
|
| 206 |
+
processed_clusters, processed_cells = LayoutPostprocessor(page, clusters, self.options).postprocess()
|
| 207 |
+
|
| 208 |
+
with warnings.catch_warnings():
|
| 209 |
+
warnings.filterwarnings(
|
| 210 |
+
"ignore",
|
| 211 |
+
"Mean of empty slice|invalid value encountered in scalar divide",
|
| 212 |
+
RuntimeWarning,
|
| 213 |
+
"numpy",
|
| 214 |
+
)
|
| 215 |
+
conv_res.confidence.pages[page.page_no].layout_score = float(
|
| 216 |
+
np.mean([c.confidence for c in processed_clusters])
|
| 217 |
+
)
|
| 218 |
+
conv_res.confidence.pages[page.page_no].ocr_score = float(
|
| 219 |
+
np.mean([c.confidence for c in processed_cells if c.from_ocr])
|
| 220 |
+
)
|
| 221 |
+
|
| 222 |
+
prediction = LayoutPrediction(clusters=processed_clusters)
|
| 223 |
+
layout_predictions.append(prediction)
|
| 224 |
+
|
| 225 |
+
return layout_predictions
|
docling_pp_doc_layout/options.py
ADDED
|
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Configuration model for the PP-DocLayout-V3 layout engine."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
import os
|
| 6 |
+
from typing import Annotated, ClassVar, Literal
|
| 7 |
+
|
| 8 |
+
from docling.datamodel.pipeline_options import LayoutOptions
|
| 9 |
+
from pydantic import ConfigDict, Field
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def _parse_bool(value: str) -> bool:
|
| 13 |
+
"""Parse a string environment variable value as a boolean.
|
| 14 |
+
|
| 15 |
+
Args:
|
| 16 |
+
value: The string to parse. Case-insensitive ``"true"``, ``"1"``,
|
| 17 |
+
and ``"yes"`` are truthy; everything else is falsy.
|
| 18 |
+
|
| 19 |
+
Returns:
|
| 20 |
+
``True`` if *value* is a recognised truthy string, ``False`` otherwise.
|
| 21 |
+
"""
|
| 22 |
+
return value.lower() in ("true", "1", "yes")
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
class PPDocLayoutV3Options(LayoutOptions):
|
| 26 |
+
"""Options for the PP-DocLayout-V3 layout detection engine.
|
| 27 |
+
|
| 28 |
+
Uses a HuggingFace-hosted PP-DocLayout-V3 model to detect document
|
| 29 |
+
layout elements (text, tables, figures, headers, etc.) in page images.
|
| 30 |
+
|
| 31 |
+
All options fall back to environment variables when not set explicitly,
|
| 32 |
+
allowing configuration without code changes (e.g. in Docker / Compose
|
| 33 |
+
deployments).
|
| 34 |
+
|
| 35 |
+
Attributes:
|
| 36 |
+
model_name: HuggingFace model repository ID.
|
| 37 |
+
Falls back to the ``PP_DOC_LAYOUT_MODEL_NAME`` env var.
|
| 38 |
+
confidence_threshold: Minimum confidence score for detections.
|
| 39 |
+
Falls back to the ``PP_DOC_LAYOUT_CONFIDENCE_THRESHOLD`` env var.
|
| 40 |
+
batch_size: Number of pages per inference batch.
|
| 41 |
+
Falls back to the ``PP_DOC_LAYOUT_BATCH_SIZE`` env var.
|
| 42 |
+
create_orphan_clusters: Create clusters for orphaned elements.
|
| 43 |
+
Falls back to the ``PP_DOC_LAYOUT_CREATE_ORPHAN_CLUSTERS`` env var.
|
| 44 |
+
keep_empty_clusters: Retain empty clusters in results.
|
| 45 |
+
Falls back to the ``PP_DOC_LAYOUT_KEEP_EMPTY_CLUSTERS`` env var.
|
| 46 |
+
skip_cell_assignment: Skip table-cell assignment during layout analysis.
|
| 47 |
+
Falls back to the ``PP_DOC_LAYOUT_SKIP_CELL_ASSIGNMENT`` env var.
|
| 48 |
+
"""
|
| 49 |
+
|
| 50 |
+
kind: ClassVar[Literal["ppdoclayout-v3"]] = "ppdoclayout-v3"
|
| 51 |
+
|
| 52 |
+
model_name: Annotated[
|
| 53 |
+
str,
|
| 54 |
+
Field(description="HuggingFace model repository ID for PP-DocLayout-V3."),
|
| 55 |
+
] = Field(
|
| 56 |
+
default_factory=lambda: os.environ.get(
|
| 57 |
+
"PP_DOC_LAYOUT_MODEL_NAME",
|
| 58 |
+
"PaddlePaddle/PP-DocLayoutV3_safetensors",
|
| 59 |
+
)
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
confidence_threshold: Annotated[
|
| 63 |
+
float,
|
| 64 |
+
Field(
|
| 65 |
+
ge=0.0,
|
| 66 |
+
le=1.0,
|
| 67 |
+
description="Minimum confidence score to keep a detection.",
|
| 68 |
+
),
|
| 69 |
+
] = Field(default_factory=lambda: float(os.environ.get("PP_DOC_LAYOUT_CONFIDENCE_THRESHOLD", "0.5")))
|
| 70 |
+
|
| 71 |
+
batch_size: Annotated[
|
| 72 |
+
int,
|
| 73 |
+
Field(
|
| 74 |
+
gt=0,
|
| 75 |
+
description="Batch size for layout inference.",
|
| 76 |
+
),
|
| 77 |
+
] = Field(default_factory=lambda: int(os.environ.get("PP_DOC_LAYOUT_BATCH_SIZE", "8")))
|
| 78 |
+
|
| 79 |
+
# Override inherited boolean fields to add environment-variable support.
|
| 80 |
+
create_orphan_clusters: Annotated[
|
| 81 |
+
bool,
|
| 82 |
+
Field(
|
| 83 |
+
description=(
|
| 84 |
+
"Create clusters for orphaned elements not assigned to any structure. "
|
| 85 |
+
"Falls back to PP_DOC_LAYOUT_CREATE_ORPHAN_CLUSTERS env var."
|
| 86 |
+
)
|
| 87 |
+
),
|
| 88 |
+
] = Field(default_factory=lambda: _parse_bool(os.environ.get("PP_DOC_LAYOUT_CREATE_ORPHAN_CLUSTERS", "true")))
|
| 89 |
+
|
| 90 |
+
keep_empty_clusters: Annotated[
|
| 91 |
+
bool,
|
| 92 |
+
Field(
|
| 93 |
+
description=(
|
| 94 |
+
"Retain empty clusters in layout analysis results. "
|
| 95 |
+
"Falls back to PP_DOC_LAYOUT_KEEP_EMPTY_CLUSTERS env var."
|
| 96 |
+
)
|
| 97 |
+
),
|
| 98 |
+
] = Field(default_factory=lambda: _parse_bool(os.environ.get("PP_DOC_LAYOUT_KEEP_EMPTY_CLUSTERS", "false")))
|
| 99 |
+
|
| 100 |
+
skip_cell_assignment: Annotated[
|
| 101 |
+
bool,
|
| 102 |
+
Field(
|
| 103 |
+
description=(
|
| 104 |
+
"Skip assignment of cells to table structures during layout analysis. "
|
| 105 |
+
"Falls back to PP_DOC_LAYOUT_SKIP_CELL_ASSIGNMENT env var."
|
| 106 |
+
)
|
| 107 |
+
),
|
| 108 |
+
] = Field(default_factory=lambda: _parse_bool(os.environ.get("PP_DOC_LAYOUT_SKIP_CELL_ASSIGNMENT", "false")))
|
| 109 |
+
|
| 110 |
+
model_config = ConfigDict(extra="forbid")
|
docling_pp_doc_layout/plugin.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Docling plugin entry point registering the PP-DocLayout-V3 layout engine."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
from typing import Any
|
| 6 |
+
|
| 7 |
+
from docling_pp_doc_layout.model import PPDocLayoutV3Model
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def layout_engines() -> dict[str, Any]:
|
| 11 |
+
"""Return layout engine classes provided by this plugin."""
|
| 12 |
+
return {"layout_engines": [PPDocLayoutV3Model]}
|
docling_pp_doc_layout/py.typed
ADDED
|
File without changes
|
requirements.txt
CHANGED
|
@@ -1,2 +1,6 @@
|
|
| 1 |
-
docling-pp-doc-layout
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
spaces
|
|
|
|
| 1 |
+
# docling-pp-doc-layout is bundled as a local package (docling_pp_doc_layout/)
|
| 2 |
+
# because its PyPI releases require Python >=3.12 and ZeroGPU runs Python 3.10.
|
| 3 |
+
# Its dependencies are listed here directly instead.
|
| 4 |
+
docling>=2.73
|
| 5 |
+
transformers>=5.1.0
|
| 6 |
spaces
|