File size: 5,175 Bytes
77089b1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fabca3e
 
 
 
 
d7c9ee5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dbe48bf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8ac770e
dbe48bf
 
 
 
 
 
 
 
8ac770e
 
 
 
 
 
 
 
dbe48bf
8ac770e
dbe48bf
 
 
 
 
 
 
 
 
 
 
8ac770e
 
 
 
 
 
 
 
 
 
 
 
dbe48bf
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# ---------------------------------------------------------------------------
# Force-upgrade transformers to >=5.1.0 before any other import.
#
# Why: PP-DocLayoutV3's custom model classes (PPDocLayoutV3ImageProcessor,
# PPDocLayoutV3ForObjectDetection) were added to the transformers library in
# version 5.1.0.  docling-ibm-models caps transformers<5.0.0 (conservative
# pinning), so pip resolves transformers ~4.x at build time.  We upgrade it
# here at runtime, before any docling/transformers import, so the correct
# classes are available.  docling-ibm-models' usage (AutoModel, pipeline API)
# remains compatible with transformers 5.x.
# ---------------------------------------------------------------------------
import subprocess
import sys

subprocess.run(
    [
        sys.executable, "-m", "pip", "install",
        "transformers>=5.1.0",
        "--quiet",
    ],
    check=True,
)

# `spaces` MUST be imported before any package that touches CUDA (torch,
# transformers, docling …).  ZeroGPU intercepts the CUDA initialisation; if
# anything else triggers it first the import raises RuntimeError.
import spaces  # noqa: E402

# ---------------------------------------------------------------------------
# Plugin registration
# ---------------------------------------------------------------------------
# docling-pp-doc-layout requires Python >=3.12 on PyPI, but the code itself
# is compatible with Python 3.10 (all annotations are guarded by
# `from __future__ import annotations`).  Instead of installing the package,
# we bundle the source directly and register the model with docling's factory
# by monkey-patching BaseFactory.load_from_plugins so that every new
# LayoutFactory instance automatically includes PPDocLayoutV3Model.
from docling.models.factories.base_factory import BaseFactory
from docling.models.factories.layout_factory import LayoutFactory
from docling_pp_doc_layout.model import PPDocLayoutV3Model

_orig_load = BaseFactory.load_from_plugins


def _load_with_pp_doc_layout(
    self, plugin_name=None, allow_external_plugins=False
):
    _orig_load(
        self,
        plugin_name=plugin_name,
        allow_external_plugins=allow_external_plugins,
    )
    if isinstance(self, LayoutFactory):
        try:
            self.register(
                PPDocLayoutV3Model,
                "docling-pp-doc-layout",
                "docling_pp_doc_layout.model",
            )
        except ValueError:
            pass  # already registered on a previous factory creation


BaseFactory.load_from_plugins = _load_with_pp_doc_layout

# ---------------------------------------------------------------------------
import gradio as gr
from docling.datamodel.base_models import InputFormat
from docling.document_converter import DocumentConverter, PdfFormatOption
from docling.datamodel.pipeline_options import PdfPipelineOptions
from docling_pp_doc_layout.options import PPDocLayoutV3Options

# Global initialisation — pipeline is constructed lazily on the first
# convert() call, which happens inside @spaces.GPU, so decide_device()
# correctly resolves "cuda:0" when the H200 is allocated.
pipeline_options = PdfPipelineOptions(
    layout_options=PPDocLayoutV3Options(
        batch_size=2,
        confidence_threshold=0.5,
    )
)

converter = DocumentConverter(
    format_options={
        InputFormat.PDF: PdfFormatOption(pipeline_options=pipeline_options)
    }
)


@spaces.GPU(duration=120)
def infer_layout(file_path: str | None):
    if not file_path:
        return {"error": "No file uploaded"}, None
    try:
        result = converter.convert(file_path)
        structured_data = []
        for item, _level in result.document.iterate_items():
            structured_data.append({
                "type": type(item).__name__,
                "content": getattr(item, "text", "No text mapping"),
            })
        # Write to a temp file so Gradio can serve it as a download.
        import json, tempfile, os
        tmp = tempfile.NamedTemporaryFile(
            mode="w", suffix=".json", delete=False, encoding="utf-8"
        )
        json.dump(structured_data, tmp, ensure_ascii=False, indent=2)
        tmp.close()
        return structured_data, tmp.name
    except Exception as e:
        return {"runtime_exception": str(e)}, None


with gr.Blocks(title="PP-DocLayoutV3 Empirical Parser") as interface:
    gr.Markdown(
        "## Layout Detection Inference\n"
        "Upload a PDF to parse structural components through the "
        "PaddlePaddle PP-DocLayoutV3 model."
    )
    with gr.Row():
        pdf_input = gr.File(label="Source Document", file_types=[".pdf"])
        json_output = gr.JSON(label="Structured Extraction Matrix")
    download_btn = gr.DownloadButton(label="Download JSON", visible=False)
    execute_btn = gr.Button("Run Layout Detection")

    def run_and_reveal(file_path):
        data, path = infer_layout(file_path)
        return data, gr.DownloadButton(value=path, visible=path is not None)

    execute_btn.click(
        fn=run_and_reveal,
        inputs=pdf_input,
        outputs=[json_output, download_btn],
    )

if __name__ == "__main__":
    interface.launch()