Spaces:
Running on Zero
Running on Zero
File size: 5,175 Bytes
77089b1 fabca3e d7c9ee5 dbe48bf 8ac770e dbe48bf 8ac770e dbe48bf 8ac770e dbe48bf 8ac770e dbe48bf | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 | # ---------------------------------------------------------------------------
# Force-upgrade transformers to >=5.1.0 before any other import.
#
# Why: PP-DocLayoutV3's custom model classes (PPDocLayoutV3ImageProcessor,
# PPDocLayoutV3ForObjectDetection) were added to the transformers library in
# version 5.1.0. docling-ibm-models caps transformers<5.0.0 (conservative
# pinning), so pip resolves transformers ~4.x at build time. We upgrade it
# here at runtime, before any docling/transformers import, so the correct
# classes are available. docling-ibm-models' usage (AutoModel, pipeline API)
# remains compatible with transformers 5.x.
# ---------------------------------------------------------------------------
import subprocess
import sys
subprocess.run(
[
sys.executable, "-m", "pip", "install",
"transformers>=5.1.0",
"--quiet",
],
check=True,
)
# `spaces` MUST be imported before any package that touches CUDA (torch,
# transformers, docling …). ZeroGPU intercepts the CUDA initialisation; if
# anything else triggers it first the import raises RuntimeError.
import spaces # noqa: E402
# ---------------------------------------------------------------------------
# Plugin registration
# ---------------------------------------------------------------------------
# docling-pp-doc-layout requires Python >=3.12 on PyPI, but the code itself
# is compatible with Python 3.10 (all annotations are guarded by
# `from __future__ import annotations`). Instead of installing the package,
# we bundle the source directly and register the model with docling's factory
# by monkey-patching BaseFactory.load_from_plugins so that every new
# LayoutFactory instance automatically includes PPDocLayoutV3Model.
from docling.models.factories.base_factory import BaseFactory
from docling.models.factories.layout_factory import LayoutFactory
from docling_pp_doc_layout.model import PPDocLayoutV3Model
_orig_load = BaseFactory.load_from_plugins
def _load_with_pp_doc_layout(
self, plugin_name=None, allow_external_plugins=False
):
_orig_load(
self,
plugin_name=plugin_name,
allow_external_plugins=allow_external_plugins,
)
if isinstance(self, LayoutFactory):
try:
self.register(
PPDocLayoutV3Model,
"docling-pp-doc-layout",
"docling_pp_doc_layout.model",
)
except ValueError:
pass # already registered on a previous factory creation
BaseFactory.load_from_plugins = _load_with_pp_doc_layout
# ---------------------------------------------------------------------------
import gradio as gr
from docling.datamodel.base_models import InputFormat
from docling.document_converter import DocumentConverter, PdfFormatOption
from docling.datamodel.pipeline_options import PdfPipelineOptions
from docling_pp_doc_layout.options import PPDocLayoutV3Options
# Global initialisation — pipeline is constructed lazily on the first
# convert() call, which happens inside @spaces.GPU, so decide_device()
# correctly resolves "cuda:0" when the H200 is allocated.
pipeline_options = PdfPipelineOptions(
layout_options=PPDocLayoutV3Options(
batch_size=2,
confidence_threshold=0.5,
)
)
converter = DocumentConverter(
format_options={
InputFormat.PDF: PdfFormatOption(pipeline_options=pipeline_options)
}
)
@spaces.GPU(duration=120)
def infer_layout(file_path: str | None):
if not file_path:
return {"error": "No file uploaded"}, None
try:
result = converter.convert(file_path)
structured_data = []
for item, _level in result.document.iterate_items():
structured_data.append({
"type": type(item).__name__,
"content": getattr(item, "text", "No text mapping"),
})
# Write to a temp file so Gradio can serve it as a download.
import json, tempfile, os
tmp = tempfile.NamedTemporaryFile(
mode="w", suffix=".json", delete=False, encoding="utf-8"
)
json.dump(structured_data, tmp, ensure_ascii=False, indent=2)
tmp.close()
return structured_data, tmp.name
except Exception as e:
return {"runtime_exception": str(e)}, None
with gr.Blocks(title="PP-DocLayoutV3 Empirical Parser") as interface:
gr.Markdown(
"## Layout Detection Inference\n"
"Upload a PDF to parse structural components through the "
"PaddlePaddle PP-DocLayoutV3 model."
)
with gr.Row():
pdf_input = gr.File(label="Source Document", file_types=[".pdf"])
json_output = gr.JSON(label="Structured Extraction Matrix")
download_btn = gr.DownloadButton(label="Download JSON", visible=False)
execute_btn = gr.Button("Run Layout Detection")
def run_and_reveal(file_path):
data, path = infer_layout(file_path)
return data, gr.DownloadButton(value=path, visible=path is not None)
execute_btn.click(
fn=run_and_reveal,
inputs=pdf_input,
outputs=[json_output, download_btn],
)
if __name__ == "__main__":
interface.launch() |