DocLayout-YOLO: Enhancing Document Layout Analysis through Diverse Synthetic Data and Global-to-Local Adaptive Perception
Paper
•
2410.12628
•
Published
•
41
Document layout detection model based on DocLayout-YOLO.
titleplain_textabandonfigurefigure_captiontabletable_captiontable_footnoteisolate_formulaformula_captionfrom huggingface_hub import snapshot_download
import sys
# Download model (includes code + weights)
repo_path = snapshot_download("anyformat-ai/doclayout-yolo-docstructbench")
# Import and use
sys.path.insert(0, repo_path)
from doclayout_yolo import DocLayoutModel
model = DocLayoutModel(f"{repo_path}/model.pt")
results = model.predict("document.png")
for det in results:
print(f"{det['class_name']}: {det['confidence']:.2f} at {det['bbox']}")
import onnxruntime as ort
import numpy as np
from huggingface_hub import hf_hub_download
import json
# Download ONNX model and config
model_path = hf_hub_download("anyformat-ai/doclayout-yolo-docstructbench", "model.onnx")
config_path = hf_hub_download("anyformat-ai/doclayout-yolo-docstructbench", "config.json")
with open(config_path) as f:
config = json.load(f)
session = ort.InferenceSession(model_path)
# Preprocess image to (1, 3, 1024, 1024) float32, normalized to [0, 1]
# Run inference and post-process outputs
ultralytics
huggingface-hub
onnxruntime # for ONNX inference
@article{zhao2024doclayout,
title={DocLayout-YOLO: Enhancing Document Layout Analysis through Diverse Synthetic Data and Global-to-Local Adaptive Perception},
author={Zhao, Zhiyuan and Kang, Hengrui and Wang, Bin and He, Conghui},
journal={arXiv preprint arXiv:2410.12628},
year={2024}
}
Apache 2.0