|
|
from pathlib import Path |
|
|
import gradio as gr |
|
|
import pymupdf |
|
|
from ultralytics import YOLO |
|
|
from PIL import Image |
|
|
from huggingface_hub import hf_hub_download |
|
|
|
|
|
SAMPLES = Path(__file__).parent / "samples" |
|
|
|
|
|
IMAGE_SAMPLES = [ |
|
|
SAMPLES / "image1.png", |
|
|
SAMPLES / "image2.png", |
|
|
SAMPLES / "image3.png", |
|
|
SAMPLES / "image4.png", |
|
|
] |
|
|
|
|
|
AVAILABLE_MODELS = { |
|
|
"yolo11n": ("Armaggheddon/yolo11-document-layout", "yolo11n_doc_layout.pt"), |
|
|
"yolo11s": ("Armaggheddon/yolo11-document-layout", "yolo11s_doc_layout.pt"), |
|
|
"yolo11m": ("Armaggheddon/yolo11-document-layout", "yolo11m_doc_layout.pt"), |
|
|
} |
|
|
current_model = "yolo11n" |
|
|
model = None |
|
|
|
|
|
def load_model(selected_model): |
|
|
global model |
|
|
if model is None or current_model != selected_model: |
|
|
repo_id, filename = AVAILABLE_MODELS[selected_model] |
|
|
model_path = hf_hub_download(repo_id=repo_id, filename=filename) |
|
|
model = YOLO(model_path) |
|
|
|
|
|
def model_runner(image, conf=0.25, iou=0.45): |
|
|
result = model.predict(source=image, save=False, verbose=False, conf=conf, iou=iou, imgsz=1280) |
|
|
result_img = result[0].plot() |
|
|
return result_img |
|
|
|
|
|
def process_input(selected_model, pdf_input, image_input, conf=0.25, iou=0.45): |
|
|
if pdf_input is None and image_input is None: |
|
|
return gr.Error("Please upload a PDF or an image file.") |
|
|
|
|
|
load_model(selected_model) |
|
|
pages = [] |
|
|
if pdf_input is not None and pdf_input.endswith(".pdf"): |
|
|
doc = pymupdf.open(pdf_input) |
|
|
for page in doc: |
|
|
pix = page.get_pixmap(dpi=200) |
|
|
pil_img = pix.pil_image() |
|
|
result_img = model_runner(pil_img) |
|
|
pages.append(result_img) |
|
|
elif image_input is not None and image_input.endswith((".png", ".jpg", ".jpeg")): |
|
|
image = image_input |
|
|
result_img = model_runner(image) |
|
|
pages.append(result_img) |
|
|
|
|
|
else: |
|
|
return gr.Error("Unsupported file type. Please upload a PDF or an image file with .pdf, .jpg or .jpeg extension.") |
|
|
|
|
|
return ((page, f"Page {i+1}") for i, page in enumerate(pages)) |
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
gr.Markdown("# YOLO11 Document Layout ππ") |
|
|
gr.Markdown( |
|
|
""" |
|
|
Detects layout elements in documents (PDFs or images) using YOLOv11 models and the Ultralytics library. |
|
|
Upload a PDF or an image, select a model size, and click "Run" to see the detected layout elements. |
|
|
- Finetuned models available at [Armaggheddon/yolo11-document-layout](https://huggingface.co/Armaggheddon/yolo11-document-layout) |
|
|
- More available in the [GitHub Repository](https://github.com/Armaggheddon/yolo11_doc_layout) |
|
|
""" |
|
|
) |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"], file_count="single") |
|
|
image_input = gr.Image(label="Upload Image", type="filepath") |
|
|
clear_button = gr.Button("Clear") |
|
|
run_button = gr.Button("Run", variant="primary") |
|
|
with gr.Column(): |
|
|
outputs = gr.Gallery(label="Output Image") |
|
|
with gr.Group(): |
|
|
model_name = gr.Dropdown( |
|
|
list(AVAILABLE_MODELS.keys()), |
|
|
value="yolo11n", |
|
|
label="Model size", |
|
|
) |
|
|
conf = gr.Slider(0, 1, value=0.25, step=0.01, label="Confidence threshold") |
|
|
iou = gr.Slider(0, 1, value=0.45, step=0.01, label="IOU threshold") |
|
|
|
|
|
examples = gr.Examples( |
|
|
examples=[[str(p), "yolo11n"] for p in IMAGE_SAMPLES], |
|
|
inputs=[image_input, model_name], |
|
|
cache_examples=False, |
|
|
fn=process_input, |
|
|
outputs=outputs, |
|
|
) |
|
|
|
|
|
run_button.click( |
|
|
fn=process_input, |
|
|
inputs=[model_name, pdf_input, image_input, conf, iou], |
|
|
outputs=outputs, |
|
|
) |
|
|
|
|
|
clear_button.click( |
|
|
fn=lambda: (None, None, None), |
|
|
inputs=[], |
|
|
outputs=[pdf_input, image_input, outputs], |
|
|
) |
|
|
|
|
|
demo.launch() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|