Instantnewdesign's picture
Create app.py
1648a50 verified
raw
history blame
1.17 kB
import gradio as gr
from PIL import Image
from mineru_vl_utils import MinerUClient
from transformers import AutoProcessor, Qwen2VLForConditionalGeneration
# Charger le modèle MinerU
model_path = "opendatalab/MinerU2.5-2509-1.2B"
model = Qwen2VLForConditionalGeneration.from_pretrained(
model_path,
torch_dtype="auto",
device_map="auto"
)
processor = AutoProcessor.from_pretrained(model_path, use_fast=True)
client = MinerUClient(
backend="transformers",
model=model,
processor=processor
)
def extract_from_image(image):
# Conversion si nécessaire
if not isinstance(image, Image.Image):
image = Image.fromarray(image)
# Extraction
blocks = client.two_step_extract(image)
# On retourne le texte concaténé
extracted_text = "\n".join([b.text for b in blocks if hasattr(b, "text")])
return extracted_text
# Interface Gradio
demo = gr.Interface(
fn=extract_from_image,
inputs=gr.Image(type="pil"),
outputs="text",
title="MinerU2.5 - Document Extract",
description="Upload an image or PDF page and extract structured text with MinerU2.5"
)
if __name__ == "__main__":
demo.launch()