Spaces:
Sleeping
Sleeping
File size: 1,167 Bytes
1648a50 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
import gradio as gr
from PIL import Image
from mineru_vl_utils import MinerUClient
from transformers import AutoProcessor, Qwen2VLForConditionalGeneration
# Charger le modèle MinerU
model_path = "opendatalab/MinerU2.5-2509-1.2B"
model = Qwen2VLForConditionalGeneration.from_pretrained(
model_path,
torch_dtype="auto",
device_map="auto"
)
processor = AutoProcessor.from_pretrained(model_path, use_fast=True)
client = MinerUClient(
backend="transformers",
model=model,
processor=processor
)
def extract_from_image(image):
# Conversion si nécessaire
if not isinstance(image, Image.Image):
image = Image.fromarray(image)
# Extraction
blocks = client.two_step_extract(image)
# On retourne le texte concaténé
extracted_text = "\n".join([b.text for b in blocks if hasattr(b, "text")])
return extracted_text
# Interface Gradio
demo = gr.Interface(
fn=extract_from_image,
inputs=gr.Image(type="pil"),
outputs="text",
title="MinerU2.5 - Document Extract",
description="Upload an image or PDF page and extract structured text with MinerU2.5"
)
if __name__ == "__main__":
demo.launch() |