Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -16,8 +16,6 @@ import cv2
|
|
| 16 |
from transformers import (
|
| 17 |
Qwen2VLForConditionalGeneration,
|
| 18 |
Qwen2_5_VLForConditionalGeneration,
|
| 19 |
-
Qwen3VLForConditionalGeneration,
|
| 20 |
-
AutoModelForImageTextToText,
|
| 21 |
AutoProcessor,
|
| 22 |
TextIteratorStreamer,
|
| 23 |
)
|
|
@@ -237,15 +235,23 @@ model_x = Qwen2VLForConditionalGeneration.from_pretrained(
|
|
| 237 |
torch_dtype=torch.float16
|
| 238 |
).to(device).eval()
|
| 239 |
|
| 240 |
-
# Model
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 249 |
|
| 250 |
# Model W: olmOCR-7B-0725
|
| 251 |
MODEL_ID_W = "allenai/olmOCR-7B-0725"
|
|
@@ -294,9 +300,12 @@ def generate_image(model_name: str, text: str, image: Image.Image,
|
|
| 294 |
elif model_name == "Nanonets-OCR2-3B":
|
| 295 |
processor = processor_v
|
| 296 |
model = model_v
|
| 297 |
-
elif model_name == "
|
| 298 |
-
|
| 299 |
-
|
|
|
|
|
|
|
|
|
|
| 300 |
elif model_name == "olmOCR-7B-0725":
|
| 301 |
processor = processor_w
|
| 302 |
model = model_w
|
|
@@ -352,6 +361,11 @@ image_examples = [
|
|
| 352 |
["Convert this page to docling", "examples/3.jpg"],
|
| 353 |
]
|
| 354 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 355 |
with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
|
| 356 |
gr.Markdown("# **Multimodal OCR**", elem_id="main-title")
|
| 357 |
with gr.Row():
|
|
@@ -379,8 +393,7 @@ with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
|
|
| 379 |
markdown_output = gr.Markdown(label="(Result.Md)")
|
| 380 |
|
| 381 |
model_choice = gr.Radio(
|
| 382 |
-
choices=
|
| 383 |
-
"Chhagan-DocVL-Qwen3", "Qwen2-VL-OCR-2B"],
|
| 384 |
label="Select Model",
|
| 385 |
value="Nanonets-OCR2-3B"
|
| 386 |
)
|
|
|
|
| 16 |
from transformers import (
|
| 17 |
Qwen2VLForConditionalGeneration,
|
| 18 |
Qwen2_5_VLForConditionalGeneration,
|
|
|
|
|
|
|
| 19 |
AutoProcessor,
|
| 20 |
TextIteratorStreamer,
|
| 21 |
)
|
|
|
|
| 235 |
torch_dtype=torch.float16
|
| 236 |
).to(device).eval()
|
| 237 |
|
| 238 |
+
# Model P: PaddleOCR-VL (NEW - More stable than Qwen3)
|
| 239 |
+
MODEL_ID_P = "PaddlePaddle/PaddleOCR-VL"
|
| 240 |
+
try:
|
| 241 |
+
processor_p = AutoProcessor.from_pretrained(MODEL_ID_P, trust_remote_code=True)
|
| 242 |
+
model_p = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
| 243 |
+
MODEL_ID_P,
|
| 244 |
+
attn_implementation="flash_attention_2",
|
| 245 |
+
trust_remote_code=True,
|
| 246 |
+
torch_dtype=torch.float16
|
| 247 |
+
).to(device).eval()
|
| 248 |
+
PADDLE_AVAILABLE = True
|
| 249 |
+
print("✓ PaddleOCR-VL model loaded successfully")
|
| 250 |
+
except Exception as e:
|
| 251 |
+
print(f"✗ PaddleOCR-VL model not available: {e}")
|
| 252 |
+
PADDLE_AVAILABLE = False
|
| 253 |
+
processor_p = None
|
| 254 |
+
model_p = None
|
| 255 |
|
| 256 |
# Model W: olmOCR-7B-0725
|
| 257 |
MODEL_ID_W = "allenai/olmOCR-7B-0725"
|
|
|
|
| 300 |
elif model_name == "Nanonets-OCR2-3B":
|
| 301 |
processor = processor_v
|
| 302 |
model = model_v
|
| 303 |
+
elif model_name == "PaddleOCR-VL":
|
| 304 |
+
if not PADDLE_AVAILABLE:
|
| 305 |
+
yield "PaddleOCR-VL model is not available.", "PaddleOCR-VL model is not available."
|
| 306 |
+
return
|
| 307 |
+
processor = processor_p
|
| 308 |
+
model = model_p
|
| 309 |
elif model_name == "olmOCR-7B-0725":
|
| 310 |
processor = processor_w
|
| 311 |
model = model_w
|
|
|
|
| 361 |
["Convert this page to docling", "examples/3.jpg"],
|
| 362 |
]
|
| 363 |
|
| 364 |
+
# Build model choices dynamically
|
| 365 |
+
model_choices = ["Nanonets-OCR2-3B", "olmOCR-7B-0725", "RolmOCR-7B", "Qwen2-VL-OCR-2B"]
|
| 366 |
+
if PADDLE_AVAILABLE:
|
| 367 |
+
model_choices.append("PaddleOCR-VL")
|
| 368 |
+
|
| 369 |
with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
|
| 370 |
gr.Markdown("# **Multimodal OCR**", elem_id="main-title")
|
| 371 |
with gr.Row():
|
|
|
|
| 393 |
markdown_output = gr.Markdown(label="(Result.Md)")
|
| 394 |
|
| 395 |
model_choice = gr.Radio(
|
| 396 |
+
choices=model_choices,
|
|
|
|
| 397 |
label="Select Model",
|
| 398 |
value="Nanonets-OCR2-3B"
|
| 399 |
)
|