Chhagan005 commited on
Commit
5982d54
·
verified ·
1 Parent(s): 641a587

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -16
app.py CHANGED
@@ -16,8 +16,6 @@ import cv2
16
  from transformers import (
17
  Qwen2VLForConditionalGeneration,
18
  Qwen2_5_VLForConditionalGeneration,
19
- Qwen3VLForConditionalGeneration,
20
- AutoModelForImageTextToText,
21
  AutoProcessor,
22
  TextIteratorStreamer,
23
  )
@@ -237,15 +235,23 @@ model_x = Qwen2VLForConditionalGeneration.from_pretrained(
237
  torch_dtype=torch.float16
238
  ).to(device).eval()
239
 
240
- # Model C: Chhagan-DocVL-Qwen3 (NEW)
241
- MODEL_ID_C = "Chhagan005/Chhagan-DocVL-Qwen3"
242
- processor_c = AutoProcessor.from_pretrained(MODEL_ID_C, trust_remote_code=True)
243
- model_c = Qwen3VLForConditionalGeneration.from_pretrained(
244
- MODEL_ID_C,
245
- attn_implementation="flash_attention_2",
246
- trust_remote_code=True,
247
- torch_dtype=torch.float16
248
- ).to(device).eval()
 
 
 
 
 
 
 
 
249
 
250
  # Model W: olmOCR-7B-0725
251
  MODEL_ID_W = "allenai/olmOCR-7B-0725"
@@ -294,9 +300,12 @@ def generate_image(model_name: str, text: str, image: Image.Image,
294
  elif model_name == "Nanonets-OCR2-3B":
295
  processor = processor_v
296
  model = model_v
297
- elif model_name == "Chhagan-DocVL-Qwen3":
298
- processor = processor_c
299
- model = model_c
 
 
 
300
  elif model_name == "olmOCR-7B-0725":
301
  processor = processor_w
302
  model = model_w
@@ -352,6 +361,11 @@ image_examples = [
352
  ["Convert this page to docling", "examples/3.jpg"],
353
  ]
354
 
 
 
 
 
 
355
  with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
356
  gr.Markdown("# **Multimodal OCR**", elem_id="main-title")
357
  with gr.Row():
@@ -379,8 +393,7 @@ with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
379
  markdown_output = gr.Markdown(label="(Result.Md)")
380
 
381
  model_choice = gr.Radio(
382
- choices=["Nanonets-OCR2-3B", "olmOCR-7B-0725", "RolmOCR-7B",
383
- "Chhagan-DocVL-Qwen3", "Qwen2-VL-OCR-2B"],
384
  label="Select Model",
385
  value="Nanonets-OCR2-3B"
386
  )
 
16
  from transformers import (
17
  Qwen2VLForConditionalGeneration,
18
  Qwen2_5_VLForConditionalGeneration,
 
 
19
  AutoProcessor,
20
  TextIteratorStreamer,
21
  )
 
235
  torch_dtype=torch.float16
236
  ).to(device).eval()
237
 
238
+ # Model P: PaddleOCR-VL (NEW - More stable than Qwen3)
239
+ MODEL_ID_P = "PaddlePaddle/PaddleOCR-VL"
240
+ try:
241
+ processor_p = AutoProcessor.from_pretrained(MODEL_ID_P, trust_remote_code=True)
242
+ model_p = Qwen2_5_VLForConditionalGeneration.from_pretrained(
243
+ MODEL_ID_P,
244
+ attn_implementation="flash_attention_2",
245
+ trust_remote_code=True,
246
+ torch_dtype=torch.float16
247
+ ).to(device).eval()
248
+ PADDLE_AVAILABLE = True
249
+ print("✓ PaddleOCR-VL model loaded successfully")
250
+ except Exception as e:
251
+ print(f"✗ PaddleOCR-VL model not available: {e}")
252
+ PADDLE_AVAILABLE = False
253
+ processor_p = None
254
+ model_p = None
255
 
256
  # Model W: olmOCR-7B-0725
257
  MODEL_ID_W = "allenai/olmOCR-7B-0725"
 
300
  elif model_name == "Nanonets-OCR2-3B":
301
  processor = processor_v
302
  model = model_v
303
+ elif model_name == "PaddleOCR-VL":
304
+ if not PADDLE_AVAILABLE:
305
+ yield "PaddleOCR-VL model is not available.", "PaddleOCR-VL model is not available."
306
+ return
307
+ processor = processor_p
308
+ model = model_p
309
  elif model_name == "olmOCR-7B-0725":
310
  processor = processor_w
311
  model = model_w
 
361
  ["Convert this page to docling", "examples/3.jpg"],
362
  ]
363
 
364
+ # Build model choices dynamically
365
+ model_choices = ["Nanonets-OCR2-3B", "olmOCR-7B-0725", "RolmOCR-7B", "Qwen2-VL-OCR-2B"]
366
+ if PADDLE_AVAILABLE:
367
+ model_choices.append("PaddleOCR-VL")
368
+
369
  with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
370
  gr.Markdown("# **Multimodal OCR**", elem_id="main-title")
371
  with gr.Row():
 
393
  markdown_output = gr.Markdown(label="(Result.Md)")
394
 
395
  model_choice = gr.Radio(
396
+ choices=model_choices,
 
397
  label="Select Model",
398
  value="Nanonets-OCR2-3B"
399
  )