Spaces:

Chhagan005
/

Multi_ML_OCR

Sleeping

App Files Files Community

Chhagan005 commited on 19 days ago

Commit

5982d54

verified ·

1 Parent(s): 641a587

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -16

app.py CHANGED Viewed

@@ -16,8 +16,6 @@ import cv2
 from transformers import (
     Qwen2VLForConditionalGeneration,
     Qwen2_5_VLForConditionalGeneration,
-    Qwen3VLForConditionalGeneration,
-    AutoModelForImageTextToText,
     AutoProcessor,
     TextIteratorStreamer,
 )
@@ -237,15 +235,23 @@ model_x = Qwen2VLForConditionalGeneration.from_pretrained(
     torch_dtype=torch.float16
 ).to(device).eval()
-# Model C: Chhagan-DocVL-Qwen3 (NEW)
-MODEL_ID_C = "Chhagan005/Chhagan-DocVL-Qwen3"
-processor_c = AutoProcessor.from_pretrained(MODEL_ID_C, trust_remote_code=True)
-model_c = Qwen3VLForConditionalGeneration.from_pretrained(
-    MODEL_ID_C,
-    attn_implementation="flash_attention_2",
-    trust_remote_code=True,
-    torch_dtype=torch.float16
-).to(device).eval()
 # Model W: olmOCR-7B-0725
 MODEL_ID_W = "allenai/olmOCR-7B-0725"
@@ -294,9 +300,12 @@ def generate_image(model_name: str, text: str, image: Image.Image,
     elif model_name == "Nanonets-OCR2-3B":
         processor = processor_v
         model = model_v
-    elif model_name == "Chhagan-DocVL-Qwen3":
-        processor = processor_c
-        model = model_c
     elif model_name == "olmOCR-7B-0725":
         processor = processor_w
         model = model_w
@@ -352,6 +361,11 @@ image_examples = [
     ["Convert this page to docling", "examples/3.jpg"],
 ]
 with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
     gr.Markdown("# **Multimodal OCR**", elem_id="main-title")
     with gr.Row():
@@ -379,8 +393,7 @@ with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
                 markdown_output = gr.Markdown(label="(Result.Md)")
             model_choice = gr.Radio(
-                choices=["Nanonets-OCR2-3B", "olmOCR-7B-0725", "RolmOCR-7B",
-                         "Chhagan-DocVL-Qwen3", "Qwen2-VL-OCR-2B"],
                 label="Select Model",
                 value="Nanonets-OCR2-3B"
             )

 from transformers import (
     Qwen2VLForConditionalGeneration,
     Qwen2_5_VLForConditionalGeneration,
     AutoProcessor,
     TextIteratorStreamer,
 )
     torch_dtype=torch.float16
 ).to(device).eval()
+# Model P: PaddleOCR-VL (NEW - More stable than Qwen3)
+MODEL_ID_P = "PaddlePaddle/PaddleOCR-VL"
+try:
+    processor_p = AutoProcessor.from_pretrained(MODEL_ID_P, trust_remote_code=True)
+    model_p = Qwen2_5_VLForConditionalGeneration.from_pretrained(
+        MODEL_ID_P,
+        attn_implementation="flash_attention_2",
+        trust_remote_code=True,
+        torch_dtype=torch.float16
+    ).to(device).eval()
+    PADDLE_AVAILABLE = True
+    print("✓ PaddleOCR-VL model loaded successfully")
+except Exception as e:
+    print(f"✗ PaddleOCR-VL model not available: {e}")
+    PADDLE_AVAILABLE = False
+    processor_p = None
+    model_p = None
 # Model W: olmOCR-7B-0725
 MODEL_ID_W = "allenai/olmOCR-7B-0725"
     elif model_name == "Nanonets-OCR2-3B":
         processor = processor_v
         model = model_v
+    elif model_name == "PaddleOCR-VL":
+        if not PADDLE_AVAILABLE:
+            yield "PaddleOCR-VL model is not available.", "PaddleOCR-VL model is not available."
+            return
+        processor = processor_p
+        model = model_p
     elif model_name == "olmOCR-7B-0725":
         processor = processor_w
         model = model_w
     ["Convert this page to docling", "examples/3.jpg"],
 ]
+# Build model choices dynamically
+model_choices = ["Nanonets-OCR2-3B", "olmOCR-7B-0725", "RolmOCR-7B", "Qwen2-VL-OCR-2B"]
+if PADDLE_AVAILABLE:
+    model_choices.append("PaddleOCR-VL")
 with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
     gr.Markdown("# **Multimodal OCR**", elem_id="main-title")
     with gr.Row():
                 markdown_output = gr.Markdown(label="(Result.Md)")
             model_choice = gr.Radio(
+                choices=model_choices,
                 label="Select Model",
                 value="Nanonets-OCR2-3B"
             )