Spaces:

prithivMLmods
/

Multimodal-OCR3

Running on Zero

prithivMLmods commited on Oct 18

Commit

30151c4

verified ·

1 Parent(s): 7abceaa

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -96,8 +96,8 @@ MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-# Load Nanonets-OCR-s
-MODEL_ID_M = "nanonets/Nanonets-OCR-s"
 processor_m = AutoProcessor.from_pretrained(MODEL_ID_M, trust_remote_code=True)
 model_m = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     MODEL_ID_M,
@@ -125,7 +125,7 @@ def generate_image(model_name: str, text: str, image: Image.Image,
                    top_k: int = 50,
                    repetition_penalty: float = 1.2):
     """Generate responses for image input using the selected model."""
-    if model_name == "Nanonets-OCR-s":
         processor, model = processor_m, model_m
     elif model_name == "Dots.OCR":
         processor, model = processor_d, model_d
@@ -197,9 +197,9 @@ with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
                 formatted_output = gr.Markdown(label="Formatted Result")
             model_choice = gr.Radio(
-                choices=["Nanonets-OCR-s", "Dots.OCR"],
                 label="Select Model",
-                value="Nanonets-OCR-s"
             )
     image_submit.click(

 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+# Load Nanonets-OCR2-3B
+MODEL_ID_M = "nanonets/Nanonets-OCR2-3B"
 processor_m = AutoProcessor.from_pretrained(MODEL_ID_M, trust_remote_code=True)
 model_m = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     MODEL_ID_M,
                    top_k: int = 50,
                    repetition_penalty: float = 1.2):
     """Generate responses for image input using the selected model."""
+    if model_name == "Nanonets-OCR2-3B":
         processor, model = processor_m, model_m
     elif model_name == "Dots.OCR":
         processor, model = processor_d, model_d
                 formatted_output = gr.Markdown(label="Formatted Result")
             model_choice = gr.Radio(
+                choices=["Nanonets-OCR2-3B", "Dots.OCR"],
                 label="Select Model",
+                value="Nanonets-OCR2-3B"
             )
     image_submit.click(