OmniParser-v2

Running on Zero

App Files Files Community

callmeumer commited on Jul 10

Commit

106fe7e

verified ·

1 Parent(s): c72c681

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -9

app.py CHANGED Viewed

@@ -1,10 +1,11 @@
-from typing import Optional
 import spaces
 import gradio as gr
 import numpy as np
 import torch
 from PIL import Image
 import io
 import base64, os
 from util.utils import check_ocr_box, get_yolo_model, get_caption_model_processor, get_som_labeled_img
@@ -47,7 +48,7 @@ def process(
     iou_threshold,
     use_paddleocr,
     imgsz
-) -> Optional[Image.Image]:
     box_overlay_ratio = image_input.size[0] / 3200
     draw_bbox_config = {
@@ -80,11 +81,13 @@ def process(
         imgsz=imgsz
     )
-    image = Image.open(io.BytesIO(base64.b64decode(dino_labled_img)))
     print('finish processing')
     parsed_content_list = '\n'.join([f'icon {i}: ' + str(v) for i, v in enumerate(parsed_content_list)])
-    return image, str(parsed_content_list)
 # Create interface with simplified component definitions
 with gr.Blocks() as demo:
@@ -133,14 +136,14 @@ with gr.Blocks() as demo:
             )
         with gr.Column():
-            image_output_component = gr.Image(
-                type='pil',
-                label='Image Output'
-            )
             text_output_component = gr.Textbox(
                 label='Parsed screen elements',
                 placeholder='Text Output'
             )
     submit_button_component.click(
         fn=process,
@@ -151,7 +154,7 @@ with gr.Blocks() as demo:
             use_paddleocr_component,
             imgsz_component
         ],
-        outputs=[image_output_component, text_output_component]
     )
 # Try launching with different configurations

+from typing import Optional, Tuple
 import spaces
 import gradio as gr
 import numpy as np
 import torch
 from PIL import Image
 import io
+import json
 import base64, os
 from util.utils import check_ocr_box, get_yolo_model, get_caption_model_processor, get_som_labeled_img
     iou_threshold,
     use_paddleocr,
     imgsz
+) -> Tuple[str, str]:
     box_overlay_ratio = image_input.size[0] / 3200
     draw_bbox_config = {
         imgsz=imgsz
     )
     print('finish processing')
     parsed_content_list = '\n'.join([f'icon {i}: ' + str(v) for i, v in enumerate(parsed_content_list)])
+    # Convert label_coordinates to JSON string for API consumption
+    label_coordinates_json = json.dumps(label_coordinates)
+    return str(parsed_content_list), label_coordinates_json
 # Create interface with simplified component definitions
 with gr.Blocks() as demo:
             )
         with gr.Column():
             text_output_component = gr.Textbox(
                 label='Parsed screen elements',
                 placeholder='Text Output'
             )
+            coordinates_output_component = gr.Textbox(
+                label='Label Coordinates (JSON)',
+                placeholder='Coordinates JSON Output'
+            )
     submit_button_component.click(
         fn=process,
             use_paddleocr_component,
             imgsz_component
         ],
+        outputs=[text_output_component, coordinates_output_component]
     )
 # Try launching with different configurations