callmeumer commited on
Commit
106fe7e
·
verified ·
1 Parent(s): c72c681

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -9
app.py CHANGED
@@ -1,10 +1,11 @@
1
- from typing import Optional
2
  import spaces
3
  import gradio as gr
4
  import numpy as np
5
  import torch
6
  from PIL import Image
7
  import io
 
8
 
9
  import base64, os
10
  from util.utils import check_ocr_box, get_yolo_model, get_caption_model_processor, get_som_labeled_img
@@ -47,7 +48,7 @@ def process(
47
  iou_threshold,
48
  use_paddleocr,
49
  imgsz
50
- ) -> Optional[Image.Image]:
51
 
52
  box_overlay_ratio = image_input.size[0] / 3200
53
  draw_bbox_config = {
@@ -80,11 +81,13 @@ def process(
80
  imgsz=imgsz
81
  )
82
 
83
- image = Image.open(io.BytesIO(base64.b64decode(dino_labled_img)))
84
  print('finish processing')
85
  parsed_content_list = '\n'.join([f'icon {i}: ' + str(v) for i, v in enumerate(parsed_content_list)])
86
 
87
- return image, str(parsed_content_list)
 
 
 
88
 
89
  # Create interface with simplified component definitions
90
  with gr.Blocks() as demo:
@@ -133,14 +136,14 @@ with gr.Blocks() as demo:
133
  )
134
 
135
  with gr.Column():
136
- image_output_component = gr.Image(
137
- type='pil',
138
- label='Image Output'
139
- )
140
  text_output_component = gr.Textbox(
141
  label='Parsed screen elements',
142
  placeholder='Text Output'
143
  )
 
 
 
 
144
 
145
  submit_button_component.click(
146
  fn=process,
@@ -151,7 +154,7 @@ with gr.Blocks() as demo:
151
  use_paddleocr_component,
152
  imgsz_component
153
  ],
154
- outputs=[image_output_component, text_output_component]
155
  )
156
 
157
  # Try launching with different configurations
 
1
+ from typing import Optional, Tuple
2
  import spaces
3
  import gradio as gr
4
  import numpy as np
5
  import torch
6
  from PIL import Image
7
  import io
8
+ import json
9
 
10
  import base64, os
11
  from util.utils import check_ocr_box, get_yolo_model, get_caption_model_processor, get_som_labeled_img
 
48
  iou_threshold,
49
  use_paddleocr,
50
  imgsz
51
+ ) -> Tuple[str, str]:
52
 
53
  box_overlay_ratio = image_input.size[0] / 3200
54
  draw_bbox_config = {
 
81
  imgsz=imgsz
82
  )
83
 
 
84
  print('finish processing')
85
  parsed_content_list = '\n'.join([f'icon {i}: ' + str(v) for i, v in enumerate(parsed_content_list)])
86
 
87
+ # Convert label_coordinates to JSON string for API consumption
88
+ label_coordinates_json = json.dumps(label_coordinates)
89
+
90
+ return str(parsed_content_list), label_coordinates_json
91
 
92
  # Create interface with simplified component definitions
93
  with gr.Blocks() as demo:
 
136
  )
137
 
138
  with gr.Column():
 
 
 
 
139
  text_output_component = gr.Textbox(
140
  label='Parsed screen elements',
141
  placeholder='Text Output'
142
  )
143
+ coordinates_output_component = gr.Textbox(
144
+ label='Label Coordinates (JSON)',
145
+ placeholder='Coordinates JSON Output'
146
+ )
147
 
148
  submit_button_component.click(
149
  fn=process,
 
154
  use_paddleocr_component,
155
  imgsz_component
156
  ],
157
+ outputs=[text_output_component, coordinates_output_component]
158
  )
159
 
160
  # Try launching with different configurations