prithivMLmods commited on
Commit
f14125e
Β·
verified Β·
1 Parent(s): a4fae6f

update app

Browse files
Files changed (1) hide show
  1. app.py +8 -8
app.py CHANGED
@@ -146,13 +146,13 @@ def process_ocr_task(image, model_size, task_type, ref_text):
146
 
147
  with tempfile.TemporaryDirectory() as output_path:
148
  # Build the prompt
149
- if task_type == "πŸ“ Free OCR":
150
  prompt = "<image>\nFree OCR."
151
- elif task_type == "πŸ“„ Convert to Markdown":
152
  prompt = "<image>\n<|grounding|>Convert the document to markdown."
153
- elif task_type == "πŸ“ˆ Parse Figure":
154
  prompt = "<image>\nParse the figure."
155
- elif task_type == "πŸ” Locate Object by Reference":
156
  if not ref_text or ref_text.strip() == "":
157
  raise gr.Error("For the 'Locate' task, you must provide the reference text to find!")
158
  prompt = f"<image>\nLocate <|ref|>{ref_text.strip()}<|/ref|> in the image."
@@ -225,9 +225,9 @@ with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
225
  with gr.Row():
226
  with gr.Column(scale=1):
227
  image_input = gr.Image(type="pil", label="Upload Image", sources=["upload", "clipboard"])
228
- model_size = gr.Dropdown(choices=["Tiny", "Small", "Base", "Large", "Gundam (Recommended)"], value="Gundam (Recommended)", label="βš™οΈ Resolution Size")
229
- task_type = gr.Dropdown(choices=["πŸ“ Free OCR", "πŸ“„ Convert to Markdown", "πŸ“ˆ Parse Figure", "πŸ” Locate Object by Reference"], value="πŸ“„ Convert to Markdown", label="πŸš€ Task Type")
230
- ref_text_input = gr.Textbox(label="πŸ“ Reference Text (for Locate task)", placeholder="e.g., the teacher, 20-10, a red car...", visible=False)
231
  submit_btn = gr.Button("Process Image", variant="primary")
232
 
233
  with gr.Column(scale=2):
@@ -236,7 +236,7 @@ with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
236
 
237
  # --- UI Interaction Logic ---
238
  def toggle_ref_text_visibility(task):
239
- return gr.Textbox(visible=True) if task == "πŸ” Locate Object by Reference" else gr.Textbox(visible=False)
240
 
241
  task_type.change(fn=toggle_ref_text_visibility, inputs=task_type, outputs=ref_text_input)
242
  submit_btn.click(fn=process_ocr_task, inputs=[image_input, model_size, task_type, ref_text_input], outputs=[output_text, output_image])
 
146
 
147
  with tempfile.TemporaryDirectory() as output_path:
148
  # Build the prompt
149
+ if task_type == "Free OCR":
150
  prompt = "<image>\nFree OCR."
151
+ elif task_type == "Convert to Markdown":
152
  prompt = "<image>\n<|grounding|>Convert the document to markdown."
153
+ elif task_type == "Parse Figure":
154
  prompt = "<image>\nParse the figure."
155
+ elif task_type == "Locate Object by Reference":
156
  if not ref_text or ref_text.strip() == "":
157
  raise gr.Error("For the 'Locate' task, you must provide the reference text to find!")
158
  prompt = f"<image>\nLocate <|ref|>{ref_text.strip()}<|/ref|> in the image."
 
225
  with gr.Row():
226
  with gr.Column(scale=1):
227
  image_input = gr.Image(type="pil", label="Upload Image", sources=["upload", "clipboard"])
228
+ model_size = gr.Dropdown(choices=["Tiny", "Small", "Base", "Large", "Gundam (Recommended)"], value="Gundam (Recommended)", label="Resolution Size")
229
+ task_type = gr.Dropdown(choices=["Free OCR", "Convert to Markdown", "Parse Figure", "Locate Object by Reference"], value="Convert to Markdown", label="Task Type")
230
+ ref_text_input = gr.Textbox(label="Reference Text (for Locate task)", placeholder="e.g., the teacher, 20-10, a red car...", visible=False)
231
  submit_btn = gr.Button("Process Image", variant="primary")
232
 
233
  with gr.Column(scale=2):
 
236
 
237
  # --- UI Interaction Logic ---
238
  def toggle_ref_text_visibility(task):
239
+ return gr.Textbox(visible=True) if task == "Locate Object by Reference" else gr.Textbox(visible=False)
240
 
241
  task_type.change(fn=toggle_ref_text_visibility, inputs=task_type, outputs=ref_text_input)
242
  submit_btn.click(fn=process_ocr_task, inputs=[image_input, model_size, task_type, ref_text_input], outputs=[output_text, output_image])