Spaces:
Running
on
Zero
Running
on
Zero
update app
Browse files
app.py
CHANGED
|
@@ -146,13 +146,13 @@ def process_ocr_task(image, model_size, task_type, ref_text):
|
|
| 146 |
|
| 147 |
with tempfile.TemporaryDirectory() as output_path:
|
| 148 |
# Build the prompt
|
| 149 |
-
if task_type == "
|
| 150 |
prompt = "<image>\nFree OCR."
|
| 151 |
-
elif task_type == "
|
| 152 |
prompt = "<image>\n<|grounding|>Convert the document to markdown."
|
| 153 |
-
elif task_type == "
|
| 154 |
prompt = "<image>\nParse the figure."
|
| 155 |
-
elif task_type == "
|
| 156 |
if not ref_text or ref_text.strip() == "":
|
| 157 |
raise gr.Error("For the 'Locate' task, you must provide the reference text to find!")
|
| 158 |
prompt = f"<image>\nLocate <|ref|>{ref_text.strip()}<|/ref|> in the image."
|
|
@@ -225,9 +225,9 @@ with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
|
|
| 225 |
with gr.Row():
|
| 226 |
with gr.Column(scale=1):
|
| 227 |
image_input = gr.Image(type="pil", label="Upload Image", sources=["upload", "clipboard"])
|
| 228 |
-
model_size = gr.Dropdown(choices=["Tiny", "Small", "Base", "Large", "Gundam (Recommended)"], value="Gundam (Recommended)", label="
|
| 229 |
-
task_type = gr.Dropdown(choices=["
|
| 230 |
-
ref_text_input = gr.Textbox(label="
|
| 231 |
submit_btn = gr.Button("Process Image", variant="primary")
|
| 232 |
|
| 233 |
with gr.Column(scale=2):
|
|
@@ -236,7 +236,7 @@ with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
|
|
| 236 |
|
| 237 |
# --- UI Interaction Logic ---
|
| 238 |
def toggle_ref_text_visibility(task):
|
| 239 |
-
return gr.Textbox(visible=True) if task == "
|
| 240 |
|
| 241 |
task_type.change(fn=toggle_ref_text_visibility, inputs=task_type, outputs=ref_text_input)
|
| 242 |
submit_btn.click(fn=process_ocr_task, inputs=[image_input, model_size, task_type, ref_text_input], outputs=[output_text, output_image])
|
|
|
|
| 146 |
|
| 147 |
with tempfile.TemporaryDirectory() as output_path:
|
| 148 |
# Build the prompt
|
| 149 |
+
if task_type == "Free OCR":
|
| 150 |
prompt = "<image>\nFree OCR."
|
| 151 |
+
elif task_type == "Convert to Markdown":
|
| 152 |
prompt = "<image>\n<|grounding|>Convert the document to markdown."
|
| 153 |
+
elif task_type == "Parse Figure":
|
| 154 |
prompt = "<image>\nParse the figure."
|
| 155 |
+
elif task_type == "Locate Object by Reference":
|
| 156 |
if not ref_text or ref_text.strip() == "":
|
| 157 |
raise gr.Error("For the 'Locate' task, you must provide the reference text to find!")
|
| 158 |
prompt = f"<image>\nLocate <|ref|>{ref_text.strip()}<|/ref|> in the image."
|
|
|
|
| 225 |
with gr.Row():
|
| 226 |
with gr.Column(scale=1):
|
| 227 |
image_input = gr.Image(type="pil", label="Upload Image", sources=["upload", "clipboard"])
|
| 228 |
+
model_size = gr.Dropdown(choices=["Tiny", "Small", "Base", "Large", "Gundam (Recommended)"], value="Gundam (Recommended)", label="Resolution Size")
|
| 229 |
+
task_type = gr.Dropdown(choices=["Free OCR", "Convert to Markdown", "Parse Figure", "Locate Object by Reference"], value="Convert to Markdown", label="Task Type")
|
| 230 |
+
ref_text_input = gr.Textbox(label="Reference Text (for Locate task)", placeholder="e.g., the teacher, 20-10, a red car...", visible=False)
|
| 231 |
submit_btn = gr.Button("Process Image", variant="primary")
|
| 232 |
|
| 233 |
with gr.Column(scale=2):
|
|
|
|
| 236 |
|
| 237 |
# --- UI Interaction Logic ---
|
| 238 |
def toggle_ref_text_visibility(task):
|
| 239 |
+
return gr.Textbox(visible=True) if task == "Locate Object by Reference" else gr.Textbox(visible=False)
|
| 240 |
|
| 241 |
task_type.change(fn=toggle_ref_text_visibility, inputs=task_type, outputs=ref_text_input)
|
| 242 |
submit_btn.click(fn=process_ocr_task, inputs=[image_input, model_size, task_type, ref_text_input], outputs=[output_text, output_image])
|