DeepSeek-OCR-experimental

Running on Zero

App Files Files Community

prithivMLmods commited on Oct 29

Commit

f14125e

verified ·

1 Parent(s): a4fae6f

update app

Browse files

Files changed (1) hide show

app.py +8 -8

app.py CHANGED Viewed

@@ -146,13 +146,13 @@ def process_ocr_task(image, model_size, task_type, ref_text):
     with tempfile.TemporaryDirectory() as output_path:
         # Build the prompt
-        if task_type == "📝 Free OCR":
             prompt = "<image>\nFree OCR."
-        elif task_type == "📄 Convert to Markdown":
             prompt = "<image>\n<|grounding|>Convert the document to markdown."
-        elif task_type == "📈 Parse Figure":
             prompt = "<image>\nParse the figure."
-        elif task_type == "🔍 Locate Object by Reference":
             if not ref_text or ref_text.strip() == "":
                 raise gr.Error("For the 'Locate' task, you must provide the reference text to find!")
             prompt = f"<image>\nLocate <|ref|>{ref_text.strip()}<|/ref|> in the image."
@@ -225,9 +225,9 @@ with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
     with gr.Row():
         with gr.Column(scale=1):
             image_input = gr.Image(type="pil", label="Upload Image", sources=["upload", "clipboard"])
-            model_size = gr.Dropdown(choices=["Tiny", "Small", "Base", "Large", "Gundam (Recommended)"], value="Gundam (Recommended)", label="⚙️ Resolution Size")
-            task_type = gr.Dropdown(choices=["📝 Free OCR", "📄 Convert to Markdown", "📈 Parse Figure", "🔍 Locate Object by Reference"], value="📄 Convert to Markdown", label="🚀 Task Type")
-            ref_text_input = gr.Textbox(label="📝 Reference Text (for Locate task)", placeholder="e.g., the teacher, 20-10, a red car...", visible=False)
             submit_btn = gr.Button("Process Image", variant="primary")
         with gr.Column(scale=2):
@@ -236,7 +236,7 @@ with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
     # --- UI Interaction Logic ---
     def toggle_ref_text_visibility(task):
-        return gr.Textbox(visible=True) if task == "🔍 Locate Object by Reference" else gr.Textbox(visible=False)
     task_type.change(fn=toggle_ref_text_visibility, inputs=task_type, outputs=ref_text_input)
     submit_btn.click(fn=process_ocr_task, inputs=[image_input, model_size, task_type, ref_text_input], outputs=[output_text, output_image])

     with tempfile.TemporaryDirectory() as output_path:
         # Build the prompt
+        if task_type == "Free OCR":
             prompt = "<image>\nFree OCR."
+        elif task_type == "Convert to Markdown":
             prompt = "<image>\n<|grounding|>Convert the document to markdown."
+        elif task_type == "Parse Figure":
             prompt = "<image>\nParse the figure."
+        elif task_type == "Locate Object by Reference":
             if not ref_text or ref_text.strip() == "":
                 raise gr.Error("For the 'Locate' task, you must provide the reference text to find!")
             prompt = f"<image>\nLocate <|ref|>{ref_text.strip()}<|/ref|> in the image."
     with gr.Row():
         with gr.Column(scale=1):
             image_input = gr.Image(type="pil", label="Upload Image", sources=["upload", "clipboard"])
+            model_size = gr.Dropdown(choices=["Tiny", "Small", "Base", "Large", "Gundam (Recommended)"], value="Gundam (Recommended)", label="Resolution Size")
+            task_type = gr.Dropdown(choices=["Free OCR", "Convert to Markdown", "Parse Figure", "Locate Object by Reference"], value="Convert to Markdown", label="Task Type")
+            ref_text_input = gr.Textbox(label="Reference Text (for Locate task)", placeholder="e.g., the teacher, 20-10, a red car...", visible=False)
             submit_btn = gr.Button("Process Image", variant="primary")
         with gr.Column(scale=2):
     # --- UI Interaction Logic ---
     def toggle_ref_text_visibility(task):
+        return gr.Textbox(visible=True) if task == "Locate Object by Reference" else gr.Textbox(visible=False)
     task_type.change(fn=toggle_ref_text_visibility, inputs=task_type, outputs=ref_text_input)
     submit_btn.click(fn=process_ocr_task, inputs=[image_input, model_size, task_type, ref_text_input], outputs=[output_text, output_image])