DeepSeek-OCR-experimental

Running on Zero

prithivMLmods commited on Oct 29

Commit

a5879e9

verified ·

1 Parent(s): b66a251

update app

Files changed (1) hide show

app.py CHANGED Viewed

@@ -107,7 +107,7 @@ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
 model = AutoModel.from_pretrained(
     model_name,
-    _attn_implementation="flash_attention_2",
     trust_remote_code=True,
     use_safetensors=True,
 ).to(device).eval() # Move to device and set to eval mode
@@ -208,13 +208,16 @@ def process_ocr_task(image, model_size, task_type, ref_text):
         return text_result, result_image_pil
 with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
     gr.Markdown("# **DeepSeek OCR [exp]**", elem_id="main-title")
     with gr.Row():
         with gr.Column(scale=1):
-            image_input = gr.Image(type="pil", label="Upload Image", sources=["upload", "clipboard"])
             model_size = gr.Dropdown(choices=["Tiny", "Small", "Base", "Large", "Gundam (Recommended)"], value="Large", label="Resolution Size")
             task_type = gr.Dropdown(choices=["Free OCR", "Convert to Markdown", "Parse Figure", "Locate Object by Reference"], value="Convert to Markdown", label="Task Type")
             ref_text_input = gr.Textbox(label="Reference Text (for Locate task)", placeholder="e.g., the teacher, 20-10, a red car...", visible=False)

 model = AutoModel.from_pretrained(
     model_name,
+    #_attn_implementation="flash_attention_2",
     trust_remote_code=True,
     use_safetensors=True,
 ).to(device).eval() # Move to device and set to eval mode
         return text_result, result_image_pil
+url = "https://huggingface.co/spaces/prithivMLmods/Multimodal-OCR3/resolve/main/examples/3.jpg?download=true"
+example_image = Image.open(requests.get(url, stream=True).raw).convert("RGB")
 with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
     gr.Markdown("# **DeepSeek OCR [exp]**", elem_id="main-title")
     with gr.Row():
         with gr.Column(scale=1):
+            image_input = gr.Image(type="pil", label="Upload Image", sources=["upload", "clipboard"], value=example_image, height=290)
             model_size = gr.Dropdown(choices=["Tiny", "Small", "Base", "Large", "Gundam (Recommended)"], value="Large", label="Resolution Size")
             task_type = gr.Dropdown(choices=["Free OCR", "Convert to Markdown", "Parse Figure", "Locate Object by Reference"], value="Convert to Markdown", label="Task Type")
             ref_text_input = gr.Textbox(label="Reference Text (for Locate task)", placeholder="e.g., the teacher, 20-10, a red car...", visible=False)