prithivMLmods commited on
Commit
a5879e9
·
verified ·
1 Parent(s): b66a251

update app

Browse files
Files changed (1) hide show
  1. app.py +5 -2
app.py CHANGED
@@ -107,7 +107,7 @@ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
107
 
108
  model = AutoModel.from_pretrained(
109
  model_name,
110
- _attn_implementation="flash_attention_2",
111
  trust_remote_code=True,
112
  use_safetensors=True,
113
  ).to(device).eval() # Move to device and set to eval mode
@@ -208,13 +208,16 @@ def process_ocr_task(image, model_size, task_type, ref_text):
208
 
209
  return text_result, result_image_pil
210
 
 
 
 
211
  with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
212
  gr.Markdown("# **DeepSeek OCR [exp]**", elem_id="main-title")
213
 
214
 
215
  with gr.Row():
216
  with gr.Column(scale=1):
217
- image_input = gr.Image(type="pil", label="Upload Image", sources=["upload", "clipboard"])
218
  model_size = gr.Dropdown(choices=["Tiny", "Small", "Base", "Large", "Gundam (Recommended)"], value="Large", label="Resolution Size")
219
  task_type = gr.Dropdown(choices=["Free OCR", "Convert to Markdown", "Parse Figure", "Locate Object by Reference"], value="Convert to Markdown", label="Task Type")
220
  ref_text_input = gr.Textbox(label="Reference Text (for Locate task)", placeholder="e.g., the teacher, 20-10, a red car...", visible=False)
 
107
 
108
  model = AutoModel.from_pretrained(
109
  model_name,
110
+ #_attn_implementation="flash_attention_2",
111
  trust_remote_code=True,
112
  use_safetensors=True,
113
  ).to(device).eval() # Move to device and set to eval mode
 
208
 
209
  return text_result, result_image_pil
210
 
211
+ url = "https://huggingface.co/spaces/prithivMLmods/Multimodal-OCR3/resolve/main/examples/3.jpg?download=true"
212
+ example_image = Image.open(requests.get(url, stream=True).raw).convert("RGB")
213
+
214
  with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
215
  gr.Markdown("# **DeepSeek OCR [exp]**", elem_id="main-title")
216
 
217
 
218
  with gr.Row():
219
  with gr.Column(scale=1):
220
+ image_input = gr.Image(type="pil", label="Upload Image", sources=["upload", "clipboard"], value=example_image, height=290)
221
  model_size = gr.Dropdown(choices=["Tiny", "Small", "Base", "Large", "Gundam (Recommended)"], value="Large", label="Resolution Size")
222
  task_type = gr.Dropdown(choices=["Free OCR", "Convert to Markdown", "Parse Figure", "Locate Object by Reference"], value="Convert to Markdown", label="Task Type")
223
  ref_text_input = gr.Textbox(label="Reference Text (for Locate task)", placeholder="e.g., the teacher, 20-10, a red car...", visible=False)