ricklon commited on
Commit
fce2f1f
·
1 Parent(s): bc8cf96

Refactor UI into workflow-first layout with larger workspace

Browse files
Files changed (1) hide show
  1. app.py +54 -14
app.py CHANGED
@@ -1113,6 +1113,31 @@ def select_boxes(task):
1113
  return gr.update(selected="tab_boxes")
1114
  return gr.update()
1115
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1116
  def get_pdf_page_count(file_path):
1117
  if not file_path or not file_path.lower().endswith('.pdf'):
1118
  return 1
@@ -1172,10 +1197,16 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
1172
  workspace_base_size = gr.State(None)
1173
  workspace_base_image = gr.State(None)
1174
  selected_regions_state = gr.State([])
 
1175
  with gr.Row():
1176
- with gr.Column(scale=1):
1177
  file_in = gr.File(label="Upload Image or PDF", file_types=["image", ".pdf"], type="filepath")
 
1178
  page_selector = gr.Number(label="Select Page", value=1, minimum=1, step=1, visible=False)
 
 
 
 
1179
  gr.Markdown("**Image Workspace (full page + region selection)**")
1180
  if HAS_IMAGE_EDITOR:
1181
  editor_kwargs = {}
@@ -1203,35 +1234,42 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
1203
  region_editor = gr.ImageEditor(
1204
  label="Main image workspace. Recommended: freehand/highlight the target area, then click Add Region. (Crop tool for rectangles is optional.)",
1205
  type="pil",
1206
- height=300,
1207
  **editor_kwargs,
1208
  )
1209
  else:
1210
  gr.Markdown("Region drawing requires a newer Gradio version with `ImageEditor` support.")
1211
  region_editor = gr.State(None)
 
 
 
 
1212
  input_scope = gr.Radio(["Entire Page", "Selected Region"], value="Entire Page", label="Input Scope")
1213
- selection_controls = gr.Row()
 
1214
  with selection_controls:
1215
  add_region_btn = gr.Button("Add Region", variant="secondary")
1216
  clear_regions_btn = gr.Button("Clear Regions")
1217
- selection_status = gr.Textbox(label="Region Selection Status", value="No saved regions.", interactive=False)
1218
- selected_regions_gallery = gr.Gallery(label="Selected Regions", show_label=True, columns=3, height=170)
1219
- task = gr.Dropdown(list(TASK_PROMPTS.keys()), value="📋 Markdown", label="Task")
1220
- equation_zoom = gr.Checkbox(label="Equation Zoom (multipass)", value=False)
1221
- separate_eq_lines = gr.Checkbox(label="Detect Equation Lines Separately", value=False)
 
1222
  prompt = gr.Textbox(label="Prompt", lines=2, visible=False)
1223
  btn = gr.Button("Extract", variant="primary", size="lg")
1224
-
1225
- with gr.Column(scale=2):
 
1226
  with gr.Tabs() as tabs:
1227
  with gr.Tab("Text", id="tab_text"):
1228
  text_out = gr.Textbox(lines=20, buttons=["copy"], show_label=False)
1229
  with gr.Tab("Markdown Preview", id="tab_markdown"):
1230
  md_out = gr.HTML("")
1231
  with gr.Tab("Boxes", id="tab_boxes"):
1232
- img_out = gr.Image(type="pil", height=500, show_label=False)
1233
  with gr.Tab("Cropped Images", id="tab_crops"):
1234
- gallery = gr.Gallery(show_label=False, columns=3, height=400)
1235
  with gr.Tab("Raw Text", id="tab_raw"):
1236
  raw_out = gr.Textbox(lines=20, buttons=["copy"], show_label=False)
1237
  download_btn = gr.DownloadButton("Download Markdown", visible=False, variant="secondary")
@@ -1272,7 +1310,7 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
1272
  1024 base + 768 patches with dynamic cropping (2-6 patches). 144 tokens per patch + 256 base tokens.
1273
 
1274
  ### Faculty Quick Workflow
1275
- 1. Choose a task (`Markdown`, `Free OCR`, or `Locate`).
1276
  2. Choose **Input Scope**:
1277
  - `Entire Page` for the full page.
1278
  - `Selected Region` for a specific area.
@@ -1284,6 +1322,7 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
1284
  Then click **Extract**.
1285
  4. Use **Clear Regions** to reset multi-select state.
1286
  5. Review **Cropped Images** and **Boxes**: both are labeled `Region 1`, `Region 2`, etc.
 
1287
 
1288
  ### Tasks
1289
  - **Markdown**: Convert document to structured markdown with layout detection (grounding ✅)
@@ -1312,6 +1351,7 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
1312
  file_in.change(update_page_selector, [file_in], [page_selector])
1313
  task.change(toggle_prompt, [task], [prompt])
1314
  task.change(select_boxes, [task], [tabs])
 
1315
  if HAS_IMAGE_EDITOR and region_editor is not None:
1316
  file_in.change(load_image_with_size, [file_in, page_selector], [region_editor, workspace_base_size, workspace_base_image])
1317
  page_selector.change(load_image_with_size, [file_in, page_selector], [region_editor, workspace_base_size, workspace_base_image])
@@ -1398,7 +1438,7 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
1398
  [file_in, task, prompt, page_selector, equation_zoom, separate_eq_lines, input_scope, region_editor, workspace_base_size, workspace_base_image, selected_regions_state],
1399
  [text_out, md_out, raw_out, img_out, gallery, download_btn]
1400
  )
1401
- submit_event.then(select_boxes, [task], [tabs])
1402
 
1403
  if __name__ == "__main__":
1404
  # server_name="0.0.0.0" is needed locally (WSL2 → Windows access)
 
1113
  return gr.update(selected="tab_boxes")
1114
  return gr.update()
1115
 
1116
+ def toggle_scope_ui(scope):
1117
+ if scope == "Selected Region":
1118
+ hint = (
1119
+ "**Selected Region mode:** Draw/highlight on the workspace, click **Add Region** "
1120
+ "for each target area, then click **Extract**."
1121
+ )
1122
+ return (
1123
+ gr.update(value=hint),
1124
+ gr.update(visible=True),
1125
+ gr.update(visible=True),
1126
+ gr.update(visible=True),
1127
+ )
1128
+ hint = "**Entire Page mode:** No drawing needed. Click **Extract** to process the full page."
1129
+ return (
1130
+ gr.update(value=hint),
1131
+ gr.update(visible=False),
1132
+ gr.update(visible=False),
1133
+ gr.update(visible=False),
1134
+ )
1135
+
1136
+ def select_post_extract_tab(task, scope):
1137
+ if scope == "Selected Region" or task == "📍 Locate":
1138
+ return gr.update(selected="tab_boxes")
1139
+ return gr.update(selected="tab_text")
1140
+
1141
  def get_pdf_page_count(file_path):
1142
  if not file_path or not file_path.lower().endswith('.pdf'):
1143
  return 1
 
1197
  workspace_base_size = gr.State(None)
1198
  workspace_base_image = gr.State(None)
1199
  selected_regions_state = gr.State([])
1200
+
1201
  with gr.Row():
1202
+ with gr.Column(scale=3):
1203
  file_in = gr.File(label="Upload Image or PDF", file_types=["image", ".pdf"], type="filepath")
1204
+ with gr.Column(scale=1):
1205
  page_selector = gr.Number(label="Select Page", value=1, minimum=1, step=1, visible=False)
1206
+
1207
+ with gr.Row():
1208
+ with gr.Column(scale=3):
1209
+ workspace_hint = gr.Markdown("**Entire Page mode:** No drawing needed. Click **Extract** to process the full page.")
1210
  gr.Markdown("**Image Workspace (full page + region selection)**")
1211
  if HAS_IMAGE_EDITOR:
1212
  editor_kwargs = {}
 
1234
  region_editor = gr.ImageEditor(
1235
  label="Main image workspace. Recommended: freehand/highlight the target area, then click Add Region. (Crop tool for rectangles is optional.)",
1236
  type="pil",
1237
+ height=640,
1238
  **editor_kwargs,
1239
  )
1240
  else:
1241
  gr.Markdown("Region drawing requires a newer Gradio version with `ImageEditor` support.")
1242
  region_editor = gr.State(None)
1243
+
1244
+ with gr.Column(scale=1):
1245
+ gr.Markdown("### OCR Workflow")
1246
+ task = gr.Dropdown(list(TASK_PROMPTS.keys()), value="📋 Markdown", label="Task")
1247
  input_scope = gr.Radio(["Entire Page", "Selected Region"], value="Entire Page", label="Input Scope")
1248
+
1249
+ selection_controls = gr.Row(visible=False)
1250
  with selection_controls:
1251
  add_region_btn = gr.Button("Add Region", variant="secondary")
1252
  clear_regions_btn = gr.Button("Clear Regions")
1253
+ selection_status = gr.Textbox(label="Region Selection Status", value="No saved regions.", interactive=False, visible=False)
1254
+ selected_regions_gallery = gr.Gallery(label="Selected Regions", show_label=True, columns=2, height=190, visible=False)
1255
+
1256
+ with gr.Accordion("Advanced Options", open=False):
1257
+ equation_zoom = gr.Checkbox(label="Equation Zoom (multipass)", value=False)
1258
+ separate_eq_lines = gr.Checkbox(label="Detect Equation Lines Separately", value=False)
1259
  prompt = gr.Textbox(label="Prompt", lines=2, visible=False)
1260
  btn = gr.Button("Extract", variant="primary", size="lg")
1261
+
1262
+ with gr.Row():
1263
+ with gr.Column(scale=1):
1264
  with gr.Tabs() as tabs:
1265
  with gr.Tab("Text", id="tab_text"):
1266
  text_out = gr.Textbox(lines=20, buttons=["copy"], show_label=False)
1267
  with gr.Tab("Markdown Preview", id="tab_markdown"):
1268
  md_out = gr.HTML("")
1269
  with gr.Tab("Boxes", id="tab_boxes"):
1270
+ img_out = gr.Image(type="pil", height=560, show_label=False)
1271
  with gr.Tab("Cropped Images", id="tab_crops"):
1272
+ gallery = gr.Gallery(show_label=False, columns=3, height=420)
1273
  with gr.Tab("Raw Text", id="tab_raw"):
1274
  raw_out = gr.Textbox(lines=20, buttons=["copy"], show_label=False)
1275
  download_btn = gr.DownloadButton("Download Markdown", visible=False, variant="secondary")
 
1310
  1024 base + 768 patches with dynamic cropping (2-6 patches). 144 tokens per patch + 256 base tokens.
1311
 
1312
  ### Faculty Quick Workflow
1313
+ 1. Upload a page/image, then confirm **Task**.
1314
  2. Choose **Input Scope**:
1315
  - `Entire Page` for the full page.
1316
  - `Selected Region` for a specific area.
 
1322
  Then click **Extract**.
1323
  4. Use **Clear Regions** to reset multi-select state.
1324
  5. Review **Cropped Images** and **Boxes**: both are labeled `Region 1`, `Region 2`, etc.
1325
+ 6. Use **Advanced Options** only when needed (Equation Zoom / line-by-line equation OCR).
1326
 
1327
  ### Tasks
1328
  - **Markdown**: Convert document to structured markdown with layout detection (grounding ✅)
 
1351
  file_in.change(update_page_selector, [file_in], [page_selector])
1352
  task.change(toggle_prompt, [task], [prompt])
1353
  task.change(select_boxes, [task], [tabs])
1354
+ input_scope.change(toggle_scope_ui, [input_scope], [workspace_hint, selection_controls, selection_status, selected_regions_gallery])
1355
  if HAS_IMAGE_EDITOR and region_editor is not None:
1356
  file_in.change(load_image_with_size, [file_in, page_selector], [region_editor, workspace_base_size, workspace_base_image])
1357
  page_selector.change(load_image_with_size, [file_in, page_selector], [region_editor, workspace_base_size, workspace_base_image])
 
1438
  [file_in, task, prompt, page_selector, equation_zoom, separate_eq_lines, input_scope, region_editor, workspace_base_size, workspace_base_image, selected_regions_state],
1439
  [text_out, md_out, raw_out, img_out, gallery, download_btn]
1440
  )
1441
+ submit_event.then(select_post_extract_tab, [task, input_scope], [tabs])
1442
 
1443
  if __name__ == "__main__":
1444
  # server_name="0.0.0.0" is needed locally (WSL2 → Windows access)