ricklon commited on
Commit
be5abde
·
1 Parent(s): 37739bf

Simplify zoom workflow and prefer ImageEditor hand/pan path

Browse files
Files changed (1) hide show
  1. app.py +11 -21
app.py CHANGED
@@ -52,6 +52,7 @@ model = AutoModel.from_pretrained(MODEL_NAME, _attn_implementation=_attn_impl, t
52
  BASE_SIZE = 1024
53
  IMAGE_SIZE = 768
54
  CROP_MODE = True
 
55
  GROUNDING_PATTERN = re.compile(r'<\|ref\|>(.*?)<\|/ref\|><\|det\|>(.*?)<\|/det\|>', re.DOTALL)
56
  INFER_DEBUG_FILTERS = ['PATCHES', '====', 'BASE:', 'directly resize', 'NO PATCHES', 'torch.Size', '%|']
57
  EQUATION_ZOOM_PROMPT = "<image>\n<|grounding|>Locate each individual equation or math line."
@@ -1352,7 +1353,7 @@ def _scale_workspace_image(img, workspace_scale):
1352
  try:
1353
  scale = max(60, min(220, int(workspace_scale)))
1354
  except Exception:
1355
- scale = 89
1356
  if scale == 100:
1357
  return img
1358
  ratio = scale / 100.0
@@ -1361,7 +1362,7 @@ def _scale_workspace_image(img, workspace_scale):
1361
  resample = Image.Resampling.BILINEAR if hasattr(Image, "Resampling") else Image.BILINEAR
1362
  return img.resize((new_w, new_h), resample)
1363
 
1364
- def load_image_with_size(file_path, page_num=1, workspace_scale=100):
1365
  img = load_image(file_path, page_num)
1366
  if img is None:
1367
  return None, None, None
@@ -1444,9 +1445,9 @@ with gr.Blocks(**blocks_kwargs) as demo:
1444
  editor_kwargs["eraser"] = gr.Eraser(default_size=26)
1445
  except TypeError:
1446
  editor_kwargs["eraser"] = gr.Eraser()
1447
- if HAS_PAINT:
1448
  try:
1449
- region_editor = gr.Paint(
1450
  label="Image Workspace",
1451
  show_label=False,
1452
  type="pil",
@@ -1455,20 +1456,20 @@ with gr.Blocks(**blocks_kwargs) as demo:
1455
  )
1456
  except TypeError:
1457
  try:
1458
- region_editor = gr.Paint(
1459
  label="Image Workspace",
1460
  show_label=False,
1461
  height=640,
1462
  **editor_kwargs,
1463
  )
1464
  except TypeError:
1465
- region_editor = gr.Paint(
1466
  label="Image Workspace",
1467
  show_label=False,
1468
  height=640,
1469
  )
1470
  else:
1471
- region_editor = gr.ImageEditor(
1472
  label="Image Workspace",
1473
  show_label=False,
1474
  type="pil",
@@ -1483,14 +1484,6 @@ with gr.Blocks(**blocks_kwargs) as demo:
1483
  gr.Markdown("### OCR Workflow")
1484
  task = gr.Dropdown(list(TASK_PROMPTS.keys()), value="📋 Markdown", label="Task")
1485
  input_scope = gr.Radio(["Entire Page", "Selected Region"], value="Entire Page", label="Input Scope")
1486
- workspace_scale = gr.Slider(
1487
- minimum=60,
1488
- maximum=220,
1489
- step=1,
1490
- value=89,
1491
- label="Workspace Scale (%)",
1492
- info="Use this instead of in-canvas zoom for smoother live drawing.",
1493
- )
1494
 
1495
  selection_controls = gr.Row(visible=False)
1496
  with selection_controls:
@@ -1567,7 +1560,7 @@ with gr.Blocks(**blocks_kwargs) as demo:
1567
  2. Choose **Input Scope**:
1568
  - `Entire Page` for the full page.
1569
  - `Selected Region` for a specific area.
1570
- 2a. For small math, increase **Workspace Scale** instead of using in-canvas zoom (better live drawing feedback).
1571
  3. For `Selected Region`, use the **Image Workspace**:
1572
  - Recommended: freehand selection (draw/highlight target); app uses an automatic bounding box around your marks.
1573
  - Optional rectangle selection: use the **Crop** tool.
@@ -1608,16 +1601,13 @@ with gr.Blocks(**blocks_kwargs) as demo:
1608
  task.change(select_boxes, [task], [tabs])
1609
  input_scope.change(toggle_scope_ui, [input_scope], [workspace_hint, selection_controls, selection_status, selected_regions_gallery])
1610
  if HAS_REGION_WORKSPACE and region_editor is not None:
1611
- file_in.change(load_image_with_size, [file_in, page_selector, workspace_scale], [region_editor, workspace_base_size, workspace_base_image])
1612
- page_selector.change(load_image_with_size, [file_in, page_selector, workspace_scale], [region_editor, workspace_base_size, workspace_base_image])
1613
- workspace_scale.change(load_image_with_size, [file_in, page_selector, workspace_scale], [region_editor, workspace_base_size, workspace_base_image])
1614
  region_editor.change(sync_workspace_state, [region_editor, workspace_base_image], [workspace_base_size, workspace_base_image])
1615
  file_in.change(_reset_selected_regions, outputs=[selected_regions_state, selected_regions_gallery, selection_status])
1616
  page_selector.change(_reset_selected_regions, outputs=[selected_regions_state, selected_regions_gallery, selection_status])
1617
- workspace_scale.change(_reset_selected_regions, outputs=[selected_regions_state, selected_regions_gallery, selection_status])
1618
  file_in.change(_reset_drawn_mask, outputs=[drawn_mask_state])
1619
  page_selector.change(_reset_drawn_mask, outputs=[drawn_mask_state])
1620
- workspace_scale.change(_reset_drawn_mask, outputs=[drawn_mask_state])
1621
 
1622
  add_region_btn.click(
1623
  add_selected_region,
 
52
  BASE_SIZE = 1024
53
  IMAGE_SIZE = 768
54
  CROP_MODE = True
55
+ WORKSPACE_DEFAULT_SCALE = 89
56
  GROUNDING_PATTERN = re.compile(r'<\|ref\|>(.*?)<\|/ref\|><\|det\|>(.*?)<\|/det\|>', re.DOTALL)
57
  INFER_DEBUG_FILTERS = ['PATCHES', '====', 'BASE:', 'directly resize', 'NO PATCHES', 'torch.Size', '%|']
58
  EQUATION_ZOOM_PROMPT = "<image>\n<|grounding|>Locate each individual equation or math line."
 
1353
  try:
1354
  scale = max(60, min(220, int(workspace_scale)))
1355
  except Exception:
1356
+ scale = WORKSPACE_DEFAULT_SCALE
1357
  if scale == 100:
1358
  return img
1359
  ratio = scale / 100.0
 
1362
  resample = Image.Resampling.BILINEAR if hasattr(Image, "Resampling") else Image.BILINEAR
1363
  return img.resize((new_w, new_h), resample)
1364
 
1365
+ def load_image_with_size(file_path, page_num=1, workspace_scale=WORKSPACE_DEFAULT_SCALE):
1366
  img = load_image(file_path, page_num)
1367
  if img is None:
1368
  return None, None, None
 
1445
  editor_kwargs["eraser"] = gr.Eraser(default_size=26)
1446
  except TypeError:
1447
  editor_kwargs["eraser"] = gr.Eraser()
1448
+ if HAS_IMAGE_EDITOR:
1449
  try:
1450
+ region_editor = gr.ImageEditor(
1451
  label="Image Workspace",
1452
  show_label=False,
1453
  type="pil",
 
1456
  )
1457
  except TypeError:
1458
  try:
1459
+ region_editor = gr.ImageEditor(
1460
  label="Image Workspace",
1461
  show_label=False,
1462
  height=640,
1463
  **editor_kwargs,
1464
  )
1465
  except TypeError:
1466
+ region_editor = gr.ImageEditor(
1467
  label="Image Workspace",
1468
  show_label=False,
1469
  height=640,
1470
  )
1471
  else:
1472
+ region_editor = gr.Paint(
1473
  label="Image Workspace",
1474
  show_label=False,
1475
  type="pil",
 
1484
  gr.Markdown("### OCR Workflow")
1485
  task = gr.Dropdown(list(TASK_PROMPTS.keys()), value="📋 Markdown", label="Task")
1486
  input_scope = gr.Radio(["Entire Page", "Selected Region"], value="Entire Page", label="Input Scope")
 
 
 
 
 
 
 
 
1487
 
1488
  selection_controls = gr.Row(visible=False)
1489
  with selection_controls:
 
1560
  2. Choose **Input Scope**:
1561
  - `Entire Page` for the full page.
1562
  - `Selected Region` for a specific area.
1563
+ 2a. Workspace opens at **89% scale** by default to keep small math readable while avoiding extra zoom adjustments.
1564
  3. For `Selected Region`, use the **Image Workspace**:
1565
  - Recommended: freehand selection (draw/highlight target); app uses an automatic bounding box around your marks.
1566
  - Optional rectangle selection: use the **Crop** tool.
 
1601
  task.change(select_boxes, [task], [tabs])
1602
  input_scope.change(toggle_scope_ui, [input_scope], [workspace_hint, selection_controls, selection_status, selected_regions_gallery])
1603
  if HAS_REGION_WORKSPACE and region_editor is not None:
1604
+ file_in.change(load_image_with_size, [file_in, page_selector], [region_editor, workspace_base_size, workspace_base_image])
1605
+ page_selector.change(load_image_with_size, [file_in, page_selector], [region_editor, workspace_base_size, workspace_base_image])
 
1606
  region_editor.change(sync_workspace_state, [region_editor, workspace_base_image], [workspace_base_size, workspace_base_image])
1607
  file_in.change(_reset_selected_regions, outputs=[selected_regions_state, selected_regions_gallery, selection_status])
1608
  page_selector.change(_reset_selected_regions, outputs=[selected_regions_state, selected_regions_gallery, selection_status])
 
1609
  file_in.change(_reset_drawn_mask, outputs=[drawn_mask_state])
1610
  page_selector.change(_reset_drawn_mask, outputs=[drawn_mask_state])
 
1611
 
1612
  add_region_btn.click(
1613
  add_selected_region,