Spaces:
Running on Zero
Running on Zero
Simplify zoom workflow and prefer ImageEditor hand/pan path
Browse files
app.py
CHANGED
|
@@ -52,6 +52,7 @@ model = AutoModel.from_pretrained(MODEL_NAME, _attn_implementation=_attn_impl, t
|
|
| 52 |
BASE_SIZE = 1024
|
| 53 |
IMAGE_SIZE = 768
|
| 54 |
CROP_MODE = True
|
|
|
|
| 55 |
GROUNDING_PATTERN = re.compile(r'<\|ref\|>(.*?)<\|/ref\|><\|det\|>(.*?)<\|/det\|>', re.DOTALL)
|
| 56 |
INFER_DEBUG_FILTERS = ['PATCHES', '====', 'BASE:', 'directly resize', 'NO PATCHES', 'torch.Size', '%|']
|
| 57 |
EQUATION_ZOOM_PROMPT = "<image>\n<|grounding|>Locate each individual equation or math line."
|
|
@@ -1352,7 +1353,7 @@ def _scale_workspace_image(img, workspace_scale):
|
|
| 1352 |
try:
|
| 1353 |
scale = max(60, min(220, int(workspace_scale)))
|
| 1354 |
except Exception:
|
| 1355 |
-
scale =
|
| 1356 |
if scale == 100:
|
| 1357 |
return img
|
| 1358 |
ratio = scale / 100.0
|
|
@@ -1361,7 +1362,7 @@ def _scale_workspace_image(img, workspace_scale):
|
|
| 1361 |
resample = Image.Resampling.BILINEAR if hasattr(Image, "Resampling") else Image.BILINEAR
|
| 1362 |
return img.resize((new_w, new_h), resample)
|
| 1363 |
|
| 1364 |
-
def load_image_with_size(file_path, page_num=1, workspace_scale=
|
| 1365 |
img = load_image(file_path, page_num)
|
| 1366 |
if img is None:
|
| 1367 |
return None, None, None
|
|
@@ -1444,9 +1445,9 @@ with gr.Blocks(**blocks_kwargs) as demo:
|
|
| 1444 |
editor_kwargs["eraser"] = gr.Eraser(default_size=26)
|
| 1445 |
except TypeError:
|
| 1446 |
editor_kwargs["eraser"] = gr.Eraser()
|
| 1447 |
-
if
|
| 1448 |
try:
|
| 1449 |
-
region_editor = gr.
|
| 1450 |
label="Image Workspace",
|
| 1451 |
show_label=False,
|
| 1452 |
type="pil",
|
|
@@ -1455,20 +1456,20 @@ with gr.Blocks(**blocks_kwargs) as demo:
|
|
| 1455 |
)
|
| 1456 |
except TypeError:
|
| 1457 |
try:
|
| 1458 |
-
region_editor = gr.
|
| 1459 |
label="Image Workspace",
|
| 1460 |
show_label=False,
|
| 1461 |
height=640,
|
| 1462 |
**editor_kwargs,
|
| 1463 |
)
|
| 1464 |
except TypeError:
|
| 1465 |
-
region_editor = gr.
|
| 1466 |
label="Image Workspace",
|
| 1467 |
show_label=False,
|
| 1468 |
height=640,
|
| 1469 |
)
|
| 1470 |
else:
|
| 1471 |
-
region_editor = gr.
|
| 1472 |
label="Image Workspace",
|
| 1473 |
show_label=False,
|
| 1474 |
type="pil",
|
|
@@ -1483,14 +1484,6 @@ with gr.Blocks(**blocks_kwargs) as demo:
|
|
| 1483 |
gr.Markdown("### OCR Workflow")
|
| 1484 |
task = gr.Dropdown(list(TASK_PROMPTS.keys()), value="📋 Markdown", label="Task")
|
| 1485 |
input_scope = gr.Radio(["Entire Page", "Selected Region"], value="Entire Page", label="Input Scope")
|
| 1486 |
-
workspace_scale = gr.Slider(
|
| 1487 |
-
minimum=60,
|
| 1488 |
-
maximum=220,
|
| 1489 |
-
step=1,
|
| 1490 |
-
value=89,
|
| 1491 |
-
label="Workspace Scale (%)",
|
| 1492 |
-
info="Use this instead of in-canvas zoom for smoother live drawing.",
|
| 1493 |
-
)
|
| 1494 |
|
| 1495 |
selection_controls = gr.Row(visible=False)
|
| 1496 |
with selection_controls:
|
|
@@ -1567,7 +1560,7 @@ with gr.Blocks(**blocks_kwargs) as demo:
|
|
| 1567 |
2. Choose **Input Scope**:
|
| 1568 |
- `Entire Page` for the full page.
|
| 1569 |
- `Selected Region` for a specific area.
|
| 1570 |
-
2a.
|
| 1571 |
3. For `Selected Region`, use the **Image Workspace**:
|
| 1572 |
- Recommended: freehand selection (draw/highlight target); app uses an automatic bounding box around your marks.
|
| 1573 |
- Optional rectangle selection: use the **Crop** tool.
|
|
@@ -1608,16 +1601,13 @@ with gr.Blocks(**blocks_kwargs) as demo:
|
|
| 1608 |
task.change(select_boxes, [task], [tabs])
|
| 1609 |
input_scope.change(toggle_scope_ui, [input_scope], [workspace_hint, selection_controls, selection_status, selected_regions_gallery])
|
| 1610 |
if HAS_REGION_WORKSPACE and region_editor is not None:
|
| 1611 |
-
file_in.change(load_image_with_size, [file_in, page_selector
|
| 1612 |
-
page_selector.change(load_image_with_size, [file_in, page_selector
|
| 1613 |
-
workspace_scale.change(load_image_with_size, [file_in, page_selector, workspace_scale], [region_editor, workspace_base_size, workspace_base_image])
|
| 1614 |
region_editor.change(sync_workspace_state, [region_editor, workspace_base_image], [workspace_base_size, workspace_base_image])
|
| 1615 |
file_in.change(_reset_selected_regions, outputs=[selected_regions_state, selected_regions_gallery, selection_status])
|
| 1616 |
page_selector.change(_reset_selected_regions, outputs=[selected_regions_state, selected_regions_gallery, selection_status])
|
| 1617 |
-
workspace_scale.change(_reset_selected_regions, outputs=[selected_regions_state, selected_regions_gallery, selection_status])
|
| 1618 |
file_in.change(_reset_drawn_mask, outputs=[drawn_mask_state])
|
| 1619 |
page_selector.change(_reset_drawn_mask, outputs=[drawn_mask_state])
|
| 1620 |
-
workspace_scale.change(_reset_drawn_mask, outputs=[drawn_mask_state])
|
| 1621 |
|
| 1622 |
add_region_btn.click(
|
| 1623 |
add_selected_region,
|
|
|
|
| 52 |
BASE_SIZE = 1024
|
| 53 |
IMAGE_SIZE = 768
|
| 54 |
CROP_MODE = True
|
| 55 |
+
WORKSPACE_DEFAULT_SCALE = 89
|
| 56 |
GROUNDING_PATTERN = re.compile(r'<\|ref\|>(.*?)<\|/ref\|><\|det\|>(.*?)<\|/det\|>', re.DOTALL)
|
| 57 |
INFER_DEBUG_FILTERS = ['PATCHES', '====', 'BASE:', 'directly resize', 'NO PATCHES', 'torch.Size', '%|']
|
| 58 |
EQUATION_ZOOM_PROMPT = "<image>\n<|grounding|>Locate each individual equation or math line."
|
|
|
|
| 1353 |
try:
|
| 1354 |
scale = max(60, min(220, int(workspace_scale)))
|
| 1355 |
except Exception:
|
| 1356 |
+
scale = WORKSPACE_DEFAULT_SCALE
|
| 1357 |
if scale == 100:
|
| 1358 |
return img
|
| 1359 |
ratio = scale / 100.0
|
|
|
|
| 1362 |
resample = Image.Resampling.BILINEAR if hasattr(Image, "Resampling") else Image.BILINEAR
|
| 1363 |
return img.resize((new_w, new_h), resample)
|
| 1364 |
|
| 1365 |
+
def load_image_with_size(file_path, page_num=1, workspace_scale=WORKSPACE_DEFAULT_SCALE):
|
| 1366 |
img = load_image(file_path, page_num)
|
| 1367 |
if img is None:
|
| 1368 |
return None, None, None
|
|
|
|
| 1445 |
editor_kwargs["eraser"] = gr.Eraser(default_size=26)
|
| 1446 |
except TypeError:
|
| 1447 |
editor_kwargs["eraser"] = gr.Eraser()
|
| 1448 |
+
if HAS_IMAGE_EDITOR:
|
| 1449 |
try:
|
| 1450 |
+
region_editor = gr.ImageEditor(
|
| 1451 |
label="Image Workspace",
|
| 1452 |
show_label=False,
|
| 1453 |
type="pil",
|
|
|
|
| 1456 |
)
|
| 1457 |
except TypeError:
|
| 1458 |
try:
|
| 1459 |
+
region_editor = gr.ImageEditor(
|
| 1460 |
label="Image Workspace",
|
| 1461 |
show_label=False,
|
| 1462 |
height=640,
|
| 1463 |
**editor_kwargs,
|
| 1464 |
)
|
| 1465 |
except TypeError:
|
| 1466 |
+
region_editor = gr.ImageEditor(
|
| 1467 |
label="Image Workspace",
|
| 1468 |
show_label=False,
|
| 1469 |
height=640,
|
| 1470 |
)
|
| 1471 |
else:
|
| 1472 |
+
region_editor = gr.Paint(
|
| 1473 |
label="Image Workspace",
|
| 1474 |
show_label=False,
|
| 1475 |
type="pil",
|
|
|
|
| 1484 |
gr.Markdown("### OCR Workflow")
|
| 1485 |
task = gr.Dropdown(list(TASK_PROMPTS.keys()), value="📋 Markdown", label="Task")
|
| 1486 |
input_scope = gr.Radio(["Entire Page", "Selected Region"], value="Entire Page", label="Input Scope")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1487 |
|
| 1488 |
selection_controls = gr.Row(visible=False)
|
| 1489 |
with selection_controls:
|
|
|
|
| 1560 |
2. Choose **Input Scope**:
|
| 1561 |
- `Entire Page` for the full page.
|
| 1562 |
- `Selected Region` for a specific area.
|
| 1563 |
+
2a. Workspace opens at **89% scale** by default to keep small math readable while avoiding extra zoom adjustments.
|
| 1564 |
3. For `Selected Region`, use the **Image Workspace**:
|
| 1565 |
- Recommended: freehand selection (draw/highlight target); app uses an automatic bounding box around your marks.
|
| 1566 |
- Optional rectangle selection: use the **Crop** tool.
|
|
|
|
| 1601 |
task.change(select_boxes, [task], [tabs])
|
| 1602 |
input_scope.change(toggle_scope_ui, [input_scope], [workspace_hint, selection_controls, selection_status, selected_regions_gallery])
|
| 1603 |
if HAS_REGION_WORKSPACE and region_editor is not None:
|
| 1604 |
+
file_in.change(load_image_with_size, [file_in, page_selector], [region_editor, workspace_base_size, workspace_base_image])
|
| 1605 |
+
page_selector.change(load_image_with_size, [file_in, page_selector], [region_editor, workspace_base_size, workspace_base_image])
|
|
|
|
| 1606 |
region_editor.change(sync_workspace_state, [region_editor, workspace_base_image], [workspace_base_size, workspace_base_image])
|
| 1607 |
file_in.change(_reset_selected_regions, outputs=[selected_regions_state, selected_regions_gallery, selection_status])
|
| 1608 |
page_selector.change(_reset_selected_regions, outputs=[selected_regions_state, selected_regions_gallery, selection_status])
|
|
|
|
| 1609 |
file_in.change(_reset_drawn_mask, outputs=[drawn_mask_state])
|
| 1610 |
page_selector.change(_reset_drawn_mask, outputs=[drawn_mask_state])
|
|
|
|
| 1611 |
|
| 1612 |
add_region_btn.click(
|
| 1613 |
add_selected_region,
|