Spaces:
Running on Zero
Running on Zero
Refactor UI into workflow-first layout with larger workspace
Browse files
app.py
CHANGED
|
@@ -1113,6 +1113,31 @@ def select_boxes(task):
|
|
| 1113 |
return gr.update(selected="tab_boxes")
|
| 1114 |
return gr.update()
|
| 1115 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1116 |
def get_pdf_page_count(file_path):
|
| 1117 |
if not file_path or not file_path.lower().endswith('.pdf'):
|
| 1118 |
return 1
|
|
@@ -1172,10 +1197,16 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
|
|
| 1172 |
workspace_base_size = gr.State(None)
|
| 1173 |
workspace_base_image = gr.State(None)
|
| 1174 |
selected_regions_state = gr.State([])
|
|
|
|
| 1175 |
with gr.Row():
|
| 1176 |
-
with gr.Column(scale=
|
| 1177 |
file_in = gr.File(label="Upload Image or PDF", file_types=["image", ".pdf"], type="filepath")
|
|
|
|
| 1178 |
page_selector = gr.Number(label="Select Page", value=1, minimum=1, step=1, visible=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1179 |
gr.Markdown("**Image Workspace (full page + region selection)**")
|
| 1180 |
if HAS_IMAGE_EDITOR:
|
| 1181 |
editor_kwargs = {}
|
|
@@ -1203,35 +1234,42 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
|
|
| 1203 |
region_editor = gr.ImageEditor(
|
| 1204 |
label="Main image workspace. Recommended: freehand/highlight the target area, then click Add Region. (Crop tool for rectangles is optional.)",
|
| 1205 |
type="pil",
|
| 1206 |
-
height=
|
| 1207 |
**editor_kwargs,
|
| 1208 |
)
|
| 1209 |
else:
|
| 1210 |
gr.Markdown("Region drawing requires a newer Gradio version with `ImageEditor` support.")
|
| 1211 |
region_editor = gr.State(None)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1212 |
input_scope = gr.Radio(["Entire Page", "Selected Region"], value="Entire Page", label="Input Scope")
|
| 1213 |
-
|
|
|
|
| 1214 |
with selection_controls:
|
| 1215 |
add_region_btn = gr.Button("Add Region", variant="secondary")
|
| 1216 |
clear_regions_btn = gr.Button("Clear Regions")
|
| 1217 |
-
selection_status = gr.Textbox(label="Region Selection Status", value="No saved regions.", interactive=False)
|
| 1218 |
-
selected_regions_gallery = gr.Gallery(label="Selected Regions", show_label=True, columns=
|
| 1219 |
-
|
| 1220 |
-
|
| 1221 |
-
|
|
|
|
| 1222 |
prompt = gr.Textbox(label="Prompt", lines=2, visible=False)
|
| 1223 |
btn = gr.Button("Extract", variant="primary", size="lg")
|
| 1224 |
-
|
| 1225 |
-
|
|
|
|
| 1226 |
with gr.Tabs() as tabs:
|
| 1227 |
with gr.Tab("Text", id="tab_text"):
|
| 1228 |
text_out = gr.Textbox(lines=20, buttons=["copy"], show_label=False)
|
| 1229 |
with gr.Tab("Markdown Preview", id="tab_markdown"):
|
| 1230 |
md_out = gr.HTML("")
|
| 1231 |
with gr.Tab("Boxes", id="tab_boxes"):
|
| 1232 |
-
img_out = gr.Image(type="pil", height=
|
| 1233 |
with gr.Tab("Cropped Images", id="tab_crops"):
|
| 1234 |
-
gallery = gr.Gallery(show_label=False, columns=3, height=
|
| 1235 |
with gr.Tab("Raw Text", id="tab_raw"):
|
| 1236 |
raw_out = gr.Textbox(lines=20, buttons=["copy"], show_label=False)
|
| 1237 |
download_btn = gr.DownloadButton("Download Markdown", visible=False, variant="secondary")
|
|
@@ -1272,7 +1310,7 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
|
|
| 1272 |
1024 base + 768 patches with dynamic cropping (2-6 patches). 144 tokens per patch + 256 base tokens.
|
| 1273 |
|
| 1274 |
### Faculty Quick Workflow
|
| 1275 |
-
1.
|
| 1276 |
2. Choose **Input Scope**:
|
| 1277 |
- `Entire Page` for the full page.
|
| 1278 |
- `Selected Region` for a specific area.
|
|
@@ -1284,6 +1322,7 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
|
|
| 1284 |
Then click **Extract**.
|
| 1285 |
4. Use **Clear Regions** to reset multi-select state.
|
| 1286 |
5. Review **Cropped Images** and **Boxes**: both are labeled `Region 1`, `Region 2`, etc.
|
|
|
|
| 1287 |
|
| 1288 |
### Tasks
|
| 1289 |
- **Markdown**: Convert document to structured markdown with layout detection (grounding ✅)
|
|
@@ -1312,6 +1351,7 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
|
|
| 1312 |
file_in.change(update_page_selector, [file_in], [page_selector])
|
| 1313 |
task.change(toggle_prompt, [task], [prompt])
|
| 1314 |
task.change(select_boxes, [task], [tabs])
|
|
|
|
| 1315 |
if HAS_IMAGE_EDITOR and region_editor is not None:
|
| 1316 |
file_in.change(load_image_with_size, [file_in, page_selector], [region_editor, workspace_base_size, workspace_base_image])
|
| 1317 |
page_selector.change(load_image_with_size, [file_in, page_selector], [region_editor, workspace_base_size, workspace_base_image])
|
|
@@ -1398,7 +1438,7 @@ with gr.Blocks(title="DeepSeek-OCR-2") as demo:
|
|
| 1398 |
[file_in, task, prompt, page_selector, equation_zoom, separate_eq_lines, input_scope, region_editor, workspace_base_size, workspace_base_image, selected_regions_state],
|
| 1399 |
[text_out, md_out, raw_out, img_out, gallery, download_btn]
|
| 1400 |
)
|
| 1401 |
-
submit_event.then(
|
| 1402 |
|
| 1403 |
if __name__ == "__main__":
|
| 1404 |
# server_name="0.0.0.0" is needed locally (WSL2 → Windows access)
|
|
|
|
| 1113 |
return gr.update(selected="tab_boxes")
|
| 1114 |
return gr.update()
|
| 1115 |
|
| 1116 |
+
def toggle_scope_ui(scope):
|
| 1117 |
+
if scope == "Selected Region":
|
| 1118 |
+
hint = (
|
| 1119 |
+
"**Selected Region mode:** Draw/highlight on the workspace, click **Add Region** "
|
| 1120 |
+
"for each target area, then click **Extract**."
|
| 1121 |
+
)
|
| 1122 |
+
return (
|
| 1123 |
+
gr.update(value=hint),
|
| 1124 |
+
gr.update(visible=True),
|
| 1125 |
+
gr.update(visible=True),
|
| 1126 |
+
gr.update(visible=True),
|
| 1127 |
+
)
|
| 1128 |
+
hint = "**Entire Page mode:** No drawing needed. Click **Extract** to process the full page."
|
| 1129 |
+
return (
|
| 1130 |
+
gr.update(value=hint),
|
| 1131 |
+
gr.update(visible=False),
|
| 1132 |
+
gr.update(visible=False),
|
| 1133 |
+
gr.update(visible=False),
|
| 1134 |
+
)
|
| 1135 |
+
|
| 1136 |
+
def select_post_extract_tab(task, scope):
|
| 1137 |
+
if scope == "Selected Region" or task == "📍 Locate":
|
| 1138 |
+
return gr.update(selected="tab_boxes")
|
| 1139 |
+
return gr.update(selected="tab_text")
|
| 1140 |
+
|
| 1141 |
def get_pdf_page_count(file_path):
|
| 1142 |
if not file_path or not file_path.lower().endswith('.pdf'):
|
| 1143 |
return 1
|
|
|
|
| 1197 |
workspace_base_size = gr.State(None)
|
| 1198 |
workspace_base_image = gr.State(None)
|
| 1199 |
selected_regions_state = gr.State([])
|
| 1200 |
+
|
| 1201 |
with gr.Row():
|
| 1202 |
+
with gr.Column(scale=3):
|
| 1203 |
file_in = gr.File(label="Upload Image or PDF", file_types=["image", ".pdf"], type="filepath")
|
| 1204 |
+
with gr.Column(scale=1):
|
| 1205 |
page_selector = gr.Number(label="Select Page", value=1, minimum=1, step=1, visible=False)
|
| 1206 |
+
|
| 1207 |
+
with gr.Row():
|
| 1208 |
+
with gr.Column(scale=3):
|
| 1209 |
+
workspace_hint = gr.Markdown("**Entire Page mode:** No drawing needed. Click **Extract** to process the full page.")
|
| 1210 |
gr.Markdown("**Image Workspace (full page + region selection)**")
|
| 1211 |
if HAS_IMAGE_EDITOR:
|
| 1212 |
editor_kwargs = {}
|
|
|
|
| 1234 |
region_editor = gr.ImageEditor(
|
| 1235 |
label="Main image workspace. Recommended: freehand/highlight the target area, then click Add Region. (Crop tool for rectangles is optional.)",
|
| 1236 |
type="pil",
|
| 1237 |
+
height=640,
|
| 1238 |
**editor_kwargs,
|
| 1239 |
)
|
| 1240 |
else:
|
| 1241 |
gr.Markdown("Region drawing requires a newer Gradio version with `ImageEditor` support.")
|
| 1242 |
region_editor = gr.State(None)
|
| 1243 |
+
|
| 1244 |
+
with gr.Column(scale=1):
|
| 1245 |
+
gr.Markdown("### OCR Workflow")
|
| 1246 |
+
task = gr.Dropdown(list(TASK_PROMPTS.keys()), value="📋 Markdown", label="Task")
|
| 1247 |
input_scope = gr.Radio(["Entire Page", "Selected Region"], value="Entire Page", label="Input Scope")
|
| 1248 |
+
|
| 1249 |
+
selection_controls = gr.Row(visible=False)
|
| 1250 |
with selection_controls:
|
| 1251 |
add_region_btn = gr.Button("Add Region", variant="secondary")
|
| 1252 |
clear_regions_btn = gr.Button("Clear Regions")
|
| 1253 |
+
selection_status = gr.Textbox(label="Region Selection Status", value="No saved regions.", interactive=False, visible=False)
|
| 1254 |
+
selected_regions_gallery = gr.Gallery(label="Selected Regions", show_label=True, columns=2, height=190, visible=False)
|
| 1255 |
+
|
| 1256 |
+
with gr.Accordion("Advanced Options", open=False):
|
| 1257 |
+
equation_zoom = gr.Checkbox(label="Equation Zoom (multipass)", value=False)
|
| 1258 |
+
separate_eq_lines = gr.Checkbox(label="Detect Equation Lines Separately", value=False)
|
| 1259 |
prompt = gr.Textbox(label="Prompt", lines=2, visible=False)
|
| 1260 |
btn = gr.Button("Extract", variant="primary", size="lg")
|
| 1261 |
+
|
| 1262 |
+
with gr.Row():
|
| 1263 |
+
with gr.Column(scale=1):
|
| 1264 |
with gr.Tabs() as tabs:
|
| 1265 |
with gr.Tab("Text", id="tab_text"):
|
| 1266 |
text_out = gr.Textbox(lines=20, buttons=["copy"], show_label=False)
|
| 1267 |
with gr.Tab("Markdown Preview", id="tab_markdown"):
|
| 1268 |
md_out = gr.HTML("")
|
| 1269 |
with gr.Tab("Boxes", id="tab_boxes"):
|
| 1270 |
+
img_out = gr.Image(type="pil", height=560, show_label=False)
|
| 1271 |
with gr.Tab("Cropped Images", id="tab_crops"):
|
| 1272 |
+
gallery = gr.Gallery(show_label=False, columns=3, height=420)
|
| 1273 |
with gr.Tab("Raw Text", id="tab_raw"):
|
| 1274 |
raw_out = gr.Textbox(lines=20, buttons=["copy"], show_label=False)
|
| 1275 |
download_btn = gr.DownloadButton("Download Markdown", visible=False, variant="secondary")
|
|
|
|
| 1310 |
1024 base + 768 patches with dynamic cropping (2-6 patches). 144 tokens per patch + 256 base tokens.
|
| 1311 |
|
| 1312 |
### Faculty Quick Workflow
|
| 1313 |
+
1. Upload a page/image, then confirm **Task**.
|
| 1314 |
2. Choose **Input Scope**:
|
| 1315 |
- `Entire Page` for the full page.
|
| 1316 |
- `Selected Region` for a specific area.
|
|
|
|
| 1322 |
Then click **Extract**.
|
| 1323 |
4. Use **Clear Regions** to reset multi-select state.
|
| 1324 |
5. Review **Cropped Images** and **Boxes**: both are labeled `Region 1`, `Region 2`, etc.
|
| 1325 |
+
6. Use **Advanced Options** only when needed (Equation Zoom / line-by-line equation OCR).
|
| 1326 |
|
| 1327 |
### Tasks
|
| 1328 |
- **Markdown**: Convert document to structured markdown with layout detection (grounding ✅)
|
|
|
|
| 1351 |
file_in.change(update_page_selector, [file_in], [page_selector])
|
| 1352 |
task.change(toggle_prompt, [task], [prompt])
|
| 1353 |
task.change(select_boxes, [task], [tabs])
|
| 1354 |
+
input_scope.change(toggle_scope_ui, [input_scope], [workspace_hint, selection_controls, selection_status, selected_regions_gallery])
|
| 1355 |
if HAS_IMAGE_EDITOR and region_editor is not None:
|
| 1356 |
file_in.change(load_image_with_size, [file_in, page_selector], [region_editor, workspace_base_size, workspace_base_image])
|
| 1357 |
page_selector.change(load_image_with_size, [file_in, page_selector], [region_editor, workspace_base_size, workspace_base_image])
|
|
|
|
| 1438 |
[file_in, task, prompt, page_selector, equation_zoom, separate_eq_lines, input_scope, region_editor, workspace_base_size, workspace_base_image, selected_regions_state],
|
| 1439 |
[text_out, md_out, raw_out, img_out, gallery, download_btn]
|
| 1440 |
)
|
| 1441 |
+
submit_event.then(select_post_extract_tab, [task, input_scope], [tabs])
|
| 1442 |
|
| 1443 |
if __name__ == "__main__":
|
| 1444 |
# server_name="0.0.0.0" is needed locally (WSL2 → Windows access)
|