Spaces:
Running
Running
Luis J Camargo commited on
Commit Β·
b107ea6
1
Parent(s): 2147761
refactor: Migrate image inputs from `gr.File` to `gr.Image` for native preview support, removing custom preview logic, and add full progress display for inference tasks.
Browse files
app.py
CHANGED
|
@@ -167,22 +167,7 @@ def _escape_inequalities_in_math(md: str) -> str:
|
|
| 167 |
md = pat.sub(lambda m: m.group(0).replace(m.group(1), fix(m.group(1))), md)
|
| 168 |
return md
|
| 169 |
|
| 170 |
-
|
| 171 |
-
if not path_or_url:
|
| 172 |
-
return gr.update(value="", visible=False)
|
| 173 |
-
|
| 174 |
-
is_url = isinstance(path_or_url, str) and path_or_url.startswith(("http://", "https://"))
|
| 175 |
-
if is_url:
|
| 176 |
-
src = path_or_url
|
| 177 |
-
else:
|
| 178 |
-
src = image_to_base64_data_url(path_or_url)
|
| 179 |
-
|
| 180 |
-
html_content = f"""
|
| 181 |
-
<div class="uploaded-image" style="background: white; padding: 10px; border-radius: 8px;">
|
| 182 |
-
<img src="{src}" alt="Preview" style="width:100%; height:auto; max-height:800px; object-fit:contain;"/>
|
| 183 |
-
</div>
|
| 184 |
-
"""
|
| 185 |
-
return gr.update(value=html_content, visible=True)
|
| 186 |
|
| 187 |
# --- Inference Logic ---
|
| 188 |
|
|
@@ -211,20 +196,28 @@ def run_inference(img_path, task_type="ocr"):
|
|
| 211 |
os.makedirs(run_output_dir, exist_ok=True)
|
| 212 |
|
| 213 |
for i, res in enumerate(output):
|
|
|
|
|
|
|
| 214 |
res.save_to_json(save_path=run_output_dir)
|
| 215 |
res.save_to_markdown(save_path=run_output_dir)
|
| 216 |
res.print()
|
| 217 |
|
|
|
|
|
|
|
| 218 |
fnames = os.listdir(run_output_dir)
|
| 219 |
for fname in fnames:
|
| 220 |
fpath = os.path.join(run_output_dir, fname)
|
| 221 |
if fname.endswith(".md"):
|
| 222 |
with open(fpath, 'r', encoding='utf-8') as f:
|
| 223 |
-
|
|
|
|
|
|
|
| 224 |
elif fname.endswith(".json"):
|
| 225 |
with open(fpath, 'r', encoding='utf-8') as f:
|
| 226 |
-
|
| 227 |
-
|
|
|
|
|
|
|
| 228 |
vis_src = image_to_base64_data_url(fpath)
|
| 229 |
vis_html += f'<div style="margin-bottom:20px; border: 2px solid #10b981; border-radius: 12px; overflow: hidden; background:white;">'
|
| 230 |
vis_html += f'<img src="{vis_src}" alt="Vis {i+1}" style="width:100%;">'
|
|
@@ -275,12 +268,12 @@ with gr.Blocks() as demo:
|
|
| 275 |
gr.Markdown(f"**β‘ Status:** {status_text} | **Model:** `{REPO_ID}` | **Hardware:** CPU")
|
| 276 |
|
| 277 |
with gr.Tabs():
|
|
|
|
| 278 |
with gr.Tab("π Full Document Parsing"):
|
| 279 |
with gr.Row():
|
| 280 |
with gr.Column(scale=5):
|
| 281 |
-
file_doc = gr.
|
| 282 |
-
|
| 283 |
-
btn_parse = gr.Button("π Start Parsing", variant="primary")
|
| 284 |
with gr.Row():
|
| 285 |
chart_switch = gr.Checkbox(label="Chart OCR", value=True)
|
| 286 |
unwarp_switch = gr.Checkbox(label="Unwarping", value=False)
|
|
@@ -290,23 +283,27 @@ with gr.Blocks() as demo:
|
|
| 290 |
with gr.Tab("π Markdown View"):
|
| 291 |
md_preview_doc = gr.Markdown(latex_delimiters=LATEX_DELIMS, elem_classes="output-box")
|
| 292 |
with gr.Tab("πΌοΈ Visual Results"):
|
| 293 |
-
vis_image_doc = gr.HTML('<div style="text-align:center; color:#94a3b8; padding: 50px;">
|
| 294 |
with gr.Tab("π Raw Source"):
|
| 295 |
md_raw_doc = gr.Code(language="markdown")
|
| 296 |
|
| 297 |
-
file_doc.change(update_preview_visibility, file_doc, preview_doc_html)
|
| 298 |
-
|
| 299 |
def parse_doc_wrapper(fp, ch, uw):
|
|
|
|
| 300 |
res_preview, res_raw, res_vis, res_json = run_inference(fp, task_type="Document")
|
| 301 |
return res_preview, res_vis, res_raw
|
| 302 |
|
| 303 |
-
btn_parse.click(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 304 |
|
|
|
|
| 305 |
with gr.Tab("π§© Specific Recognition"):
|
| 306 |
with gr.Row():
|
| 307 |
with gr.Column(scale=5):
|
| 308 |
-
file_vl = gr.
|
| 309 |
-
preview_vl_html = gr.HTML(visible=False)
|
| 310 |
with gr.Row():
|
| 311 |
btn_ocr = gr.Button("Text", variant="secondary")
|
| 312 |
btn_formula = gr.Button("Formula", variant="secondary")
|
|
@@ -319,36 +316,44 @@ with gr.Blocks() as demo:
|
|
| 319 |
with gr.Tab("π Source"):
|
| 320 |
md_raw_vl = gr.Code(language="markdown")
|
| 321 |
|
| 322 |
-
file_vl.change(update_preview_visibility, file_vl, preview_vl_html)
|
| 323 |
-
|
| 324 |
def run_vl_wrapper(fp, prompt):
|
|
|
|
| 325 |
res_preview, res_raw, _, _ = run_inference(fp, task_type=prompt)
|
| 326 |
return res_preview, res_raw
|
| 327 |
|
| 328 |
for btn, prompt in [(btn_ocr, "Text"), (btn_formula, "Formula"), (btn_table, "Table")]:
|
| 329 |
-
btn.click(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 330 |
|
|
|
|
| 331 |
with gr.Tab("π Feature Spotting"):
|
| 332 |
with gr.Row():
|
| 333 |
with gr.Column(scale=5):
|
| 334 |
-
file_spot = gr.
|
| 335 |
-
preview_spot_html = gr.HTML(visible=False)
|
| 336 |
btn_run_spot = gr.Button("π― Run Spotting", variant="primary")
|
| 337 |
|
| 338 |
with gr.Column(scale=7):
|
| 339 |
with gr.Tabs():
|
| 340 |
with gr.Tab("πΌοΈ Detection"):
|
| 341 |
-
vis_image_spot = gr.HTML('<div style="text-align:center; color:#94a3b8; padding: 50px;">
|
| 342 |
with gr.Tab("πΎ JSON Feed"):
|
| 343 |
json_spot = gr.Code(label="JSON", language="json")
|
| 344 |
|
| 345 |
-
file_spot.change(update_preview_visibility, file_spot, preview_spot_html)
|
| 346 |
-
|
| 347 |
def run_spotting_wrapper(fp):
|
|
|
|
| 348 |
_, _, vis, js = run_inference(fp, task_type="Spotting")
|
| 349 |
return vis, js
|
| 350 |
|
| 351 |
-
btn_run_spot.click(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 352 |
|
| 353 |
gr.Markdown("--- \n *Tachiwin Project: Indigenous Languages of Mexico.*")
|
| 354 |
|
|
|
|
| 167 |
md = pat.sub(lambda m: m.group(0).replace(m.group(1), fix(m.group(1))), md)
|
| 168 |
return md
|
| 169 |
|
| 170 |
+
# Removed update_preview_visibility as gr.Image handles previews natively.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 171 |
|
| 172 |
# --- Inference Logic ---
|
| 173 |
|
|
|
|
| 196 |
os.makedirs(run_output_dir, exist_ok=True)
|
| 197 |
|
| 198 |
for i, res in enumerate(output):
|
| 199 |
+
logger.info(f"Processing segment {i+1}...")
|
| 200 |
+
# Save results
|
| 201 |
res.save_to_json(save_path=run_output_dir)
|
| 202 |
res.save_to_markdown(save_path=run_output_dir)
|
| 203 |
res.print()
|
| 204 |
|
| 205 |
+
# Read back generated files from this segment's save
|
| 206 |
+
# Paddle naming: res_{i}.md, res_{i}.json, etc.
|
| 207 |
fnames = os.listdir(run_output_dir)
|
| 208 |
for fname in fnames:
|
| 209 |
fpath = os.path.join(run_output_dir, fname)
|
| 210 |
if fname.endswith(".md"):
|
| 211 |
with open(fpath, 'r', encoding='utf-8') as f:
|
| 212 |
+
content = f.read()
|
| 213 |
+
if content not in md_content: # Avoid duplicates if listdir is messy
|
| 214 |
+
md_content += content + "\n\n"
|
| 215 |
elif fname.endswith(".json"):
|
| 216 |
with open(fpath, 'r', encoding='utf-8') as f:
|
| 217 |
+
content = f.read()
|
| 218 |
+
if content not in json_content:
|
| 219 |
+
json_content += content + "\n\n"
|
| 220 |
+
elif fname.endswith((".png", ".jpg", ".jpeg")) and ("res" in fname or "vis" in fname):
|
| 221 |
vis_src = image_to_base64_data_url(fpath)
|
| 222 |
vis_html += f'<div style="margin-bottom:20px; border: 2px solid #10b981; border-radius: 12px; overflow: hidden; background:white;">'
|
| 223 |
vis_html += f'<img src="{vis_src}" alt="Vis {i+1}" style="width:100%;">'
|
|
|
|
| 268 |
gr.Markdown(f"**β‘ Status:** {status_text} | **Model:** `{REPO_ID}` | **Hardware:** CPU")
|
| 269 |
|
| 270 |
with gr.Tabs():
|
| 271 |
+
# Document Parsing Tab
|
| 272 |
with gr.Tab("π Full Document Parsing"):
|
| 273 |
with gr.Row():
|
| 274 |
with gr.Column(scale=5):
|
| 275 |
+
file_doc = gr.Image(label="Upload Image", type="filepath")
|
| 276 |
+
btn_parse = gr.Button("οΏ½ Start Parsing", variant="primary")
|
|
|
|
| 277 |
with gr.Row():
|
| 278 |
chart_switch = gr.Checkbox(label="Chart OCR", value=True)
|
| 279 |
unwarp_switch = gr.Checkbox(label="Unwarping", value=False)
|
|
|
|
| 283 |
with gr.Tab("π Markdown View"):
|
| 284 |
md_preview_doc = gr.Markdown(latex_delimiters=LATEX_DELIMS, elem_classes="output-box")
|
| 285 |
with gr.Tab("πΌοΈ Visual Results"):
|
| 286 |
+
vis_image_doc = gr.HTML('<div style="text-align:center; color:#94a3b8; padding: 50px;">Results will appear here.</div>')
|
| 287 |
with gr.Tab("π Raw Source"):
|
| 288 |
md_raw_doc = gr.Code(language="markdown")
|
| 289 |
|
|
|
|
|
|
|
| 290 |
def parse_doc_wrapper(fp, ch, uw):
|
| 291 |
+
if not fp: return "β οΈ Please upload an image.", "", ""
|
| 292 |
res_preview, res_raw, res_vis, res_json = run_inference(fp, task_type="Document")
|
| 293 |
return res_preview, res_vis, res_raw
|
| 294 |
|
| 295 |
+
btn_parse.click(
|
| 296 |
+
parse_doc_wrapper,
|
| 297 |
+
[file_doc, chart_switch, unwarp_switch],
|
| 298 |
+
[md_preview_doc, vis_image_doc, md_raw_doc],
|
| 299 |
+
show_progress="full"
|
| 300 |
+
)
|
| 301 |
|
| 302 |
+
# Element Recognition Tab
|
| 303 |
with gr.Tab("π§© Specific Recognition"):
|
| 304 |
with gr.Row():
|
| 305 |
with gr.Column(scale=5):
|
| 306 |
+
file_vl = gr.Image(label="Upload Element", type="filepath")
|
|
|
|
| 307 |
with gr.Row():
|
| 308 |
btn_ocr = gr.Button("Text", variant="secondary")
|
| 309 |
btn_formula = gr.Button("Formula", variant="secondary")
|
|
|
|
| 316 |
with gr.Tab("π Source"):
|
| 317 |
md_raw_vl = gr.Code(language="markdown")
|
| 318 |
|
|
|
|
|
|
|
| 319 |
def run_vl_wrapper(fp, prompt):
|
| 320 |
+
if not fp: return "β οΈ Please upload an image.", ""
|
| 321 |
res_preview, res_raw, _, _ = run_inference(fp, task_type=prompt)
|
| 322 |
return res_preview, res_raw
|
| 323 |
|
| 324 |
for btn, prompt in [(btn_ocr, "Text"), (btn_formula, "Formula"), (btn_table, "Table")]:
|
| 325 |
+
btn.click(
|
| 326 |
+
run_vl_wrapper,
|
| 327 |
+
[file_vl, gr.State(prompt)],
|
| 328 |
+
[md_preview_vl, md_raw_vl],
|
| 329 |
+
show_progress="full"
|
| 330 |
+
)
|
| 331 |
|
| 332 |
+
# Spotting Tab
|
| 333 |
with gr.Tab("π Feature Spotting"):
|
| 334 |
with gr.Row():
|
| 335 |
with gr.Column(scale=5):
|
| 336 |
+
file_spot = gr.Image(label="Target Image", type="filepath")
|
|
|
|
| 337 |
btn_run_spot = gr.Button("π― Run Spotting", variant="primary")
|
| 338 |
|
| 339 |
with gr.Column(scale=7):
|
| 340 |
with gr.Tabs():
|
| 341 |
with gr.Tab("πΌοΈ Detection"):
|
| 342 |
+
vis_image_spot = gr.HTML('<div style="text-align:center; color:#94a3b8; padding: 50px;">Bboxes view.</div>')
|
| 343 |
with gr.Tab("πΎ JSON Feed"):
|
| 344 |
json_spot = gr.Code(label="JSON", language="json")
|
| 345 |
|
|
|
|
|
|
|
| 346 |
def run_spotting_wrapper(fp):
|
| 347 |
+
if not fp: return "", ""
|
| 348 |
_, _, vis, js = run_inference(fp, task_type="Spotting")
|
| 349 |
return vis, js
|
| 350 |
|
| 351 |
+
btn_run_spot.click(
|
| 352 |
+
run_spotting_wrapper,
|
| 353 |
+
file_spot,
|
| 354 |
+
[vis_image_spot, json_spot],
|
| 355 |
+
show_progress="full"
|
| 356 |
+
)
|
| 357 |
|
| 358 |
gr.Markdown("--- \n *Tachiwin Project: Indigenous Languages of Mexico.*")
|
| 359 |
|