ZhouChuYue
commited on
Commit
Β·
453fb4d
1
Parent(s):
06dc834
Add Hugging Face Space demo for UltraData Math Parser
Browse files
app.py
CHANGED
|
@@ -85,20 +85,9 @@ def format_output(result: dict) -> tuple:
|
|
| 85 |
"",
|
| 86 |
"",
|
| 87 |
"",
|
| 88 |
-
"",
|
| 89 |
)
|
| 90 |
|
| 91 |
-
# Build metadata string
|
| 92 |
-
metadata = f"""π **Parsing Statistics**
|
| 93 |
-
- **Title**: {result['title'] or 'N/A'}
|
| 94 |
-
- **Text Length**: {result['text_length']} characters
|
| 95 |
-
- **XPath Match**: {result['xp_num']}
|
| 96 |
-
- **Fallback Strategy**: {result['fallback_strategy']}
|
| 97 |
-
- **Forum Assembled**: {'β
Yes' if result['forum_assembled'] else 'β No'}
|
| 98 |
-
"""
|
| 99 |
-
|
| 100 |
return (
|
| 101 |
-
metadata,
|
| 102 |
result.get("title", ""),
|
| 103 |
result.get("html", ""),
|
| 104 |
result.get("text", ""),
|
|
@@ -281,6 +270,32 @@ footer {
|
|
| 281 |
label {
|
| 282 |
color: #cbd5e1 !important;
|
| 283 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 284 |
"""
|
| 285 |
|
| 286 |
# Build Gradio interface
|
|
@@ -336,11 +351,6 @@ with gr.Blocks(css=custom_css, title="UltraData Math Parser") as demo:
|
|
| 336 |
with gr.Column(scale=1):
|
| 337 |
gr.HTML('<div class="section-header">π€ Output</div>')
|
| 338 |
|
| 339 |
-
metadata_output = gr.Markdown(
|
| 340 |
-
label="Parsing Statistics",
|
| 341 |
-
elem_classes=["output-box"],
|
| 342 |
-
)
|
| 343 |
-
|
| 344 |
title_output = gr.Textbox(
|
| 345 |
label="Extracted Title",
|
| 346 |
lines=1,
|
|
@@ -351,16 +361,20 @@ with gr.Blocks(css=custom_css, title="UltraData Math Parser") as demo:
|
|
| 351 |
with gr.TabItem("π Raw HTML"):
|
| 352 |
html_output = gr.Textbox(
|
| 353 |
label="Extracted HTML",
|
| 354 |
-
lines=
|
| 355 |
-
max_lines=
|
| 356 |
interactive=False,
|
|
|
|
|
|
|
| 357 |
)
|
| 358 |
with gr.TabItem("π Plain Text"):
|
| 359 |
text_output = gr.Textbox(
|
| 360 |
label="Plain Text (w3m rendered)",
|
| 361 |
-
lines=
|
| 362 |
-
max_lines=
|
| 363 |
interactive=False,
|
|
|
|
|
|
|
| 364 |
)
|
| 365 |
with gr.TabItem("ποΈ Preview"):
|
| 366 |
preview_output = gr.HTML(
|
|
@@ -371,15 +385,15 @@ with gr.Blocks(css=custom_css, title="UltraData Math Parser") as demo:
|
|
| 371 |
parse_btn.click(
|
| 372 |
fn=process_input,
|
| 373 |
inputs=[html_input, base_url_input, process_math, include_tables, enable_forum, html_type],
|
| 374 |
-
outputs=[
|
| 375 |
)
|
| 376 |
|
| 377 |
def clear_all():
|
| 378 |
-
return "", "", "", "", "", ""
|
| 379 |
|
| 380 |
clear_btn.click(
|
| 381 |
fn=clear_all,
|
| 382 |
-
outputs=[html_input, base_url_input,
|
| 383 |
)
|
| 384 |
|
| 385 |
# Footer info
|
|
|
|
| 85 |
"",
|
| 86 |
"",
|
| 87 |
"",
|
|
|
|
| 88 |
)
|
| 89 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
return (
|
|
|
|
| 91 |
result.get("title", ""),
|
| 92 |
result.get("html", ""),
|
| 93 |
result.get("text", ""),
|
|
|
|
| 270 |
label {
|
| 271 |
color: #cbd5e1 !important;
|
| 272 |
}
|
| 273 |
+
|
| 274 |
+
/* Fixed height textbox with scrollbar */
|
| 275 |
+
.gr-textbox textarea {
|
| 276 |
+
max-height: 350px !important;
|
| 277 |
+
overflow-y: auto !important;
|
| 278 |
+
resize: none !important;
|
| 279 |
+
}
|
| 280 |
+
|
| 281 |
+
/* Custom scrollbar styling */
|
| 282 |
+
.gr-textbox textarea::-webkit-scrollbar {
|
| 283 |
+
width: 8px;
|
| 284 |
+
}
|
| 285 |
+
|
| 286 |
+
.gr-textbox textarea::-webkit-scrollbar-track {
|
| 287 |
+
background: rgba(30, 30, 60, 0.4);
|
| 288 |
+
border-radius: 4px;
|
| 289 |
+
}
|
| 290 |
+
|
| 291 |
+
.gr-textbox textarea::-webkit-scrollbar-thumb {
|
| 292 |
+
background: rgba(124, 58, 237, 0.6);
|
| 293 |
+
border-radius: 4px;
|
| 294 |
+
}
|
| 295 |
+
|
| 296 |
+
.gr-textbox textarea::-webkit-scrollbar-thumb:hover {
|
| 297 |
+
background: rgba(124, 58, 237, 0.8);
|
| 298 |
+
}
|
| 299 |
"""
|
| 300 |
|
| 301 |
# Build Gradio interface
|
|
|
|
| 351 |
with gr.Column(scale=1):
|
| 352 |
gr.HTML('<div class="section-header">π€ Output</div>')
|
| 353 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 354 |
title_output = gr.Textbox(
|
| 355 |
label="Extracted Title",
|
| 356 |
lines=1,
|
|
|
|
| 361 |
with gr.TabItem("π Raw HTML"):
|
| 362 |
html_output = gr.Textbox(
|
| 363 |
label="Extracted HTML",
|
| 364 |
+
lines=15,
|
| 365 |
+
max_lines=15,
|
| 366 |
interactive=False,
|
| 367 |
+
autoscroll=False,
|
| 368 |
+
show_copy_button=True,
|
| 369 |
)
|
| 370 |
with gr.TabItem("π Plain Text"):
|
| 371 |
text_output = gr.Textbox(
|
| 372 |
label="Plain Text (w3m rendered)",
|
| 373 |
+
lines=15,
|
| 374 |
+
max_lines=15,
|
| 375 |
interactive=False,
|
| 376 |
+
autoscroll=False,
|
| 377 |
+
show_copy_button=True,
|
| 378 |
)
|
| 379 |
with gr.TabItem("ποΈ Preview"):
|
| 380 |
preview_output = gr.HTML(
|
|
|
|
| 385 |
parse_btn.click(
|
| 386 |
fn=process_input,
|
| 387 |
inputs=[html_input, base_url_input, process_math, include_tables, enable_forum, html_type],
|
| 388 |
+
outputs=[title_output, html_output, text_output, preview_output],
|
| 389 |
)
|
| 390 |
|
| 391 |
def clear_all():
|
| 392 |
+
return "", "", "", "", "", ""
|
| 393 |
|
| 394 |
clear_btn.click(
|
| 395 |
fn=clear_all,
|
| 396 |
+
outputs=[html_input, base_url_input, title_output, html_output, text_output, preview_output],
|
| 397 |
)
|
| 398 |
|
| 399 |
# Footer info
|