ZhouChuYue
commited on
Commit
Β·
5e6d33f
1
Parent(s):
8255ade
One-click fetch & parse with Math StackExchange example
Browse files
app.py
CHANGED
|
@@ -31,6 +31,29 @@ def fetch_url_content(url: str) -> tuple:
|
|
| 31 |
return "", f"Failed to fetch URL: {str(e)}"
|
| 32 |
|
| 33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
def parse_html(
|
| 35 |
html_content: str,
|
| 36 |
base_url: str = "",
|
|
@@ -370,8 +393,9 @@ with gr.Blocks(title="UltraData Math Parser") as demo:
|
|
| 370 |
label="URL",
|
| 371 |
placeholder="Enter URL to fetch (e.g., https://example.com/math-article)",
|
| 372 |
lines=1,
|
|
|
|
| 373 |
)
|
| 374 |
-
fetch_btn = gr.Button("π₯ Fetch
|
| 375 |
|
| 376 |
with gr.TabItem("π HTML"):
|
| 377 |
pass # HTML input will be below, shared between tabs
|
|
@@ -457,9 +481,9 @@ with gr.Blocks(title="UltraData Math Parser") as demo:
|
|
| 457 |
|
| 458 |
# Event handlers
|
| 459 |
fetch_btn.click(
|
| 460 |
-
fn=
|
| 461 |
-
inputs=[url_input],
|
| 462 |
-
outputs=[html_input, base_url_input],
|
| 463 |
)
|
| 464 |
|
| 465 |
parse_btn.click(
|
|
|
|
| 31 |
return "", f"Failed to fetch URL: {str(e)}"
|
| 32 |
|
| 33 |
|
| 34 |
+
def fetch_and_parse(url: str, process_math: bool, include_tables: bool, enable_forum: bool, html_type: str) -> tuple:
|
| 35 |
+
"""Fetch URL content and parse it in one step."""
|
| 36 |
+
html_content, base_url = fetch_url_content(url)
|
| 37 |
+
|
| 38 |
+
if not html_content:
|
| 39 |
+
# base_url contains error message in this case
|
| 40 |
+
error_msg = base_url
|
| 41 |
+
return "", error_msg, f"β {error_msg}", "", "", f"**Error:** {error_msg}"
|
| 42 |
+
|
| 43 |
+
result = parse_html(
|
| 44 |
+
html_content=html_content,
|
| 45 |
+
base_url=base_url,
|
| 46 |
+
process_math=process_math,
|
| 47 |
+
include_tables=include_tables,
|
| 48 |
+
enable_forum_assembly=enable_forum,
|
| 49 |
+
html_type=html_type,
|
| 50 |
+
)
|
| 51 |
+
|
| 52 |
+
formatted = format_output(result)
|
| 53 |
+
# Return: html_content, base_url, title, html_output, text_output, markdown_output
|
| 54 |
+
return html_content, base_url, formatted[0], formatted[1], formatted[2], formatted[3]
|
| 55 |
+
|
| 56 |
+
|
| 57 |
def parse_html(
|
| 58 |
html_content: str,
|
| 59 |
base_url: str = "",
|
|
|
|
| 393 |
label="URL",
|
| 394 |
placeholder="Enter URL to fetch (e.g., https://example.com/math-article)",
|
| 395 |
lines=1,
|
| 396 |
+
value="https://math.stackexchange.com/questions/5120625/ode-problem-of-yt-sqrtyt-with-the-inital-value-y0-1-t-geq-0",
|
| 397 |
)
|
| 398 |
+
fetch_btn = gr.Button("π₯ Fetch & Parse", variant="primary")
|
| 399 |
|
| 400 |
with gr.TabItem("π HTML"):
|
| 401 |
pass # HTML input will be below, shared between tabs
|
|
|
|
| 481 |
|
| 482 |
# Event handlers
|
| 483 |
fetch_btn.click(
|
| 484 |
+
fn=fetch_and_parse,
|
| 485 |
+
inputs=[url_input, process_math, include_tables, enable_forum, html_type],
|
| 486 |
+
outputs=[html_input, base_url_input, title_output, html_output, text_output, markdown_output],
|
| 487 |
)
|
| 488 |
|
| 489 |
parse_btn.click(
|