Spaces:

openbmb
/

UltraData-Math-L0-Parser

Running

App Files Files Community

chuyue commited on Jan 21

Commit

8255ade

1 Parent(s): 1238b33

URL fetch support and LaTeX rendering in Markdown

Browse files

Files changed (2) hide show

app.py +53 -6
requirements.txt +1 -0

app.py CHANGED Viewed

@@ -5,9 +5,32 @@ A unified HTML parser optimized for extracting mathematical content.
 """
 import gradio as gr
 from ultradata_math_parser import GeneralParser
 def parse_html(
     html_content: str,
     base_url: str = "",
@@ -341,16 +364,28 @@ with gr.Blocks(title="UltraData Math Parser") as demo:
         with gr.Column(scale=1):
             gr.HTML('<div class="section-header">📥 Input</div>')
             html_input = gr.Textbox(
                 label="HTML Content",
-                placeholder="Paste your HTML content here...",
-                lines=15,
-                max_lines=30,
                 value=EXAMPLE_HTML,
             )
             base_url_input = gr.Textbox(
-                label="Base URL (Optional)",
                 placeholder="https://example.com/page",
                 lines=1,
             )
@@ -396,6 +431,12 @@ with gr.Blocks(title="UltraData Math Parser") as demo:
                     markdown_output = gr.Markdown(
                         label="Markdown Preview",
                         elem_classes=["markdown-box"],
                     )
                 with gr.TabItem("📄 Plain Text"):
                     text_output = gr.Textbox(
@@ -415,6 +456,12 @@ with gr.Blocks(title="UltraData Math Parser") as demo:
                     )
     # Event handlers
     parse_btn.click(
         fn=process_input,
         inputs=[html_input, base_url_input, process_math, include_tables, enable_forum, html_type],
@@ -422,11 +469,11 @@ with gr.Blocks(title="UltraData Math Parser") as demo:
     )
     def clear_all():
-        return "", "", "", "", "", ""
     clear_btn.click(
         fn=clear_all,
-        outputs=[html_input, base_url_input, title_output, html_output, text_output, markdown_output],
     )
     # Footer info

 """
 import gradio as gr
+import requests
 from ultradata_math_parser import GeneralParser
+def fetch_url_content(url: str) -> tuple:
+    """Fetch HTML content from a URL."""
+    if not url or not url.strip():
+        return "", "Please enter a URL"
+    url = url.strip()
+    if not url.startswith(("http://", "https://")):
+        url = "https://" + url
+    try:
+        headers = {
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
+        }
+        response = requests.get(url, headers=headers, timeout=15)
+        response.raise_for_status()
+        return response.text, url
+    except requests.exceptions.Timeout:
+        return "", f"Request timed out for {url}"
+    except requests.exceptions.RequestException as e:
+        return "", f"Failed to fetch URL: {str(e)}"
 def parse_html(
     html_content: str,
     base_url: str = "",
         with gr.Column(scale=1):
             gr.HTML('<div class="section-header">📥 Input</div>')
+            with gr.Tabs():
+                with gr.TabItem("🔗 URL"):
+                    url_input = gr.Textbox(
+                        label="URL",
+                        placeholder="Enter URL to fetch (e.g., https://example.com/math-article)",
+                        lines=1,
+                    )
+                    fetch_btn = gr.Button("📥 Fetch URL", variant="secondary")
+                with gr.TabItem("📝 HTML"):
+                    pass  # HTML input will be below, shared between tabs
             html_input = gr.Textbox(
                 label="HTML Content",
+                placeholder="Paste your HTML content here or fetch from URL above...",
+                lines=12,
+                max_lines=25,
                 value=EXAMPLE_HTML,
             )
             base_url_input = gr.Textbox(
+                label="Base URL (Auto-filled from URL fetch)",
                 placeholder="https://example.com/page",
                 lines=1,
             )
                     markdown_output = gr.Markdown(
                         label="Markdown Preview",
                         elem_classes=["markdown-box"],
+                        latex_delimiters=[
+                            {"left": "$$", "right": "$$", "display": True},
+                            {"left": "$", "right": "$", "display": False},
+                            {"left": "\\[", "right": "\\]", "display": True},
+                            {"left": "\\(", "right": "\\)", "display": False},
+                        ],
                     )
                 with gr.TabItem("📄 Plain Text"):
                     text_output = gr.Textbox(
                     )
     # Event handlers
+    fetch_btn.click(
+        fn=fetch_url_content,
+        inputs=[url_input],
+        outputs=[html_input, base_url_input],
+    )
     parse_btn.click(
         fn=process_input,
         inputs=[html_input, base_url_input, process_math, include_tables, enable_forum, html_type],
     )
     def clear_all():
+        return "", "", "", "", "", "", ""
     clear_btn.click(
         fn=clear_all,
+        outputs=[url_input, html_input, base_url_input, title_output, html_output, text_output, markdown_output],
     )
     # Footer info

requirements.txt CHANGED Viewed

@@ -7,3 +7,4 @@ numpy
 py_asciimath
 urllib3
 tldextract

 py_asciimath
 urllib3
 tldextract
+requests