Spaces:

MegaTronX
/

HTML_to_Gradio_Input

Sleeping

MegaTronX commited on Nov 23, 2024

Commit

0606278

verified ·

1 Parent(s): 0508087

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,41 +1,18 @@
-# app.py
 import gradio as gr
-from selectolax.parser import HTMLParser
 import requests
-def parse_url(url):
-    try:
-        response = requests.get(url)
-        response.raise_for_status()
-        parser = HTMLParser(response.text)
-        # Extract the desired information from the parsed HTML
-        text_content = parser.text()
-        # Extract specific elements if needed, for example, all paragraph texts
-        paragraphs = [node.text() for node in parser.css('p')]
-        # Combine the extracted information into a single string
-        output = r"""Text Content:
-{text_content}
-Paragraphs:
-{'\n'.join(paragraphs)}"""
-        return output
-    except Exception as e:
-        return f"Error: {str(e)}"
-# Create the Gradio interface
-iface = gr.Interface(
-    fn=parse_url,
-    inputs="text",  # Input is a text box for the URL
-    outputs="text",  # Output is a text box for the parsed content
-    title="URL Parser",
-    description="Enter a URL to parse the page using Selectolax and output the information."
 )
-# Launch the Gradio app
-if __name__ == "__main__":
-    iface.launch()

 import gradio as gr
 import requests
+from selectolax.parser import HTMLParser
+def get_web_page_data(url):
+    response = requests.get(url)
+    parser = HTMLParser(html=response.text)
+    return parser.html
+demo = gr.Interface(
+    fn=get_web_page_data,
+    inputs="text",
+    outputs="text",
+    title="Web Page Data Extractor",
+    description="Enter a URL to extract its web page data"
 )
+demo.launch()