Spaces:
Build error
Build error
Updated app to use Gradio instead of Streamlit
Browse files- app.py +24 -25
- requirements.txt +3 -0
app.py
CHANGED
|
@@ -1,9 +1,8 @@
|
|
| 1 |
-
import
|
| 2 |
import requests
|
| 3 |
from bs4 import BeautifulSoup
|
| 4 |
import re
|
| 5 |
|
| 6 |
-
# Function to scrape only visible text from the given URL
|
| 7 |
def scrape_visible_text_from_url(url):
|
| 8 |
try:
|
| 9 |
response = requests.get(url)
|
|
@@ -29,28 +28,28 @@ def scrape_visible_text_from_url(url):
|
|
| 29 |
visible_text = re.sub(r'\s+', ' ', visible_text)
|
| 30 |
return visible_text.strip()
|
| 31 |
except Exception as e:
|
| 32 |
-
|
| 33 |
-
return None
|
| 34 |
-
|
| 35 |
-
# Streamlit UI
|
| 36 |
-
def main():
|
| 37 |
-
st.title("Web Data Scraper")
|
| 38 |
-
|
| 39 |
-
# Get the URL from the user
|
| 40 |
-
url_input = st.text_input("Enter the URL of the web page:", "")
|
| 41 |
-
|
| 42 |
-
if st.button("Scrape Visible Text"):
|
| 43 |
-
if url_input:
|
| 44 |
-
# Extract visible text from the URL
|
| 45 |
-
data = scrape_visible_text_from_url(url_input)
|
| 46 |
-
if data:
|
| 47 |
-
st.success("Visible text successfully scraped!")
|
| 48 |
-
st.subheader("Scraped Text:")
|
| 49 |
-
st.write(data)
|
| 50 |
-
else:
|
| 51 |
-
st.warning("Failed to scrape visible text from the URL.")
|
| 52 |
-
else:
|
| 53 |
-
st.warning("Please enter a valid URL.")
|
| 54 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
if __name__ == "__main__":
|
| 56 |
-
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
import requests
|
| 3 |
from bs4 import BeautifulSoup
|
| 4 |
import re
|
| 5 |
|
|
|
|
| 6 |
def scrape_visible_text_from_url(url):
|
| 7 |
try:
|
| 8 |
response = requests.get(url)
|
|
|
|
| 28 |
visible_text = re.sub(r'\s+', ' ', visible_text)
|
| 29 |
return visible_text.strip()
|
| 30 |
except Exception as e:
|
| 31 |
+
return f"Error occurred while scraping the data: {e}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
+
def scrape_and_display(url):
|
| 34 |
+
if url:
|
| 35 |
+
data = scrape_visible_text_from_url(url)
|
| 36 |
+
if data:
|
| 37 |
+
return data
|
| 38 |
+
else:
|
| 39 |
+
return "Failed to scrape visible text from the URL."
|
| 40 |
+
else:
|
| 41 |
+
return "Please enter a valid URL."
|
| 42 |
+
|
| 43 |
+
# Define the Gradio interface
|
| 44 |
+
iface = gr.Interface(
|
| 45 |
+
fn=scrape_and_display,
|
| 46 |
+
inputs=gr.Textbox(label="Enter the URL of the web page:"),
|
| 47 |
+
outputs=gr.Textbox(label="Scraped Text:"),
|
| 48 |
+
title="Web Data Scraper",
|
| 49 |
+
description="Enter a URL to scrape visible text from the web page.",
|
| 50 |
+
theme="huggingface"
|
| 51 |
+
)
|
| 52 |
+
|
| 53 |
+
# Launch the Gradio app
|
| 54 |
if __name__ == "__main__":
|
| 55 |
+
iface.launch()
|
requirements.txt
CHANGED
|
@@ -58,3 +58,6 @@ validators==0.20.0
|
|
| 58 |
watchdog==3.0.0
|
| 59 |
yarl==1.9.2
|
| 60 |
zipp==3.16.2
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
watchdog==3.0.0
|
| 59 |
yarl==1.9.2
|
| 60 |
zipp==3.16.2
|
| 61 |
+
gradio==4.41.0
|
| 62 |
+
beautifulsoup4==4.12.2
|
| 63 |
+
requests==2.31.0
|