guifav commited on
Commit
302b997
·
1 Parent(s): 90ff1e2

Updated app to use Gradio instead of Streamlit

Browse files
Files changed (2) hide show
  1. app.py +24 -25
  2. requirements.txt +3 -0
app.py CHANGED
@@ -1,9 +1,8 @@
1
- import streamlit as st
2
  import requests
3
  from bs4 import BeautifulSoup
4
  import re
5
 
6
- # Function to scrape only visible text from the given URL
7
  def scrape_visible_text_from_url(url):
8
  try:
9
  response = requests.get(url)
@@ -29,28 +28,28 @@ def scrape_visible_text_from_url(url):
29
  visible_text = re.sub(r'\s+', ' ', visible_text)
30
  return visible_text.strip()
31
  except Exception as e:
32
- st.error(f"Error occurred while scraping the data: {e}")
33
- return None
34
-
35
- # Streamlit UI
36
- def main():
37
- st.title("Web Data Scraper")
38
-
39
- # Get the URL from the user
40
- url_input = st.text_input("Enter the URL of the web page:", "")
41
-
42
- if st.button("Scrape Visible Text"):
43
- if url_input:
44
- # Extract visible text from the URL
45
- data = scrape_visible_text_from_url(url_input)
46
- if data:
47
- st.success("Visible text successfully scraped!")
48
- st.subheader("Scraped Text:")
49
- st.write(data)
50
- else:
51
- st.warning("Failed to scrape visible text from the URL.")
52
- else:
53
- st.warning("Please enter a valid URL.")
54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  if __name__ == "__main__":
56
- main()
 
1
+ import gradio as gr
2
  import requests
3
  from bs4 import BeautifulSoup
4
  import re
5
 
 
6
  def scrape_visible_text_from_url(url):
7
  try:
8
  response = requests.get(url)
 
28
  visible_text = re.sub(r'\s+', ' ', visible_text)
29
  return visible_text.strip()
30
  except Exception as e:
31
+ return f"Error occurred while scraping the data: {e}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
+ def scrape_and_display(url):
34
+ if url:
35
+ data = scrape_visible_text_from_url(url)
36
+ if data:
37
+ return data
38
+ else:
39
+ return "Failed to scrape visible text from the URL."
40
+ else:
41
+ return "Please enter a valid URL."
42
+
43
+ # Define the Gradio interface
44
+ iface = gr.Interface(
45
+ fn=scrape_and_display,
46
+ inputs=gr.Textbox(label="Enter the URL of the web page:"),
47
+ outputs=gr.Textbox(label="Scraped Text:"),
48
+ title="Web Data Scraper",
49
+ description="Enter a URL to scrape visible text from the web page.",
50
+ theme="huggingface"
51
+ )
52
+
53
+ # Launch the Gradio app
54
  if __name__ == "__main__":
55
+ iface.launch()
requirements.txt CHANGED
@@ -58,3 +58,6 @@ validators==0.20.0
58
  watchdog==3.0.0
59
  yarl==1.9.2
60
  zipp==3.16.2
 
 
 
 
58
  watchdog==3.0.0
59
  yarl==1.9.2
60
  zipp==3.16.2
61
+ gradio==4.41.0
62
+ beautifulsoup4==4.12.2
63
+ requests==2.31.0