Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import requests | |
| from bs4 import BeautifulSoup | |
| import re | |
| import html | |
| # Define the web scraping function | |
| def scrape_website(url): | |
| # Send a GET request to the website | |
| response = requests.get(url) | |
| html_content = response.content | |
| # Parse the HTML content using BeautifulSoup | |
| soup = BeautifulSoup(html_content, "html.parser") | |
| # Extract all text from the HTML | |
| text = soup.get_text() | |
| # Clean the text by removing extra whitespaces and special characters | |
| cleaned_text = re.sub(r"\s+", " ", text) | |
| cleaned_text = html.unescape(cleaned_text) | |
| return cleaned_text | |
| # Create a Gradio interface | |
| iface = gr.Interface( | |
| fn=scrape_website, | |
| inputs="text", | |
| outputs="text", | |
| title="Web Scraping", | |
| description="Enter a website URL to scrape its text", | |
| example="https://www.example.com" | |
| ) | |
| iface.launch() | |