Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import requests | |
| from bs4 import BeautifulSoup | |
| def main(): | |
| st.title("Website Content Extractor") | |
| # Get website URL from user input | |
| url = st.text_input("Enter a URL:", "") | |
| if st.button("Proceed"): | |
| if not url: | |
| st.warning("URL is empty.") | |
| else: | |
| extract_text(url) | |
| def extract_text(url): | |
| try: | |
| # Fetch and extract website content | |
| with st.spinner("Loading website data..."): | |
| html_content = get_website_text(url) | |
| st.subheader("Website Content:") | |
| if html_content: | |
| st.write(html_content) | |
| else: | |
| st.error("Error: Could not extract content.") | |
| except Exception as e: | |
| st.error(f"Error: {e}") | |
| def get_website_text(url): | |
| try: | |
| # Send GET request to the URL | |
| response = requests.get(url) | |
| response.raise_for_status() # Will raise an exception for bad responses (4xx, 5xx) | |
| # Parse the HTML content with BeautifulSoup | |
| soup = BeautifulSoup(response.text, 'html.parser') | |
| # Extract and clean text from the website | |
| texts = soup.stripped_strings # Extracts all text and removes extra spaces/newlines | |
| return '\n'.join(texts) # Join all text pieces into a single string | |
| except requests.exceptions.RequestException as e: | |
| st.error(f"Error fetching URL: {e}") | |
| return None | |
| if __name__ == "__main__": | |
| main() |