import streamlit as st import requests from bs4 import BeautifulSoup import urllib3 import pandas as pd import tempfile def simple_web_scraper(url, scrape_option): try: # Create a PoolManager with urllib3 to handle SSL http = urllib3.PoolManager() # Send an HTTP request response = http.request('GET', url) # Check if the request was successful (status code 200) if response.status == 200: # Parse the HTML content of the page soup = BeautifulSoup(response.data, 'html.parser') # Extract information from the HTML based on user's choice if scrape_option == 'data': # Extract all text content from the page all_text = soup.get_text() # Prepare data for the table (split text by lines) table_data = [{'Data': line.strip()} for line in all_text.split('\n') if line.strip()] # Display the data in a table st.table(table_data) # Save data to a temporary CSV file df = pd.DataFrame(table_data) csv_file = tempfile.NamedTemporaryFile(delete=False, suffix=".csv") df.to_csv(csv_file.name, index=False) # Provide a download button for the CSV file st.download_button( label="Download Data as CSV", data=open(csv_file.name, "rb").read(), file_name="scraped_data.csv", mime="text/csv", ) elif scrape_option == 'links': # Example: Extract all the links on the page links = soup.find_all('a') # Prepare data for the table table_data = [{'Links': link.get('href')} for link in links if link.get('href')] # Display the data in a table st.table(table_data) # Save links to a temporary CSV file df = pd.DataFrame(table_data) csv_file = tempfile.NamedTemporaryFile(delete=False, suffix=".csv") df.to_csv(csv_file.name, index=False) # Provide a download button for the CSV file st.download_button( label="Download Links as CSV", data=open(csv_file.name, "rb").read(), file_name="scraped_links.csv", mime="text/csv", ) else: st.write('Invalid scrape option. Please choose "data" or "links".') else: st.write(f'Error: {response.status}') except Exception as e: st.write(f'An error occurred: {e}') # Streamlit UI st.title("Web Scraping Tool") website_url = st.text_input("Enter the URL to scrape:") scrape_option = st.selectbox("Select what to scrape:", ['data', 'links']) if st.button("Scrape"): simple_web_scraper(website_url, scrape_option)