Engineer786 commited on
Commit
a5513a7
·
verified ·
1 Parent(s): d146642

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -80
app.py DELETED
@@ -1,80 +0,0 @@
1
- import streamlit as st
2
- import requests
3
- from bs4 import BeautifulSoup
4
- import urllib3
5
- import pandas as pd
6
- import tempfile
7
-
8
- def simple_web_scraper(url, scrape_option):
9
- try:
10
- # Create a PoolManager with urllib3 to handle SSL
11
- http = urllib3.PoolManager()
12
-
13
- # Send an HTTP request
14
- response = http.request('GET', url)
15
-
16
- # Check if the request was successful (status code 200)
17
- if response.status == 200:
18
- # Parse the HTML content of the page
19
- soup = BeautifulSoup(response.data, 'html.parser')
20
-
21
- # Extract information from the HTML based on user's choice
22
- if scrape_option == 'data':
23
- # Extract all text content from the page
24
- all_text = soup.get_text()
25
-
26
- # Prepare data for the table (split text by lines)
27
- table_data = [{'Data': line.strip()} for line in all_text.split('\n') if line.strip()]
28
-
29
- # Display the data in a table
30
- st.table(table_data)
31
-
32
- # Save data to a temporary CSV file
33
- df = pd.DataFrame(table_data)
34
- csv_file = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
35
- df.to_csv(csv_file.name, index=False)
36
-
37
- # Provide a download button for the CSV file
38
- st.download_button(
39
- label="Download Data as CSV",
40
- data=open(csv_file.name, "rb").read(),
41
- file_name="scraped_data.csv",
42
- mime="text/csv",
43
- )
44
- elif scrape_option == 'links':
45
- # Example: Extract all the links on the page
46
- links = soup.find_all('a')
47
-
48
- # Prepare data for the table
49
- table_data = [{'Links': link.get('href')} for link in links if link.get('href')]
50
-
51
- # Display the data in a table
52
- st.table(table_data)
53
-
54
- # Save links to a temporary CSV file
55
- df = pd.DataFrame(table_data)
56
- csv_file = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
57
- df.to_csv(csv_file.name, index=False)
58
-
59
- # Provide a download button for the CSV file
60
- st.download_button(
61
- label="Download Links as CSV",
62
- data=open(csv_file.name, "rb").read(),
63
- file_name="scraped_links.csv",
64
- mime="text/csv",
65
- )
66
- else:
67
- st.write('Invalid scrape option. Please choose "data" or "links".')
68
- else:
69
- st.write(f'Error: {response.status}')
70
-
71
- except Exception as e:
72
- st.write(f'An error occurred: {e}')
73
-
74
- # Streamlit UI
75
- st.title("Web Scraping Tool")
76
- website_url = st.text_input("Enter the URL to scrape:")
77
- scrape_option = st.selectbox("Select what to scrape:", ['data', 'links'])
78
-
79
- if st.button("Scrape"):
80
- simple_web_scraper(website_url, scrape_option)