ositamiles commited on
Commit
d445fc3
·
verified ·
1 Parent(s): 08ceabd

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -0
app.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import scrapy
3
+ from scrapy.crawler import CrawlerProcess
4
+ import pandas as pd
5
+ import io
6
+
7
+ class MySpider(scrapy.Spider):
8
+ name = 'myspider'
9
+
10
+ def __init__(self, start_url=None, *args, **kwargs):
11
+ super(MySpider, self).__init__(*args, **kwargs)
12
+ self.start_urls = [start_url]
13
+
14
+ def parse(self, response):
15
+ # Define your parsing logic here
16
+ # For example, extracting all text from p tags:
17
+ for p in response.css('p::text'):
18
+ yield {'text': p.get()}
19
+
20
+ def run_spider(url):
21
+ process = CrawlerProcess(settings={
22
+ 'FEED_FORMAT': 'csv',
23
+ 'FEED_URI': 'output.csv'
24
+ })
25
+ process.crawl(MySpider, start_url=url)
26
+ process.start()
27
+
28
+ def main():
29
+ st.title('Web Scraper and CSV Generator')
30
+
31
+ # URL input
32
+ url = st.text_input('Enter the URL to scrape:', 'https://example.com')
33
+
34
+ if st.button('Start Scraping'):
35
+ if url:
36
+ with st.spinner('Scraping in progress...'):
37
+ run_spider(url)
38
+ st.success('Scraping completed!')
39
+
40
+ # Read the CSV file
41
+ df = pd.read_csv('output.csv')
42
+
43
+ # Display the data
44
+ st.write(df)
45
+
46
+ # Provide download link
47
+ csv = df.to_csv(index=False)
48
+ b = io.BytesIO(csv.encode())
49
+ st.download_button(
50
+ label="Download CSV",
51
+ data=b,
52
+ file_name="scraped_data.csv",
53
+ mime="text/csv"
54
+ )
55
+ else:
56
+ st.error('Please enter a valid URL')
57
+
58
+ if __name__ == '__main__':
59
+ main()