ositamiles commited on
Commit
f525d2f
·
verified ·
1 Parent(s): 09db500

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -48
app.py CHANGED
@@ -1,53 +1,54 @@
1
  # app.py
 
2
  import streamlit as st
3
  import pandas as pd
4
- import io
5
- import subprocess
6
  import os
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
- def run_spider(url, output_file):
9
- spider_script = 'spider.py'
10
- result = subprocess.run(['python', spider_script, url, output_file], capture_output=True, text=True)
11
- return result.stdout, result.stderr
12
-
13
- def main():
14
- st.title('Web Scraper and CSV Generator')
15
-
16
- # URL input
17
- url = st.text_input('Enter the URL to scrape:', 'https://example.com')
18
- output_file = 'output.csv'
19
-
20
- if st.button('Start Scraping'):
21
- if url:
22
- with st.spinner('Scraping in progress...'):
23
- stdout, stderr = run_spider(url, output_file)
24
-
25
- if stderr:
26
- st.error(f"An error occurred during scraping: {stderr}")
27
- elif os.path.exists(output_file) and os.path.getsize(output_file) > 0:
28
- st.success('Scraping completed!')
29
- # Read the CSV file
30
- df = pd.read_csv(output_file)
31
-
32
- # Display the data
33
- st.write(df)
34
-
35
- # Provide download link
36
- csv = df.to_csv(index=False)
37
- b = io.BytesIO(csv.encode())
38
- st.download_button(
39
- label="Download CSV",
40
- data=b,
41
- file_name="scraped_data.csv",
42
- mime="text/csv"
43
- )
44
- else:
45
- st.warning('No data was scraped. The website might be empty or the scraping pattern might need adjustment.')
46
- if stdout:
47
- st.text("Spider output:")
48
- st.text(stdout)
49
- else:
50
- st.error('Please enter a valid URL')
51
-
52
- if __name__ == '__main__':
53
- main()
 
1
  # app.py
2
+
3
  import streamlit as st
4
  import pandas as pd
 
 
5
  import os
6
+ from scraper import GamePriceSpider
7
+ from scrapy.crawler import CrawlerProcess
8
+
9
+ # Function to run the Scrapy spider
10
+ @st.cache_data
11
+ def run_scrapy_spider():
12
+ process = CrawlerProcess(settings={
13
+ 'FEED_FORMAT': 'csv',
14
+ 'FEED_URI': 'game_data.csv'
15
+ })
16
+
17
+ # Run the spider
18
+ process.crawl(GamePriceSpider)
19
+ process.start() # The script will block here until the spider finishes
20
+
21
+ # Load the scraped CSV data
22
+ def load_data():
23
+ if os.path.exists('game_data.csv'):
24
+ return pd.read_csv('game_data.csv')
25
+ else:
26
+ return None
27
+
28
+ # Streamlit app layout
29
+ st.title("B2B Game Marketplace - Real-Time Game Price Scraping")
30
+
31
+ st.write("""
32
+ This application scrapes real-time game prices from a target website and converts it into a CSV dataset for the B2B game marketplace.
33
+ """)
34
+
35
+ if st.button('Run Scraping'):
36
+ with st.spinner('Scraping game prices...'):
37
+ run_scrapy_spider()
38
+ st.success('Scraping completed!')
39
+
40
+ # Display scraped game price data
41
+ data = load_data()
42
+ if data is not None:
43
+ st.write("### Scraped Game Prices", data.head())
44
 
45
+ # Convert to CSV for download
46
+ csv = data.to_csv(index=False)
47
+ st.download_button(
48
+ label="Download Game Prices as CSV",
49
+ data=csv,
50
+ file_name='game_data.csv',
51
+ mime='text/csv',
52
+ )
53
+ else:
54
+ st.info('No data available. Please run the scraping first.')