Spaces:
Build error
Build error
| import streamlit as st | |
| import pandas as pd | |
| import os | |
| import tempfile | |
| from scrapy import IGDBSpider # Make sure to use the correct spider name | |
| from scrapy.crawler import CrawlerRunner | |
| from twisted.internet import reactor, defer | |
| from scrapy.utils.log import configure_logging | |
| # Function to run the Scrapy spider and store data in a temporary CSV file | |
| def run_scrapy_spider(): | |
| # Disable Scrapy's default log handling | |
| configure_logging() | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as temp_file: | |
| temp_file_path = temp_file.name | |
| # CrawlerRunner does not handle signals, avoiding the 'EPollReactor' issue | |
| runner = CrawlerRunner(settings={ | |
| 'FEED_FORMAT': 'csv', | |
| 'FEED_URI': temp_file_path | |
| }) | |
| def crawl(): | |
| yield runner.crawl(IGDBSpider) | |
| reactor.stop() | |
| # Start the reactor manually and crawl the website | |
| reactor.callWhenRunning(crawl) | |
| reactor.run() # Blocking call until spider completes | |
| return temp_file_path # Return the temporary file path | |
| # Load scraped CSV data | |
| def load_data(file_path): | |
| if os.path.exists(file_path) and os.path.getsize(file_path) > 0: | |
| return pd.read_csv(file_path) | |
| else: | |
| return None | |
| # Streamlit app layout | |
| st.title("B2B Game Marketplace - Recently Released Games Scraping") | |
| st.write(""" | |
| This application scrapes recently released games from IGDB and converts the data into a CSV dataset for the B2B game marketplace. | |
| """) | |
| if st.button('Run Scraping'): | |
| with st.spinner('Scraping recently released games...'): | |
| file_path = run_scrapy_spider() | |
| st.success('Scraping completed!') | |
| # Display scraped game data | |
| data = load_data(file_path) | |
| if data is not None and not data.empty: | |
| st.write("### Scraped Game Data", data.head()) | |
| # Convert to CSV for download | |
| csv = data.to_csv(index=False) | |
| st.download_button( | |
| label="Download Game Data as CSV", | |
| data=csv, | |
| file_name='recent_games.csv', | |
| mime='text/csv', | |
| ) | |
| else: | |
| st.info('No data available. Please run the scraping again.') | |
| else: | |
| st.info('Please click the button to start scraping.') |