Spaces:

ositamiles
/

Spider-crawler

Build error

App Files Files Community

ositamiles commited on Sep 28, 2024

Commit

f525d2f

verified ·

1 Parent(s): 09db500

Update app.py

Browse files

Files changed (1) hide show

app.py +49 -48

app.py CHANGED Viewed

@@ -1,53 +1,54 @@
 # app.py
 import streamlit as st
 import pandas as pd
-import io
-import subprocess
 import os
-def run_spider(url, output_file):
-    spider_script = 'spider.py'
-    result = subprocess.run(['python', spider_script, url, output_file], capture_output=True, text=True)
-    return result.stdout, result.stderr
-def main():
-    st.title('Web Scraper and CSV Generator')
-    # URL input
-    url = st.text_input('Enter the URL to scrape:', 'https://example.com')
-    output_file = 'output.csv'
-    if st.button('Start Scraping'):
-        if url:
-            with st.spinner('Scraping in progress...'):
-                stdout, stderr = run_spider(url, output_file)
-            if stderr:
-                st.error(f"An error occurred during scraping: {stderr}")
-            elif os.path.exists(output_file) and os.path.getsize(output_file) > 0:
-                st.success('Scraping completed!')
-                # Read the CSV file
-                df = pd.read_csv(output_file)
-                # Display the data
-                st.write(df)
-                # Provide download link
-                csv = df.to_csv(index=False)
-                b = io.BytesIO(csv.encode())
-                st.download_button(
-                    label="Download CSV",
-                    data=b,
-                    file_name="scraped_data.csv",
-                    mime="text/csv"
-                )
-            else:
-                st.warning('No data was scraped. The website might be empty or the scraping pattern might need adjustment.')
-                if stdout:
-                    st.text("Spider output:")
-                    st.text(stdout)
-        else:
-            st.error('Please enter a valid URL')
-if __name__ == '__main__':
-    main()

 # app.py
 import streamlit as st
 import pandas as pd
 import os
+from scraper import GamePriceSpider
+from scrapy.crawler import CrawlerProcess
+# Function to run the Scrapy spider
+@st.cache_data
+def run_scrapy_spider():
+    process = CrawlerProcess(settings={
+        'FEED_FORMAT': 'csv',
+        'FEED_URI': 'game_data.csv'
+    })
+    # Run the spider
+    process.crawl(GamePriceSpider)
+    process.start()  # The script will block here until the spider finishes
+# Load the scraped CSV data
+def load_data():
+    if os.path.exists('game_data.csv'):
+        return pd.read_csv('game_data.csv')
+    else:
+        return None
+# Streamlit app layout
+st.title("B2B Game Marketplace - Real-Time Game Price Scraping")
+st.write("""
+This application scrapes real-time game prices from a target website and converts it into a CSV dataset for the B2B game marketplace.
+""")
+if st.button('Run Scraping'):
+    with st.spinner('Scraping game prices...'):
+        run_scrapy_spider()
+        st.success('Scraping completed!')
+# Display scraped game price data
+data = load_data()
+if data is not None:
+    st.write("### Scraped Game Prices", data.head())
+    # Convert to CSV for download
+    csv = data.to_csv(index=False)
+    st.download_button(
+        label="Download Game Prices as CSV",
+        data=csv,
+        file_name='game_data.csv',
+        mime='text/csv',
+    )
+else:
+    st.info('No data available. Please run the scraping first.')