Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,53 +1,54 @@
|
|
| 1 |
# app.py
|
|
|
|
| 2 |
import streamlit as st
|
| 3 |
import pandas as pd
|
| 4 |
-
import io
|
| 5 |
-
import subprocess
|
| 6 |
import os
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
output_file = 'output.csv'
|
| 19 |
-
|
| 20 |
-
if st.button('Start Scraping'):
|
| 21 |
-
if url:
|
| 22 |
-
with st.spinner('Scraping in progress...'):
|
| 23 |
-
stdout, stderr = run_spider(url, output_file)
|
| 24 |
-
|
| 25 |
-
if stderr:
|
| 26 |
-
st.error(f"An error occurred during scraping: {stderr}")
|
| 27 |
-
elif os.path.exists(output_file) and os.path.getsize(output_file) > 0:
|
| 28 |
-
st.success('Scraping completed!')
|
| 29 |
-
# Read the CSV file
|
| 30 |
-
df = pd.read_csv(output_file)
|
| 31 |
-
|
| 32 |
-
# Display the data
|
| 33 |
-
st.write(df)
|
| 34 |
-
|
| 35 |
-
# Provide download link
|
| 36 |
-
csv = df.to_csv(index=False)
|
| 37 |
-
b = io.BytesIO(csv.encode())
|
| 38 |
-
st.download_button(
|
| 39 |
-
label="Download CSV",
|
| 40 |
-
data=b,
|
| 41 |
-
file_name="scraped_data.csv",
|
| 42 |
-
mime="text/csv"
|
| 43 |
-
)
|
| 44 |
-
else:
|
| 45 |
-
st.warning('No data was scraped. The website might be empty or the scraping pattern might need adjustment.')
|
| 46 |
-
if stdout:
|
| 47 |
-
st.text("Spider output:")
|
| 48 |
-
st.text(stdout)
|
| 49 |
-
else:
|
| 50 |
-
st.error('Please enter a valid URL')
|
| 51 |
-
|
| 52 |
-
if __name__ == '__main__':
|
| 53 |
-
main()
|
|
|
|
| 1 |
# app.py
|
| 2 |
+
|
| 3 |
import streamlit as st
|
| 4 |
import pandas as pd
|
|
|
|
|
|
|
| 5 |
import os
|
| 6 |
+
from scraper import GamePriceSpider
|
| 7 |
+
from scrapy.crawler import CrawlerProcess
|
| 8 |
+
|
| 9 |
+
# Function to run the Scrapy spider
|
| 10 |
+
@st.cache_data
|
| 11 |
+
def run_scrapy_spider():
|
| 12 |
+
process = CrawlerProcess(settings={
|
| 13 |
+
'FEED_FORMAT': 'csv',
|
| 14 |
+
'FEED_URI': 'game_data.csv'
|
| 15 |
+
})
|
| 16 |
+
|
| 17 |
+
# Run the spider
|
| 18 |
+
process.crawl(GamePriceSpider)
|
| 19 |
+
process.start() # The script will block here until the spider finishes
|
| 20 |
+
|
| 21 |
+
# Load the scraped CSV data
|
| 22 |
+
def load_data():
|
| 23 |
+
if os.path.exists('game_data.csv'):
|
| 24 |
+
return pd.read_csv('game_data.csv')
|
| 25 |
+
else:
|
| 26 |
+
return None
|
| 27 |
+
|
| 28 |
+
# Streamlit app layout
|
| 29 |
+
st.title("B2B Game Marketplace - Real-Time Game Price Scraping")
|
| 30 |
+
|
| 31 |
+
st.write("""
|
| 32 |
+
This application scrapes real-time game prices from a target website and converts it into a CSV dataset for the B2B game marketplace.
|
| 33 |
+
""")
|
| 34 |
+
|
| 35 |
+
if st.button('Run Scraping'):
|
| 36 |
+
with st.spinner('Scraping game prices...'):
|
| 37 |
+
run_scrapy_spider()
|
| 38 |
+
st.success('Scraping completed!')
|
| 39 |
+
|
| 40 |
+
# Display scraped game price data
|
| 41 |
+
data = load_data()
|
| 42 |
+
if data is not None:
|
| 43 |
+
st.write("### Scraped Game Prices", data.head())
|
| 44 |
|
| 45 |
+
# Convert to CSV for download
|
| 46 |
+
csv = data.to_csv(index=False)
|
| 47 |
+
st.download_button(
|
| 48 |
+
label="Download Game Prices as CSV",
|
| 49 |
+
data=csv,
|
| 50 |
+
file_name='game_data.csv',
|
| 51 |
+
mime='text/csv',
|
| 52 |
+
)
|
| 53 |
+
else:
|
| 54 |
+
st.info('No data available. Please run the scraping first.')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|