Spaces:
Sleeping
Sleeping
| # -*- coding: utf-8 -*- | |
| import streamlit as st | |
| import requests | |
| import json | |
| import pandas as pd | |
| import time | |
| import matplotlib.pyplot as plt | |
| import matplotlib.font_manager as fm | |
| import matplotlib as mpl | |
| from io import BytesIO | |
| # Set up Streamlit app | |
| st.title("PCHOME 商品價格爬蟲分析") | |
| # Prompt user for keyword input | |
| keyword = st.text_input("請輸入關鍵字:", "平板") | |
| num_pages = st.number_input("請輸入要爬取的頁數:", min_value=1, max_value=100, value=1) | |
| # Define base URL | |
| base_url = 'https://ecshweb.pchome.com.tw/search/v3.3/all/results?q=' | |
| # Data collection | |
| if st.button("開始爬取"): | |
| start_time = time.time() | |
| alldata = pd.DataFrame() | |
| progress = st.progress(0) | |
| for i in range(1, num_pages + 1): | |
| url = f'{base_url}{keyword}&page={i}&sort=sale/dc' | |
| list_req = requests.get(url) | |
| getdata = json.loads(list_req.content) | |
| todataFrame = pd.DataFrame(getdata['prods']) | |
| alldata = pd.concat([alldata, todataFrame]) | |
| progress.progress(i / num_pages) | |
| time.sleep(10) # Simulate delay | |
| # Load data directly from the collected DataFrame | |
| data = alldata | |
| # Check for null values | |
| st.write(f'Total null values: {data.isnull().sum().sum()}') | |
| # Data analysis | |
| df = data[["name", "price"]] | |
| mean_price = df["price"].mean() | |
| st.write(f'Mean price: {mean_price}') | |
| st.write(f'Max price: {df["price"].max()}') | |
| st.write(f'Min price: {df["price"].min()}') | |
| # Display the results | |
| st.write("爬取結果:", df) | |
| # Download and set custom font | |
| font_url = "https://drive.google.com/uc?id=1eGAsTN1HBpJAkeVM57_C7ccp7hbgSz3_&export=download" | |
| font_response = requests.get(font_url) | |
| with open("TaipeiSansTCBeta-Regular.ttf", "wb") as font_file: | |
| font_file.write(font_response.content) | |
| fm.fontManager.addfont("TaipeiSansTCBeta-Regular.ttf") | |
| mpl.rc('font', family='Taipei Sans TC Beta') | |
| # Visualization | |
| st.subheader("價格分布圖") | |
| fig, ax = plt.subplots(figsize=(15, 8)) | |
| ax.plot(df.index[:70], df['price'][:70], 'o', color='skyblue', markersize=8) | |
| ax.set_title('PCHOME 電商網站上商品售價', fontsize=20, fontweight='bold') | |
| ax.axhline(y=mean_price, color='red', linestyle='--', linewidth=2, label=f'Mean Price: {mean_price:.2f}') | |
| ax.set_xlabel('Index', fontsize=14) | |
| ax.set_ylabel('Price', fontsize=14) | |
| ax.tick_params(axis='x', rotation=45, labelsize=12) | |
| ax.tick_params(axis='y', labelsize=12) | |
| ax.legend(fontsize=12, loc='upper left') | |
| ax.grid(axis='y', linestyle='--', alpha=0.7) | |
| plt.tight_layout() | |
| st.pyplot(fig) | |
| # Measure execution time | |
| end_time = time.time() | |
| execution_time = end_time - start_time | |
| st.write(f"Execution time: {execution_time:.2f} seconds") | |