|
|
import streamlit as st |
|
|
import plotly.graph_objs as go |
|
|
import requests |
|
|
import json |
|
|
import pandas as pd |
|
|
import time |
|
|
from pytrends.request import TrendReq |
|
|
from pytrends.exceptions import TooManyRequestsError |
|
|
|
|
|
|
|
|
st.title("PCHOME 和 MOMO 商品價格爬蟲分析與趨勢分析") |
|
|
|
|
|
|
|
|
pchome_base_url = 'https://ecshweb.pchome.com.tw/search/v3.3/all/results?q=' |
|
|
momo_url = "https://apisearch.momoshop.com.tw/momoSearchCloud/moec/textSearch" |
|
|
|
|
|
|
|
|
def crawl_pchome(keyword, num_pages): |
|
|
alldata = pd.DataFrame() |
|
|
for i in range(1, num_pages + 1): |
|
|
url = f'{pchome_base_url}{keyword}&page={i}&sort=sale/dc' |
|
|
list_req = requests.get(url) |
|
|
getdata = json.loads(list_req.content) |
|
|
todataFrame = pd.DataFrame(getdata['prods']) |
|
|
alldata = pd.concat([alldata, todataFrame]) |
|
|
time.sleep(1) |
|
|
return alldata[["name", "price"]] |
|
|
|
|
|
|
|
|
def crawl_momo(keyword, num_pages): |
|
|
headers = { |
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36" |
|
|
} |
|
|
product_list = [] |
|
|
for page in range(1, num_pages + 1): |
|
|
payload = { |
|
|
"host": "momoshop", |
|
|
"flag": "searchEngine", |
|
|
"data": { |
|
|
"searchValue": keyword, |
|
|
"curPage": str(page), |
|
|
"priceS": "0", |
|
|
"priceE": "9999999", |
|
|
"searchType": "1" |
|
|
} |
|
|
} |
|
|
response = requests.post(momo_url, headers=headers, json=payload) |
|
|
if response.status_code == 200: |
|
|
data_from_api = response.json() |
|
|
products = data_from_api.get('rtnSearchData', {}).get('goodsInfoList', []) |
|
|
for product in products: |
|
|
name = product.get('goodsName', '') |
|
|
price = product.get('goodsPrice', '') |
|
|
price_str = str(price).split('(')[0].replace(',', '').replace('$', '') |
|
|
try: |
|
|
product_price = float(price_str) |
|
|
except ValueError: |
|
|
product_price = 0 |
|
|
product_list.append({'name': name, 'price': product_price}) |
|
|
time.sleep(1) |
|
|
return pd.DataFrame(product_list) |
|
|
|
|
|
|
|
|
def get_trends_data(keyword, start_date, end_date): |
|
|
pytrends = TrendReq(hl='en-US', tz=360) |
|
|
kw_list = [keyword] |
|
|
timeframe = f'{start_date} {end_date}' |
|
|
|
|
|
for _ in range(3): |
|
|
try: |
|
|
pytrends.build_payload(kw_list, cat=0, timeframe=timeframe, geo='TW', gprop='') |
|
|
trends_data = pytrends.interest_over_time() |
|
|
if not trends_data.empty: |
|
|
trends_data = trends_data.reset_index() |
|
|
return trends_data |
|
|
except TooManyRequestsError: |
|
|
st.warning("Google 趨勢請求過多,正在等待重試...") |
|
|
time.sleep(60) |
|
|
|
|
|
st.error("無法獲取 Google 趨勢數據,請稍後再試。") |
|
|
return None |
|
|
|
|
|
|
|
|
keyword = st.text_input("請輸入關鍵字:", "平板") |
|
|
num_pages = st.number_input("請輸入要爬取的頁數:", min_value=1, max_value=100, value=1) |
|
|
|
|
|
|
|
|
start_date = st.date_input("選擇開始日期:", value=pd.to_datetime("2023-01-01")) |
|
|
end_date = st.date_input("選擇結束日期:", value=pd.to_datetime("2023-12-31")) |
|
|
|
|
|
|
|
|
if st.button("開始爬取"): |
|
|
start_time = time.time() |
|
|
|
|
|
|
|
|
st.subheader("爬取 PCHOME 資料") |
|
|
pchome_progress = st.progress(0) |
|
|
pchome_data = crawl_pchome(keyword, num_pages) |
|
|
pchome_progress.progress(100) |
|
|
|
|
|
|
|
|
st.subheader("爬取 MOMO 資料") |
|
|
momo_progress = st.progress(0) |
|
|
momo_data = crawl_momo(keyword, num_pages) |
|
|
momo_progress.progress(100) |
|
|
|
|
|
|
|
|
if pchome_data.empty and momo_data.empty: |
|
|
st.error("查無商品") |
|
|
else: |
|
|
|
|
|
pchome_data['source'] = 'PCHOME' |
|
|
momo_data['source'] = 'MOMO' |
|
|
|
|
|
|
|
|
combined_data = pd.concat([pchome_data, momo_data]) |
|
|
combined_data = combined_data.sort_values('price', ascending=False).reset_index(drop=True) |
|
|
|
|
|
|
|
|
st.subheader("爬取結果:") |
|
|
st.write(combined_data) |
|
|
|
|
|
|
|
|
csv = combined_data.to_csv(index=False) |
|
|
st.download_button( |
|
|
label="下載 CSV 檔案", |
|
|
data=csv, |
|
|
file_name=f"{keyword}_price_data.csv", |
|
|
mime="text/csv", |
|
|
) |
|
|
|
|
|
|
|
|
st.subheader("價格分布圖 - 圓餅圖") |
|
|
combined_data['price_range'] = pd.cut(combined_data['price'], bins=range(0, int(combined_data['price'].max()) + 1000, 1000)) |
|
|
price_range_counts = combined_data['price_range'].value_counts().sort_index() |
|
|
fig_pie = go.Figure(data=[go.Pie(labels=price_range_counts.index.astype(str), values=price_range_counts.values)]) |
|
|
fig_pie.update_layout( |
|
|
title='價格分布 - 圓餅圖', |
|
|
width=800, |
|
|
height=800 |
|
|
) |
|
|
st.plotly_chart(fig_pie) |
|
|
|
|
|
|
|
|
st.subheader("價格分布圖 - 折線圖") |
|
|
fig_line = go.Figure(data=[go.Scatter(x=price_range_counts.index.astype(str), y=price_range_counts.values, mode='lines+markers')]) |
|
|
fig_line.update_layout(title='價格分布 - 折線圖', xaxis_title='價格區間', yaxis_title='商品數量', |
|
|
width=800, |
|
|
height=800 |
|
|
) |
|
|
st.plotly_chart(fig_line) |
|
|
|
|
|
|
|
|
st.subheader("價格分布圖 - 南丁格爾玫瑰圖") |
|
|
fig_rose = go.Figure(data=[go.Barpolar(r=price_range_counts.values, theta=price_range_counts.index.astype(str), marker=dict(color=price_range_counts.values, colorscale='Viridis'))]) |
|
|
fig_rose.update_layout(title='價格分布 - 南丁格爾玫瑰圖', |
|
|
width=800, |
|
|
height=800 |
|
|
) |
|
|
st.plotly_chart(fig_rose) |
|
|
|
|
|
|
|
|
st.subheader("價格分布圖 - 環形圖") |
|
|
fig_donut = go.Figure(data=[go.Pie(labels=price_range_counts.index.astype(str), values=price_range_counts.values, hole=0.4)]) |
|
|
fig_donut.update_layout(title='價格分布 - 環形圖', |
|
|
width=800, |
|
|
height=800 |
|
|
) |
|
|
st.plotly_chart(fig_donut) |
|
|
|
|
|
|
|
|
st.subheader("統計數據") |
|
|
st.write(f"平均價格: {combined_data['price'].mean():.2f}") |
|
|
st.write(f"最高價格: {combined_data['price'].max():.2f}") |
|
|
st.write(f"最低價格: {combined_data['price'].min():.2f}") |
|
|
|
|
|
|
|
|
trends_data = get_trends_data(keyword, start_date, end_date) |
|
|
if trends_data is not None: |
|
|
st.subheader("Google 搜尋趨勢") |
|
|
fig = go.Figure() |
|
|
fig.add_trace(go.Scatter(x=trends_data['date'], y=trends_data[keyword], mode='lines', name='趨勢指數')) |
|
|
fig.update_layout(title=f'Google 搜尋趨勢: {keyword}', xaxis_title='日期', yaxis_title='趨勢指數') |
|
|
st.plotly_chart(fig) |
|
|
else: |
|
|
st.warning("無法獲取 Google 趨勢數據") |
|
|
|
|
|
|
|
|
end_time = time.time() |
|
|
execution_time = end_time - start_time |
|
|
st.write(f"執行時間: {execution_time:.2f} 秒") |
|
|
|