Spaces:

KKingzor
/

test01

Sleeping

App Files Files Community

test01 / app.py

KKingzor

Update app.py

4049afa verified over 1 year ago

raw

history blame contribute delete

7.81 kB

	import streamlit as st
	import plotly.graph_objs as go
	import requests
	import json
	import pandas as pd
	import time
	from pytrends.request import TrendReq
	from pytrends.exceptions import TooManyRequestsError

	# 設置 Streamlit 應用程序
	st.title("PCHOME 和 MOMO 商品價格爬蟲分析與趨勢分析")

	# 定義基礎URL
	pchome_base_url = 'https://ecshweb.pchome.com.tw/search/v3.3/all/results?q='
	momo_url = "https://apisearch.momoshop.com.tw/momoSearchCloud/moec/textSearch"

	# 爬取 PCHOME 的函數
	def crawl_pchome(keyword, num_pages):
	alldata = pd.DataFrame()
	for i in range(1, num_pages + 1):
	url = f'{pchome_base_url}{keyword}&page={i}&sort=sale/dc'
	list_req = requests.get(url)
	getdata = json.loads(list_req.content)
	todataFrame = pd.DataFrame(getdata['prods'])
	alldata = pd.concat([alldata, todataFrame])
	time.sleep(1)
	return alldata[["name", "price"]]

	# 爬取 MOMO 的函數
	def crawl_momo(keyword, num_pages):
	headers = {
	"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36"
	}
	product_list = []
	for page in range(1, num_pages + 1):
	payload = {
	"host": "momoshop",
	"flag": "searchEngine",
	"data": {
	"searchValue": keyword,
	"curPage": str(page),
	"priceS": "0",
	"priceE": "9999999",
	"searchType": "1"
	}
	}
	response = requests.post(momo_url, headers=headers, json=payload)
	if response.status_code == 200:
	data_from_api = response.json()
	products = data_from_api.get('rtnSearchData', {}).get('goodsInfoList', [])
	for product in products:
	name = product.get('goodsName', '')
	price = product.get('goodsPrice', '')
	price_str = str(price).split('(')[0].replace(',', '').replace('$', '')
	try:
	product_price = float(price_str)
	except ValueError:
	product_price = 0
	product_list.append({'name': name, 'price': product_price})
	time.sleep(1)
	return pd.DataFrame(product_list)

	# 獲取 Google 趨勢數據的函數
	def get_trends_data(keyword, start_date, end_date):
	pytrends = TrendReq(hl='en-US', tz=360)
	kw_list = [keyword]
	timeframe = f'{start_date} {end_date}'

	for _ in range(3): # 重試最多3次
	try:
	pytrends.build_payload(kw_list, cat=0, timeframe=timeframe, geo='TW', gprop='')
	trends_data = pytrends.interest_over_time()
	if not trends_data.empty:
	trends_data = trends_data.reset_index()
	return trends_data
	except TooManyRequestsError:
	st.warning("Google 趨勢請求過多，正在等待重試...")
	time.sleep(60) # 等待60秒後重試

	st.error("無法獲取 Google 趨勢數據，請稍後再試。")
	return None

	# 輸入關鍵字和頁數
	keyword = st.text_input("請輸入關鍵字:", "平板")
	num_pages = st.number_input("請輸入要爬取的頁數:", min_value=1, max_value=100, value=1)

	# 選擇 Pytrends 時間範圍
	start_date = st.date_input("選擇開始日期:", value=pd.to_datetime("2023-01-01"))
	end_date = st.date_input("選擇結束日期:", value=pd.to_datetime("2023-12-31"))

	# 主爬取函數
	if st.button("開始爬取"):
	start_time = time.time()

	# 爬取 PCHOME 資料
	st.subheader("爬取 PCHOME 資料")
	pchome_progress = st.progress(0)
	pchome_data = crawl_pchome(keyword, num_pages)
	pchome_progress.progress(100)

	# 爬取 MOMO 資料
	st.subheader("爬取 MOMO 資料")
	momo_progress = st.progress(0)
	momo_data = crawl_momo(keyword, num_pages)
	momo_progress.progress(100)

	# 檢查是否有數據
	if pchome_data.empty and momo_data.empty:
	st.error("查無商品")
	else:
	# 添加來源欄位
	pchome_data['source'] = 'PCHOME'
	momo_data['source'] = 'MOMO'

	# 合併並排序數據
	combined_data = pd.concat([pchome_data, momo_data])
	combined_data = combined_data.sort_values('price', ascending=False).reset_index(drop=True)

	# 顯示結果
	st.subheader("爬取結果:")
	st.write(combined_data)

	# 生成 CSV 檔案
	csv = combined_data.to_csv(index=False)
	st.download_button(
	label="下載 CSV 檔案",
	data=csv,
	file_name=f"{keyword}_price_data.csv",
	mime="text/csv",
	)

	# 繪製價格分布圖 - 圓餅圖
	st.subheader("價格分布圖 - 圓餅圖")
	combined_data['price_range'] = pd.cut(combined_data['price'], bins=range(0, int(combined_data['price'].max()) + 1000, 1000))
	price_range_counts = combined_data['price_range'].value_counts().sort_index()
	fig_pie = go.Figure(data=[go.Pie(labels=price_range_counts.index.astype(str), values=price_range_counts.values)])
	fig_pie.update_layout(
	title='價格分布 - 圓餅圖',
	width=800, # 設定圖表寬度
	height=800 # 設定圖表高度
	)
	st.plotly_chart(fig_pie)

	# 繪製價格分布圖 - 折線圖
	st.subheader("價格分布圖 - 折線圖")
	fig_line = go.Figure(data=[go.Scatter(x=price_range_counts.index.astype(str), y=price_range_counts.values, mode='lines+markers')])
	fig_line.update_layout(title='價格分布 - 折線圖', xaxis_title='價格區間', yaxis_title='商品數量',
	width=800, # 設定圖表寬度
	height=800 # 設定圖表高度
	)
	st.plotly_chart(fig_line)

	# 繪製價格分布圖 - 南丁格爾玫瑰圖
	st.subheader("價格分布圖 - 南丁格爾玫瑰圖")
	fig_rose = go.Figure(data=[go.Barpolar(r=price_range_counts.values, theta=price_range_counts.index.astype(str), marker=dict(color=price_range_counts.values, colorscale='Viridis'))])
	fig_rose.update_layout(title='價格分布 - 南丁格爾玫瑰圖',
	width=800, # 設定圖表寬度
	height=800 # 設定圖表高度
	)
	st.plotly_chart(fig_rose)

	# 繪製價格分布圖 - 環形圖
	st.subheader("價格分布圖 - 環形圖")
	fig_donut = go.Figure(data=[go.Pie(labels=price_range_counts.index.astype(str), values=price_range_counts.values, hole=0.4)])
	fig_donut.update_layout(title='價格分布 - 環形圖',
	width=800, # 設定圖表寬度
	height=800 # 設定圖表高度
	)
	st.plotly_chart(fig_donut)

	# 顯示統計數據
	st.subheader("統計數據")
	st.write(f"平均價格: {combined_data['price'].mean():.2f}")
	st.write(f"最高價格: {combined_data['price'].max():.2f}")
	st.write(f"最低價格: {combined_data['price'].min():.2f}")

	# 獲取並繪製 Google 趨勢數據
	trends_data = get_trends_data(keyword, start_date, end_date)
	if trends_data is not None:
	st.subheader("Google 搜尋趨勢")
	fig = go.Figure()
	fig.add_trace(go.Scatter(x=trends_data['date'], y=trends_data[keyword], mode='lines', name='趨勢指數'))
	fig.update_layout(title=f'Google 搜尋趨勢: {keyword}', xaxis_title='日期', yaxis_title='趨勢指數')
	st.plotly_chart(fig)
	else:
	st.warning("無法獲取 Google 趨勢數據")

	# 執行時間
	end_time = time.time()
	execution_time = end_time - start_time
	st.write(f"執行時間: {execution_time:.2f} 秒")