import streamlit as st
import plotly.graph_objs as go
import requests
import json
import pandas as pd
import time
from pytrends.request import TrendReq
from pytrends.exceptions import TooManyRequestsError

# 設置 Streamlit 應用程序
st.title("PCHOME 和 MOMO 商品價格爬蟲分析與趨勢分析")

# 定義基礎URL
pchome_base_url = 'https://ecshweb.pchome.com.tw/search/v3.3/all/results?q='
momo_url = "https://apisearch.momoshop.com.tw/momoSearchCloud/moec/textSearch"

# 爬取 PCHOME 的函數
def crawl_pchome(keyword, num_pages):
    alldata = pd.DataFrame()
    for i in range(1, num_pages + 1):
        url = f'{pchome_base_url}{keyword}&page={i}&sort=sale/dc'
        list_req = requests.get(url)
        getdata = json.loads(list_req.content)
        todataFrame = pd.DataFrame(getdata['prods'])
        alldata = pd.concat([alldata, todataFrame])
        time.sleep(1)
    return alldata[["name", "price"]]

# 爬取 MOMO 的函數
def crawl_momo(keyword, num_pages):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36"
    }
    product_list = []
    for page in range(1, num_pages + 1):
        payload = {
            "host": "momoshop",
            "flag": "searchEngine",
            "data": {
                "searchValue": keyword,
                "curPage": str(page),
                "priceS": "0",
                "priceE": "9999999",
                "searchType": "1"
            }
        }
        response = requests.post(momo_url, headers=headers, json=payload)
        if response.status_code == 200:
            data_from_api = response.json()
            products = data_from_api.get('rtnSearchData', {}).get('goodsInfoList', [])
            for product in products:
                name = product.get('goodsName', '')
                price = product.get('goodsPrice', '')
                price_str = str(price).split('(')[0].replace(',', '').replace('$', '')
                try:
                    product_price = float(price_str)
                except ValueError:
                    product_price = 0
                product_list.append({'name': name, 'price': product_price})
        time.sleep(1)
    return pd.DataFrame(product_list)

# 獲取 Google 趨勢數據的函數
def get_trends_data(keyword, start_date, end_date):
    pytrends = TrendReq(hl='en-US', tz=360)
    kw_list = [keyword]
    timeframe = f'{start_date} {end_date}'
    
    for _ in range(3):  # 重試最多3次
        try:
            pytrends.build_payload(kw_list, cat=0, timeframe=timeframe, geo='TW', gprop='')
            trends_data = pytrends.interest_over_time()
            if not trends_data.empty:
                trends_data = trends_data.reset_index()
                return trends_data
        except TooManyRequestsError:
            st.warning("Google 趨勢請求過多，正在等待重試...")
            time.sleep(60)  # 等待60秒後重試
            
    st.error("無法獲取 Google 趨勢數據，請稍後再試。")
    return None

# 輸入關鍵字和頁數
keyword = st.text_input("請輸入關鍵字:", "平板")
num_pages = st.number_input("請輸入要爬取的頁數:", min_value=1, max_value=100, value=1)

# 選擇 Pytrends 時間範圍
start_date = st.date_input("選擇開始日期:", value=pd.to_datetime("2023-01-01"))
end_date = st.date_input("選擇結束日期:", value=pd.to_datetime("2023-12-31"))

# 主爬取函數
if st.button("開始爬取"):
    start_time = time.time()
    
    # 爬取 PCHOME 資料
    st.subheader("爬取 PCHOME 資料")
    pchome_progress = st.progress(0)
    pchome_data = crawl_pchome(keyword, num_pages)
    pchome_progress.progress(100)
    
    # 爬取 MOMO 資料
    st.subheader("爬取 MOMO 資料")
    momo_progress = st.progress(0)
    momo_data = crawl_momo(keyword, num_pages)
    momo_progress.progress(100)
    
    # 檢查是否有數據
    if pchome_data.empty and momo_data.empty:
        st.error("查無商品")
    else:
        # 添加來源欄位
        pchome_data['source'] = 'PCHOME'
        momo_data['source'] = 'MOMO'
        
        # 合併並排序數據
        combined_data = pd.concat([pchome_data, momo_data])
        combined_data = combined_data.sort_values('price', ascending=False).reset_index(drop=True)
        
        # 顯示結果
        st.subheader("爬取結果:")
        st.write(combined_data)
        
        # 生成 CSV 檔案
        csv = combined_data.to_csv(index=False)
        st.download_button(
            label="下載 CSV 檔案",
            data=csv,
            file_name=f"{keyword}_price_data.csv",
            mime="text/csv",
        )

        # 繪製價格分布圖 - 圓餅圖
        st.subheader("價格分布圖 - 圓餅圖")
        combined_data['price_range'] = pd.cut(combined_data['price'], bins=range(0, int(combined_data['price'].max()) + 1000, 1000))
        price_range_counts = combined_data['price_range'].value_counts().sort_index()
        fig_pie = go.Figure(data=[go.Pie(labels=price_range_counts.index.astype(str), values=price_range_counts.values)])
        fig_pie.update_layout(
            title='價格分布 - 圓餅圖',
            width=800,  # 設定圖表寬度
            height=800  # 設定圖表高度
            )
        st.plotly_chart(fig_pie)

        # 繪製價格分布圖 - 折線圖
        st.subheader("價格分布圖 - 折線圖")
        fig_line = go.Figure(data=[go.Scatter(x=price_range_counts.index.astype(str), y=price_range_counts.values, mode='lines+markers')])
        fig_line.update_layout(title='價格分布 - 折線圖', xaxis_title='價格區間', yaxis_title='商品數量',
            width=800,  # 設定圖表寬度
            height=800  # 設定圖表高度
            )
        st.plotly_chart(fig_line)
        
        # 繪製價格分布圖 - 南丁格爾玫瑰圖
        st.subheader("價格分布圖 - 南丁格爾玫瑰圖")
        fig_rose = go.Figure(data=[go.Barpolar(r=price_range_counts.values, theta=price_range_counts.index.astype(str), marker=dict(color=price_range_counts.values, colorscale='Viridis'))])
        fig_rose.update_layout(title='價格分布 - 南丁格爾玫瑰圖',
            width=800,  # 設定圖表寬度
            height=800  # 設定圖表高度
            )
        st.plotly_chart(fig_rose)

        # 繪製價格分布圖 - 環形圖
        st.subheader("價格分布圖 - 環形圖")
        fig_donut = go.Figure(data=[go.Pie(labels=price_range_counts.index.astype(str), values=price_range_counts.values, hole=0.4)])
        fig_donut.update_layout(title='價格分布 - 環形圖',
            width=800,  # 設定圖表寬度
            height=800  # 設定圖表高度
            )
        st.plotly_chart(fig_donut)

        # 顯示統計數據
        st.subheader("統計數據")
        st.write(f"平均價格: {combined_data['price'].mean():.2f}")
        st.write(f"最高價格: {combined_data['price'].max():.2f}")
        st.write(f"最低價格: {combined_data['price'].min():.2f}")
    
    # 獲取並繪製 Google 趨勢數據
    trends_data = get_trends_data(keyword, start_date, end_date)
    if trends_data is not None:
        st.subheader("Google 搜尋趨勢")
        fig = go.Figure()
        fig.add_trace(go.Scatter(x=trends_data['date'], y=trends_data[keyword], mode='lines', name='趨勢指數'))
        fig.update_layout(title=f'Google 搜尋趨勢: {keyword}', xaxis_title='日期', yaxis_title='趨勢指數')
        st.plotly_chart(fig)
    else:
        st.warning("無法獲取 Google 趨勢數據")
    
    # 執行時間
    end_time = time.time()
    execution_time = end_time - start_time
    st.write(f"執行時間: {execution_time:.2f} 秒")