import streamlit as st
from bs4 import BeautifulSoup
from datetime import datetime
import requests
import json
import emoji
import re
import pandas as pd
from io import BytesIO

class GoogleMapSpider:
    def __init__(self):
        self.headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
                       "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36"
        }
        self.store_id_url = "https://www.google.com.tw/maps/search/{store_name}"
        self.store_name_url = "https://www.google.com.tw/maps/place/data=!4m5!3m4!1s{store_id}!8m2!3d25.0564743!4d121.5204167?authuser=0&hl=zh-TW&rclk=1"
        self.comment_url = "https://www.google.com.tw/maps/rpc/listugcposts"

    def get_store_id(self, store_name):
        url = self.store_id_url.format(store_name=store_name)
        response = requests.get(url, headers=self.headers)
        soup = BeautifulSoup(response.text, "lxml")
        pattern = r'0x.{16}:0x.{16}'
        match = re.search(pattern, str(soup))
        store_id = match.group()
        return store_id

    def get_store_name(self, store_id):
        url = self.store_name_url.format(store_id=store_id)
        response = requests.get(url, headers=self.headers)
        soup = BeautifulSoup(response.text, "lxml")
        meta_list = soup.find_all('meta')
        store_name = []
        for i in meta_list:
            if '''itemprop="name"''' in str(i):
                store_name.append(re.search('".*·',str(i)).group()[1:-2])
        return store_name[0]

    def get_comment(self, store_id, page_count=1, sorted_by=2, progress_callback=None):
        next_token = ""
        commont_list = []
        for page in range(1, page_count+1):
            if progress_callback:
                progress_callback(page, page_count)

            params = {
                "authuser": "0",
                "hl": "zh-TW",
                "gl": "tw",
                "pb": (
                    f"!1m6!1s{store_id}!6m4!4m1!1e1!4m1!1e3!2m2!1i10!2s"
                    f"{next_token}"
                    f"!5m2!1s0OBwZ4OnGsrM1e8PxIjW6AI!7e81!8m5!1b1!2b1!3b1!5b1!7b1!11m0!13m1!1e{sorted_by}"
                )
            }

            response = requests.get(self.comment_url, params=params, headers=self.headers)
            data = json.loads(emoji.demojize(response.text[4:]))

            next_token = data[1]
            commont_list.extend(data[2])
            if not next_token:
                break

        commont_dict_list = []
        for comment_data in commont_list:
            try:
                comment_date = comment_data[0][2][2][0][1][21][6][-1]
                comment_date = datetime(comment_date[0], comment_date[1], comment_date[2], comment_date[3]).strftime('%Y/%m/%d %H:%M:%S')
            except:
                comment_date = None

            try:
                comment_text = comment_data[0][2][-1][0][0]
            except:
                comment_text = None

            comment_info = {
                "評論者": comment_data[0][1][4][5][0],
                "評論者id": comment_data[0][0],
                "評論者狀態": comment_data[0][1][4][5][10][0],
                "評論者等級": comment_data[0][1][4][5][9],
                "留言時間": comment_data[0][1][6],
                "留言日期": comment_date,
                "評論": comment_text,
                "評論分數": comment_data[0][2][0][0]
            }
            commont_dict_list.append(comment_info)

        return commont_dict_list

def main():
    st.set_page_config(page_title="Google Maps 評論爬蟲", page_icon="🗺️", layout="wide")
    
    st.title("🗺️ Google Maps 評論爬蟲")
    st.markdown("---")

    # Sidebar for input
    with st.sidebar:
        st.header("⚙️ 設定")
        store_name = st.text_input("店家名稱", placeholder="例如：台北101")
        page_count = st.number_input("爬取頁數", min_value=1, max_value=50, value=1)
        
        sort_options = {
            "最相關": 1,
            "最新": 2,
            "評分最高": 3,
            "評分最低": 4
        }
        sorted_by_name = st.selectbox("排序方式", list(sort_options.keys()))
        sorted_by = sort_options[sorted_by_name]
        
        start_button = st.button("🚀 開始爬取", type="primary", use_container_width=True)

    # Initialize session state
    if 'comments_data' not in st.session_state:
        st.session_state.comments_data = None
    if 'store_name_used' not in st.session_state:
        st.session_state.store_name_used = None

    # Main content
    if start_button:
        if not store_name:
            st.error("❌ 請輸入店家名稱！")
        else:
            try:
                spider = GoogleMapSpider()
                
                # Progress indicators
                progress_bar = st.progress(0)
                status_text = st.empty()
                
                # Get store ID
                status_text.text("🔍 正在獲取店家ID...")
                store_id = spider.get_store_id(store_name)
                st.success(f"✅ 店家ID: {store_id}")
                
                # Get comments
                status_text.text("📝 開始爬取評論...")
                
                def update_progress(current, total):
                    progress = current / total
                    progress_bar.progress(progress)
                    status_text.text(f"📝 正在爬取第 {current} 頁，共 {total} 頁")
                
                comments_data = spider.get_comment(
                    store_id=store_id,
                    page_count=page_count,
                    sorted_by=sorted_by,
                    progress_callback=update_progress
                )
                
                progress_bar.progress(1.0)
                status_text.text("✅ 爬取完成！")
                
                # Save to session state
                st.session_state.comments_data = comments_data
                st.session_state.store_name_used = store_name
                
                st.success(f"🎉 完成！共爬取 {len(comments_data)} 則評論")
                
            except Exception as e:
                st.error(f"❌ 發生錯誤: {str(e)}")

    # Display results
    if st.session_state.comments_data:
        st.markdown("---")
        st.header("📊 評論結果")
        
        df = pd.DataFrame(st.session_state.comments_data)
        
        # Statistics
        col1, col2, col3, col4 = st.columns(4)
        with col1:
            st.metric("總評論數", len(df))
        with col2:
            avg_score = df['評論分數'].mean()
            st.metric("平均評分", f"{avg_score:.2f}")
        with col3:
            max_score = df['評論分數'].max()
            st.metric("最高評分", int(max_score))
        with col4:
            min_score = df['評論分數'].min()
            st.metric("最低評分", int(min_score))
        
        # Data table
        st.subheader("📝 評論詳細內容")
        st.dataframe(df, use_container_width=True, height=400)
        
        # Download options
        st.subheader("💾 下載資料")
        col1, col2 = st.columns(2)
        
        with col1:
            # CSV download with proper encoding
            csv = df.to_csv(index=False, encoding='utf-8-sig').encode('utf-8-sig')
            st.download_button(
                label="📥 下載 CSV",
                data=csv,
                file_name=f"{st.session_state.store_name_used}_評論.csv",
                mime="text/csv",
                use_container_width=True
            )
        
        with col2:
            # JSON download
            json_str = json.dumps(st.session_state.comments_data, ensure_ascii=False, indent=2)
            st.download_button(
                label="📥 下載 JSON",
                data=json_str,
                file_name=f"{st.session_state.store_name_used}_評論.json",
                mime="application/json",
                use_container_width=True
            )

if __name__ == "__main__":
    main()