Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import requests | |
| import pandas as pd | |
| import time | |
| import json | |
| from urllib.parse import urlencode | |
| import resend | |
| from datetime import datetime | |
| import io | |
| from linebot.v3.messaging import Configuration, MessagingApi | |
| from linebot.v3.messaging.models import TextMessage | |
| from linebot import LineBotApi | |
| from linebot.models import TextSendMessage | |
| class MeetTaiwanAPIScraper: | |
| def __init__(self): | |
| self.base_url = "https://service.meettaiwan.com" | |
| self.api_base = "https://service.meettaiwan.com/gpa/api/v2/events" | |
| # 設定session | |
| self.session = requests.Session() | |
| self.session.headers.update({ | |
| 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', | |
| 'Accept': 'application/json, text/plain, */*', | |
| 'Accept-Language': 'zh-TW,zh;q=0.9,en;q=0.8', | |
| 'Accept-Encoding': 'gzip, deflate, br', | |
| 'Connection': 'keep-alive', | |
| 'Referer': 'https://service.meettaiwan.com/gpa/zh/events/list;type=all', | |
| 'Origin': 'https://service.meettaiwan.com' | |
| }) | |
| def get_events_by_page(self, page=1, page_size=10, event_type=None, progress=None): | |
| """調用API獲取指定頁面的活動資料""" | |
| try: | |
| # 構建API URL | |
| params = { | |
| 'page': page, | |
| 'pageSize': page_size | |
| } | |
| if event_type: | |
| params['type'] = event_type | |
| # 嘗試不同的API端點 | |
| api_endpoints = [ | |
| f"{self.api_base}/", # 主要API | |
| f"{self.api_base}/tt-events" # TT events API | |
| ] | |
| for api_url in api_endpoints: | |
| try: | |
| response = self.session.get(api_url, params=params, timeout=30) | |
| if response.status_code == 200: | |
| try: | |
| data = response.json() | |
| # 檢查資料結構 | |
| if isinstance(data, dict): | |
| # 常見的資料結構 | |
| if 'data' in data: | |
| events = data['data'] | |
| elif 'items' in data: | |
| events = data['items'] | |
| elif 'results' in data: | |
| events = data['results'] | |
| elif 'events' in data: | |
| events = data['events'] | |
| else: | |
| events = data | |
| if isinstance(events, list) and events: | |
| return events, data | |
| else: | |
| continue | |
| elif isinstance(data, list): | |
| return data, data | |
| except json.JSONDecodeError as e: | |
| continue | |
| except Exception as e: | |
| continue | |
| return None, None | |
| except Exception as e: | |
| return None, None | |
| def get_all_events(self, progress_callback=None): | |
| """獲取所有類型的活動資料""" | |
| all_events = [] | |
| page_size_options = [50, 30, 20] | |
| max_pages = 20 | |
| for page_size in page_size_options: | |
| for page in range(1, max_pages + 1): | |
| if progress_callback: | |
| progress_callback((page-1) * 5, f"正在獲取第 {page} 頁資料 (頁面大小: {page_size})") | |
| events, raw_data = self.get_events_by_page(page=page, page_size=page_size, event_type=None) | |
| if not events or len(events) == 0: | |
| if page == 1: | |
| break | |
| else: | |
| return all_events | |
| # 處理事件資料 | |
| for event in events: | |
| processed_event = self.process_event_data(event, page, 'All') | |
| if processed_event: | |
| # 檢查是否已存在(避免重複) | |
| is_duplicate = False | |
| for existing_event in all_events: | |
| if (existing_event['name'] == processed_event['name'] and | |
| existing_event['event_date'] == processed_event['event_date']): | |
| is_duplicate = True | |
| break | |
| if not is_duplicate: | |
| all_events.append(processed_event) | |
| # 如果這一頁的資料少於頁面大小,可能是最後一頁 | |
| if len(events) < page_size: | |
| return all_events | |
| time.sleep(0.5) # 減少延遲 | |
| # 如果成功獲取到資料,就不需要嘗試其他頁面大小 | |
| if all_events: | |
| break | |
| return all_events | |
| def process_event_data(self, event, page_num, event_type): | |
| """處理單個活動資料""" | |
| try: | |
| if isinstance(event, dict): | |
| name = event.get('name', event.get('title', event.get('eventName', ''))) | |
| form = event.get('type', event.get('category', event.get('eventType', event_type or ''))) | |
| event_date = event.get('eventDate', event.get('startDate', event.get('date', ''))) | |
| upload_date = event.get('createdAt', event.get('uploadDate', event.get('publishDate', ''))) | |
| # 構建連結 | |
| event_id = event.get('id', event.get('eventId', '')) | |
| if event_id: | |
| link = f"{self.base_url}/gpa/zh/events/{form}/{event_id}" | |
| else: | |
| link = "" | |
| return { | |
| 'name': str(name), | |
| 'link': link, | |
| 'form': str(form), | |
| 'event_date': str(event_date), | |
| 'upload_date': str(upload_date), | |
| 'page_num': page_num | |
| } | |
| except Exception as e: | |
| pass | |
| return None | |
| def create_html_table(df, max_display=10): | |
| """將活動資料轉換為 HTML 表格格式""" | |
| if df is None or df.empty: | |
| return "<p>沒有找到活動資料</p>" | |
| display_count = min(len(df), max_display) | |
| df_display = df.head(display_count) | |
| # 創建 HTML 表格 | |
| html_content = f""" | |
| <html> | |
| <head> | |
| <meta charset="UTF-8"> | |
| <style> | |
| body {{ font-family: Arial, sans-serif; margin: 20px; line-height: 1.6; }} | |
| .greeting {{ | |
| font-size: 18px; | |
| color: #2c3e50; | |
| margin-bottom: 20px; | |
| font-weight: bold; | |
| }} | |
| h2 {{ color: #2c3e50; margin-top: 20px; }} | |
| .data-source {{ | |
| background-color: #e8f4f8; | |
| padding: 15px; | |
| border-left: 4px solid #3498db; | |
| margin: 20px 0; | |
| font-weight: bold; | |
| color: #2c3e50; | |
| }} | |
| table {{ | |
| border-collapse: collapse; | |
| width: 100%; | |
| margin-top: 20px; | |
| box-shadow: 0 2px 8px rgba(0,0,0,0.1); | |
| }} | |
| th, td {{ | |
| border: 1px solid #ddd; | |
| padding: 12px; | |
| text-align: left; | |
| }} | |
| th {{ | |
| background-color: #3498db; | |
| color: white; | |
| font-weight: bold; | |
| }} | |
| tr:nth-child(even) {{ background-color: #f2f2f2; }} | |
| tr:hover {{ background-color: #e8f4f8; }} | |
| a {{ color: #3498db; text-decoration: none; }} | |
| a:hover {{ text-decoration: underline; }} | |
| .summary {{ | |
| background-color: #ecf0f1; | |
| padding: 15px; | |
| border-radius: 5px; | |
| margin-bottom: 20px; | |
| }} | |
| .copyright {{ | |
| text-align: center; | |
| margin-top: 30px; | |
| padding: 20px; | |
| background-color: #34495e; | |
| color: white; | |
| border-radius: 5px; | |
| font-size: 14px; | |
| }} | |
| </style> | |
| </head> | |
| <body> | |
| <div class="greeting"> | |
| 親愛的會員您好: | |
| </div> | |
| <div class="data-source"> | |
| 📋 資料來源:全球政府採購商機網 | |
| </div> | |
| <h2>🎯 最新活動資訊</h2> | |
| <div class="summary"> | |
| <strong>📊 資料統計:</strong>顯示前 {display_count} 筆,共 {len(df)} 筆活動 | |
| </div> | |
| <table> | |
| <thead> | |
| <tr> | |
| <th>序號</th> | |
| <th>名稱</th> | |
| <th>形式</th> | |
| <th>活動日期</th> | |
| <th>上載日期</th> | |
| <th>網址</th> | |
| </tr> | |
| </thead> | |
| <tbody> | |
| """ | |
| for idx, row in df_display.iterrows(): | |
| link_html = f'<a href="{row["超連結網址"]}" target="_blank">查看詳情</a>' if row["超連結網址"] else "無連結" | |
| html_content += f""" | |
| <tr> | |
| <td>{idx + 1}</td> | |
| <td><strong>{row['名稱']}</strong></td> | |
| <td>{row['形式']}</td> | |
| <td>{row['活動日期']}</td> | |
| <td>{row['上載日期']}</td> | |
| <td>{link_html}</td> | |
| </tr> | |
| """ | |
| html_content += """ | |
| </tbody> | |
| </table> | |
| """ | |
| if len(df) > max_display: | |
| html_content += f""" | |
| <div class="summary" style="margin-top: 20px;"> | |
| <strong>📝 提醒:</strong>還有 {len(df) - max_display} 筆資料未顯示, | |
| 請查看附加的 CSV 檔案獲取完整資料。 | |
| </div> | |
| """ | |
| html_content += """ | |
| <div class="summary" style="margin-top: 20px;"> | |
| <strong>🤖 自動爬蟲系統</strong><br> | |
| 此郵件由 MeetTaiwan API 爬蟲系統自動產生並發送 | |
| </div> | |
| <div class="copyright"> | |
| 2025 © Copyright robert_studio | |
| </div> | |
| </body> | |
| </html> | |
| """ | |
| return html_content | |
| def send_events_email(df_events, recipient_email, api_key, max_display=5): | |
| """發送活動資料到指定郵箱""" | |
| if df_events is None or df_events.empty: | |
| return False, "沒有資料可發送" | |
| try: | |
| # 設定 Resend API Key | |
| resend.api_key = api_key | |
| # 取前N筆資料用於顯示 | |
| df_display = df_events.head(max_display) | |
| # 建立 HTML 內容 | |
| html_content = create_html_table(df_display, max_display=max_display) | |
| # 準備郵件主題 | |
| current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
| subject = f"📊 MeetTaiwan 最新活動資訊 - {len(df_events)}筆活動 ({current_time})" | |
| # 發送郵件 | |
| r = resend.Emails.send({ | |
| "from": "onboarding@resend.dev", | |
| "to": recipient_email, | |
| "subject": subject, | |
| "html": html_content | |
| }) | |
| return True, f"郵件發送成功!郵件 ID: {r.get('id', 'N/A')}" | |
| except Exception as e: | |
| return False, f"郵件發送失敗: {str(e)}" | |
| def send_line_message(message, channel_access_token): | |
| """ | |
| Send Line broadcast message to all users who have added the official account | |
| """ | |
| try: | |
| # Initialize LineBotApi | |
| line_bot_api = LineBotApi(channel_access_token) | |
| # Create text message | |
| text_message = TextSendMessage(text=message) | |
| # Broadcast message | |
| line_bot_api.broadcast(text_message) | |
| return True, "訊息成功發送到所有用戶!" | |
| except Exception as e: | |
| return False, f"發送訊息時發生錯誤: {e}" | |
| def scrape_events(progress=gr.Progress()): | |
| """爬取活動資料的主函數""" | |
| try: | |
| progress(0, desc="初始化爬蟲...") | |
| scraper = MeetTaiwanAPIScraper() | |
| def progress_callback(percent, message): | |
| progress(percent/100, desc=message) | |
| progress(20, desc="開始爬取活動資料...") | |
| events_data = scraper.get_all_events(progress_callback=progress_callback) | |
| if events_data and len(events_data) > 0: | |
| progress(80, desc="處理資料...") | |
| # 轉換為 DataFrame | |
| df_events = pd.DataFrame(events_data) | |
| df_events.columns = ["名稱", "超連結網址", "形式", "活動日期", "上載日期", "頁數"] | |
| # 去除重複資料 | |
| original_count = len(df_events) | |
| df_events = df_events.drop_duplicates(subset=['名稱', '活動日期']) | |
| deduplicated_count = len(df_events) | |
| progress(100, desc="完成!") | |
| success_msg = f"✅ 成功獲取 {deduplicated_count} 筆活動資料!" | |
| if original_count != deduplicated_count: | |
| success_msg += f"\n📝 去除了 {original_count - deduplicated_count} 筆重複資料" | |
| # 創建下載用的CSV | |
| csv_data = df_events.to_csv(index=False, encoding='utf-8-sig') | |
| return ( | |
| success_msg, | |
| df_events, | |
| csv_data, | |
| f"meettaiwan_events_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv" | |
| ) | |
| else: | |
| return "❌ 無法獲取活動資料,可能原因:API端點變更、需要認證或網路問題", None, None, None | |
| except Exception as e: | |
| return f"❌ 爬取過程發生錯誤: {str(e)}", None, None, None | |
| def send_email_report(df_events, recipient_email, api_key, max_display): | |
| """發送郵件報告""" | |
| if df_events is None or df_events.empty: | |
| return "❌ 沒有資料可發送,請先爬取資料" | |
| if not api_key.strip(): | |
| return "❌ 請輸入 Resend API Key" | |
| if not recipient_email.strip(): | |
| return "❌ 請輸入收件人郵箱" | |
| success, message = send_events_email(df_events, recipient_email, api_key, max_display) | |
| if success: | |
| return f"✅ {message}" | |
| else: | |
| return f"❌ {message}" | |
| def send_line_notification(df_events, channel_access_token, message_template): | |
| """發送 LINE 通知""" | |
| if df_events is None or df_events.empty: | |
| return "❌ 沒有資料可發送,請先爬取資料" | |
| if not channel_access_token.strip(): | |
| return "❌ 請輸入 LINE Channel Access Token" | |
| try: | |
| # 準備訊息內容 | |
| current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
| if message_template.strip(): | |
| message = message_template | |
| else: | |
| # 默認訊息模板 | |
| top_5_events = df_events.head(5) | |
| events_list = "" | |
| for idx, row in top_5_events.iterrows(): | |
| events_list += f"{idx+1}. {row['名稱']} ({row['活動日期']})\n" | |
| message = f"""🎯 MeetTaiwan 最新活動通知 | |
| 📊 共找到 {len(df_events)} 筆活動資料 | |
| ⏰ 更新時間:{current_time} | |
| 📋 最新 5 筆活動: | |
| {events_list} | |
| 🤖 此訊息由自動爬蟲系統發送 | |
| 資料來源:全球政府採購商機網""" | |
| success, result_message = send_line_message(message, channel_access_token) | |
| if success: | |
| return f"✅ {result_message}" | |
| else: | |
| return f"❌ {result_message}" | |
| except Exception as e: | |
| return f"❌ 發送 LINE 通知時發生錯誤: {str(e)}" | |
| # 創建 Gradio 界面 | |
| def create_interface(): | |
| with gr.Blocks( | |
| title="🎯 MeetTaiwan 活動爬蟲系統", | |
| theme=gr.themes.Soft(), | |
| css=""" | |
| .gradio-container { | |
| font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif !important; | |
| } | |
| .gr-button-primary { | |
| background: linear-gradient(45deg, #3498db, #2ecc71) !important; | |
| border: none !important; | |
| } | |
| .gr-button-secondary { | |
| background: linear-gradient(45deg, #e74c3c, #f39c12) !important; | |
| border: none !important; | |
| } | |
| """ | |
| ) as app: | |
| # 標題區域 | |
| gr.Markdown(""" | |
| # 🎯 MeetTaiwan 活動爬蟲系統 | |
| **全球政府採購商機網活動資訊自動抓取與通知系統** | |
| 本系統整合了資料爬取、郵件發送和 LINE 通知功能,讓您輕鬆獲取最新的活動資訊。 | |
| """) | |
| # 狀態變量 | |
| scraped_data = gr.State(None) | |
| with gr.Tabs(): | |
| # Tab 1: 資料爬取 | |
| with gr.Tab("📊 資料爬取", id="scraping"): | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| gr.Markdown("### 🚀 開始爬取活動資料") | |
| scrape_btn = gr.Button( | |
| "開始爬取", | |
| variant="primary", | |
| size="lg" | |
| ) | |
| scrape_status = gr.Textbox( | |
| label="爬取狀態", | |
| interactive=False, | |
| placeholder="等待開始爬取..." | |
| ) | |
| with gr.Column(scale=1): | |
| gr.Markdown("### 📥 資料下載") | |
| download_file = gr.File( | |
| label="下載 CSV 檔案", | |
| interactive=False | |
| ) | |
| # 資料預覽 | |
| gr.Markdown("### 📋 資料預覽") | |
| data_preview = gr.Dataframe( | |
| label="活動資料", | |
| wrap=True, | |
| interactive=False | |
| ) | |
| # Tab 2: 郵件發送 | |
| with gr.Tab("📧 郵件通知", id="email"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown("### ⚙️ 郵件設定") | |
| email_api_key = gr.Textbox( | |
| label="Resend API Key", | |
| type="password", | |
| value="re_ZGacBiDw_HFEBpuCbaJ2S3NThPWiMU7Ex", | |
| placeholder="請輸入您的 Resend API Key" | |
| ) | |
| recipient_email = gr.Textbox( | |
| label="收件人郵箱", | |
| value="cjhuang38@gmail.com", | |
| placeholder="請輸入收件人郵箱地址" | |
| ) | |
| max_display_email = gr.Slider( | |
| label="郵件顯示筆數", | |
| minimum=5, | |
| maximum=20, | |
| value=10, | |
| step=1 | |
| ) | |
| with gr.Column(): | |
| gr.Markdown("### 📨 發送郵件") | |
| send_email_btn = gr.Button( | |
| "發送郵件報告", | |
| variant="secondary", | |
| size="lg" | |
| ) | |
| email_status = gr.Textbox( | |
| label="郵件發送狀態", | |
| interactive=False, | |
| placeholder="等待發送..." | |
| ) | |
| # Tab 3: LINE 通知 | |
| with gr.Tab("📱 LINE 通知", id="line"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown("### ⚙️ LINE 設定") | |
| line_token = gr.Textbox( | |
| label="LINE Channel Access Token", | |
| type="password", | |
| value="FuM3pGmpqyOldcMltKVxkzBuy32o6mpkWv/gVfrR3sm9VxFUxTVzLlKU9C1ssOi2l/om2JkpKIdB/R+VLAyCvQA2o4pTD757kpN4GmUUq68FKuWwEaQXG376pR8hhyqUvElGn4rEYA7oxJDgsm4EBAdB04t89/1O/w1cDnyilFU=", | |
| placeholder="請輸入您的 LINE Channel Access Token" | |
| ) | |
| message_template = gr.Textbox( | |
| label="自訂訊息內容 (選填)", | |
| placeholder="留空將使用默認模板...", | |
| lines=5 | |
| ) | |
| with gr.Column(): | |
| gr.Markdown("### 📲 發送通知") | |
| send_line_btn = gr.Button( | |
| "發送 LINE 通知", | |
| variant="secondary", | |
| size="lg" | |
| ) | |
| line_status = gr.Textbox( | |
| label="LINE 發送狀態", | |
| interactive=False, | |
| placeholder="等待發送..." | |
| ) | |
| # 頁尾 | |
| gr.Markdown(""" | |
| --- | |
| <div style='text-align: center; color: #666; margin-top: 20px;'> | |
| <p>🤖 MeetTaiwan API 爬蟲系統 | 2025 © Copyright robert_studio</p> | |
| <p>資料來源:全球政府採購商機網</p> | |
| </div> | |
| """) | |
| # 事件綁定 | |
| scrape_btn.click( | |
| fn=scrape_events, | |
| outputs=[scrape_status, scraped_data, download_file, gr.State()] | |
| ).then( | |
| fn=lambda data: data if data is not None else gr.DataFrame(), | |
| inputs=[scraped_data], | |
| outputs=[data_preview] | |
| ) | |
| send_email_btn.click( | |
| fn=send_email_report, | |
| inputs=[scraped_data, recipient_email, email_api_key, max_display_email], | |
| outputs=[email_status] | |
| ) | |
| send_line_btn.click( | |
| fn=send_line_notification, | |
| inputs=[scraped_data, line_token, message_template], | |
| outputs=[line_status] | |
| ) | |
| return app | |
| if __name__ == "__main__": | |
| app = create_interface() | |
| app.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=True, | |
| show_error=True | |
| ) |