Spaces:

shiue2000
/

sparkAnalysis

Sleeping

App Files Files Community

shiue2000 commited on Sep 25, 2025

Commit

cc88742

verified ·

1 Parent(s): bf221ac

Update app.py

Browse files

Files changed (1) hide show

app.py +144 -86

app.py CHANGED Viewed

@@ -14,6 +14,8 @@ import schedule
 import time
 import threading
 import traceback
 # -----------------------------
 # 參數設定
 # -----------------------------
@@ -22,7 +24,8 @@ days_back = 7
 max_tweets_per_candidate = 100
 news_file = "news_sample.csv"
 history_file = "history_sentiment.csv"
-max_retries = 3 # 貼文抓取失敗重試次數
 # -----------------------------
 # 中文情緒分析模型 (公開可用)
 # -----------------------------
@@ -30,6 +33,7 @@ sentiment = pipeline(
     "sentiment-analysis",
     model="uer/roberta-base-finetuned-sentiment-chinese"
 )
 # -----------------------------
 # 主分析函數
 # -----------------------------
@@ -41,13 +45,13 @@ def run_analysis():
         all_tweets = []
         for candidate in candidates:
             query = f'{candidate} since:{since_date} until:{until_date}'
-            for attempt in range(1, max_retries+1):
                 try:
                     for i, tweet in enumerate(sntwitter.TwitterSearchScraper(query).get_items()):
                         if i >= max_tweets_per_candidate:
                             break
                         all_tweets.append([tweet.date, tweet.user.username, tweet.content, candidate])
-                    break # 成功抓取，跳出重試
                 except Exception as e:
                     print(f"⚠️ {candidate} 第 {attempt} 次抓貼文失敗: {e}")
                     if attempt == max_retries:
@@ -62,17 +66,18 @@ def run_analysis():
         summary['總貼文'] = summary.sum(axis=1)
         summary['正面比率'] = summary.get('positive', 0) / summary['總貼文']
         summary['負面比率'] = summary.get('negative', 0) / summary['總貼文']
-        summary['日期'] = datetime.now().strftime('%Y-%m-%d')
         # 3. 更新歷史資料
         if os.path.exists(history_file):
             df_history = pd.read_csv(history_file)
-            df_history = pd.concat([df_history, summary.reset_index()[['日期','候選人','正面比率','負面比率']]], ignore_index=True)
         else:
-            df_history = summary.reset_index()[['日期','候選人','正面比率','負面比率']]
         df_history.to_csv(history_file, index=False)
         # 4. 可視化當日情緒圖表
-        plt.figure(figsize=(8,5))
-        summary[['正面比率','負面比率']].plot(kind='bar', stacked=True, colormap='coolwarm')
         plt.title("候選人當日社群情緒比例")
         plt.ylabel("比例")
         plt.xlabel("候選人")
@@ -83,10 +88,11 @@ def run_analysis():
         buf.seek(0)
         img_b64_today = base64.b64encode(buf.read()).decode("utf-8")
         buf.close()
         # 5. 可視化歷史情緒趨勢
-        plt.figure(figsize=(10,5))
         for c in candidates:
-            temp = df_history[df_history['候選人']==c]
             plt.plot(temp['日期'], temp['正面比率'], marker='o', label=f"{c} 正面")
             plt.plot(temp['日期'], temp['負面比率'], marker='x', label=f"{c} 負面")
         plt.xticks(rotation=45)
@@ -99,6 +105,7 @@ def run_analysis():
         buf.seek(0)
         img_b64_trend = base64.b64encode(buf.read()).decode("utf-8")
         buf.close()
         # 6. 新聞線索整合
         if os.path.exists(news_file):
             df_news = pd.read_csv(news_file)
@@ -107,35 +114,132 @@ def run_analysis():
         else:
             news_summary = {}
             news_table = "<p>未提供新聞資料</p>"
-        # 7. 生成即時輿情報告 (HTML) with added figures
-        html_report = f"""
-        <h2>高雄市長選戰輿情摘要 ({datetime.now().strftime('%Y-%m-%d')})</h2>
-        <h3>1. 當日社群貼文情緒</h3>
-        {summary.to_html()}
-        <img src='data:image/png;base64,{img_b64_today}' width='600'>
-        <h3>2. 歷史情緒趨勢</h3>
-        <img src='data:image/png;base64,{img_b64_trend}' width='800'>
-        <h3>3. 社群媒體參與概況</h3>
-        <table>
-          <tr><td>總參與數</td><td>3,511 (+12.5%)</td><td>正面情緒比例</td><td>73% (+3.2%)</td><td>平均互動率</td><td>3.9% (+0.8%)</td><td>活躍平台</td><td>6</td></tr>
-        </table>
-        <h3>4. 社群情感趨勢圖</h3>
-        <img src='data:image/png;base64,{base64.b64encode(open("social_sentiment_trend.png", "rb").read()).decode("utf-8")}' width='800'>
-        <h3>5. 各平台表現</h3>
-        <img src='data:image/png;base64,{base64.b64encode(open("platform_performance.png", "rb").read()).decode("utf-8")}' width='600'>
-        <h3>6. 候選人社群量趨勢</h3>
-        <img src='data:image/png;base64,{base64.b64encode(open("candidate_volume_trend.png", "rb").read()).decode("utf-8")}' width='800'>
-        <h3>7. 候選人社群量分析（正/負面情緒）</h3>
-        <img src='data:image/png;base64,{base64.b64encode(open("candidate_sentiment_analysis.png", "rb").read()).decode("utf-8")}' width='600'>
-        <h3>8. 新聞議題統計</h3>
-        <p>各類別新聞數量：{news_summary}</p>
-        {news_table}
-        """
-        return html_report
     except Exception as e:
         err_msg = f"❌ 輿情分析執行失敗:\n{traceback.format_exc()}"
         print(err_msg)
         return f"<pre>{err_msg}</pre>"
 # -----------------------------
 # 自動排程設定 (每天 08:00 執行)
 # -----------------------------
@@ -146,68 +250,22 @@ def schedule_daily_run():
             schedule.run_pending()
         except Exception as e:
             print(f"⚠️ 排程異常: {e}")
-        time.sleep(60) # 每分鐘檢查一次
 # -----------------------------
 # 啟動排程背景執行緒
 # -----------------------------
 threading.Thread(target=schedule_daily_run, daemon=True).start()
 # -----------------------------
 # Gradio 前端
 # -----------------------------
 iface = gr.Interface(
     fn=run_analysis,
     inputs=[],
-    outputs=gr.HTML,
     live=False,
     title="高雄市長選戰輿情分析",
     description="每日自動抓取 X 貼文 + 新聞議題分析 + 歷史情緒趨勢\n支援自動重試與排程異常警告"
 )
-iface.launch()
-# 假設你已經有 df_tweets 和 df_news
-# df_tweets columns: ['日期','使用者','內容','候選人','情緒']
-# df_news columns: ['日期','標題','摘要','類別','平台']
-# 1️⃣ 社群情感趨勢圖
-df_trend = df_tweets.copy()
-df_trend['日期'] = pd.to_datetime(df_trend['日期']).dt.date
-trend_summary = df_trend.groupby(['日期','情緒']).size().unstack(fill_value=0)
-trend_summary_percent = trend_summary.div(trend_summary.sum(axis=1), axis=0)
-plt.figure(figsize=(10,5))
-trend_summary_percent.plot.area(colormap='RdYlGn', alpha=0.6)
-plt.title("社群情感趨勢")
-plt.ylabel("比例")
-plt.xlabel("日期")
-plt.xticks(rotation=45)
-plt.tight_layout()
-plt.savefig("social_sentiment_trend.png")
-plt.show()
-# 2️⃣ 各平台表現
-platform_summary = df_news['平台'].value_counts()
-plt.figure(figsize=(8,4))
-platform_summary.plot(kind='bar', color='royalblue')
-plt.title("各平台表現")
-plt.ylabel("提及數")
-plt.xticks(rotation=0)
-plt.tight_layout()
-plt.savefig("platform_performance.png")
-plt.show()
-# 3️⃣ 候選人社群量趨勢
-candidate_trend = df_trend.groupby(['日期','候選人']).size().unstack(fill_value=0)
-candidate_trend.plot.area(figsize=(10,5), alpha=0.6)
-plt.title("候選人社群聲量趨勢")
-plt.ylabel("提及數")
-plt.xlabel("日期")
-plt.xticks(rotation=45)
-plt.tight_layout()
-plt.savefig("candidate_volume_trend.png")
-plt.show()
-# 4️⃣ 候選人社群量分析（正/負面情緒）
-candidate_sentiment = df_trend.groupby(['候選人','情緒']).size().unstack(fill_value=0)
-candidate_sentiment[['positive','negative']].plot(kind='bar', stacked=True, colormap='RdYlGn')
-plt.title("候選人社群情緒分析")
-plt.ylabel("貼文數")
-plt.xlabel("候選人")
-plt.xticks(rotation=0)
-plt.tight_layout()
-plt.savefig("candidate_sentiment_analysis.png")
-plt.show()

 import time
 import threading
 import traceback
+import networkx as nx
 # -----------------------------
 # 參數設定
 # -----------------------------
 max_tweets_per_candidate = 100
 news_file = "news_sample.csv"
 history_file = "history_sentiment.csv"
+max_retries = 3  # 貼文抓取失敗重試次數
 # -----------------------------
 # 中文情緒分析模型 (公開可用)
 # -----------------------------
     "sentiment-analysis",
     model="uer/roberta-base-finetuned-sentiment-chinese"
 )
 # -----------------------------
 # 主分析函數
 # -----------------------------
         all_tweets = []
         for candidate in candidates:
             query = f'{candidate} since:{since_date} until:{until_date}'
+            for attempt in range(1, max_retries + 1):
                 try:
                     for i, tweet in enumerate(sntwitter.TwitterSearchScraper(query).get_items()):
                         if i >= max_tweets_per_candidate:
                             break
                         all_tweets.append([tweet.date, tweet.user.username, tweet.content, candidate])
+                    break  # 成功抓取，跳出重試
                 except Exception as e:
                     print(f"⚠️ {candidate} 第 {attempt} 次抓貼文失敗: {e}")
                     if attempt == max_retries:
         summary['總貼文'] = summary.sum(axis=1)
         summary['正面比率'] = summary.get('positive', 0) / summary['總貼文']
         summary['負面比率'] = summary.get('negative', 0) / summary['總貼文']
+        summary['日期'] = datetime.now().strftime('%Y-%m-%d %H:%M %Z')
         # 3. 更新歷史資料
         if os.path.exists(history_file):
             df_history = pd.read_csv(history_file)
+            df_history = pd.concat([df_history, summary.reset_index()[['日期', '候選人', '正面比率', '負面比率']]], ignore_index=True)
         else:
+            df_history = summary.reset_index()[['日期', '候選人', '正面比率', '負面比率']]
         df_history.to_csv(history_file, index=False)
         # 4. 可視化當日情緒圖表
+        plt.figure(figsize=(8, 5))
+        summary[['正面比率', '負面比率']].plot(kind='bar', stacked=True, colormap='coolwarm')
         plt.title("候選人當日社群情緒比例")
         plt.ylabel("比例")
         plt.xlabel("候選人")
         buf.seek(0)
         img_b64_today = base64.b64encode(buf.read()).decode("utf-8")
         buf.close()
         # 5. 可視化歷史情緒趨勢
+        plt.figure(figsize=(10, 5))
         for c in candidates:
+            temp = df_history[df_history['候選人'] == c]
             plt.plot(temp['日期'], temp['正面比率'], marker='o', label=f"{c} 正面")
             plt.plot(temp['日期'], temp['負面比率'], marker='x', label=f"{c} 負面")
         plt.xticks(rotation=45)
         buf.seek(0)
         img_b64_trend = base64.b64encode(buf.read()).decode("utf-8")
         buf.close()
         # 6. 新聞線索整合
         if os.path.exists(news_file):
             df_news = pd.read_csv(news_file)
         else:
             news_summary = {}
             news_table = "<p>未提供新聞資料</p>"
+        # 7. 社群情感趨勢圖
+        df_trend = df_tweets.copy()
+        df_trend['日期'] = pd.to_datetime(df_trend['日期']).dt.date
+        trend_summary = df_trend.groupby(['日期', '情緒']).size().unstack(fill_value=0)
+        trend_summary_percent = trend_summary.div(trend_summary.sum(axis=1), axis=0)
+        plt.figure(figsize=(10, 5))
+        trend_summary_percent.plot.area(colormap='RdYlGn', alpha=0.6)
+        plt.title("社群情感趨勢")
+        plt.ylabel("比例")
+        plt.xlabel("日期")
+        plt.xticks(rotation=45)
+        plt.tight_layout()
+        plt.savefig("social_sentiment_trend.png")
+        plt.close()
+        # 8. 各平台表現
+        platform_summary = df_news['平台'].value_counts()
+        plt.figure(figsize=(8, 4))
+        platform_summary.plot(kind='bar', color='royalblue')
+        plt.title("各平台表現")
+        plt.ylabel("提及數")
+        plt.xticks(rotation=0)
+        plt.tight_layout()
+        plt.savefig("platform_performance.png")
+        plt.close()
+        # 9. 候選人社群量趨勢
+        candidate_trend = df_trend.groupby(['日期', '候選人']).size().unstack(fill_value=0)
+        candidate_trend.plot.area(figsize=(10, 5), alpha=0.6)
+        plt.title("候選人社群聲量趨勢")
+        plt.ylabel("提及數")
+        plt.xlabel("日期")
+        plt.xticks(rotation=45)
+        plt.tight_layout()
+        plt.savefig("candidate_volume_trend.png")
+        plt.close()
+        # 10. 候選人社群量分析（正/負面情緒）
+        candidate_sentiment = df_trend.groupby(['候選人', '情緒']).size().unstack(fill_value=0)
+        candidate_sentiment[['positive', 'negative']].plot(kind='bar', stacked=True, colormap='RdYlGn')
+        plt.title("候選人社群情緒分析")
+        plt.ylabel("貼文數")
+        plt.xlabel("候選人")
+        plt.xticks(rotation=0)
+        plt.tight_layout()
+        plt.savefig("candidate_sentiment_analysis.png")
+        plt.close()
+        # 11. 知識圖譜
+        G = nx.DiGraph()
+        G.add_nodes_from(candidates, type='candidate')
+        G.add_nodes_from(df_tweets['情緒'].unique(), type='sentiment')
+        G.add_nodes_from(df_news['平台'].unique(), type='platform')
+        G.add_nodes_from(df_news['類別'].unique(), type='news_category')
+        for candidate in candidates:
+            candidate_data = df_tweets[df_tweets['候選人'] == candidate]
+            total_posts = len(candidate_data)
+            for sentiment in df_tweets['情緒'].unique():
+                sentiment_count = len(candidate_data[candidate_data['情緒'] == sentiment])
+                if total_posts > 0 and sentiment_count > 0:
+                    G.add_edge(candidate, sentiment, weight=sentiment_count / total_posts)
+        for candidate in candidates:
+            candidate_tweets = df_tweets[df_tweets['候選人'] == candidate]
+            for platform in df_news['平台'].unique():
+                platform_count = len(candidate_tweets[candidate_tweets['內容'].str.contains(platform, na=False)])
+                if platform_count > 0:
+                    G.add_edge(candidate, platform, weight=platform_count)
+        for candidate in candidates:
+            candidate_news = df_news[df_news['內容'].str.contains(candidate, na=False)]
+            for category in df_news['類別'].unique():
+                category_count = len(candidate_news[candidate_news['類別'] == category])
+                if category_count > 0:
+                    G.add_edge(candidate, category, weight=category_count)
+        plt.figure(figsize=(12, 8))
+        pos = nx.spring_layout(G)
+        nx.draw(G, pos, with_labels=True, node_color='lightblue', node_size=1500, font_size=8, font_weight='bold', arrows=True)
+        edge_labels = nx.get_edge_attributes(G, 'weight')
+        nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels)
+        plt.title("知識圖譜 of Kaohsiung Mayoral Election Sentiment")
+        plt.axis('off')
+        plt.savefig("knowledge_graph.png")
+        plt.close()
+        # 12. 載入並填充 index.html 模板
+        with open("index.html", "r", encoding="utf-8") as f:
+            html_template = f.read()
+        html_content = html_template.format(
+            report_date=datetime.now().strftime('%Y-%m-%d %H:%M %Z'),
+            img_b64_today=img_b64_today,
+            img_b64_trend=img_b64_trend,
+            engagement_table="""
+                <table class="min-w-full bg-white border border-gray-200">
+                    <tr class="bg-gray-100 border-b">
+                        <th class="py-2 px-4 border-r">總參與數</th>
+                        <td class="py-2 px-4 border-r">3,511 (+12.5%)</td>
+                        <th class="py-2 px-4 border-r">正面情緒比例</th>
+                        <td class="py-2 px-4 border-r">73% (+3.2%)</td>
+                        <th class="py-2 px-4 border-r">平均互動率</th>
+                        <td class="py-2 px-4 border-r">3.9% (+0.8%)</td>
+                        <th class="py-2 px-4 border-r">活躍平台</th>
+                        <td class="py-2 px-4">6</td>
+                    </tr>
+                </table>
+            """,
+            img_social_sentiment=base64.b64encode(open("social_sentiment_trend.png", "rb").read()).decode("utf-8"),
+            img_platform_performance=base64.b64encode(open("platform_performance.png", "rb").read()).decode("utf-8"),
+            img_candidate_volume=base64.b64encode(open("candidate_volume_trend.png", "rb").read()).decode("utf-8"),
+            img_candidate_sentiment=base64.b64encode(open("candidate_sentiment_analysis.png", "rb").read()).decode("utf-8"),
+            img_knowledge_graph=base64.b64encode(open("knowledge_graph.png", "rb").read()).decode("utf-8"),
+            news_summary=str(news_summary),
+            news_table=news_table
+        )
+        return html_content
     except Exception as e:
         err_msg = f"❌ 輿情分析執行失敗:\n{traceback.format_exc()}"
         print(err_msg)
         return f"<pre>{err_msg}</pre>"
 # -----------------------------
 # 自動排程設定 (每天 08:00 執行)
 # -----------------------------
             schedule.run_pending()
         except Exception as e:
             print(f"⚠️ 排程異常: {e}")
+        time.sleep(60)  # 每分鐘檢查一次
 # -----------------------------
 # 啟動排程背景執行緒
 # -----------------------------
 threading.Thread(target=schedule_daily_run, daemon=True).start()
 # -----------------------------
 # Gradio 前端
 # -----------------------------
 iface = gr.Interface(
     fn=run_analysis,
     inputs=[],
+    outputs=gr.HTML(),
     live=False,
     title="高雄市長選戰輿情分析",
     description="每日自動抓取 X 貼文 + 新聞議題分析 + 歷史情緒趨勢\n支援自動重試與排程異常警告"
 )
+iface.launch(server_name="0.0.0.0", server_port=7860)