Spaces:

shiue2000
/

sparkAnalysis

Sleeping

App Files Files Community

shiue2000 commited on Sep 25, 2025

Commit

28d89bb

verified ·

1 Parent(s): 9925690

Update app.py

Browse files

Files changed (1) hide show

app.py +86 -78

app.py CHANGED Viewed

@@ -1,13 +1,20 @@
-# 1. 匯入套件與參數設定
-import pandas as pd, matplotlib.pyplot as plt, io, base64, os, traceback, random, networkx as nx
 from datetime import datetime, timedelta
-import gradio as gr, schedule, time, threading
-# 中文顯示
-plt.rcParams['font.sans-serif'] = ['Microsoft JhengHei','Arial Unicode MS','SimHei','DejaVu Sans']
 plt.rcParams['axes.unicode_minus'] = False
-candidates = ["許智傑","邱議瑩","賴瑞隆","林岱樺","柯志恩"]
 days_back = 7
 max_tweets_per_candidate = 20
 news_file = "news_sample.csv"
@@ -18,19 +25,23 @@ try:
     from transformers import pipeline
     sentiment_pipeline = pipeline("sentiment-analysis", model="lxyuan/distilbert-base-multilingual-cased-sentiments-student")
     def sentiment(text): return sentiment_pipeline(text)[0]
-except:
-    def sentiment(text): return {"label": random.choice(["positive","negative"]), "score":0.5}
 # 模擬抓貼文
 def fetch_tweets(candidate):
-    return pd.DataFrame([{"日期": datetime.now()-timedelta(days=random.randint(0,6)),
-                          "使用者": f"user{random.randint(1,100)}",
-                          "內容": f"{candidate} 的貼文 {i}",
-                          "候選人": candidate} for i in range(random.randint(5,max_tweets_per_candidate))])
-# base64 圖片轉換
 def plot_to_base64(fig):
-    buf=io.BytesIO()
     fig.savefig(buf, format='png', bbox_inches='tight')
     buf.seek(0)
     img_b64 = base64.b64encode(buf.read()).decode('utf-8')
@@ -47,36 +58,32 @@ def run_analysis():
         all_df['信心度'] = all_df['內容'].apply(lambda x: sentiment(x)['score'])
         # 統計每日情緒
-        summary = all_df.groupby(['候選人','情緒']).size().unstack(fill_value=0)
         summary['總貼文'] = summary.sum(axis=1)
-        summary['正面比率'] = summary.get('positive',0)/summary['總貼文']
-        summary['負面比率'] = summary.get('negative',0)/summary['總貼文']
         # 更新歷史資料
         today_str = datetime.now().strftime('%Y-%m-%d')
-        hist_row = summary[['正面比率','負面比率']].copy()
         hist_row['日期'] = today_str
         hist_row['候選人'] = summary.index
         df_hist = pd.concat([pd.read_csv(history_file), hist_row], ignore_index=True) if os.path.exists(history_file) else hist_row
-        df_hist.to_csv(history_file,index=False)
-        # ----------------- 圖表 -----------------
         # 1. 當日情緒比例
-        plt.rcParams['font.sans-serif'] = ['Microsoft JhengHei', 'Arial Unicode MS', 'SimHei', 'DejaVu Sans']
-        plt.rcParams['axes.unicode_minus'] = False
-        fig1 = plt.figure(figsize=(8,5))
-        summary[['正面比率','負面比率']].plot(kind='bar', stacked=True, colormap='coolwarm', ax=fig1.gca())
         fig1.gca().set_title("候選人當日社群情緒比例")
         img_b64_today = plot_to_base64(fig1)
         # 2. 歷史情緒趨勢
-        plt.rcParams['font.sans-serif'] = ['Microsoft JhengHei', 'Arial Unicode MS', 'SimHei', 'DejaVu Sans']
-        plt.rcParams['axes.unicode_minus'] = False
-        fig2 = plt.figure(figsize=(10,5))
         for c in candidates:
-            temp = df_hist[df_hist['候選人']==c]
             plt.plot(temp['日期'], temp['正面比率'], marker='o', label=f"{c} 正面")
             plt.plot(temp['日期'], temp['負面���率'], marker='x', label=f"{c} 負面")
         plt.title("候選人歷史情緒趨勢")
@@ -85,55 +92,56 @@ def run_analysis():
         plt.legend()
         img_b64_trend = plot_to_base64(fig2)
-        # 3~8 其他圖表生成
-        # 社群情感趨勢
-        plt.rcParams['font.sans-serif'] = ['Microsoft JhengHei', 'Arial Unicode MS', 'SimHei', 'DejaVu Sans']
-        plt.rcParams['axes.unicode_minus'] = False
-        fig3 = plt.figure(figsize=(8,5))
-        plt.plot(range(7), [random.random() for _ in range(7)], marker='o', label="正面")
-        plt.plot(range(7), [random.random() for _ in range(7)], marker='x', label="負面")
         plt.title("社群情感趨勢")
         plt.legend()
         img_social_sentiment = plot_to_base64(fig3)
-        # 各平台表現
-        plt.rcParams['font.sans-serif'] = ['Microsoft JhengHei', 'Arial Unicode MS', 'SimHei', 'DejaVu Sans']
-        plt.rcParams['axes.unicode_minus'] = False
-        fig4 = plt.figure(figsize=(8,5))
-        platforms=["X","Facebook","Instagram","PTT","Line"]
-        plt.bar(platforms, [random.randint(10,100) for _ in platforms], color='skyblue')
         plt.title("各平台貼文量")
         img_platform_performance = plot_to_base64(fig4)
-        # 候選人社群量趨勢
-        plt.rcParams['font.sans-serif'] = ['Microsoft JhengHei', 'Arial Unicode MS', 'SimHei', 'DejaVu Sans']
-        plt.rcParams['axes.unicode_minus'] = False
-        fig5 = plt.figure(figsize=(8,5))
-        for c in candidates: plt.plot(range(7), [random.randint(5,20) for _ in range(7)], marker='o', label=c)
         plt.title("候選人社群量趨勢")
         plt.legend()
         img_candidate_volume = plot_to_base64(fig5)
-        # 候選人社群量分析
-        plt.rcParams['font.sans-serif'] = ['Microsoft JhengHei', 'Arial Unicode MS', 'SimHei', 'DejaVu Sans']
-        plt.rcParams['axes.unicode_minus'] = False
-        fig6 = plt.figure(figsize=(8,5))
-        summary[['正面比率','負面比率']].plot(kind='bar', stacked=True, colormap='coolwarm', ax=fig6.gca())
         fig6.gca().set_title("候選人社群量分析（正/負面情緒）")
         img_candidate_sentiment = plot_to_base64(fig6)
-        # 知識圖譜
-        plt.rcParams['font.sans-serif'] = ['Microsoft JhengHei', 'Arial Unicode MS', 'SimHei', 'DejaVu Sans']
-        plt.rcParams['axes.unicode_minus'] = False
-        fig7, ax7 = plt.subplots(figsize=(8,6))
-        G=nx.Graph()
-        for c in candidates: G.add_node(c)
-        for i in range(len(candidates)-1): G.add_edge(candidates[i], candidates[i+1])
         nx.draw(G, nx.spring_layout(G), with_labels=True, node_color='lightgreen', font_size=12, ax=ax7)
         img_knowledge_graph = plot_to_base64(fig7)
@@ -141,31 +149,29 @@ def run_analysis():
         if os.path.exists(news_file):
             df_news = pd.read_csv(news_file)
             news_summary = df_news.groupby('類別').size().to_dict()
-            news_table = df_news.to_html(index=False)
         else:
             news_summary = {}
             news_table = "<p>未提供新聞資料</p>"
         # 社群參與表格
-        engagement_table=f"""
         <table class="min-w-full bg-white border border-gray-200">
         <tr class="bg-gray-100 border-b">
             <th class="py-2 px-4 border-r">總參與數</th>
             <td class="py-2 px-4 border-r">{len(all_df)}</td>
             <th class="py-2 px-4 border-r">正面情緒比例</th>
-            <td class="py-2 px-4 border-r">{all_df['情緒'].value_counts(normalize=True).get('positive',0):.1%}</td>
             <th class="py-2 px-4 border-r">平均互動率</th>
             <td class="py-2 px-4 border-r">3.9%</td>
             <th class="py-2 px-4 border-r">活躍平台</th>
-            <td class="py-2 px-4">6</td>
         </tr></table>
         """
         # HTML template
-        plt.rcParams['font.sans-serif'] = ['Microsoft JhengHei', 'Arial Unicode MS', 'SimHei', 'DejaVu Sans']
-        plt.rcParams['axes.unicode_minus'] = False
-        html_template = open("templates/index.html").read()
         html_content = html_template.format(
             report_date=datetime.now().strftime('%Y-%m-%d %H:%M'),
             img_b64_today=img_b64_today,
@@ -180,8 +186,10 @@ def run_analysis():
             news_table=news_table
         )
         return html_content
-    except Exception: return f"<pre>❌ 輿情分析執行失敗:\n{traceback.format_exc()}</pre>"
 # Gradio 前端
-iface = gr.Interface(fn=run_analysis, inputs=[], outputs=gr.HTML(), title="高雄市長選戰輿情分析")
-iface.launch(server_name="0.0.0.0", server_port=7860)

+import pandas as pd
+import matplotlib.pyplot as plt
+import io
+import base64
+import os
+import traceback
+import random
+import networkx as nx
 from datetime import datetime, timedelta
+import gradio as gr
+# 中文顯示設置 (只需設置一次)
+plt.rcParams['font.sans-serif'] = ['Microsoft JhengHei', 'Arial Unicode MS', 'SimHei', 'DejaVu Sans']
 plt.rcParams['axes.unicode_minus'] = False
+# 參數設定
+candidates = ["許智傑", "邱議瑩", "賴瑞隆", "林岱樺", "柯志恩"]
 days_back = 7
 max_tweets_per_candidate = 20
 news_file = "news_sample.csv"
     from transformers import pipeline
     sentiment_pipeline = pipeline("sentiment-analysis", model="lxyuan/distilbert-base-multilingual-cased-sentiments-student")
     def sentiment(text): return sentiment_pipeline(text)[0]
+except:
+    def sentiment(text): return {"label": random.choice(["positive", "negative"]), "score": 0.5}
 # 模擬抓貼文
 def fetch_tweets(candidate):
+    return pd.DataFrame([
+        {
+            "日期": datetime.now() - timedelta(days=random.randint(0, days_back - 1)),
+            "使用者": f"user{random.randint(1, 100)}",
+            "內容": f"{candidate} 的貼文 {i}",
+            "候選人": candidate
+        } for i in range(random.randint(5, max_tweets_per_candidate))
+    ])
+# Base64 圖片轉換
 def plot_to_base64(fig):
+    buf = io.BytesIO()
     fig.savefig(buf, format='png', bbox_inches='tight')
     buf.seek(0)
     img_b64 = base64.b64encode(buf.read()).decode('utf-8')
         all_df['信心度'] = all_df['內容'].apply(lambda x: sentiment(x)['score'])
         # 統計每日情緒
+        summary = all_df.groupby(['候選人', '情緒']).size().unstack(fill_value=0)
         summary['總貼文'] = summary.sum(axis=1)
+        summary['正面比率'] = summary.get('positive', 0) / summary['總貼文'].replace(0, 1)  # Avoid division by zero
+        summary['負面比率'] = summary.get('negative', 0) / summary['總貼文'].replace(0, 1)
         # 更新歷史資料
         today_str = datetime.now().strftime('%Y-%m-%d')
+        hist_row = summary[['正面比率', '負面比率']].copy()
         hist_row['日期'] = today_str
         hist_row['候選人'] = summary.index
         df_hist = pd.concat([pd.read_csv(history_file), hist_row], ignore_index=True) if os.path.exists(history_file) else hist_row
+        df_hist.to_csv(history_file, index=False)
+        # 圖表生成
         # 1. 當日情緒比例
+        fig1 = plt.figure(figsize=(8, 5))
+        summary[['正面比率', '負面比率']].plot(kind='bar', stacked=True, colormap='coolwarm', ax=fig1.gca())
         fig1.gca().set_title("候選人當日社群情緒比例")
+        fig1.gca().set_ylabel("比例")
+        fig1.gca().set_xlabel("候選人")
         img_b64_today = plot_to_base64(fig1)
         # 2. 歷史情緒趨勢
+        fig2 = plt.figure(figsize=(10, 5))
         for c in candidates:
+            temp = df_hist[df_hist['候選人'] == c]
             plt.plot(temp['日期'], temp['正面比率'], marker='o', label=f"{c} 正面")
             plt.plot(temp['日期'], temp['負面���率'], marker='x', label=f"{c} 負面")
         plt.title("候選人歷史情緒趨勢")
         plt.legend()
         img_b64_trend = plot_to_base64(fig2)
+        # 3. 社群情感趨勢 (使用實際資料而非隨機)
+        sentiment_trend = all_df.groupby([pd.Grouper(key='日期', freq='D'), '情緒']).size().unstack(fill_value=0)
+        sentiment_trend = sentiment_trend.div(sentiment_trend.sum(axis=1), axis=0).fillna(0)
+        fig3 = plt.figure(figsize=(8, 5))
+        for s in ['positive', 'negative']:
+            if s in sentiment_trend.columns:
+                plt.plot(sentiment_trend.index, sentiment_trend[s], marker='o', label=s)
         plt.title("社群情感趨勢")
+        plt.xlabel("日期")
+        plt.ylabel("比例")
         plt.legend()
         img_social_sentiment = plot_to_base64(fig3)
+        # 4. 各平台表現 (模擬平台數據)
+        platforms = ["X", "Facebook", "Instagram", "PTT", "Line"]
+        platform_counts = pd.Series({p: random.randint(10, 100) for p in platforms})
+        fig4 = plt.figure(figsize=(8, 5))
+        plt.bar(platforms, platform_counts, color='skyblue')
         plt.title("各平台貼文量")
+        plt.xlabel("平台")
+        plt.ylabel("貼文數量")
         img_platform_performance = plot_to_base64(fig4)
+        # 5. 候選人社群量趨勢
+        candidate_trend = all_df.groupby([pd.Grouper(key='日期', freq='D'), '候選人']).size().unstack(fill_value=0)
+        fig5 = plt.figure(figsize=(8, 5))
+        for c in candidates:
+            if c in candidate_trend.columns:
+                plt.plot(candidate_trend.index, candidate_trend[c], marker='o', label=c)
         plt.title("候選人社群量趨勢")
+        plt.xlabel("日期")
+        plt.ylabel("貼文數量")
         plt.legend()
         img_candidate_volume = plot_to_base64(fig5)
+        # 6. 候選人社群量分析
+        fig6 = plt.figure(figsize=(8, 5))
+        summary[['正面比率', '負面比率']].plot(kind='bar', stacked=True, colormap='coolwarm', ax=fig6.gca())
         fig6.gca().set_title("候選人社群量分析（正/負面情緒）")
+        fig6.gca().set_ylabel("比例")
+        fig6.gca().set_xlabel("候選人")
         img_candidate_sentiment = plot_to_base64(fig6)
+        # 7. 知識圖譜
+        fig7, ax7 = plt.subplots(figsize=(8, 6))
+        G = nx.Graph()
+        for c in candidates:
+            G.add_node(c)
+        for i in range(len(candidates) - 1):
+            G.add_edge(candidates[i], candidates[i + 1])
         nx.draw(G, nx.spring_layout(G), with_labels=True, node_color='lightgreen', font_size=12, ax=ax7)
         img_knowledge_graph = plot_to_base64(fig7)
         if os.path.exists(news_file):
             df_news = pd.read_csv(news_file)
             news_summary = df_news.groupby('類別').size().to_dict()
+            news_table = df_news.to_html(index=False, classes="min-w-full border border-gray-200")
         else:
             news_summary = {}
             news_table = "<p>未提供新聞資料</p>"
         # 社群參與表格
+        engagement_table = f"""
         <table class="min-w-full bg-white border border-gray-200">
         <tr class="bg-gray-100 border-b">
             <th class="py-2 px-4 border-r">總參與數</th>
             <td class="py-2 px-4 border-r">{len(all_df)}</td>
             <th class="py-2 px-4 border-r">正面情緒比例</th>
+            <td class="py-2 px-4 border-r">{all_df['情緒'].value_counts(normalize=True).get('positive', 0):.1%}</td>
             <th class="py-2 px-4 border-r">平均互動率</th>
             <td class="py-2 px-4 border-r">3.9%</td>
             <th class="py-2 px-4 border-r">活躍平台</th>
+            <td class="py-2 px-4">{len(platforms)}</td>
         </tr></table>
         """
         # HTML template
+        with open("templates/index.html", encoding='utf-8') as f:
+            html_template = f.read()
         html_content = html_template.format(
             report_date=datetime.now().strftime('%Y-%m-%d %H:%M'),
             img_b64_today=img_b64_today,
             news_table=news_table
         )
         return html_content
+    except Exception as e:
+        return f"<pre>❌ 輿情分析執行失敗:\n{traceback.format_exc()}</pre>"
 # Gradio 前端
+if __name__ == "__main__":
+    iface = gr.Interface(fn=run_analysis, inputs=[], outputs=gr.HTML(), title="高雄市長選戰輿情分析")
+    iface.launch(server_name="0.0.0.0", server_port=7860)