Spaces:

shiue2000
/

sparkAnalysis

Sleeping

App Files Files Community

shiue2000 commited on Sep 25, 2025

Commit

c03fc1f

verified ·

1 Parent(s): a761827

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -71

app.py CHANGED Viewed

@@ -9,29 +9,16 @@ import networkx as nx
 from datetime import datetime, timedelta
 import gradio as gr
 import logging
-import matplotlib.font_manager as fm
 # 設置日誌
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
-# 中文顯示設置
-font_list = ['Microsoft JhengHei', 'SimHei', 'Arial Unicode MS', 'DejaVu Sans']
-available_fonts = [f.name for f in fm.fontManager.ttflist]
-selected_font = None
-for font in font_list:
-    if font in available_fonts:
-        selected_font = font
-        break
-if selected_font:
-    logging.info(f"Using font: {selected_font}")
-    plt.rcParams['font.sans-serif'] = [selected_font]
-else:
-    logging.warning("No specified Chinese fonts found. Falling back to default. Chinese text may not render correctly.")
 plt.rcParams['axes.unicode_minus'] = False
 # 參數設定
-candidates = ["許智傑", "邱議瑩", "賴瑞隆", "林岱樺", "柯志恩"]
 days_back = 7
 max_tweets_per_candidate = 20
 news_file = "news_sample.csv"
@@ -54,10 +41,10 @@ def fetch_tweets(candidate):
     logging.info(f"Fetching tweets for candidate: {candidate}")
     return pd.DataFrame([
         {
-            "日期": datetime.now() - timedelta(days=random.randint(0, days_back - 1)),
-            "使用者": f"user{random.randint(1, 100)}",
-            "內容": f"{candidate} 的貼文 {i}",
-            "候選人": candidate
         } for i in range(random.randint(5, max_tweets_per_candidate))
     ])
@@ -78,93 +65,93 @@ def run_analysis():
         template_path = "templates/index.html"
         if not os.path.exists(template_path):
             logging.error(f"Template file {template_path} not found.")
-            return f"<pre>❌ 模板檔案 {template_path} 不存在</pre>"
         # 抓貼文與情緒分析
         logging.info("Fetching and analyzing tweets...")
         all_df = pd.concat([fetch_tweets(c) for c in candidates], ignore_index=True)
-        all_df['情緒'] = all_df['內容'].apply(lambda x: sentiment(x)['label'])
-        all_df['信心度'] = all_df['內容'].apply(lambda x: sentiment(x)['score'])
         # 統計每日情緒
-        summary = all_df.groupby(['候選人', '情緒']).size().unstack(fill_value=0)
-        summary['總貼文'] = summary.sum(axis=1)
-        summary['正面比率'] = summary.get('positive', 0) / summary['總貼文'].replace(0, 1)
-        summary['負面比率'] = summary.get('negative', 0) / summary['總貼文'].replace(0, 1)
         # 更新歷史資料
         today_str = datetime.now().strftime('%Y-%m-%d')
-        hist_row = summary[['正面比率', '負面比率']].copy()
-        hist_row['日期'] = today_str
-        hist_row['候選人'] = summary.index
         df_hist = pd.concat([pd.read_csv(history_file), hist_row], ignore_index=True) if os.path.exists(history_file) else hist_row
         df_hist.to_csv(history_file, index=False)
         # 圖表生成
-        # 1. 當日情緒比例
         fig1 = plt.figure(figsize=(8, 5))
-        summary[['正面比率', '負面比率']].plot(kind='bar', stacked=True, colormap='coolwarm', ax=fig1.gca())
-        fig1.gca().set_title("候選人當日社群情緒比例")
-        fig1.gca().set_ylabel("比例")
-        fig1.gca().set_xlabel("候選人")
         img_b64_today = plot_to_base64(fig1)
-        # 2. 歷史情緒趨勢
         fig2 = plt.figure(figsize=(10, 5))
         for c in candidates:
-            temp = df_hist[df_hist['候選人'] == c]
-            plt.plot(temp['日期'], temp['正面比率'], marker='o', label=f"{c} 正面")
-            plt.plot(temp['日期'], temp['負面比率'], marker='x', label=f"{c} 負面")
-        plt.title("候選人歷史情緒趨勢")
         plt.xticks(rotation=45)
-        plt.ylabel("比例")
         plt.legend()
         img_b64_trend = plot_to_base64(fig2)
-        # 3. 社群情感趨勢
-        sentiment_trend = all_df.groupby([pd.Grouper(key='日期', freq='D'), '情緒']).size().unstack(fill_value=0)
         sentiment_trend = sentiment_trend.div(sentiment_trend.sum(axis=1), axis=0).fillna(0)
         fig3 = plt.figure(figsize=(8, 5))
         for s in ['positive', 'negative']:
             if s in sentiment_trend.columns:
-                plt.plot(sentiment_trend.index, sentiment_trend[s], marker='o', label=s)
-        plt.title("社群情感趨勢")
-        plt.xlabel("日期")
-        plt.ylabel("比例")
         plt.legend()
         img_social_sentiment = plot_to_base64(fig3)
-        # 4. 各平台表現
         platforms = ["X", "Facebook", "Instagram", "PTT", "Line"]
         platform_counts = pd.Series({p: random.randint(10, 100) for p in platforms})
         fig4 = plt.figure(figsize=(8, 5))
         plt.bar(platforms, platform_counts, color='skyblue')
-        plt.title("各平台貼文量")
-        plt.xlabel("平台")
-        plt.ylabel("貼文數量")
         img_platform_performance = plot_to_base64(fig4)
-        # 5. 候選人社群量趨勢
-        candidate_trend = all_df.groupby([pd.Grouper(key='日期', freq='D'), '候選人']).size().unstack(fill_value=0)
         fig5 = plt.figure(figsize=(8, 5))
         for c in candidates:
             if c in candidate_trend.columns:
                 plt.plot(candidate_trend.index, candidate_trend[c], marker='o', label=c)
-        plt.title("候選人社群量趨勢")
-        plt.xlabel("日期")
-        plt.ylabel("貼文數量")
         plt.legend()
         img_candidate_volume = plot_to_base64(fig5)
-        # 6. 候選人社群量分析
         fig6 = plt.figure(figsize=(8, 5))
-        summary[['正面比率', '負面比率']].plot(kind='bar', stacked=True, colormap='coolwarm', ax=fig6.gca())
-        fig6.gca().set_title("候選人社群量分析（正/負面情緒）")
-        fig6.gca().set_ylabel("比例")
-        fig6.gca().set_xlabel("候選人")
         img_candidate_sentiment = plot_to_base64(fig6)
-        # 7. 知識圖譜
         fig7, ax7 = plt.subplots(figsize=(8, 6))
         G = nx.Graph()
         for c in candidates:
@@ -177,23 +164,23 @@ def run_analysis():
         # 新聞資料
         if os.path.exists(news_file):
             df_news = pd.read_csv(news_file)
-            news_summary = df_news.groupby('類別').size().to_dict()
             news_table = df_news.to_html(index=False, classes="min-w-full border border-gray-200")
         else:
             news_summary = {}
-            news_table = "<p>未提供新聞資料</p>"
         # 社群參與表格
         engagement_table = f"""
         <table class="min-w-full bg-white border border-gray-200">
         <tr class="bg-gray-100 border-b">
-            <th class="py-2 px-4 border-r">總參與數</th>
             <td class="py-2 px-4 border-r">{len(all_df)}</td>
-            <th class="py-2 px-4 border-r">正面情緒比例</th>
-            <td class="py-2 px-4 border-r">{all_df['情緒'].value_counts(normalize=True).get('positive', 0):.1%}</td>
-            <th class="py-2 px-4 border-r">平均互動率</th>
             <td class="py-2 px-4 border-r">3.9%</td>
-            <th class="py-2 px-4 border-r">活躍平台</th>
             <td class="py-2 px-4">{len(platforms)}</td>
         </tr></table>
         """
@@ -220,10 +207,10 @@ def run_analysis():
         return html_content
     except Exception as e:
         logging.error(f"Analysis failed: {str(e)}")
-        return f"<pre>❌ 輿情分析執行失敗:\n{traceback.format_exc()}</pre>"
 # Gradio 前端
 if __name__ == "__main__":
     logging.info("Starting Gradio interface...")
-    iface = gr.Interface(fn=run_analysis, inputs=[], outputs=gr.HTML(), title="高雄市長選戰輿情分析")
     iface.launch(server_name="0.0.0.0", server_port=7860)

 from datetime import datetime, timedelta
 import gradio as gr
 import logging
 # 設置日誌
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+# 字體設置 (使用標準字體以確保英文顯示)
+plt.rcParams['font.sans-serif'] = ['Arial', 'DejaVu Sans']
 plt.rcParams['axes.unicode_minus'] = False
 # 參數設定
+candidates = ["Xu Zhijie", "Qiu Yiying", "Lai Ruilong", "Lin Daihua", "Ke Zhien"]  # English transliteration
 days_back = 7
 max_tweets_per_candidate = 20
 news_file = "news_sample.csv"
     logging.info(f"Fetching tweets for candidate: {candidate}")
     return pd.DataFrame([
         {
+            "Date": datetime.now() - timedelta(days=random.randint(0, days_back - 1)),
+            "User": f"user{random.randint(1, 100)}",
+            "Content": f"{candidate}'s post {i}",
+            "Candidate": candidate
         } for i in range(random.randint(5, max_tweets_per_candidate))
     ])
         template_path = "templates/index.html"
         if not os.path.exists(template_path):
             logging.error(f"Template file {template_path} not found.")
+            return f"<pre>❌ Template file {template_path} not found</pre>"
         # 抓貼文與情緒分析
         logging.info("Fetching and analyzing tweets...")
         all_df = pd.concat([fetch_tweets(c) for c in candidates], ignore_index=True)
+        all_df['Sentiment'] = all_df['Content'].apply(lambda x: sentiment(x)['label'])
+        all_df['Confidence'] = all_df['Content'].apply(lambda x: sentiment(x)['score'])
         # 統計每日情緒
+        summary = all_df.groupby(['Candidate', 'Sentiment']).size().unstack(fill_value=0)
+        summary['Total Posts'] = summary.sum(axis=1)
+        summary['Positive Ratio'] = summary.get('positive', 0) / summary['Total Posts'].replace(0, 1)
+        summary['Negative Ratio'] = summary.get('negative', 0) / summary['Total Posts'].replace(0, 1)
         # 更新歷史資料
         today_str = datetime.now().strftime('%Y-%m-%d')
+        hist_row = summary[['Positive Ratio', 'Negative Ratio']].copy()
+        hist_row['Date'] = today_str
+        hist_row['Candidate'] = summary.index
         df_hist = pd.concat([pd.read_csv(history_file), hist_row], ignore_index=True) if os.path.exists(history_file) else hist_row
         df_hist.to_csv(history_file, index=False)
         # 圖表生成
+        # 1. Daily Sentiment Proportion
         fig1 = plt.figure(figsize=(8, 5))
+        summary[['Positive Ratio', 'Negative Ratio']].plot(kind='bar', stacked=True, colormap='coolwarm', ax=fig1.gca())
+        fig1.gca().set_title("Candidate Daily Social Sentiment Proportion")
+        fig1.gca().set_ylabel("Proportion")
+        fig1.gca().set_xlabel("Candidate")
         img_b64_today = plot_to_base64(fig1)
+        # 2. Historical Sentiment Trend
         fig2 = plt.figure(figsize=(10, 5))
         for c in candidates:
+            temp = df_hist[df_hist['Candidate'] == c]
+            plt.plot(temp['Date'], temp['Positive Ratio'], marker='o', label=f"{c} Positive")
+            plt.plot(temp['Date'], temp['Negative Ratio'], marker='x', label=f"{c} Negative")
+        plt.title("Candidate Historical Sentiment Trend")
         plt.xticks(rotation=45)
+        plt.ylabel("Proportion")
         plt.legend()
         img_b64_trend = plot_to_base64(fig2)
+        # 3. Social Sentiment Trend
+        sentiment_trend = all_df.groupby([pd.Grouper(key='Date', freq='D'), 'Sentiment']).size().unstack(fill_value=0)
         sentiment_trend = sentiment_trend.div(sentiment_trend.sum(axis=1), axis=0).fillna(0)
         fig3 = plt.figure(figsize=(8, 5))
         for s in ['positive', 'negative']:
             if s in sentiment_trend.columns:
+                plt.plot(sentiment_trend.index, sentiment_trend[s], marker='o', label=s.capitalize())
+        plt.title("Social Sentiment Trend")
+        plt.xlabel("Date")
+        plt.ylabel("Proportion")
         plt.legend()
         img_social_sentiment = plot_to_base64(fig3)
+        # 4. Platform Performance
         platforms = ["X", "Facebook", "Instagram", "PTT", "Line"]
         platform_counts = pd.Series({p: random.randint(10, 100) for p in platforms})
         fig4 = plt.figure(figsize=(8, 5))
         plt.bar(platforms, platform_counts, color='skyblue')
+        plt.title("Platform Post Volume")
+        plt.xlabel("Platform")
+        plt.ylabel("Post Count")
         img_platform_performance = plot_to_base64(fig4)
+        # 5. Candidate Post Volume Trend
+        candidate_trend = all_df.groupby([pd.Grouper(key='Date', freq='D'), 'Candidate']).size().unstack(fill_value=0)
         fig5 = plt.figure(figsize=(8, 5))
         for c in candidates:
             if c in candidate_trend.columns:
                 plt.plot(candidate_trend.index, candidate_trend[c], marker='o', label=c)
+        plt.title("Candidate Post Volume Trend")
+        plt.xlabel("Date")
+        plt.ylabel("Post Count")
         plt.legend()
         img_candidate_volume = plot_to_base64(fig5)
+        # 6. Candidate Sentiment Analysis
         fig6 = plt.figure(figsize=(8, 5))
+        summary[['Positive Ratio', 'Negative Ratio']].plot(kind='bar', stacked=True, colormap='coolwarm', ax=fig6.gca())
+        fig6.gca().set_title("Candidate Post Sentiment Analysis (Positive/Negative)")
+        fig6.gca().set_ylabel("Proportion")
+        fig6.gca().set_xlabel("Candidate")
         img_candidate_sentiment = plot_to_base64(fig6)
+        # 7. Knowledge Graph
         fig7, ax7 = plt.subplots(figsize=(8, 6))
         G = nx.Graph()
         for c in candidates:
         # 新聞資料
         if os.path.exists(news_file):
             df_news = pd.read_csv(news_file)
+            news_summary = df_news.groupby('Category').size().to_dict()  # Assuming 'Category' is English
             news_table = df_news.to_html(index=False, classes="min-w-full border border-gray-200")
         else:
             news_summary = {}
+            news_table = "<p>No news data provided</p>"
         # 社群參與表格
         engagement_table = f"""
         <table class="min-w-full bg-white border border-gray-200">
         <tr class="bg-gray-100 border-b">
+            <th class="py-2 px-4 border-r">Total Engagement</th>
             <td class="py-2 px-4 border-r">{len(all_df)}</td>
+            <th class="py-2 px-4 border-r">Positive Sentiment Proportion</th>
+            <td class="py-2 px-4 border-r">{all_df['Sentiment'].value_counts(normalize=True).get('positive', 0):.1%}</td>
+            <th class="py-2 px-4 border-r">Average Interaction Rate</th>
             <td class="py-2 px-4 border-r">3.9%</td>
+            <th class="py-2 px-4 border-r">Active Platforms</th>
             <td class="py-2 px-4">{len(platforms)}</td>
         </tr></table>
         """
         return html_content
     except Exception as e:
         logging.error(f"Analysis failed: {str(e)}")
+        return f"<pre>❌ Analysis failed:\n{traceback.format_exc()}</pre>"
 # Gradio 前端
 if __name__ == "__main__":
     logging.info("Starting Gradio interface...")
+    iface = gr.Interface(fn=run_analysis, inputs=[], outputs=gr.HTML(), title="Kaohsiung Mayoral Election Sentiment Analysis")
     iface.launch(server_name="0.0.0.0", server_port=7860)