Spaces:

shiue2000
/

sparkAnalysis

Sleeping

App Files Files Community

shiue2000 commited on Sep 26, 2025

Commit

86cc189

verified ·

1 Parent(s): c03fc1f

Update app.py

Browse files

Files changed (1) hide show

app.py +85 -65

app.py CHANGED Viewed

@@ -13,12 +13,12 @@ import logging
 # 設置日誌
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
-# 字體設置 (使用標準字體以確保英文顯示)
-plt.rcParams['font.sans-serif'] = ['Arial', 'DejaVu Sans']
 plt.rcParams['axes.unicode_minus'] = False
 # 參數設定
-candidates = ["Xu Zhijie", "Qiu Yiying", "Lai Ruilong", "Lin Daihua", "Ke Zhien"]  # English transliteration
 days_back = 7
 max_tweets_per_candidate = 20
 news_file = "news_sample.csv"
@@ -28,22 +28,29 @@ history_file = "history_sentiment.csv"
 try:
     from transformers import pipeline
     sentiment_pipeline = pipeline("sentiment-analysis", model="lxyuan/distilbert-base-multilingual-cased-sentiments-student")
-    def sentiment(text):
-        logging.info(f"Performing sentiment analysis on text: {text[:50]}...")
         return sentiment_pipeline(text)[0]
 except:
     def sentiment(text):
-        logging.warning("Sentiment model failed to load, using random fallback.")
-        return {"label": random.choice(["positive", "negative"]), "score": 0.5}
-# 模擬抓貼文
 def fetch_tweets(candidate):
-    logging.info(f"Fetching tweets for candidate: {candidate}")
     return pd.DataFrame([
         {
             "Date": datetime.now() - timedelta(days=random.randint(0, days_back - 1)),
             "User": f"user{random.randint(1, 100)}",
-            "Content": f"{candidate}'s post {i}",
             "Candidate": candidate
         } for i in range(random.randint(5, max_tweets_per_candidate))
     ])
@@ -64,11 +71,11 @@ def run_analysis():
         # 檢查模板檔案
         template_path = "templates/index.html"
         if not os.path.exists(template_path):
-            logging.error(f"Template file {template_path} not found.")
-            return f"<pre>❌ Template file {template_path} not found</pre>"
-        # 抓貼文與情緒分析
-        logging.info("Fetching and analyzing tweets...")
         all_df = pd.concat([fetch_tweets(c) for c in candidates], ignore_index=True)
         all_df['Sentiment'] = all_df['Content'].apply(lambda x: sentiment(x)['label'])
         all_df['Confidence'] = all_df['Content'].apply(lambda x: sentiment(x)['score'])
@@ -78,80 +85,82 @@ def run_analysis():
         summary['Total Posts'] = summary.sum(axis=1)
         summary['Positive Ratio'] = summary.get('positive', 0) / summary['Total Posts'].replace(0, 1)
         summary['Negative Ratio'] = summary.get('negative', 0) / summary['Total Posts'].replace(0, 1)
         # 更新歷史資料
         today_str = datetime.now().strftime('%Y-%m-%d')
-        hist_row = summary[['Positive Ratio', 'Negative Ratio']].copy()
         hist_row['Date'] = today_str
         hist_row['Candidate'] = summary.index
         df_hist = pd.concat([pd.read_csv(history_file), hist_row], ignore_index=True) if os.path.exists(history_file) else hist_row
         df_hist.to_csv(history_file, index=False)
         # 圖表生成
-        # 1. Daily Sentiment Proportion
         fig1 = plt.figure(figsize=(8, 5))
-        summary[['Positive Ratio', 'Negative Ratio']].plot(kind='bar', stacked=True, colormap='coolwarm', ax=fig1.gca())
-        fig1.gca().set_title("Candidate Daily Social Sentiment Proportion")
-        fig1.gca().set_ylabel("Proportion")
-        fig1.gca().set_xlabel("Candidate")
         img_b64_today = plot_to_base64(fig1)
-        # 2. Historical Sentiment Trend
         fig2 = plt.figure(figsize=(10, 5))
         for c in candidates:
             temp = df_hist[df_hist['Candidate'] == c]
-            plt.plot(temp['Date'], temp['Positive Ratio'], marker='o', label=f"{c} Positive")
-            plt.plot(temp['Date'], temp['Negative Ratio'], marker='x', label=f"{c} Negative")
-        plt.title("Candidate Historical Sentiment Trend")
         plt.xticks(rotation=45)
-        plt.ylabel("Proportion")
         plt.legend()
         img_b64_trend = plot_to_base64(fig2)
-        # 3. Social Sentiment Trend
         sentiment_trend = all_df.groupby([pd.Grouper(key='Date', freq='D'), 'Sentiment']).size().unstack(fill_value=0)
         sentiment_trend = sentiment_trend.div(sentiment_trend.sum(axis=1), axis=0).fillna(0)
         fig3 = plt.figure(figsize=(8, 5))
-        for s in ['positive', 'negative']:
             if s in sentiment_trend.columns:
                 plt.plot(sentiment_trend.index, sentiment_trend[s], marker='o', label=s.capitalize())
-        plt.title("Social Sentiment Trend")
-        plt.xlabel("Date")
-        plt.ylabel("Proportion")
         plt.legend()
         img_social_sentiment = plot_to_base64(fig3)
-        # 4. Platform Performance
         platforms = ["X", "Facebook", "Instagram", "PTT", "Line"]
         platform_counts = pd.Series({p: random.randint(10, 100) for p in platforms})
         fig4 = plt.figure(figsize=(8, 5))
         plt.bar(platforms, platform_counts, color='skyblue')
-        plt.title("Platform Post Volume")
-        plt.xlabel("Platform")
-        plt.ylabel("Post Count")
         img_platform_performance = plot_to_base64(fig4)
-        # 5. Candidate Post Volume Trend
         candidate_trend = all_df.groupby([pd.Grouper(key='Date', freq='D'), 'Candidate']).size().unstack(fill_value=0)
         fig5 = plt.figure(figsize=(8, 5))
         for c in candidates:
             if c in candidate_trend.columns:
                 plt.plot(candidate_trend.index, candidate_trend[c], marker='o', label=c)
-        plt.title("Candidate Post Volume Trend")
-        plt.xlabel("Date")
-        plt.ylabel("Post Count")
         plt.legend()
         img_candidate_volume = plot_to_base64(fig5)
-        # 6. Candidate Sentiment Analysis
         fig6 = plt.figure(figsize=(8, 5))
-        summary[['Positive Ratio', 'Negative Ratio']].plot(kind='bar', stacked=True, colormap='coolwarm', ax=fig6.gca())
-        fig6.gca().set_title("Candidate Post Sentiment Analysis (Positive/Negative)")
-        fig6.gca().set_ylabel("Proportion")
-        fig6.gca().set_xlabel("Candidate")
         img_candidate_sentiment = plot_to_base64(fig6)
-        # 7. Knowledge Graph
         fig7, ax7 = plt.subplots(figsize=(8, 6))
         G = nx.Graph()
         for c in candidates:
@@ -164,32 +173,37 @@ def run_analysis():
         # 新聞資料
         if os.path.exists(news_file):
             df_news = pd.read_csv(news_file)
-            news_summary = df_news.groupby('Category').size().to_dict()  # Assuming 'Category' is English
             news_table = df_news.to_html(index=False, classes="min-w-full border border-gray-200")
         else:
-            news_summary = {}
-            news_table = "<p>No news data provided</p>"
         # 社群參與表格
         engagement_table = f"""
         <table class="min-w-full bg-white border border-gray-200">
-        <tr class="bg-gray-100 border-b">
-            <th class="py-2 px-4 border-r">Total Engagement</th>
-            <td class="py-2 px-4 border-r">{len(all_df)}</td>
-            <th class="py-2 px-4 border-r">Positive Sentiment Proportion</th>
-            <td class="py-2 px-4 border-r">{all_df['Sentiment'].value_counts(normalize=True).get('positive', 0):.1%}</td>
-            <th class="py-2 px-4 border-r">Average Interaction Rate</th>
-            <td class="py-2 px-4 border-r">3.9%</td>
-            <th class="py-2 px-4 border-r">Active Platforms</th>
-            <td class="py-2 px-4">{len(platforms)}</td>
-        </tr></table>
         """
-        # HTML template
-        logging.info(f"Loading template from {template_path}")
         with open(template_path, encoding='utf-8') as f:
             html_template = f.read()
-        logging.info("Formatting HTML template...")
         html_content = html_template.format(
             report_date=datetime.now().strftime('%Y-%m-%d %H:%M'),
             img_b64_today=img_b64_today,
@@ -203,14 +217,20 @@ def run_analysis():
             news_summary=news_summary,
             news_table=news_table
         )
-        logging.info("HTML content generated successfully.")
         return html_content
     except Exception as e:
-        logging.error(f"Analysis failed: {str(e)}")
-        return f"<pre>❌ Analysis failed:\n{traceback.format_exc()}</pre>"
 # Gradio 前端
 if __name__ == "__main__":
-    logging.info("Starting Gradio interface...")
-    iface = gr.Interface(fn=run_analysis, inputs=[], outputs=gr.HTML(), title="Kaohsiung Mayoral Election Sentiment Analysis")
     iface.launch(server_name="0.0.0.0", server_port=7860)

 # 設置日誌
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+# 字體設置 (使用繁體中文支援字體)
+plt.rcParams['font.sans-serif'] = ['Microsoft JhengHei', 'Arial', 'DejaVu Sans']
 plt.rcParams['axes.unicode_minus'] = False
 # 參數設定
+candidates = ["許智傑", "邱議瑩", "賴瑞隆", "林岱樺", "柯志恩"]
 days_back = 7
 max_tweets_per_candidate = 20
 news_file = "news_sample.csv"
 try:
     from transformers import pipeline
     sentiment_pipeline = pipeline("sentiment-analysis", model="lxyuan/distilbert-base-multilingual-cased-sentiments-student")
+    def sentiment(text):
+        logging.info(f"正在對文字進行情緒分析: {text[:50]}...")
         return sentiment_pipeline(text)[0]
 except:
     def sentiment(text):
+        logging.warning("情緒分析模型載入失敗，使用隨機備用方案。")
+        return {"label": random.choice(["positive", "negative", "neutral"]), "score": random.uniform(0.3, 0.9)}
+# 模擬抓取 X 貼文
 def fetch_tweets(candidate):
+    logging.info(f"正在為候選人抓取貼文: {candidate}")
+    sample_texts = {
+        "許智傑": ["許智傑積極參與地方活動，親民形象受好評！", "許智傑被指政策空洞，民眾不滿", "支持許智傑，打造高雄新未來！"],
+        "邱議瑩": ["邱議瑩強勢表態選市長，展現領導力", "邱議瑩批林岱樺，黨內競爭加劇", "邱議瑩推客家文化，獲基層支持"],
+        "賴瑞隆": ["賴瑞隆推海洋經濟，展現專業", "賴瑞隆民調領先，陳菊子弟兵受矚目", "賴瑞隆被質疑行政經驗不足"],
+        "林岱樺": ["林岱樺積極跑基層，民調領先！", "林岱樺涉助理費爭議，形象受損", "林岱樺獲正國會支持，選情看好"],
+        "柯志恩": ["柯志恩民調大幅領先綠營，藍營看好", "柯志恩被批勘災缺席，引發爭議", "柯志恩推青年政策，吸引年輕選民"]
+    }
     return pd.DataFrame([
         {
             "Date": datetime.now() - timedelta(days=random.randint(0, days_back - 1)),
             "User": f"user{random.randint(1, 100)}",
+            "Content": random.choice(sample_texts.get(candidate, [f"{candidate} 的貼文 {i}"])),
             "Candidate": candidate
         } for i in range(random.randint(5, max_tweets_per_candidate))
     ])
         # 檢查模板檔案
         template_path = "templates/index.html"
         if not os.path.exists(template_path):
+            logging.error(f"模板檔案 {template_path} 未找到。")
+            return f"<pre>❌ 模板檔案 {template_path} 未找到</pre>"
+        # 抓取貼文與情緒分析
+        logging.info("正在抓取並分析貼文...")
         all_df = pd.concat([fetch_tweets(c) for c in candidates], ignore_index=True)
         all_df['Sentiment'] = all_df['Content'].apply(lambda x: sentiment(x)['label'])
         all_df['Confidence'] = all_df['Content'].apply(lambda x: sentiment(x)['score'])
         summary['Total Posts'] = summary.sum(axis=1)
         summary['Positive Ratio'] = summary.get('positive', 0) / summary['Total Posts'].replace(0, 1)
         summary['Negative Ratio'] = summary.get('negative', 0) / summary['Total Posts'].replace(0, 1)
+        summary['Neutral Ratio'] = summary.get('neutral', 0) / summary['Total Posts'].replace(0, 1)
         # 更新歷史資料
         today_str = datetime.now().strftime('%Y-%m-%d')
+        hist_row = summary[['Positive Ratio', 'Negative Ratio', 'Neutral Ratio']].copy()
         hist_row['Date'] = today_str
         hist_row['Candidate'] = summary.index
         df_hist = pd.concat([pd.read_csv(history_file), hist_row], ignore_index=True) if os.path.exists(history_file) else hist_row
         df_hist.to_csv(history_file, index=False)
         # 圖表生成
+        # 1. 每日情緒比例
         fig1 = plt.figure(figsize=(8, 5))
+        summary[['Positive Ratio', 'Negative Ratio', 'Neutral Ratio']].plot(kind='bar', stacked=True, colormap='coolwarm', ax=fig1.gca())
+        fig1.gca().set_title("候選人每日社群情緒比例")
+        fig1.gca().set_ylabel("比例")
+        fig1.gca().set_xlabel("候選人")
         img_b64_today = plot_to_base64(fig1)
+        # 2. 歷史情緒趨勢
         fig2 = plt.figure(figsize=(10, 5))
         for c in candidates:
             temp = df_hist[df_hist['Candidate'] == c]
+            plt.plot(temp['Date'], temp['Positive Ratio'], marker='o', label=f"{c} 正面")
+            plt.plot(temp['Date'], temp['Negative Ratio'], marker='x', label=f"{c} 負面")
+            plt.plot(temp['Date'], temp['Neutral Ratio'], marker='s', label=f"{c} 中性")
+        plt.title("候選人歷史情緒趨勢")
         plt.xticks(rotation=45)
+        plt.ylabel("比例")
         plt.legend()
         img_b64_trend = plot_to_base64(fig2)
+        # 3. 社群情緒趨勢
         sentiment_trend = all_df.groupby([pd.Grouper(key='Date', freq='D'), 'Sentiment']).size().unstack(fill_value=0)
         sentiment_trend = sentiment_trend.div(sentiment_trend.sum(axis=1), axis=0).fillna(0)
         fig3 = plt.figure(figsize=(8, 5))
+        for s in ['positive', 'negative', 'neutral']:
             if s in sentiment_trend.columns:
                 plt.plot(sentiment_trend.index, sentiment_trend[s], marker='o', label=s.capitalize())
+        plt.title("社群情緒趨勢")
+        plt.xlabel("日期")
+        plt.ylabel("比例")
         plt.legend()
         img_social_sentiment = plot_to_base64(fig3)
+        # 4. 平台聲量表現
         platforms = ["X", "Facebook", "Instagram", "PTT", "Line"]
         platform_counts = pd.Series({p: random.randint(10, 100) for p in platforms})
         fig4 = plt.figure(figsize=(8, 5))
         plt.bar(platforms, platform_counts, color='skyblue')
+        plt.title("平台貼文聲量")
+        plt.xlabel("平台")
+        plt.ylabel("貼文數量")
         img_platform_performance = plot_to_base64(fig4)
+        # 5. 候選人貼文聲量趨勢
         candidate_trend = all_df.groupby([pd.Grouper(key='Date', freq='D'), 'Candidate']).size().unstack(fill_value=0)
         fig5 = plt.figure(figsize=(8, 5))
         for c in candidates:
             if c in candidate_trend.columns:
                 plt.plot(candidate_trend.index, candidate_trend[c], marker='o', label=c)
+        plt.title("候選人貼文聲量趨勢")
+        plt.xlabel("日期")
+        plt.ylabel("貼文數量")
         plt.legend()
         img_candidate_volume = plot_to_base64(fig5)
+        # 6. 候選人情緒分析
         fig6 = plt.figure(figsize=(8, 5))
+        summary[['Positive Ratio', 'Negative Ratio', 'Neutral Ratio']].plot(kind='bar', stacked=True, colormap='coolwarm', ax=fig6.gca())
+        fig6.gca().set_title("候選人貼文情緒分析（正面/負面/中性）")
+        fig6.gca().set_ylabel("比例")
+        fig6.gca().set_xlabel("候選人")
         img_candidate_sentiment = plot_to_base64(fig6)
+        # 7. 知識圖譜
         fig7, ax7 = plt.subplots(figsize=(8, 6))
         G = nx.Graph()
         for c in candidates:
         # 新聞資料
         if os.path.exists(news_file):
             df_news = pd.read_csv(news_file)
+            news_summary = df_news.groupby('Category').size().to_dict()
             news_table = df_news.to_html(index=False, classes="min-w-full border border-gray-200")
         else:
+            news_summary = {
+                "民調": "柯志恩在多份民調中領先綠營候選人，差距5-23%。",
+                "黨內競爭": "民進黨初選競爭激烈，邱議瑩、林岱樺、賴瑞隆、許智傑四人角逐。",
+                "爭議": "林岱樺涉助理費爭議，許銘春因職場霸凌案轉低調。"
+            }
+            news_table = "<p>無新聞資料，僅提供模擬摘要</p>"
         # 社群參與表格
         engagement_table = f"""
         <table class="min-w-full bg-white border border-gray-200">
+            <tr class="bg-gray-100 border-b">
+                <th class="py-2 px-4 border-r">總參與度</th>
+                <td class="py-2 px-4 border-r">{len(all_df)}</td>
+                <th class="py-2 px-4 border-r">正面情緒比例</th>
+                <td class="py-2 px-4 border-r">{all_df['Sentiment'].value_counts(normalize=True).get('positive', 0):.1%}</td>
+                <th class="py-2 px-4 border-r">平均互動率</th>
+                <td class="py-2 px-4 border-r">3.9%</td>
+                <th class="py-2 px-4 border-r">活躍平台數</th>
+                <td class="py-2 px-4">{len(platforms)}</td>
+            </tr>
+        </table>
         """
+        # HTML 模板
+        logging.info(f"正在從 {template_path} 載入模板...")
         with open(template_path, encoding='utf-8') as f:
             html_template = f.read()
+        logging.info("正在格式化 HTML 模板...")
         html_content = html_template.format(
             report_date=datetime.now().strftime('%Y-%m-%d %H:%M'),
             img_b64_today=img_b64_today,
             news_summary=news_summary,
             news_table=news_table
         )
+        logging.info("HTML 內容生成成功。")
         return html_content
     except Exception as e:
+        logging.error(f"分析失敗: {str(e)}")
+        return f"<pre>❌ 分析失敗:\n{traceback.format_exc()}</pre>"
 # Gradio 前端
 if __name__ == "__main__":
+    logging.info("正在啟動 Gradio 介面...")
+    iface = gr.Interface(
+        fn=run_analysis,
+        inputs=[],
+        outputs=gr.HTML(),
+        title="2026 高雄市長選舉輿情分析"
+    )
     iface.launch(server_name="0.0.0.0", server_port=7860)