Spaces:

shiue2000
/

sparkAnalysis

Sleeping

App Files Files Community

shiue2000 commited on Sep 25, 2025

Commit

0ea5a01

verified ·

1 Parent(s): 07dbcee

Update app.py

Browse files

Files changed (1) hide show

app.py +116 -207

app.py CHANGED Viewed

@@ -1,252 +1,161 @@
-# ==========================================
-# 高雄市長選戰輕量化輿情分析 + 自動排程 + 重試/異常警告
-# ==========================================
-import pandas as pd
 from datetime import datetime, timedelta
-import matplotlib.pyplot as plt
-import io
-import base64
-import gradio as gr
-import os
-import schedule
-import time
-import threading
-import traceback
-import networkx as nx
-import random
-# -----------------------------
-# 參數設定
-# -----------------------------
-candidates = ["許智傑", "邱議瑩", "賴瑞隆", "林岱樺", "柯志恩"]
 days_back = 7
 max_tweets_per_candidate = 20
 news_file = "news_sample.csv"
 history_file = "history_sentiment.csv"
-# -----------------------------
-# 情緒分析模型
-# -----------------------------
 try:
     from transformers import pipeline
-    sentiment_pipeline = pipeline(
-        "sentiment-analysis",
-        model="lxyuan/distilbert-base-multilingual-cased-sentiments-student"
-    )
-    print("✅ 情緒分析模型加載成功")
-    def sentiment(text):
-        return sentiment_pipeline(text)[0]  # 保證回傳單 dict
-except Exception as e:
-    print(f"⚠️ 警告: {e}. 將使用隨機情緒")
-    def sentiment(text):
-        return {"label": random.choice(["positive", "negative"]), "score": 0.5}
-# -----------------------------
 # 模擬抓貼文
-# -----------------------------
-def fetch_tweets_via_x_tools(candidate, since_date, until_date):
-    print(f"🔍 Fetching tweets for {candidate}...")
-    mock_tweets = [
-        {'id': random.randint(1000000000000000000,1999999999999999999),
-         'date': datetime.now() - timedelta(days=random.randint(0, days_back)),
-         'user': f'user_{random.randint(1,1000)}',
-         'content': f'Sample tweet about {candidate} {random.choice(["supportive","critical","neutral"])}'}
-        for _ in range(random.randint(1,max_tweets_per_candidate))
-    ]
-    print(f"✅ Fetched {len(mock_tweets)} tweets for {candidate}")
-    return [[t['date'], t['user'], t['content'], candidate] for t in mock_tweets]
-# -----------------------------
 # 主分析函數
-# -----------------------------
 def run_analysis():
     try:
-        # 1. 抓貼文
-        all_tweets = []
-        since_date = (datetime.now() - timedelta(days=days_back)).strftime('%Y-%m-%d')
-        until_date = datetime.now().strftime('%Y-%m-%d')
-        for c in candidates:
-            all_tweets.extend(fetch_tweets_via_x_tools(c, since_date, until_date))
-        df_tweets = pd.DataFrame(all_tweets, columns=["日期","使用者","內容","候選人"])
-        # 2. 情緒分析
-        df_tweets['情緒'] = df_tweets['內容'].apply(lambda x: sentiment(x)['label'])
-        df_tweets['信心度'] = df_tweets['內容'].apply(lambda x: sentiment(x)['score'])
-        # 3. 統計每日情緒
-        summary = df_tweets.groupby(['候選人','情緒']).size().unstack(fill_value=0)
         summary['總貼文'] = summary.sum(axis=1)
         summary['正面比率'] = summary.get('positive',0)/summary['總貼文']
         summary['負面比率'] = summary.get('negative',0)/summary['總貼文']
-        # 4. 更新歷史資料
-        summary['日期'] = datetime.now().strftime('%Y-%m-%d %H:%M %Z')
-        if os.path.exists(history_file):
-            df_history = pd.read_csv(history_file)
-            df_history = pd.concat([df_history, summary.reset_index()[['日期','候選人','正面比率','負面比率']]], ignore_index=True)
-        else:
-            df_history = summary.reset_index()[['日期','候選人','正面比率','負面比率']]
-        df_history.to_csv(history_file,index=False)
-        # ----------------- 圖表生成 -----------------
-        # 當日情緒比例
-        plt.figure(figsize=(8,5))
-        summary[['正面比率','負面比率']].plot(kind='bar', stacked=True, colormap='coolwarm')
-        plt.title("候選人當日社群情緒比例")
-        plt.ylabel("比例")
-        plt.xlabel("候選人")
-        plt.xticks(rotation=0)
-        plt.tight_layout()
-        buf = io.BytesIO()
-        plt.savefig(buf,format='png')
-        buf.seek(0)
-        img_b64_today = base64.b64encode(buf.read()).decode('utf-8')
-        buf.close()
-        # 歷史情緒趨勢
-        plt.figure(figsize=(10,5))
         for c in candidates:
-            temp = df_history[df_history['候選人']==c]
             plt.plot(temp['日期'], temp['正面比率'], marker='o', label=f"{c} 正面")
             plt.plot(temp['日期'], temp['負面比率'], marker='x', label=f"{c} 負面")
         plt.xticks(rotation=45)
         plt.ylabel("比例")
-        plt.title("候選人歷史情緒趨勢")
         plt.legend()
-        plt.tight_layout()
-        buf = io.BytesIO()
-        plt.savefig(buf,format='png')
-        buf.seek(0)
-        img_b64_trend = base64.b64encode(buf.read()).decode('utf-8')
-        buf.close()
-        # ----------------- 其他圖表 placeholder -----------------
-        img_social_sentiment = ""
-        img_platform_performance = ""
-        img_candidate_volume = ""
-        img_candidate_sentiment = ""
-        img_knowledge_graph = ""
-        # ----------------- 社群參與表格 -----------------
-        engagement_table = f"""
         <table class="min-w-full bg-white border border-gray-200">
         <tr class="bg-gray-100 border-b">
             <th class="py-2 px-4 border-r">總參與數</th>
-            <td class="py-2 px-4 border-r">{len(df_tweets)}</td>
             <th class="py-2 px-4 border-r">正面情緒比例</th>
-            <td class="py-2 px-4 border-r">{df_tweets['情緒'].value_counts(normalize=True).get('positive',0):.1%}</td>
             <th class="py-2 px-4 border-r">平均互動率</th>
             <td class="py-2 px-4 border-r">3.9%</td>
             <th class="py-2 px-4 border-r">活躍平台</th>
             <td class="py-2 px-4">6</td>
-        </tr>
-        </table>
         """
-        # ----------------- 新聞資料 -----------------
-        if os.path.exists(news_file):
-            df_news = pd.read_csv(news_file)
-            news_summary = df_news.groupby('類別').size().to_dict()
-            news_table = df_news.to_html(index=False)
-        else:
-            news_summary = {}
-            news_table = "<p>未提供新聞資料</p>"
-        # ----------------- 內嵌 HTML -----------------
-        html_template = """<!DOCTYPE html>
-<html lang="zh-TW">
-<head>
-<meta charset="UTF-8">
-<meta name="viewport" content="width=device-width, initial-scale=1.0">
-<title>高雄市長選戰輿情分析</title>
-<script src="https://cdn.tailwindcss.com"></script>
-<style>
-body {{
-    background-color: #f3f4f6;
-    font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
-}}
-.card {{
-    background-color: white;
-    border-radius: 0.5rem;
-    box-shadow: 0 4px 6px rgba(0,0,0,0.1);
-    padding: 1.5rem;
-    margin-bottom: 1.5rem;
-}}
-.chart-container {{
-    max-width: 100%;
-    overflow-x: auto;
-}}
-</style>
-</head>
-<body class="p-6">
-<header class="bg-blue-600 text-white p-4 rounded-lg mb-6">
-<h1 class="text-3xl font-bold">高雄市長選戰輿情分析</h1>
-<p class="text-sm">更新時間: {report_date}</p>
-</header>
-<main class="grid grid-cols-1 md:grid-cols-2 gap-6">
-<div class="card">
-<h2 class="text-xl font-semibold mb-4">1. 當日社群貼文情緒</h2>
-<div class="chart-container"><img src="data:image/png;base64,{img_b64_today}" class="w-full"></div>
-</div>
-<div class="card">
-<h2 class="text-xl font-semibold mb-4">2. 歷史情緒趨勢</h2>
-<div class="chart-container"><img src="data:image/png;base64,{img_b64_trend}" class="w-full"></div>
-</div>
-<div class="card md:col-span-2">
-<h2 class="text-xl font-semibold mb-4">3. 社群媒體參與概況</h2>
-{engagement_table}
-</div>
-<div class="card md:col-span-2">
-<h2 class="text-xl font-semibold mb-4">9. 新聞議題統計</h2>
-<p>各類別新聞數量: {news_summary}</p>
-{news_table}
-</div>
-</main>
-<footer class="mt-6 text-center text-gray-500">
-<p>© 2025 高雄市長選戰輿情分析系統 | 由 xAI 技術支持</p>
-</footer>
-</body>
-</html>"""
         html_content = html_template.format(
             report_date=datetime.now().strftime('%Y-%m-%d %H:%M'),
             img_b64_today=img_b64_today,
             img_b64_trend=img_b64_trend,
             engagement_table=engagement_table,
             news_summary=news_summary,
             news_table=news_table
         )
         return html_content
-    except Exception:
-        return f"<pre>❌ 輿情分析執行失敗:\n{traceback.format_exc()}</pre>"
-# -----------------------------
-# 自動排程設定
-# -----------------------------
-def schedule_daily_run():
-    schedule.every().day.at("08:00").do(run_analysis)
-    while True:
-        try:
-            schedule.run_pending()
-        except Exception as e:
-            print(f"⚠️ 排程異常: {e}")
-        time.sleep(60)
-threading.Thread(target=schedule_daily_run, daemon=True).start()
-# -----------------------------
 # Gradio 前端
-# -----------------------------
-iface = gr.Interface(
-    fn=run_analysis,
-    inputs=[],
-    outputs=gr.HTML(),
-    live=False,
-    title="高雄市長選戰輿情分析",
-    description="每日自動抓取 X 貼文 + 新聞議題分析 + 歷史情緒趨勢"
-)
 iface.launch(server_name="0.0.0.0", server_port=7860)

+# 1. 匯入套件與參數設定
+import pandas as pd, matplotlib.pyplot as plt, io, base64, os, traceback, random, networkx as nx
 from datetime import datetime, timedelta
+import gradio as gr, schedule, time, threading
+# 中文顯示
+plt.rcParams['font.sans-serif'] = ['Microsoft JhengHei','Arial Unicode MS','SimHei','DejaVu Sans']
+plt.rcParams['axes.unicode_minus'] = False
+candidates = ["許智傑","邱議瑩","賴瑞隆","林岱樺","柯志恩"]
 days_back = 7
 max_tweets_per_candidate = 20
 news_file = "news_sample.csv"
 history_file = "history_sentiment.csv"
+# 情緒分析
 try:
     from transformers import pipeline
+    sentiment_pipeline = pipeline("sentiment-analysis", model="lxyuan/distilbert-base-multilingual-cased-sentiments-student")
+    def sentiment(text): return sentiment_pipeline(text)[0]
+except:
+    def sentiment(text): return {"label": random.choice(["positive","negative"]), "score":0.5}
 # 模擬抓貼文
+def fetch_tweets(candidate):
+    return pd.DataFrame([{"日期": datetime.now()-timedelta(days=random.randint(0,6)),
+                          "使用者": f"user{random.randint(1,100)}",
+                          "內容": f"{candidate} 的貼文 {i}",
+                          "候選人": candidate} for i in range(random.randint(5,max_tweets_per_candidate))])
+# base64 圖片轉換
+def plot_to_base64(fig):
+    buf=io.BytesIO()
+    fig.savefig(buf, format='png', bbox_inches='tight')
+    buf.seek(0)
+    img_b64 = base64.b64encode(buf.read()).decode('utf-8')
+    buf.close()
+    plt.close(fig)
+    return img_b64
 # 主分析函數
 def run_analysis():
     try:
+        # 抓貼文與情緒分析
+        all_df = pd.concat([fetch_tweets(c) for c in candidates], ignore_index=True)
+        all_df['情緒'] = all_df['內容'].apply(lambda x: sentiment(x)['label'])
+        all_df['信心度'] = all_df['內容'].apply(lambda x: sentiment(x)['score'])
+        # 統計每日情緒
+        summary = all_df.groupby(['候選人','情緒']).size().unstack(fill_value=0)
         summary['總貼文'] = summary.sum(axis=1)
         summary['正面比率'] = summary.get('positive',0)/summary['總貼文']
         summary['負面比率'] = summary.get('negative',0)/summary['總貼文']
+        # 更新歷史資料
+        today_str = datetime.now().strftime('%Y-%m-%d')
+        hist_row = summary[['正面比率','負面比率']].copy()
+        hist_row['日期'] = today_str
+        hist_row['候選人'] = summary.index
+        df_hist = pd.concat([pd.read_csv(history_file), hist_row], ignore_index=True) if os.path.exists(history_file) else hist_row
+        df_hist.to_csv(history_file,index=False)
+        # ----------------- 圖表 -----------------
+        # 1. 當日情緒比例
+        fig1 = plt.figure(figsize=(8,5))
+        summary[['正面比率','負面比率']].plot(kind='bar', stacked=True, colormap='coolwarm', ax=fig1.gca())
+        fig1.gca().set_title("候選人當日社群情緒比例")
+        img_b64_today = plot_to_base64(fig1)
+        # 2. 歷史情緒趨勢
+        fig2 = plt.figure(figsize=(10,5))
         for c in candidates:
+            temp = df_hist[df_hist['候選人']==c]
             plt.plot(temp['日期'], temp['正面比率'], marker='o', label=f"{c} 正面")
             plt.plot(temp['日期'], temp['負面比率'], marker='x', label=f"{c} 負面")
+        plt.title("候選人歷史情緒趨勢")
         plt.xticks(rotation=45)
         plt.ylabel("比例")
         plt.legend()
+        img_b64_trend = plot_to_base64(fig2)
+        # 3~8 其他圖表生成
+        # 社群情感趨勢
+        fig3 = plt.figure(figsize=(8,5))
+        plt.plot(range(7), [random.random() for _ in range(7)], marker='o', label="正面")
+        plt.plot(range(7), [random.random() for _ in range(7)], marker='x', label="負面")
+        plt.title("社群情感趨勢")
+        plt.legend()
+        img_social_sentiment = plot_to_base64(fig3)
+        # 各平台表現
+        fig4 = plt.figure(figsize=(8,5))
+        platforms=["X","Facebook","Instagram","PTT","Line"]
+        plt.bar(platforms, [random.randint(10,100) for _ in platforms], color='skyblue')
+        plt.title("各平台貼文量")
+        img_platform_performance = plot_to_base64(fig4)
+        # 候選人社群量趨勢
+        fig5 = plt.figure(figsize=(8,5))
+        for c in candidates: plt.plot(range(7), [random.randint(5,20) for _ in range(7)], marker='o', label=c)
+        plt.title("候選人社群量趨勢")
+        plt.legend()
+        img_candidate_volume = plot_to_base64(fig5)
+        # 候選人社群量分析
+        fig6 = plt.figure(figsize=(8,5))
+        summary[['正面比率','負面比率']].plot(kind='bar', stacked=True, colormap='coolwarm', ax=fig6.gca())
+        fig6.gca().set_title("候選人社群量分析（正/負面情緒）")
+        img_candidate_sentiment = plot_to_base64(fig6)
+        # 知識圖譜
+        fig7, ax7 = plt.subplots(figsize=(8,6))
+        G=nx.Graph()
+        for c in candidates: G.add_node(c)
+        for i in range(len(candidates)-1): G.add_edge(candidates[i], candidates[i+1])
+        nx.draw(G, nx.spring_layout(G), with_labels=True, node_color='lightgreen', font_size=12, ax=ax7)
+        img_knowledge_graph = plot_to_base64(fig7)
+        # 新聞資料
+        if os.path.exists(news_file):
+            df_news = pd.read_csv(news_file)
+            news_summary = df_news.groupby('類別').size().to_dict()
+            news_table = df_news.to_html(index=False)
+        else: news_summary={}, news_table="<p>未提供新聞資料</p>"
+        # 社群��與表格
+        engagement_table=f"""
         <table class="min-w-full bg-white border border-gray-200">
         <tr class="bg-gray-100 border-b">
             <th class="py-2 px-4 border-r">總參與數</th>
+            <td class="py-2 px-4 border-r">{len(all_df)}</td>
             <th class="py-2 px-4 border-r">正面情緒比例</th>
+            <td class="py-2 px-4 border-r">{all_df['情緒'].value_counts(normalize=True).get('positive',0):.1%}</td>
             <th class="py-2 px-4 border-r">平均互動率</th>
             <td class="py-2 px-4 border-r">3.9%</td>
             <th class="py-2 px-4 border-r">活躍平台</th>
             <td class="py-2 px-4">6</td>
+        </tr></table>
         """
+        # HTML template
+        html_template = open("index.html").read()
         html_content = html_template.format(
             report_date=datetime.now().strftime('%Y-%m-%d %H:%M'),
             img_b64_today=img_b64_today,
             img_b64_trend=img_b64_trend,
+            img_social_sentiment=img_social_sentiment,
+            img_platform_performance=img_platform_performance,
+            img_candidate_volume=img_candidate_volume,
+            img_candidate_sentiment=img_candidate_sentiment,
+            img_knowledge_graph=img_knowledge_graph,
             engagement_table=engagement_table,
             news_summary=news_summary,
             news_table=news_table
         )
         return html_content
+    except Exception: return f"<pre>❌ 輿情分析執行失敗:\n{traceback.format_exc()}</pre>"
 # Gradio 前端
+iface = gr.Interface(fn=run_analysis, inputs=[], outputs=gr.HTML(), title="高雄市長選戰輿情分析")
 iface.launch(server_name="0.0.0.0", server_port=7860)