Spaces:

shiue2000
/

sparkAnalysis

Sleeping

App Files Files Community

shiue2000 commited on Sep 25, 2025

Commit

07dbcee

verified ·

1 Parent(s): af46a64

Update app.py

Browse files

Files changed (1) hide show

app.py +49 -79

app.py CHANGED Viewed

@@ -13,19 +13,19 @@ import time
 import threading
 import traceback
 import networkx as nx
-import random  # For fallback dummy data
 # -----------------------------
 # 參數設定
 # -----------------------------
 candidates = ["許智傑", "邱議瑩", "賴瑞隆", "林岱樺", "柯志恩"]
 days_back = 7
-max_tweets_per_candidate = 20  # Reduced for tool limits
 news_file = "news_sample.csv"
 history_file = "history_sentiment.csv"
 # -----------------------------
-# 情緒分析模型 (統一接口)
 # -----------------------------
 try:
     from transformers import pipeline
@@ -36,82 +36,59 @@ try:
     print("✅ 情緒分析模型加載成功")
     def sentiment(text):
-        # 保證回傳 list of dict
-        return sentiment_pipeline(text)
 except Exception as e:
-    print(f"⚠️ 警告: {e}. 情緒分析將使用預設值 (positive/negative 隨機分配)。")
     def sentiment(text):
-        # 回傳 list of dict，和 pipeline 統一接口
-        return [{"label": random.choice(["positive", "negative"]), "score": 0.5}]
 # -----------------------------
-# X Tool Simulation (Replace with actual function calls in production)
 # -----------------------------
 def fetch_tweets_via_x_tools(candidate, since_date, until_date):
-    print(f"🔍 Fetching tweets for {candidate} via X tools...")
     mock_tweets = [
-        {
-            'id': 1970100275227869230,
-            'date': datetime(2025, 9, 22, 12, 18, 2),
-            'user': 'mai_ka_nng',
-            'content': '邱志偉（提案）、許智傑、陳其邁、劉建國、管碧玲、高志鵬、林淑芬、楊曜、許添財、葉宜津、陳節如、尤美女、姚文智、陳歐珀、陳唐山、蘇震清、蔡煌瑯'
-        }
-    ] if candidate == "許智傑" else []
-    if not mock_tweets:
-        mock_tweets = [
-            {
-                'id': random.randint(1000000000000000000, 1999999999999999999),
-                'date': datetime.now() - timedelta(days=random.randint(1, days_back)),
-                'user': f'user_{random.randint(1, 1000)}',
-                'content': f'Sample tweet about {candidate} {random.choice(["supportive", "critical", "neutral"])} opinion.'
-            }
-            for _ in range(random.randint(1, max_tweets_per_candidate))
-        ]
-        print(f"ℹ️ Using fallback dummy data for {candidate}")
     print(f"✅ Fetched {len(mock_tweets)} tweets for {candidate}")
-    return [[tweet['date'], tweet['user'], tweet['content'], candidate] for tweet in mock_tweets]
 # -----------------------------
 # 主分析函數
 # -----------------------------
 def run_analysis():
-    print("🔄 開始執行輿情分析...")
     try:
-        since_date = (datetime.now() - timedelta(days=days_back)).strftime('%Y-%m-%d')
-        until_date = datetime.now().strftime('%Y-%m-%d')
         # 1. 抓貼文
         all_tweets = []
-        for candidate in candidates:
-            tweets = fetch_tweets_via_x_tools(candidate, since_date, until_date)
-            all_tweets.extend(tweets)
-        if not all_tweets:
-            raise ValueError("No tweets fetched. Using full dummy data.")
-        df_tweets = pd.DataFrame(all_tweets, columns=["日期", "使用者", "內容", "候選人"])
         # 2. 情緒分析
         df_tweets['情緒'] = df_tweets['內容'].apply(lambda x: sentiment(x)['label'])
         df_tweets['信心度'] = df_tweets['內容'].apply(lambda x: sentiment(x)['score'])
-        # 統計每位候選人情緒比例
-        summary = df_tweets.groupby(['候選人', '情緒']).size().unstack(fill_value=0)
         summary['總貼文'] = summary.sum(axis=1)
-        summary['正面比率'] = summary.get('positive', 0) / summary['總貼文']
-        summary['負面比率'] = summary.get('negative', 0) / summary['總貼文']
-        # 3. 更新歷史資料
         summary['日期'] = datetime.now().strftime('%Y-%m-%d %H:%M %Z')
         if os.path.exists(history_file):
             df_history = pd.read_csv(history_file)
             df_history = pd.concat([df_history, summary.reset_index()[['日期','候選人','正面比率','負面比率']]], ignore_index=True)
         else:
             df_history = summary.reset_index()[['日期','候選人','正面比率','負面比率']]
-        df_history.to_csv(history_file, index=False)
         # ----------------- 圖表生成 -----------------
         # 當日情緒比例
@@ -123,7 +100,7 @@ def run_analysis():
         plt.xticks(rotation=0)
         plt.tight_layout()
         buf = io.BytesIO()
-        plt.savefig(buf, format='png')
         buf.seek(0)
         img_b64_today = base64.b64encode(buf.read()).decode('utf-8')
         buf.close()
@@ -140,35 +117,35 @@ def run_analysis():
         plt.legend()
         plt.tight_layout()
         buf = io.BytesIO()
-        plt.savefig(buf, format='png')
         buf.seek(0)
         img_b64_trend = base64.b64encode(buf.read()).decode('utf-8')
         buf.close()
-        # 其他圖表 placeholder（可自行生成圖表後轉 base64）
         img_social_sentiment = ""
         img_platform_performance = ""
         img_candidate_volume = ""
         img_candidate_sentiment = ""
         img_knowledge_graph = ""
-        # 社群參與表格
         engagement_table = f"""
         <table class="min-w-full bg-white border border-gray-200">
-            <tr class="bg-gray-100 border-b">
-                <th class="py-2 px-4 border-r">總參與數</th>
-                <td class="py-2 px-4 border-r">{len(df_tweets)}</td>
-                <th class="py-2 px-4 border-r">正面情緒比例</th>
-                <td class="py-2 px-4 border-r">{df_tweets['情緒'].value_counts(normalize=True).get('positive',0):.1%}</td>
-                <th class="py-2 px-4 border-r">平均互動率</th>
-                <td class="py-2 px-4 border-r">3.9%</td>
-                <th class="py-2 px-4 border-r">活躍平台</th>
-                <td class="py-2 px-4">6</td>
-            </tr>
         </table>
         """
-        # 新聞資料
         if os.path.exists(news_file):
             df_news = pd.read_csv(news_file)
             news_summary = df_news.groupby('類別').size().to_dict()
@@ -177,7 +154,7 @@ def run_analysis():
             news_summary = {}
             news_table = "<p>未提供新聞資料</p>"
-        # ----------------- 內嵌 HTML 模板 -----------------
         html_template = """<!DOCTYPE html>
 <html lang="zh-TW">
 <head>
@@ -211,14 +188,12 @@ body {{
 <main class="grid grid-cols-1 md:grid-cols-2 gap-6">
 <div class="card">
 <h2 class="text-xl font-semibold mb-4">1. 當日社群貼文情緒</h2>
-<div class="chart-container">
-<img src="data:image/png;base64,{img_b64_today}" class="w-full">
-</div></div>
 <div class="card">
 <h2 class="text-xl font-semibold mb-4">2. 歷史情緒趨勢</h2>
-<div class="chart-container">
-<img src="data:image/png;base64,{img_b64_trend}" class="w-full">
-</div></div>
 <div class="card md:col-span-2">
 <h2 class="text-xl font-semibold mb-4">3. 社群媒體參與概況</h2>
 {engagement_table}
@@ -244,16 +219,13 @@ body {{
             news_table=news_table
         )
-        print("✅ 輿情分析完成")
         return html_content
-    except Exception as e:
-        err_msg = f"❌ 輿情分析執行失敗:\n{traceback.format_exc()}"
-        print(err_msg)
-        return f"<pre>{err_msg}</pre>"
 # -----------------------------
-# 自動排程設定 (每天 08:00 執行)
 # -----------------------------
 def schedule_daily_run():
     schedule.every().day.at("08:00").do(run_analysis)
@@ -269,7 +241,6 @@ threading.Thread(target=schedule_daily_run, daemon=True).start()
 # -----------------------------
 # Gradio 前端
 # -----------------------------
-print("🔄 啟動 Gradio 介面...")
 iface = gr.Interface(
     fn=run_analysis,
     inputs=[],
@@ -278,5 +249,4 @@ iface = gr.Interface(
     title="高雄市長選戰輿情分析",
     description="每日自動抓取 X 貼文 + 新聞議題分析 + 歷史情緒趨勢"
 )
-print("✅ Gradio 介面啟動，監聽 0.0.0.0:7860")
 iface.launch(server_name="0.0.0.0", server_port=7860)

 import threading
 import traceback
 import networkx as nx
+import random
 # -----------------------------
 # 參數設定
 # -----------------------------
 candidates = ["許智傑", "邱議瑩", "賴瑞隆", "林岱樺", "柯志恩"]
 days_back = 7
+max_tweets_per_candidate = 20
 news_file = "news_sample.csv"
 history_file = "history_sentiment.csv"
 # -----------------------------
+# 情緒分析模型
 # -----------------------------
 try:
     from transformers import pipeline
     print("✅ 情緒分析模型加載成功")
     def sentiment(text):
+        return sentiment_pipeline(text)[0]  # 保證回傳單 dict
 except Exception as e:
+    print(f"⚠️ 警告: {e}. 將使用隨機情緒")
     def sentiment(text):
+        return {"label": random.choice(["positive", "negative"]), "score": 0.5}
 # -----------------------------
+# 模擬抓貼文
 # -----------------------------
 def fetch_tweets_via_x_tools(candidate, since_date, until_date):
+    print(f"🔍 Fetching tweets for {candidate}...")
     mock_tweets = [
+        {'id': random.randint(1000000000000000000,1999999999999999999),
+         'date': datetime.now() - timedelta(days=random.randint(0, days_back)),
+         'user': f'user_{random.randint(1,1000)}',
+         'content': f'Sample tweet about {candidate} {random.choice(["supportive","critical","neutral"])}'}
+        for _ in range(random.randint(1,max_tweets_per_candidate))
+    ]
     print(f"✅ Fetched {len(mock_tweets)} tweets for {candidate}")
+    return [[t['date'], t['user'], t['content'], candidate] for t in mock_tweets]
 # -----------------------------
 # 主分析函數
 # -----------------------------
 def run_analysis():
     try:
         # 1. 抓貼文
         all_tweets = []
+        since_date = (datetime.now() - timedelta(days=days_back)).strftime('%Y-%m-%d')
+        until_date = datetime.now().strftime('%Y-%m-%d')
+        for c in candidates:
+            all_tweets.extend(fetch_tweets_via_x_tools(c, since_date, until_date))
+        df_tweets = pd.DataFrame(all_tweets, columns=["日期","使用者","內容","候選人"])
         # 2. 情緒分析
         df_tweets['情緒'] = df_tweets['內容'].apply(lambda x: sentiment(x)['label'])
         df_tweets['信心度'] = df_tweets['內容'].apply(lambda x: sentiment(x)['score'])
+        # 3. 統計每日情緒
+        summary = df_tweets.groupby(['候選人','情緒']).size().unstack(fill_value=0)
         summary['總貼文'] = summary.sum(axis=1)
+        summary['正面比率'] = summary.get('positive',0)/summary['總貼文']
+        summary['負面比率'] = summary.get('negative',0)/summary['總貼文']
+        # 4. 更新歷史資料
         summary['日期'] = datetime.now().strftime('%Y-%m-%d %H:%M %Z')
         if os.path.exists(history_file):
             df_history = pd.read_csv(history_file)
             df_history = pd.concat([df_history, summary.reset_index()[['日期','候選人','正面比率','負面比率']]], ignore_index=True)
         else:
             df_history = summary.reset_index()[['日期','候選人','正面比率','負面比率']]
+        df_history.to_csv(history_file,index=False)
         # ----------------- 圖表生成 -----------------
         # 當日情緒比例
         plt.xticks(rotation=0)
         plt.tight_layout()
         buf = io.BytesIO()
+        plt.savefig(buf,format='png')
         buf.seek(0)
         img_b64_today = base64.b64encode(buf.read()).decode('utf-8')
         buf.close()
         plt.legend()
         plt.tight_layout()
         buf = io.BytesIO()
+        plt.savefig(buf,format='png')
         buf.seek(0)
         img_b64_trend = base64.b64encode(buf.read()).decode('utf-8')
         buf.close()
+        # ----------------- 其他圖表 placeholder -----------------
         img_social_sentiment = ""
         img_platform_performance = ""
         img_candidate_volume = ""
         img_candidate_sentiment = ""
         img_knowledge_graph = ""
+        # ----------------- 社群參與表格 -----------------
         engagement_table = f"""
         <table class="min-w-full bg-white border border-gray-200">
+        <tr class="bg-gray-100 border-b">
+            <th class="py-2 px-4 border-r">總參與數</th>
+            <td class="py-2 px-4 border-r">{len(df_tweets)}</td>
+            <th class="py-2 px-4 border-r">正面情緒比例</th>
+            <td class="py-2 px-4 border-r">{df_tweets['情緒'].value_counts(normalize=True).get('positive',0):.1%}</td>
+            <th class="py-2 px-4 border-r">平均互動率</th>
+            <td class="py-2 px-4 border-r">3.9%</td>
+            <th class="py-2 px-4 border-r">活躍平台</th>
+            <td class="py-2 px-4">6</td>
+        </tr>
         </table>
         """
+        # ----------------- 新聞資料 -----------------
         if os.path.exists(news_file):
             df_news = pd.read_csv(news_file)
             news_summary = df_news.groupby('類別').size().to_dict()
             news_summary = {}
             news_table = "<p>未提供新聞資料</p>"
+        # ----------------- 內嵌 HTML -----------------
         html_template = """<!DOCTYPE html>
 <html lang="zh-TW">
 <head>
 <main class="grid grid-cols-1 md:grid-cols-2 gap-6">
 <div class="card">
 <h2 class="text-xl font-semibold mb-4">1. 當日社群貼文情緒</h2>
+<div class="chart-container"><img src="data:image/png;base64,{img_b64_today}" class="w-full"></div>
+</div>
 <div class="card">
 <h2 class="text-xl font-semibold mb-4">2. 歷史情緒趨勢</h2>
+<div class="chart-container"><img src="data:image/png;base64,{img_b64_trend}" class="w-full"></div>
+</div>
 <div class="card md:col-span-2">
 <h2 class="text-xl font-semibold mb-4">3. 社群媒體參與概況</h2>
 {engagement_table}
             news_table=news_table
         )
         return html_content
+    except Exception:
+        return f"<pre>❌ 輿情分析執行失敗:\n{traceback.format_exc()}</pre>"
 # -----------------------------
+# 自動排程設定
 # -----------------------------
 def schedule_daily_run():
     schedule.every().day.at("08:00").do(run_analysis)
 # -----------------------------
 # Gradio 前端
 # -----------------------------
 iface = gr.Interface(
     fn=run_analysis,
     inputs=[],
     title="高雄市長選戰輿情分析",
     description="每日自動抓取 X 貼文 + 新聞議題分析 + 歷史情緒趨勢"
 )
 iface.launch(server_name="0.0.0.0", server_port=7860)