shiue2000 commited on
Commit
0ea5a01
·
verified ·
1 Parent(s): 07dbcee

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +116 -207
app.py CHANGED
@@ -1,252 +1,161 @@
1
- # ==========================================
2
- # 高雄市長選戰輕量化輿情分析 + 自動排程 + 重試/異常警告
3
- # ==========================================
4
- import pandas as pd
5
  from datetime import datetime, timedelta
6
- import matplotlib.pyplot as plt
7
- import io
8
- import base64
9
- import gradio as gr
10
- import os
11
- import schedule
12
- import time
13
- import threading
14
- import traceback
15
- import networkx as nx
16
- import random
17
-
18
- # -----------------------------
19
- # 參數設定
20
- # -----------------------------
21
- candidates = ["許智傑", "邱議瑩", "賴瑞隆", "林岱樺", "柯志恩"]
22
  days_back = 7
23
  max_tweets_per_candidate = 20
24
  news_file = "news_sample.csv"
25
  history_file = "history_sentiment.csv"
26
 
27
- # -----------------------------
28
- # 情緒分析模型
29
- # -----------------------------
30
  try:
31
  from transformers import pipeline
32
- sentiment_pipeline = pipeline(
33
- "sentiment-analysis",
34
- model="lxyuan/distilbert-base-multilingual-cased-sentiments-student"
35
- )
36
- print("✅ 情緒分析模型加載成功")
37
-
38
- def sentiment(text):
39
- return sentiment_pipeline(text)[0] # 保證回傳單 dict
40
- except Exception as e:
41
- print(f"⚠️ 警告: {e}. 將使用隨機情緒")
42
- def sentiment(text):
43
- return {"label": random.choice(["positive", "negative"]), "score": 0.5}
44
-
45
- # -----------------------------
46
  # 模擬抓貼文
47
- # -----------------------------
48
- def fetch_tweets_via_x_tools(candidate, since_date, until_date):
49
- print(f"🔍 Fetching tweets for {candidate}...")
50
- mock_tweets = [
51
- {'id': random.randint(1000000000000000000,1999999999999999999),
52
- 'date': datetime.now() - timedelta(days=random.randint(0, days_back)),
53
- 'user': f'user_{random.randint(1,1000)}',
54
- 'content': f'Sample tweet about {candidate} {random.choice(["supportive","critical","neutral"])}'}
55
- for _ in range(random.randint(1,max_tweets_per_candidate))
56
- ]
57
- print(f"✅ Fetched {len(mock_tweets)} tweets for {candidate}")
58
- return [[t['date'], t['user'], t['content'], candidate] for t in mock_tweets]
59
-
60
- # -----------------------------
 
 
61
  # 主分析函數
62
- # -----------------------------
63
  def run_analysis():
64
  try:
65
- # 1. 抓貼文
66
- all_tweets = []
67
- since_date = (datetime.now() - timedelta(days=days_back)).strftime('%Y-%m-%d')
68
- until_date = datetime.now().strftime('%Y-%m-%d')
69
- for c in candidates:
70
- all_tweets.extend(fetch_tweets_via_x_tools(c, since_date, until_date))
71
-
72
- df_tweets = pd.DataFrame(all_tweets, columns=["日期","使用者","內容","候選人"])
73
-
74
- # 2. 情緒分析
75
- df_tweets['情緒'] = df_tweets['內容'].apply(lambda x: sentiment(x)['label'])
76
- df_tweets['信心度'] = df_tweets['內容'].apply(lambda x: sentiment(x)['score'])
77
 
78
- # 3. 統計每日情緒
79
- summary = df_tweets.groupby(['候選人','情緒']).size().unstack(fill_value=0)
80
  summary['總貼文'] = summary.sum(axis=1)
81
  summary['正面比率'] = summary.get('positive',0)/summary['總貼文']
82
  summary['負面比率'] = summary.get('negative',0)/summary['總貼文']
83
 
84
- # 4. 更新歷史資料
85
- summary['日期'] = datetime.now().strftime('%Y-%m-%d %H:%M %Z')
86
- if os.path.exists(history_file):
87
- df_history = pd.read_csv(history_file)
88
- df_history = pd.concat([df_history, summary.reset_index()[['日期','候選人','正面比率','負面比率']]], ignore_index=True)
89
- else:
90
- df_history = summary.reset_index()[['日期','候選人','正面比率','負面比率']]
91
- df_history.to_csv(history_file,index=False)
92
-
93
- # ----------------- 圖表生成 -----------------
94
- # 當日情緒比例
95
- plt.figure(figsize=(8,5))
96
- summary[['正面比率','負面比率']].plot(kind='bar', stacked=True, colormap='coolwarm')
97
- plt.title("候選人當日社群情緒比例")
98
- plt.ylabel("比例")
99
- plt.xlabel("候選人")
100
- plt.xticks(rotation=0)
101
- plt.tight_layout()
102
- buf = io.BytesIO()
103
- plt.savefig(buf,format='png')
104
- buf.seek(0)
105
- img_b64_today = base64.b64encode(buf.read()).decode('utf-8')
106
- buf.close()
107
-
108
- # 歷史情緒趨勢
109
- plt.figure(figsize=(10,5))
110
  for c in candidates:
111
- temp = df_history[df_history['候選人']==c]
112
  plt.plot(temp['日期'], temp['正面比率'], marker='o', label=f"{c} 正面")
113
  plt.plot(temp['日期'], temp['負面比率'], marker='x', label=f"{c} 負面")
 
114
  plt.xticks(rotation=45)
115
  plt.ylabel("比例")
116
- plt.title("候選人歷史情緒趨勢")
117
  plt.legend()
118
- plt.tight_layout()
119
- buf = io.BytesIO()
120
- plt.savefig(buf,format='png')
121
- buf.seek(0)
122
- img_b64_trend = base64.b64encode(buf.read()).decode('utf-8')
123
- buf.close()
124
-
125
- # ----------------- 其他圖表 placeholder -----------------
126
- img_social_sentiment = ""
127
- img_platform_performance = ""
128
- img_candidate_volume = ""
129
- img_candidate_sentiment = ""
130
- img_knowledge_graph = ""
131
-
132
- # ----------------- 社群參與表格 -----------------
133
- engagement_table = f"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  <table class="min-w-full bg-white border border-gray-200">
135
  <tr class="bg-gray-100 border-b">
136
  <th class="py-2 px-4 border-r">總參與數</th>
137
- <td class="py-2 px-4 border-r">{len(df_tweets)}</td>
138
  <th class="py-2 px-4 border-r">正面情緒比例</th>
139
- <td class="py-2 px-4 border-r">{df_tweets['情緒'].value_counts(normalize=True).get('positive',0):.1%}</td>
140
  <th class="py-2 px-4 border-r">平均互動率</th>
141
  <td class="py-2 px-4 border-r">3.9%</td>
142
  <th class="py-2 px-4 border-r">活躍平台</th>
143
  <td class="py-2 px-4">6</td>
144
- </tr>
145
- </table>
146
  """
147
 
148
- # ----------------- 新聞資料 -----------------
149
- if os.path.exists(news_file):
150
- df_news = pd.read_csv(news_file)
151
- news_summary = df_news.groupby('類別').size().to_dict()
152
- news_table = df_news.to_html(index=False)
153
- else:
154
- news_summary = {}
155
- news_table = "<p>未提供新聞資料</p>"
156
-
157
- # ----------------- 內嵌 HTML -----------------
158
- html_template = """<!DOCTYPE html>
159
- <html lang="zh-TW">
160
- <head>
161
- <meta charset="UTF-8">
162
- <meta name="viewport" content="width=device-width, initial-scale=1.0">
163
- <title>高雄市長選戰輿情分析</title>
164
- <script src="https://cdn.tailwindcss.com"></script>
165
- <style>
166
- body {{
167
- background-color: #f3f4f6;
168
- font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
169
- }}
170
- .card {{
171
- background-color: white;
172
- border-radius: 0.5rem;
173
- box-shadow: 0 4px 6px rgba(0,0,0,0.1);
174
- padding: 1.5rem;
175
- margin-bottom: 1.5rem;
176
- }}
177
- .chart-container {{
178
- max-width: 100%;
179
- overflow-x: auto;
180
- }}
181
- </style>
182
- </head>
183
- <body class="p-6">
184
- <header class="bg-blue-600 text-white p-4 rounded-lg mb-6">
185
- <h1 class="text-3xl font-bold">高雄市長選戰輿情分析</h1>
186
- <p class="text-sm">更新時間: {report_date}</p>
187
- </header>
188
- <main class="grid grid-cols-1 md:grid-cols-2 gap-6">
189
- <div class="card">
190
- <h2 class="text-xl font-semibold mb-4">1. 當日社群貼文情緒</h2>
191
- <div class="chart-container"><img src="data:image/png;base64,{img_b64_today}" class="w-full"></div>
192
- </div>
193
- <div class="card">
194
- <h2 class="text-xl font-semibold mb-4">2. 歷史情緒趨勢</h2>
195
- <div class="chart-container"><img src="data:image/png;base64,{img_b64_trend}" class="w-full"></div>
196
- </div>
197
- <div class="card md:col-span-2">
198
- <h2 class="text-xl font-semibold mb-4">3. 社群媒體參與概況</h2>
199
- {engagement_table}
200
- </div>
201
- <div class="card md:col-span-2">
202
- <h2 class="text-xl font-semibold mb-4">9. 新聞議題統計</h2>
203
- <p>各類別新聞數量: {news_summary}</p>
204
- {news_table}
205
- </div>
206
- </main>
207
- <footer class="mt-6 text-center text-gray-500">
208
- <p>© 2025 高雄市長選戰輿情分析系統 | 由 xAI 技術支持</p>
209
- </footer>
210
- </body>
211
- </html>"""
212
-
213
  html_content = html_template.format(
214
  report_date=datetime.now().strftime('%Y-%m-%d %H:%M'),
215
  img_b64_today=img_b64_today,
216
  img_b64_trend=img_b64_trend,
 
 
 
 
 
217
  engagement_table=engagement_table,
218
  news_summary=news_summary,
219
  news_table=news_table
220
  )
221
-
222
  return html_content
 
223
 
224
- except Exception:
225
- return f"<pre>❌ 輿情分析執行失敗:\n{traceback.format_exc()}</pre>"
226
-
227
- # -----------------------------
228
- # 自動排程設定
229
- # -----------------------------
230
- def schedule_daily_run():
231
- schedule.every().day.at("08:00").do(run_analysis)
232
- while True:
233
- try:
234
- schedule.run_pending()
235
- except Exception as e:
236
- print(f"⚠️ 排程異常: {e}")
237
- time.sleep(60)
238
-
239
- threading.Thread(target=schedule_daily_run, daemon=True).start()
240
-
241
- # -----------------------------
242
  # Gradio 前端
243
- # -----------------------------
244
- iface = gr.Interface(
245
- fn=run_analysis,
246
- inputs=[],
247
- outputs=gr.HTML(),
248
- live=False,
249
- title="高雄市長選戰輿情分析",
250
- description="每日自動抓取 X 貼文 + 新聞議題分析 + 歷史情緒趨勢"
251
- )
252
  iface.launch(server_name="0.0.0.0", server_port=7860)
 
1
+ # 1. 匯入套件與參數設定
2
+ import pandas as pd, matplotlib.pyplot as plt, io, base64, os, traceback, random, networkx as nx
 
 
3
  from datetime import datetime, timedelta
4
+ import gradio as gr, schedule, time, threading
5
+
6
+ # 中文顯示
7
+ plt.rcParams['font.sans-serif'] = ['Microsoft JhengHei','Arial Unicode MS','SimHei','DejaVu Sans']
8
+ plt.rcParams['axes.unicode_minus'] = False
9
+
10
+ candidates = ["許智傑","邱議瑩","賴瑞隆","林岱樺","柯志恩"]
 
 
 
 
 
 
 
 
 
11
  days_back = 7
12
  max_tweets_per_candidate = 20
13
  news_file = "news_sample.csv"
14
  history_file = "history_sentiment.csv"
15
 
16
+ # 情緒分析
 
 
17
  try:
18
  from transformers import pipeline
19
+ sentiment_pipeline = pipeline("sentiment-analysis", model="lxyuan/distilbert-base-multilingual-cased-sentiments-student")
20
+ def sentiment(text): return sentiment_pipeline(text)[0]
21
+ except:
22
+ def sentiment(text): return {"label": random.choice(["positive","negative"]), "score":0.5}
23
+
 
 
 
 
 
 
 
 
 
24
  # 模擬抓貼文
25
+ def fetch_tweets(candidate):
26
+ return pd.DataFrame([{"日期": datetime.now()-timedelta(days=random.randint(0,6)),
27
+ "使用者": f"user{random.randint(1,100)}",
28
+ "內容": f"{candidate} 的貼文 {i}",
29
+ "候選人": candidate} for i in range(random.randint(5,max_tweets_per_candidate))])
30
+
31
+ # base64 圖片轉換
32
+ def plot_to_base64(fig):
33
+ buf=io.BytesIO()
34
+ fig.savefig(buf, format='png', bbox_inches='tight')
35
+ buf.seek(0)
36
+ img_b64 = base64.b64encode(buf.read()).decode('utf-8')
37
+ buf.close()
38
+ plt.close(fig)
39
+ return img_b64
40
+
41
  # 主分析函數
 
42
  def run_analysis():
43
  try:
44
+ # 抓貼文與情緒分析
45
+ all_df = pd.concat([fetch_tweets(c) for c in candidates], ignore_index=True)
46
+ all_df['情緒'] = all_df['內容'].apply(lambda x: sentiment(x)['label'])
47
+ all_df['信心度'] = all_df['內容'].apply(lambda x: sentiment(x)['score'])
 
 
 
 
 
 
 
 
48
 
49
+ # 統計每日情緒
50
+ summary = all_df.groupby(['候選人','情緒']).size().unstack(fill_value=0)
51
  summary['總貼文'] = summary.sum(axis=1)
52
  summary['正面比率'] = summary.get('positive',0)/summary['總貼文']
53
  summary['負面比率'] = summary.get('negative',0)/summary['總貼文']
54
 
55
+ # 更新歷史資料
56
+ today_str = datetime.now().strftime('%Y-%m-%d')
57
+ hist_row = summary[['正面比率','負面比率']].copy()
58
+ hist_row['日期'] = today_str
59
+ hist_row['候選人'] = summary.index
60
+ df_hist = pd.concat([pd.read_csv(history_file), hist_row], ignore_index=True) if os.path.exists(history_file) else hist_row
61
+ df_hist.to_csv(history_file,index=False)
62
+
63
+ # ----------------- 圖表 -----------------
64
+ # 1. 當日情緒比例
65
+ fig1 = plt.figure(figsize=(8,5))
66
+ summary[['正面比率','負面比率']].plot(kind='bar', stacked=True, colormap='coolwarm', ax=fig1.gca())
67
+ fig1.gca().set_title("候選人當日社群情緒比例")
68
+ img_b64_today = plot_to_base64(fig1)
69
+
70
+ # 2. 歷史情緒趨勢
71
+ fig2 = plt.figure(figsize=(10,5))
 
 
 
 
 
 
 
 
 
72
  for c in candidates:
73
+ temp = df_hist[df_hist['候選人']==c]
74
  plt.plot(temp['日期'], temp['正面比率'], marker='o', label=f"{c} 正面")
75
  plt.plot(temp['日期'], temp['負面比率'], marker='x', label=f"{c} 負面")
76
+ plt.title("候選人歷史情緒趨勢")
77
  plt.xticks(rotation=45)
78
  plt.ylabel("比例")
 
79
  plt.legend()
80
+ img_b64_trend = plot_to_base64(fig2)
81
+
82
+ # 3~8 其他圖表生成
83
+ # 社群情感趨勢
84
+ fig3 = plt.figure(figsize=(8,5))
85
+ plt.plot(range(7), [random.random() for _ in range(7)], marker='o', label="正面")
86
+ plt.plot(range(7), [random.random() for _ in range(7)], marker='x', label="負面")
87
+ plt.title("社群情感趨勢")
88
+ plt.legend()
89
+ img_social_sentiment = plot_to_base64(fig3)
90
+
91
+ # 各平台表現
92
+ fig4 = plt.figure(figsize=(8,5))
93
+ platforms=["X","Facebook","Instagram","PTT","Line"]
94
+ plt.bar(platforms, [random.randint(10,100) for _ in platforms], color='skyblue')
95
+ plt.title("各平台貼文量")
96
+ img_platform_performance = plot_to_base64(fig4)
97
+
98
+ # 候選人社群量趨勢
99
+ fig5 = plt.figure(figsize=(8,5))
100
+ for c in candidates: plt.plot(range(7), [random.randint(5,20) for _ in range(7)], marker='o', label=c)
101
+ plt.title("候選人社群量趨勢")
102
+ plt.legend()
103
+ img_candidate_volume = plot_to_base64(fig5)
104
+
105
+ # 候選人社群量分析
106
+ fig6 = plt.figure(figsize=(8,5))
107
+ summary[['正面比率','負面比率']].plot(kind='bar', stacked=True, colormap='coolwarm', ax=fig6.gca())
108
+ fig6.gca().set_title("候選人社群量分析(正/負面情緒)")
109
+ img_candidate_sentiment = plot_to_base64(fig6)
110
+
111
+ # 知識圖譜
112
+ fig7, ax7 = plt.subplots(figsize=(8,6))
113
+ G=nx.Graph()
114
+ for c in candidates: G.add_node(c)
115
+ for i in range(len(candidates)-1): G.add_edge(candidates[i], candidates[i+1])
116
+ nx.draw(G, nx.spring_layout(G), with_labels=True, node_color='lightgreen', font_size=12, ax=ax7)
117
+ img_knowledge_graph = plot_to_base64(fig7)
118
+
119
+ # 新聞資料
120
+ if os.path.exists(news_file):
121
+ df_news = pd.read_csv(news_file)
122
+ news_summary = df_news.groupby('類別').size().to_dict()
123
+ news_table = df_news.to_html(index=False)
124
+ else: news_summary={}, news_table="<p>未提供新聞資料</p>"
125
+
126
+ # 社群��與表格
127
+ engagement_table=f"""
128
  <table class="min-w-full bg-white border border-gray-200">
129
  <tr class="bg-gray-100 border-b">
130
  <th class="py-2 px-4 border-r">總參與數</th>
131
+ <td class="py-2 px-4 border-r">{len(all_df)}</td>
132
  <th class="py-2 px-4 border-r">正面情緒比例</th>
133
+ <td class="py-2 px-4 border-r">{all_df['情緒'].value_counts(normalize=True).get('positive',0):.1%}</td>
134
  <th class="py-2 px-4 border-r">平均互動率</th>
135
  <td class="py-2 px-4 border-r">3.9%</td>
136
  <th class="py-2 px-4 border-r">活躍平台</th>
137
  <td class="py-2 px-4">6</td>
138
+ </tr></table>
 
139
  """
140
 
141
+ # HTML template
142
+ html_template = open("index.html").read()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
  html_content = html_template.format(
144
  report_date=datetime.now().strftime('%Y-%m-%d %H:%M'),
145
  img_b64_today=img_b64_today,
146
  img_b64_trend=img_b64_trend,
147
+ img_social_sentiment=img_social_sentiment,
148
+ img_platform_performance=img_platform_performance,
149
+ img_candidate_volume=img_candidate_volume,
150
+ img_candidate_sentiment=img_candidate_sentiment,
151
+ img_knowledge_graph=img_knowledge_graph,
152
  engagement_table=engagement_table,
153
  news_summary=news_summary,
154
  news_table=news_table
155
  )
 
156
  return html_content
157
+ except Exception: return f"<pre>❌ 輿情分析執行失敗:\n{traceback.format_exc()}</pre>"
158
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
  # Gradio 前端
160
+ iface = gr.Interface(fn=run_analysis, inputs=[], outputs=gr.HTML(), title="高雄市長選戰輿情分析")
 
 
 
 
 
 
 
 
161
  iface.launch(server_name="0.0.0.0", server_port=7860)