shiue2000 commited on
Commit
cc88742
·
verified ·
1 Parent(s): bf221ac

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +144 -86
app.py CHANGED
@@ -14,6 +14,8 @@ import schedule
14
  import time
15
  import threading
16
  import traceback
 
 
17
  # -----------------------------
18
  # 參數設定
19
  # -----------------------------
@@ -22,7 +24,8 @@ days_back = 7
22
  max_tweets_per_candidate = 100
23
  news_file = "news_sample.csv"
24
  history_file = "history_sentiment.csv"
25
- max_retries = 3 # 貼文抓取失敗重試次數
 
26
  # -----------------------------
27
  # 中文情緒分析模型 (公開可用)
28
  # -----------------------------
@@ -30,6 +33,7 @@ sentiment = pipeline(
30
  "sentiment-analysis",
31
  model="uer/roberta-base-finetuned-sentiment-chinese"
32
  )
 
33
  # -----------------------------
34
  # 主分析函數
35
  # -----------------------------
@@ -41,13 +45,13 @@ def run_analysis():
41
  all_tweets = []
42
  for candidate in candidates:
43
  query = f'{candidate} since:{since_date} until:{until_date}'
44
- for attempt in range(1, max_retries+1):
45
  try:
46
  for i, tweet in enumerate(sntwitter.TwitterSearchScraper(query).get_items()):
47
  if i >= max_tweets_per_candidate:
48
  break
49
  all_tweets.append([tweet.date, tweet.user.username, tweet.content, candidate])
50
- break # 成功抓取,跳出重試
51
  except Exception as e:
52
  print(f"⚠️ {candidate} 第 {attempt} 次抓貼文失敗: {e}")
53
  if attempt == max_retries:
@@ -62,17 +66,18 @@ def run_analysis():
62
  summary['總貼文'] = summary.sum(axis=1)
63
  summary['正面比率'] = summary.get('positive', 0) / summary['總貼文']
64
  summary['負面比率'] = summary.get('negative', 0) / summary['總貼文']
65
- summary['日期'] = datetime.now().strftime('%Y-%m-%d')
66
  # 3. 更新歷史資料
67
  if os.path.exists(history_file):
68
  df_history = pd.read_csv(history_file)
69
- df_history = pd.concat([df_history, summary.reset_index()[['日期','候選人','正面比率','負面比率']]], ignore_index=True)
70
  else:
71
- df_history = summary.reset_index()[['日期','候選人','正面比率','負面比率']]
72
  df_history.to_csv(history_file, index=False)
 
73
  # 4. 可視化當日情緒圖表
74
- plt.figure(figsize=(8,5))
75
- summary[['正面比率','負面比率']].plot(kind='bar', stacked=True, colormap='coolwarm')
76
  plt.title("候選人當日社群情緒比例")
77
  plt.ylabel("比例")
78
  plt.xlabel("候選人")
@@ -83,10 +88,11 @@ def run_analysis():
83
  buf.seek(0)
84
  img_b64_today = base64.b64encode(buf.read()).decode("utf-8")
85
  buf.close()
 
86
  # 5. 可視化歷史情緒趨勢
87
- plt.figure(figsize=(10,5))
88
  for c in candidates:
89
- temp = df_history[df_history['候選人']==c]
90
  plt.plot(temp['日期'], temp['正面比率'], marker='o', label=f"{c} 正面")
91
  plt.plot(temp['日期'], temp['負面比率'], marker='x', label=f"{c} 負面")
92
  plt.xticks(rotation=45)
@@ -99,6 +105,7 @@ def run_analysis():
99
  buf.seek(0)
100
  img_b64_trend = base64.b64encode(buf.read()).decode("utf-8")
101
  buf.close()
 
102
  # 6. 新聞線索整合
103
  if os.path.exists(news_file):
104
  df_news = pd.read_csv(news_file)
@@ -107,35 +114,132 @@ def run_analysis():
107
  else:
108
  news_summary = {}
109
  news_table = "<p>未提供新聞資料</p>"
110
- # 7. 生成即時輿情報告 (HTML) with added figures
111
- html_report = f"""
112
- <h2>高雄市長選戰輿情摘要 ({datetime.now().strftime('%Y-%m-%d')})</h2>
113
- <h3>1. 社群貼文情緒</h3>
114
- {summary.to_html()}
115
- <img src='data:image/png;base64,{img_b64_today}' width='600'>
116
- <h3>2. 歷史情緒趨勢</h3>
117
- <img src='data:image/png;base64,{img_b64_trend}' width='800'>
118
- <h3>3. 社群媒體參與概況</h3>
119
- <table>
120
- <tr><td>總參與數</td><td>3,511 (+12.5%)</td><td>正面情緒比例</td><td>73% (+3.2%)</td><td>平均互動率</td><td>3.9% (+0.8%)</td><td>活躍平台</td><td>6</td></tr>
121
- </table>
122
- <h3>4. 社群情感趨勢圖</h3>
123
- <img src='data:image/png;base64,{base64.b64encode(open("social_sentiment_trend.png", "rb").read()).decode("utf-8")}' width='800'>
124
- <h3>5. 各平台表現</h3>
125
- <img src='data:image/png;base64,{base64.b64encode(open("platform_performance.png", "rb").read()).decode("utf-8")}' width='600'>
126
- <h3>6. 候選人社群量趨勢</h3>
127
- <img src='data:image/png;base64,{base64.b64encode(open("candidate_volume_trend.png", "rb").read()).decode("utf-8")}' width='800'>
128
- <h3>7. 候選人社群量分析(正/負面情緒)</h3>
129
- <img src='data:image/png;base64,{base64.b64encode(open("candidate_sentiment_analysis.png", "rb").read()).decode("utf-8")}' width='600'>
130
- <h3>8. 新聞議題統計</h3>
131
- <p>各類別新聞量:{news_summary}</p>
132
- {news_table}
133
- """
134
- return html_report
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  except Exception as e:
136
  err_msg = f"❌ 輿情分析執行失敗:\n{traceback.format_exc()}"
137
  print(err_msg)
138
  return f"<pre>{err_msg}</pre>"
 
139
  # -----------------------------
140
  # 自動排程設定 (每天 08:00 執行)
141
  # -----------------------------
@@ -146,68 +250,22 @@ def schedule_daily_run():
146
  schedule.run_pending()
147
  except Exception as e:
148
  print(f"⚠️ 排程異常: {e}")
149
- time.sleep(60) # 每分鐘檢查一次
 
150
  # -----------------------------
151
  # 啟動排程背景執行緒
152
  # -----------------------------
153
  threading.Thread(target=schedule_daily_run, daemon=True).start()
 
154
  # -----------------------------
155
  # Gradio 前端
156
  # -----------------------------
157
  iface = gr.Interface(
158
  fn=run_analysis,
159
  inputs=[],
160
- outputs=gr.HTML,
161
  live=False,
162
  title="高雄市長選戰輿情分析",
163
  description="每日自動抓取 X 貼文 + 新聞議題分析 + 歷史情緒趨勢\n支援自動重試與排程異常警告"
164
  )
165
- iface.launch()
166
-
167
- # 假設你已經有 df_tweets 和 df_news
168
- # df_tweets columns: ['日期','使用者','內容','候選人','情緒']
169
- # df_news columns: ['日期','標題','摘要','類別','平台']
170
- # 1️⃣ 社群情感趨勢圖
171
- df_trend = df_tweets.copy()
172
- df_trend['日期'] = pd.to_datetime(df_trend['日期']).dt.date
173
- trend_summary = df_trend.groupby(['日期','情緒']).size().unstack(fill_value=0)
174
- trend_summary_percent = trend_summary.div(trend_summary.sum(axis=1), axis=0)
175
- plt.figure(figsize=(10,5))
176
- trend_summary_percent.plot.area(colormap='RdYlGn', alpha=0.6)
177
- plt.title("社群情感趨勢")
178
- plt.ylabel("比例")
179
- plt.xlabel("日期")
180
- plt.xticks(rotation=45)
181
- plt.tight_layout()
182
- plt.savefig("social_sentiment_trend.png")
183
- plt.show()
184
- # 2️⃣ 各平台表現
185
- platform_summary = df_news['平台'].value_counts()
186
- plt.figure(figsize=(8,4))
187
- platform_summary.plot(kind='bar', color='royalblue')
188
- plt.title("各平台表現")
189
- plt.ylabel("提及數")
190
- plt.xticks(rotation=0)
191
- plt.tight_layout()
192
- plt.savefig("platform_performance.png")
193
- plt.show()
194
- # 3️⃣ 候選人社群量趨勢
195
- candidate_trend = df_trend.groupby(['日期','候選人']).size().unstack(fill_value=0)
196
- candidate_trend.plot.area(figsize=(10,5), alpha=0.6)
197
- plt.title("候選人社群聲量趨勢")
198
- plt.ylabel("提及數")
199
- plt.xlabel("日期")
200
- plt.xticks(rotation=45)
201
- plt.tight_layout()
202
- plt.savefig("candidate_volume_trend.png")
203
- plt.show()
204
- # 4️⃣ 候選人社群量分析(正/負面情緒)
205
- candidate_sentiment = df_trend.groupby(['候選人','情緒']).size().unstack(fill_value=0)
206
- candidate_sentiment[['positive','negative']].plot(kind='bar', stacked=True, colormap='RdYlGn')
207
- plt.title("候選人社群情緒分析")
208
- plt.ylabel("貼文數")
209
- plt.xlabel("候選人")
210
- plt.xticks(rotation=0)
211
- plt.tight_layout()
212
- plt.savefig("candidate_sentiment_analysis.png")
213
- plt.show()
 
14
  import time
15
  import threading
16
  import traceback
17
+ import networkx as nx
18
+
19
  # -----------------------------
20
  # 參數設定
21
  # -----------------------------
 
24
  max_tweets_per_candidate = 100
25
  news_file = "news_sample.csv"
26
  history_file = "history_sentiment.csv"
27
+ max_retries = 3 # 貼文抓取失敗重試次數
28
+
29
  # -----------------------------
30
  # 中文情緒分析模型 (公開可用)
31
  # -----------------------------
 
33
  "sentiment-analysis",
34
  model="uer/roberta-base-finetuned-sentiment-chinese"
35
  )
36
+
37
  # -----------------------------
38
  # 主分析函數
39
  # -----------------------------
 
45
  all_tweets = []
46
  for candidate in candidates:
47
  query = f'{candidate} since:{since_date} until:{until_date}'
48
+ for attempt in range(1, max_retries + 1):
49
  try:
50
  for i, tweet in enumerate(sntwitter.TwitterSearchScraper(query).get_items()):
51
  if i >= max_tweets_per_candidate:
52
  break
53
  all_tweets.append([tweet.date, tweet.user.username, tweet.content, candidate])
54
+ break # 成功抓取,跳出重試
55
  except Exception as e:
56
  print(f"⚠️ {candidate} 第 {attempt} 次抓貼文失敗: {e}")
57
  if attempt == max_retries:
 
66
  summary['總貼文'] = summary.sum(axis=1)
67
  summary['正面比率'] = summary.get('positive', 0) / summary['總貼文']
68
  summary['負面比率'] = summary.get('negative', 0) / summary['總貼文']
69
+ summary['日期'] = datetime.now().strftime('%Y-%m-%d %H:%M %Z')
70
  # 3. 更新歷史資料
71
  if os.path.exists(history_file):
72
  df_history = pd.read_csv(history_file)
73
+ df_history = pd.concat([df_history, summary.reset_index()[['日期', '候選人', '正面比率', '負面比率']]], ignore_index=True)
74
  else:
75
+ df_history = summary.reset_index()[['日期', '候選人', '正面比率', '負面比率']]
76
  df_history.to_csv(history_file, index=False)
77
+
78
  # 4. 可視化當日情緒圖表
79
+ plt.figure(figsize=(8, 5))
80
+ summary[['正面比率', '負面比率']].plot(kind='bar', stacked=True, colormap='coolwarm')
81
  plt.title("候選人當日社群情緒比例")
82
  plt.ylabel("比例")
83
  plt.xlabel("候選人")
 
88
  buf.seek(0)
89
  img_b64_today = base64.b64encode(buf.read()).decode("utf-8")
90
  buf.close()
91
+
92
  # 5. 可視化歷史情緒趨勢
93
+ plt.figure(figsize=(10, 5))
94
  for c in candidates:
95
+ temp = df_history[df_history['候選人'] == c]
96
  plt.plot(temp['日期'], temp['正面比率'], marker='o', label=f"{c} 正面")
97
  plt.plot(temp['日期'], temp['負面比率'], marker='x', label=f"{c} 負面")
98
  plt.xticks(rotation=45)
 
105
  buf.seek(0)
106
  img_b64_trend = base64.b64encode(buf.read()).decode("utf-8")
107
  buf.close()
108
+
109
  # 6. 新聞線索整合
110
  if os.path.exists(news_file):
111
  df_news = pd.read_csv(news_file)
 
114
  else:
115
  news_summary = {}
116
  news_table = "<p>未提供新聞資料</p>"
117
+
118
+ # 7. 社群情感趨勢圖
119
+ df_trend = df_tweets.copy()
120
+ df_trend['日期'] = pd.to_datetime(df_trend['期']).dt.date
121
+ trend_summary = df_trend.groupby(['日期', '情緒']).size().unstack(fill_value=0)
122
+ trend_summary_percent = trend_summary.div(trend_summary.sum(axis=1), axis=0)
123
+ plt.figure(figsize=(10, 5))
124
+ trend_summary_percent.plot.area(colormap='RdYlGn', alpha=0.6)
125
+ plt.title("社群情感趨勢")
126
+ plt.ylabel("比例")
127
+ plt.xlabel("日期")
128
+ plt.xticks(rotation=45)
129
+ plt.tight_layout()
130
+ plt.savefig("social_sentiment_trend.png")
131
+ plt.close()
132
+
133
+ # 8. 各平台表現
134
+ platform_summary = df_news['平台'].value_counts()
135
+ plt.figure(figsize=(8, 4))
136
+ platform_summary.plot(kind='bar', color='royalblue')
137
+ plt.title("各平台表現")
138
+ plt.ylabel("提及")
139
+ plt.xticks(rotation=0)
140
+ plt.tight_layout()
141
+ plt.savefig("platform_performance.png")
142
+ plt.close()
143
+
144
+ # 9. 候選人社群量趨勢
145
+ candidate_trend = df_trend.groupby(['日期', '候選人']).size().unstack(fill_value=0)
146
+ candidate_trend.plot.area(figsize=(10, 5), alpha=0.6)
147
+ plt.title("候選人社群聲量趨勢")
148
+ plt.ylabel("提及數")
149
+ plt.xlabel("日期")
150
+ plt.xticks(rotation=45)
151
+ plt.tight_layout()
152
+ plt.savefig("candidate_volume_trend.png")
153
+ plt.close()
154
+
155
+ # 10. 候選人社群量分析(正/負面情緒)
156
+ candidate_sentiment = df_trend.groupby(['候選人', '情緒']).size().unstack(fill_value=0)
157
+ candidate_sentiment[['positive', 'negative']].plot(kind='bar', stacked=True, colormap='RdYlGn')
158
+ plt.title("候選人社群情緒分析")
159
+ plt.ylabel("貼文數")
160
+ plt.xlabel("候選人")
161
+ plt.xticks(rotation=0)
162
+ plt.tight_layout()
163
+ plt.savefig("candidate_sentiment_analysis.png")
164
+ plt.close()
165
+
166
+ # 11. 知識圖譜
167
+ G = nx.DiGraph()
168
+ G.add_nodes_from(candidates, type='candidate')
169
+ G.add_nodes_from(df_tweets['情緒'].unique(), type='sentiment')
170
+ G.add_nodes_from(df_news['平台'].unique(), type='platform')
171
+ G.add_nodes_from(df_news['類別'].unique(), type='news_category')
172
+
173
+ for candidate in candidates:
174
+ candidate_data = df_tweets[df_tweets['候選人'] == candidate]
175
+ total_posts = len(candidate_data)
176
+ for sentiment in df_tweets['情緒'].unique():
177
+ sentiment_count = len(candidate_data[candidate_data['情緒'] == sentiment])
178
+ if total_posts > 0 and sentiment_count > 0:
179
+ G.add_edge(candidate, sentiment, weight=sentiment_count / total_posts)
180
+
181
+ for candidate in candidates:
182
+ candidate_tweets = df_tweets[df_tweets['候選人'] == candidate]
183
+ for platform in df_news['平台'].unique():
184
+ platform_count = len(candidate_tweets[candidate_tweets['內容'].str.contains(platform, na=False)])
185
+ if platform_count > 0:
186
+ G.add_edge(candidate, platform, weight=platform_count)
187
+
188
+ for candidate in candidates:
189
+ candidate_news = df_news[df_news['內容'].str.contains(candidate, na=False)]
190
+ for category in df_news['類別'].unique():
191
+ category_count = len(candidate_news[candidate_news['類別'] == category])
192
+ if category_count > 0:
193
+ G.add_edge(candidate, category, weight=category_count)
194
+
195
+ plt.figure(figsize=(12, 8))
196
+ pos = nx.spring_layout(G)
197
+ nx.draw(G, pos, with_labels=True, node_color='lightblue', node_size=1500, font_size=8, font_weight='bold', arrows=True)
198
+ edge_labels = nx.get_edge_attributes(G, 'weight')
199
+ nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels)
200
+ plt.title("知識圖譜 of Kaohsiung Mayoral Election Sentiment")
201
+ plt.axis('off')
202
+ plt.savefig("knowledge_graph.png")
203
+ plt.close()
204
+
205
+ # 12. 載入並填充 index.html 模板
206
+ with open("index.html", "r", encoding="utf-8") as f:
207
+ html_template = f.read()
208
+
209
+ html_content = html_template.format(
210
+ report_date=datetime.now().strftime('%Y-%m-%d %H:%M %Z'),
211
+ img_b64_today=img_b64_today,
212
+ img_b64_trend=img_b64_trend,
213
+ engagement_table="""
214
+ <table class="min-w-full bg-white border border-gray-200">
215
+ <tr class="bg-gray-100 border-b">
216
+ <th class="py-2 px-4 border-r">總參與數</th>
217
+ <td class="py-2 px-4 border-r">3,511 (+12.5%)</td>
218
+ <th class="py-2 px-4 border-r">正面情緒比例</th>
219
+ <td class="py-2 px-4 border-r">73% (+3.2%)</td>
220
+ <th class="py-2 px-4 border-r">平均互動率</th>
221
+ <td class="py-2 px-4 border-r">3.9% (+0.8%)</td>
222
+ <th class="py-2 px-4 border-r">活躍平台</th>
223
+ <td class="py-2 px-4">6</td>
224
+ </tr>
225
+ </table>
226
+ """,
227
+ img_social_sentiment=base64.b64encode(open("social_sentiment_trend.png", "rb").read()).decode("utf-8"),
228
+ img_platform_performance=base64.b64encode(open("platform_performance.png", "rb").read()).decode("utf-8"),
229
+ img_candidate_volume=base64.b64encode(open("candidate_volume_trend.png", "rb").read()).decode("utf-8"),
230
+ img_candidate_sentiment=base64.b64encode(open("candidate_sentiment_analysis.png", "rb").read()).decode("utf-8"),
231
+ img_knowledge_graph=base64.b64encode(open("knowledge_graph.png", "rb").read()).decode("utf-8"),
232
+ news_summary=str(news_summary),
233
+ news_table=news_table
234
+ )
235
+
236
+ return html_content
237
+
238
  except Exception as e:
239
  err_msg = f"❌ 輿情分析執行失敗:\n{traceback.format_exc()}"
240
  print(err_msg)
241
  return f"<pre>{err_msg}</pre>"
242
+
243
  # -----------------------------
244
  # 自動排程設定 (每天 08:00 執行)
245
  # -----------------------------
 
250
  schedule.run_pending()
251
  except Exception as e:
252
  print(f"⚠️ 排程異常: {e}")
253
+ time.sleep(60) # 每分鐘檢查一次
254
+
255
  # -----------------------------
256
  # 啟動排程背景執行緒
257
  # -----------------------------
258
  threading.Thread(target=schedule_daily_run, daemon=True).start()
259
+
260
  # -----------------------------
261
  # Gradio 前端
262
  # -----------------------------
263
  iface = gr.Interface(
264
  fn=run_analysis,
265
  inputs=[],
266
+ outputs=gr.HTML(),
267
  live=False,
268
  title="高雄市長選戰輿情分析",
269
  description="每日自動抓取 X 貼文 + 新聞議題分析 + 歷史情緒趨勢\n支援自動重試與排程異常警告"
270
  )
271
+ iface.launch(server_name="0.0.0.0", server_port=7860)