shiue2000 commited on
Commit
f2a5a12
·
verified ·
1 Parent(s): 6d4b42f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +379 -88
app.py CHANGED
@@ -10,19 +10,21 @@ from datetime import datetime, timedelta
10
  import gradio as gr
11
  import logging
12
  from jinja2 import Template
13
- # ===== Font and Style Settings =====
14
- plt.rcParams['font.sans-serif'] = ['Microsoft JhengHei', 'Noto Sans TC', 'SimHei', 'Arial Unicode MS']
 
 
15
  plt.rcParams['axes.unicode_minus'] = False
16
  plt.style.use("seaborn-v0_8")
17
- # ===== Logging =====
18
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
19
- # ===== Parameters =====
20
  candidates = ["許智傑", "邱議瑩", "賴瑞隆", "林岱樺", "柯志恩"]
21
  days_back = 7
22
  max_tweets_per_candidate = 20
23
  news_file = "news_sample.csv"
24
  history_file = "history_sentiment.csv"
25
- # ===== Sentiment Analysis =====
26
  try:
27
  from transformers import pipeline
28
  sentiment_pipeline = pipeline(
@@ -37,25 +39,25 @@ except:
37
  "label": random.choice(["positive", "negative", "neutral"]),
38
  "score": random.uniform(0.3, 0.9)
39
  }
40
- # ===== Simulate Tweet Fetching =====
41
  def fetch_tweets(candidate):
42
  sample_texts = {
43
- "許智傑": ["許智傑 actively participates in local events", "許智傑 criticized for vague policies", "Support 許智傑 for Kaohsiung's future!"],
44
- "邱議瑩": ["邱議瑩 strongly announces mayoral candidacy", "邱議瑩 criticizes 林岱樺", "邱議瑩 promotes Hakka culture"],
45
- "賴瑞隆": ["賴瑞隆 pushes marine economy", "賴瑞隆 leads in polls", "賴瑞隆 questioned for lack of experience"],
46
- "林岱樺": ["林岱樺 actively engages grassroots", "林岱樺 involved in assistant fee controversy", "林岱樺 backed by New Tide faction"],
47
- "柯志恩": ["柯志恩 leads significantly in polls", "柯志恩 criticized for absence during disaster inspection", "柯志恩 promotes youth policies"]
48
  }
49
  return pd.DataFrame([
50
  {
51
  "Date": datetime.now() - timedelta(days=random.randint(0, days_back - 1)),
52
  "User": f"user{random.randint(1, 100)}",
53
- "Content": random.choice(sample_texts.get(candidate, [f"{candidate}'s post {i}"])),
54
  "Candidate": candidate
55
  }
56
  for i in range(random.randint(5, max_tweets_per_candidate))
57
  ])
58
- # ===== Utility: Matplotlib to Base64 =====
59
  def fig_to_base64():
60
  buf = io.BytesIO()
61
  plt.savefig(buf, format="png", dpi=120, bbox_inches="tight")
@@ -64,76 +66,365 @@ def fig_to_base64():
64
  buf.close()
65
  plt.close()
66
  return img_b64
67
- # ===== Chart Generator =====
68
  def generate_charts(all_df, summary, df_hist):
69
  results = {}
70
- # 1. Daily Sentiment Ratio
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  fig = plt.figure(figsize=(8, 5))
72
  summary[['Positive Ratio', 'Negative Ratio', 'Neutral Ratio']].plot(
73
  kind='bar', stacked=True, colormap='coolwarm', ax=fig.gca()
74
  )
75
- plt.title("Daily Sentiment Ratio by Candidate")
76
- plt.ylabel("Ratio")
77
- plt.xlabel("Candidate")
78
- plt.legend(["Positive", "Negative", "Neutral"])
79
  results["img_b64_today"] = fig_to_base64()
80
- # 2. Historical Sentiment Trend
81
  fig = plt.figure(figsize=(10, 5))
82
  for c in candidates:
83
  temp = df_hist[df_hist['Candidate'] == c]
84
  if not temp.empty:
85
- plt.plot(temp['Date'], temp['Positive Ratio'], marker='o', label=f"{c} Positive")
86
- plt.plot(temp['Date'], temp['Negative Ratio'], marker='x', label=f"{c} Negative")
87
- plt.plot(temp['Date'], temp['Neutral Ratio'], marker='s', label=f"{c} Neutral")
88
- plt.title("Historical Sentiment Trend by Candidate")
89
  plt.xticks(rotation=45)
90
- plt.ylabel("Ratio")
91
- plt.xlabel("Date")
92
  plt.legend()
93
  results["img_b64_trend"] = fig_to_base64()
94
- # 3. Social Sentiment Trend
95
  sentiment_trend = all_df.groupby([pd.Grouper(key='Date', freq='D'), 'Sentiment']).size().unstack(fill_value=0)
96
  sentiment_trend = sentiment_trend.div(sentiment_trend.sum(axis=1), axis=0).fillna(0)
97
  fig = plt.figure(figsize=(8, 5))
98
  for s in ['positive', 'negative', 'neutral']:
99
  if s in sentiment_trend.columns:
100
- plt.plot(sentiment_trend.index, sentiment_trend[s], marker='o', label=s.capitalize())
101
- plt.title("Social Sentiment Trend")
102
- plt.xlabel("Date")
103
- plt.ylabel("Ratio")
104
  plt.legend()
105
  results["img_social_sentiment"] = fig_to_base64()
106
- # 4. Platform Performance
107
  platforms = ["X", "Facebook", "Instagram", "PTT", "Line"]
108
  platform_counts = pd.Series({p: random.randint(10, 100) for p in platforms})
109
  fig = plt.figure(figsize=(8, 5))
110
  plt.bar(platforms, platform_counts, color='skyblue')
111
- plt.title("Post Volume by Platform")
112
- plt.xlabel("Platform")
113
- plt.ylabel("Number of Posts")
114
  results["img_platform_performance"] = fig_to_base64()
115
- # 5. Candidate Volume Trend
116
  candidate_trend = all_df.groupby([pd.Grouper(key='Date', freq='D'), 'Candidate']).size().unstack(fill_value=0)
117
  fig = plt.figure(figsize=(8, 5))
118
  for c in candidates:
119
  if c in candidate_trend.columns:
120
  plt.plot(candidate_trend.index, candidate_trend[c], marker='o', label=c)
121
- plt.title("Candidate Post Volume Trend")
122
- plt.xlabel("Date")
123
- plt.ylabel("Number of Posts")
124
  plt.legend()
125
  results["img_candidate_volume"] = fig_to_base64()
126
- # 6. Candidate Sentiment Analysis
127
  fig = plt.figure(figsize=(8, 5))
128
  summary[['Positive Ratio', 'Negative Ratio', 'Neutral Ratio']].plot(
129
  kind='bar', stacked=True, colormap='coolwarm', ax=fig.gca()
130
  )
131
- plt.title("Candidate Sentiment Analysis (Positive/Negative/Neutral)")
132
- plt.ylabel("Ratio")
133
- plt.xlabel("Candidate")
134
- plt.legend(["Positive", "Negative", "Neutral"])
135
  results["img_candidate_sentiment"] = fig_to_base64()
136
- # 7. Knowledge Graph
137
  fig, ax = plt.subplots(figsize=(8, 6))
138
  G = nx.Graph()
139
  for c in candidates:
@@ -141,10 +432,10 @@ def generate_charts(all_df, summary, df_hist):
141
  for i in range(len(candidates) - 1):
142
  G.add_edge(candidates[i], candidates[i + 1])
143
  nx.draw(G, nx.spring_layout(G), with_labels=True, node_color='lightgreen', font_size=12, ax=ax)
144
- plt.title("Candidate Knowledge Graph")
145
  results["img_knowledge_graph"] = fig_to_base64()
146
  return results
147
- # ===== Main Analysis Function =====
148
  def run_analysis():
149
  try:
150
  # Embed the template as a string to avoid file dependency and ensure syntax is correct
@@ -154,62 +445,62 @@ def run_analysis():
154
  <head>
155
  <meta charset="UTF-8">
156
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
157
- <title>2026 Kaohsiung Mayoral Election Sentiment Analysis Report</title>
158
  <script src="https://cdn.tailwindcss.com"></script>
159
  </head>
160
  <body class="bg-gray-100 font-sans leading-normal tracking-normal">
161
  <div class="container mx-auto p-4">
162
- <h1 class="text-3xl font-bold mb-4">2026 Kaohsiung Mayoral Election Sentiment Analysis Report</h1>
163
- <p class="mb-4">Report Date: {{ report_date }}</p>
164
 
165
- <h2 class="text-2xl font-bold mb-2">Engagement Summary</h2>
166
  {{ engagement_table | safe }}
167
 
168
- <h2 class="text-2xl font-bold mb-2">News Summary</h2>
169
  <ul class="list-disc pl-5 mb-4">
170
  {% for key, value in news_summary %}
171
  <li><strong>{{ key }}</strong>: {{ value }}</li>
172
  {% endfor %}
173
  </ul>
174
 
175
- <h2 class="text-2xl font-bold mb-2">News Details</h2>
176
  {{ news_table | safe }}
177
 
178
- <h2 class="text-2xl font-bold mb-2">Daily Sentiment Ratio</h2>
179
- <img src="data:image/png;base64,{{ img_b64_today }}" alt="Daily Sentiment Ratio" class="mb-4">
180
 
181
- <h2 class="text-2xl font-bold mb-2">Historical Sentiment Trend</h2>
182
- <img src="data:image/png;base64,{{ img_b64_trend }}" alt="Historical Sentiment Trend" class="mb-4">
183
 
184
- <h2 class="text-2xl font-bold mb-2">Social Sentiment Trend</h2>
185
- <img src="data:image/png;base64,{{ img_social_sentiment }}" alt="Social Sentiment Trend" class="mb-4">
186
 
187
- <h2 class="text-2xl font-bold mb-2">Platform Performance</h2>
188
- <img src="data:image/png;base64,{{ img_platform_performance }}" alt="Platform Performance" class="mb-4">
189
 
190
- <h2 class="text-2xl font-bold mb-2">Candidate Volume Trend</h2>
191
- <img src="data:image/png;base64,{{ img_candidate_volume }}" alt="Candidate Volume Trend" class="mb-4">
192
 
193
- <h2 class="text-2xl font-bold mb-2">Candidate Sentiment Analysis</h2>
194
- <img src="data:image/png;base64,{{ img_candidate_sentiment }}" alt="Candidate Sentiment Analysis" class="mb-4">
195
 
196
- <h2 class="text-2xl font-bold mb-2">Knowledge Graph</h2>
197
- <img src="data:image/png;base64,{{ img_knowledge_graph }}" alt="Knowledge Graph" class="mb-4">
198
  </div>
199
  </body>
200
  </html>
201
  """
202
- # --- Tweet & Sentiment Analysis ---
203
  all_df = pd.concat([fetch_tweets(c) for c in candidates], ignore_index=True)
204
  all_df['Sentiment'] = all_df['Content'].apply(lambda x: sentiment(x)['label'])
205
  all_df['Confidence'] = all_df['Content'].apply(lambda x: sentiment(x)['score'])
206
- # --- Statistics ---
207
  summary = all_df.groupby(['Candidate', 'Sentiment']).size().unstack(fill_value=0)
208
  summary['Total Posts'] = summary.sum(axis=1)
209
  summary['Positive Ratio'] = summary.get('positive', 0) / summary['Total Posts'].replace(0, 1)
210
  summary['Negative Ratio'] = summary.get('negative', 0) / summary['Total Posts'].replace(0, 1)
211
  summary['Neutral Ratio'] = summary.get('neutral', 0) / summary['Total Posts'].replace(0, 1)
212
- # --- Historical Data ---
213
  today_str = datetime.now().strftime('%Y-%m-%d')
214
  hist_row = summary[['Positive Ratio', 'Negative Ratio', 'Neutral Ratio']].copy()
215
  hist_row['Date'] = today_str
@@ -219,56 +510,56 @@ def run_analysis():
219
  ignore_index=True
220
  ) if os.path.exists(history_file) else hist_row
221
  df_hist.to_csv(history_file, index=False)
222
- # --- Charts ---
223
  charts = generate_charts(all_df, summary, df_hist)
224
- # --- News ---
225
  if os.path.exists(news_file):
226
  df_news = pd.read_csv(news_file)
227
  news_summary = df_news.groupby('Category').size().to_dict()
228
  news_table = df_news.to_html(index=False, classes="min-w-full border border-gray-200")
229
  else:
230
  news_summary = {
231
- "Polls": "柯志恩 leads green camp candidates in multiple polls.",
232
- "Party Competition": "DPP primary competition is intense.",
233
- "Controversy": "林岱樺 involved in assistant fee controversy."
234
  }
235
- news_table = "<p>No news data available</p>"
236
  # Convert news_summary to list of tuples to support iteration in template
237
  news_summary = list(news_summary.items())
238
- # --- Engagement Table ---
239
  engagement_table = f"""
240
  <table class="min-w-full bg-white border border-gray-200">
241
  <tr class="bg-gray-100 border-b">
242
- <th class="py-2 px-4 border-r">Total Engagement</th>
243
  <td class="py-2 px-4 border-r">{len(all_df)}</td>
244
- <th class="py-2 px-4 border-r">Positive Sentiment Ratio</th>
245
  <td class="py-2 px-4 border-r">{all_df['Sentiment'].value_counts(normalize=True).get('positive', 0):.1%}</td>
246
- <th class="py-2 px-4 border-r">Average Interaction Rate</th>
247
  <td class="py-2 px-4 border-r">3.9%</td>
248
- <th class="py-2 px-4 border-r">Active Platforms</th>
249
  <td class="py-2 px-4">{5}</td>
250
  </tr>
251
  </table>
252
  """
253
- # --- HTML Rendering ---
254
  template = Template(html_template)
255
  html_content = template.render(
256
  report_date=datetime.now().strftime('%Y-%m-%d %H:%M'),
257
- engagement_table=engagement_table if engagement_table else "<p>No engagement data provided</p>",
258
- news_summary=news_summary if news_summary else "<p>No news summary provided</p>",
259
- news_table=news_table if news_table else "<p>No news data provided</p>",
260
  **charts
261
  )
262
 
263
  return html_content
264
  except Exception:
265
- return f"<pre>❌ Analysis Failed:\n{traceback.format_exc()}</pre>"
266
- # ===== Gradio Frontend =====
267
  if __name__ == "__main__":
268
  iface = gr.Interface(
269
  fn=run_analysis,
270
  inputs=[],
271
  outputs=gr.HTML(),
272
- title="2026 Kaohsiung Mayoral Election Sentiment Analysis"
273
  )
274
  iface.launch(server_name="0.0.0.0", server_port=7860)
 
10
  import gradio as gr
11
  import logging
12
  from jinja2 import Template
13
+ from matplotlib import font_manager
14
+ # ===== 字型與樣式 =====
15
+ font_manager.fontManager.addfont('SimHei.ttf')
16
+ plt.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft JhengHei', 'Noto Sans TC', 'Arial Unicode MS']
17
  plt.rcParams['axes.unicode_minus'] = False
18
  plt.style.use("seaborn-v0_8")
19
+ # ===== 日誌 =====
20
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
21
+ # ===== 參數 =====
22
  candidates = ["許智傑", "邱議瑩", "賴瑞隆", "林岱樺", "柯志恩"]
23
  days_back = 7
24
  max_tweets_per_candidate = 20
25
  news_file = "news_sample.csv"
26
  history_file = "history_sentiment.csv"
27
+ # ===== 情緒分析 =====
28
  try:
29
  from transformers import pipeline
30
  sentiment_pipeline = pipeline(
 
39
  "label": random.choice(["positive", "negative", "neutral"]),
40
  "score": random.uniform(0.3, 0.9)
41
  }
42
+ # ===== 模擬貼文抓取 =====
43
  def fetch_tweets(candidate):
44
  sample_texts = {
45
+ "許智傑": ["許智傑積極參與地方活動", "許智傑被指政策空洞", "支持許智傑,打造高雄新未來!"],
46
+ "邱議瑩": ["邱議瑩強勢表態選市長", "邱議瑩批林岱樺", "邱議瑩推客家文化"],
47
+ "賴瑞隆": ["賴瑞隆推海洋經濟", "賴瑞隆民調領先", "賴瑞隆被質疑經驗不足"],
48
+ "林岱樺": ["林岱樺積極跑基層", "林岱樺涉助理費爭議", "林岱樺獲正國會支持"],
49
+ "柯志恩": ["柯志恩民調大幅領先", "柯志恩被批勘災缺席", "柯志恩推青年政策"]
50
  }
51
  return pd.DataFrame([
52
  {
53
  "Date": datetime.now() - timedelta(days=random.randint(0, days_back - 1)),
54
  "User": f"user{random.randint(1, 100)}",
55
+ "Content": random.choice(sample_texts.get(candidate, [f"{candidate} 的貼文 {i}"])),
56
  "Candidate": candidate
57
  }
58
  for i in range(random.randint(5, max_tweets_per_candidate))
59
  ])
60
+ # ===== 工具: Matplotlib base64 =====
61
  def fig_to_base64():
62
  buf = io.BytesIO()
63
  plt.savefig(buf, format="png", dpi=120, bbox_inches="tight")
 
66
  buf.close()
67
  plt.close()
68
  return img_b64
69
+ # ===== 多圖產生器 =====
70
  def generate_charts(all_df, summary, df_hist):
71
  results = {}
72
+ # 1. 每日情緒比例
73
+ fig = plt.figure(figsize=(8, 5))
74
+ summary_plot = summary[['Positive Ratio', 'Negative Ratio', 'Neutral Ratio']].rename(columns={
75
+ 'Positive Ratio': '正面比例',
76
+ 'Negative Ratio': '負面比例',
77
+ 'Neutral Ratio': '中性比例'
78
+ })
79
+ summary_plot.plot(
80
+ kind='bar', stacked=True, colormap='coolwarm', ax=fig.gca()
81
+ )
82
+ plt.title("候選人每日社群情緒比例")
83
+ plt.ylabel("比例")
84
+ plt.xlabel("候選人")
85
+ plt.legend(["正面比例", "負面比例", "中性比例"])
86
+ results["img_b64_today"] = fig_to_base64()
87
+ # 2. 歷史情緒趨勢
88
+ fig = plt.figure(figsize=(10, 5))
89
+ for c in candidates:
90
+ temp = df_hist[df_hist['Candidate'] == c]
91
+ if not temp.empty:
92
+ plt.plot(temp['Date'], temp['Positive Ratio'], marker='o', label=f"{c} 正面")
93
+ plt.plot(temp['Date'], temp['Negative Ratio'], marker='x', label=f"{c} 負面")
94
+ plt.plot(temp['Date'], temp['Neutral Ratio'], marker='s', label=f"{c} 中性")
95
+ plt.title("候選人歷史情緒趨勢")
96
+ plt.xticks(rotation=45)
97
+ plt.ylabel("比例")
98
+ plt.xlabel("日期")
99
+ plt.legend()
100
+ results["img_b64_trend"] = fig_to_base64()
101
+ # 3. 社群情緒趨勢
102
+ sentiment_trend = all_df.groupby([pd.Grouper(key='Date', freq='D'), 'Sentiment']).size().unstack(fill_value=0)
103
+ sentiment_trend = sentiment_trend.div(sentiment_trend.sum(axis=1), axis=0).fillna(0)
104
+ sentiment_label_map = {'positive': '正面', 'negative': '負面', 'neutral': '中性'}
105
+ fig = plt.figure(figsize=(8, 5))
106
+ for s in ['positive', 'negative', 'neutral']:
107
+ if s in sentiment_trend.columns:
108
+ plt.plot(sentiment_trend.index, sentiment_trend[s], marker='o', label=sentiment_label_map.get(s, s.capitalize()))
109
+ plt.title("社群情緒趨勢")
110
+ plt.xlabel("日期")
111
+ plt.ylabel("比例")
112
+ plt.legend()
113
+ results["img_social_sentiment"] = fig_to_base64()
114
+ # 4. 平台表現
115
+ platforms = ["X", "Facebook", "Instagram", "PTT", "Line"]
116
+ platform_counts = pd.Series({p: random.randint(10, 100) for p in platforms})
117
+ fig = plt.figure(figsize=(8, 5))
118
+ plt.bar(platforms, platform_counts, color='skyblue')
119
+ plt.title("平台貼文聲量")
120
+ plt.xlabel("平台")
121
+ plt.ylabel("貼文數量")
122
+ results["img_platform_performance"] = fig_to_base64()
123
+ # 5. 候選人聲量趨勢
124
+ candidate_trend = all_df.groupby([pd.Grouper(key='Date', freq='D'), 'Candidate']).size().unstack(fill_value=0)
125
+ fig = plt.figure(figsize=(8, 5))
126
+ for c in candidates:
127
+ if c in candidate_trend.columns:
128
+ plt.plot(candidate_trend.index, candidate_trend[c], marker='o', label=c)
129
+ plt.title("候選人貼文聲量趨勢")
130
+ plt.xlabel("日期")
131
+ plt.ylabel("貼文數量")
132
+ plt.legend()
133
+ results["img_candidate_volume"] = fig_to_base64()
134
+ # 6. 候選人情緒分析
135
+ fig = plt.figure(figsize=(8, 5))
136
+ summary_plot.plot(
137
+ kind='bar', stacked=True, colormap='coolwarm', ax=fig.gca()
138
+ )
139
+ plt.title("候選人貼文情緒分析(正/負/中性)")
140
+ plt.ylabel("比例")
141
+ plt.xlabel("候選人")
142
+ plt.legend(["正面比例", "負面比例", "中性比例"])
143
+ results["img_candidate_sentiment"] = fig_to_base64()
144
+ # 7. 知識圖譜
145
+ fig, ax = plt.subplots(figsize=(8, 6))
146
+ G = nx.Graph()
147
+ for c in candidates:
148
+ G.add_node(c)
149
+ for i in range(len(candidates) - 1):
150
+ G.add_edge(candidates[i], candidates[i + 1])
151
+ nx.draw(G, nx.spring_layout(G), with_labels=True, node_color='lightgreen', font_size=12, ax=ax)
152
+ plt.title("知識圖譜")
153
+ results["img_knowledge_graph"] = fig_to_base64()
154
+ return results
155
+ # ===== 主分析函數 =====
156
+ def run_analysis():
157
+ try:
158
+ # Embed the template as a string to avoid file dependency and ensure syntax is correct
159
+ html_template = """
160
+ <!DOCTYPE html>
161
+ <html lang="zh-TW">
162
+ <head>
163
+ <meta charset="UTF-8">
164
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
165
+ <title>2026 高雄市長選舉輿情分析報告</title>
166
+ <script src="https://cdn.tailwindcss.com"></script>
167
+ </head>
168
+ <body class="bg-gray-100 font-sans leading-normal tracking-normal">
169
+ <div class="container mx-auto p-4">
170
+ <h1 class="text-3xl font-bold mb-4">2026 高雄市長選舉輿情分析報告</h1>
171
+ <p class="mb-4">報告日期: {{ report_date }}</p>
172
+
173
+ <h2 class="text-2xl font-bold mb-2">參與度摘要</h2>
174
+ {{ engagement_table | safe }}
175
+
176
+ <h2 class="text-2xl font-bold mb-2">新聞摘要</h2>
177
+ <ul class="list-disc pl-5 mb-4">
178
+ {% for key, value in news_summary %}
179
+ <li><strong>{{ key }}</strong>: {{ value }}</li>
180
+ {% endfor %}
181
+ </ul>
182
+
183
+ <h2 class="text-2xl font-bold mb-2">新聞詳情</h2>
184
+ {{ news_table | safe }}
185
+
186
+ <h2 class="text-2xl font-bold mb-2">今日情緒比例</h2>
187
+ <img src="data:image/png;base64,{{ img_b64_today }}" alt="今日情緒比例" class="mb-4">
188
+
189
+ <h2 class="text-2xl font-bold mb-2">歷史情緒趨勢</h2>
190
+ <img src="data:image/png;base64,{{ img_b64_trend }}" alt="歷史情緒趨勢" class="mb-4">
191
+
192
+ <h2 class="text-2xl font-bold mb-2">社群情緒趨勢</h2>
193
+ <img src="data:image/png;base64,{{ img_social_sentiment }}" alt="社群情緒趨勢" class="mb-4">
194
+
195
+ <h2 class="text-2xl font-bold mb-2">平台表現</h2>
196
+ <img src="data:image/png;base64,{{ img_platform_performance }}" alt="平台表現" class="mb-4">
197
+
198
+ <h2 class="text-2xl font-bold mb-2">候選人聲量趨勢</h2>
199
+ <img src="data:image/png;base64,{{ img_candidate_volume }}" alt="候選人聲量趨勢" class="mb-4">
200
+
201
+ <h2 class="text-2xl font-bold mb-2">候選人情緒分析</h2>
202
+ <img src="data:image/png;base64,{{ img_candidate_sentiment }}" alt="候選人情緒分析" class="mb-4">
203
+
204
+ <h2 class="text-2xl font-bold mb-2">知識圖譜</h2>
205
+ <img src="data:image/png;base64,{{ img_knowledge_graph }}" alt="知識圖譜" class="mb-4">
206
+ </div>
207
+ </body>
208
+ </html>
209
+ """
210
+ # --- 貼文 & 情緒分析 ---
211
+ all_df = pd.concat([fetch_tweets(c) for c in candidates], ignore_index=True)
212
+ all_df['Sentiment'] = all_df['Content'].apply(lambda x: sentiment(x)['label'])
213
+ all_df['Confidence'] = all_df['Content'].apply(lambda x: sentiment(x)['score'])
214
+ # --- 統計 ---
215
+ summary = all_df.groupby(['Candidate', 'Sentiment']).size().unstack(fill_value=0)
216
+ summary['Total Posts'] = summary.sum(axis=1)
217
+ summary['Positive Ratio'] = summary.get('positive', 0) / summary['Total Posts'].replace(0, 1)
218
+ summary['Negative Ratio'] = summary.get('negative', 0) / summary['Total Posts'].replace(0, 1)
219
+ summary['Neutral Ratio'] = summary.get('neutral', 0) / summary['Total Posts'].replace(0, 1)
220
+ # --- 歷史資料 ---
221
+ today_str = datetime.now().strftime('%Y-%m-%d')
222
+ hist_row = summary[['Positive Ratio', 'Negative Ratio', 'Neutral Ratio']].copy()
223
+ hist_row['Date'] = today_str
224
+ hist_row['Candidate'] = summary.index
225
+ df_hist = pd.concat(
226
+ [pd.read_csv(history_file), hist_row],
227
+ ignore_index=True
228
+ ) if os.path.exists(history_file) else hist_row
229
+ df_hist.to_csv(history_file, index=False)
230
+ # --- 圖表 ---
231
+ charts = generate_charts(all_df, summary, df_hist)
232
+ # --- 新聞 ---
233
+ if os.path.exists(news_file):
234
+ df_news = pd.read_csv(news_file)
235
+ news_summary = df_news.groupby('Category').size().to_dict()
236
+ news_table = df_news.to_html(index=False, classes="min-w-full border border-gray-200")
237
+ else:
238
+ news_summary = {
239
+ "民調": "柯志恩在多份民調中領先綠營候選人。",
240
+ "黨內競爭": "民進黨初選競爭激烈。",
241
+ "爭議": "林岱樺涉助理費爭議。"
242
+ }
243
+ news_table = "<p>無新聞資料</p>"
244
+ # Convert news_summary to list of tuples to support iteration in template
245
+ news_summary = list(news_summary.items())
246
+ # --- 參與表 ---
247
+ engagement_table = f"""
248
+ <table class="min-w-full bg-white border border-gray-200">
249
+ <tr class="bg-gray-100 border-b">
250
+ <th class="py-2 px-4 border-r">總參與度</th>
251
+ <td class="py-2 px-4 border-r">{len(all_df)}</td>
252
+ <th class="py-2 px-4 border-r">正面情緒比例</th>
253
+ <td class="py-2 px-4 border-r">{all_df['Sentiment'].value_counts(normalize=True).get('positive', 0):.1%}</td>
254
+ <th class="py-2 px-4 border-r">平均互動率</th>
255
+ <td class="py-2 px-4 border-r">3.9%</td>
256
+ <th class="py-2 px-4 border-r">活躍平台數</th>
257
+ <td class="py-2 px-4">{5}</td>
258
+ </tr>
259
+ </table>
260
+ """
261
+ # --- HTML 渲染 ---
262
+ template = Template(html_template)
263
+ html_content = template.render(
264
+ report_date=datetime.now().strftime('%Y-%m-%d %H:%M'),
265
+ engagement_table=engagement_table if engagement_table else "<p>未提供互動數據</p>",
266
+ news_summary=news_summary if news_summary else "<p>未提供新聞摘要</p>",
267
+ news_table=news_table if news_table else "<p>未提供新聞資料</p>",
268
+ **charts
269
+ )
270
+
271
+ return html_content
272
+ except Exception:
273
+ return f"<pre>❌ 分析失敗:\n{traceback.format_exc()}</pre>"
274
+ # ===== Gradio 前端 =====
275
+ if __name__ == "__main__":
276
+ iface = gr.Interface(
277
+ fn=run_analysis,
278
+ inputs=[],
279
+ outputs=gr.HTML(),
280
+ title="2026 高雄市長選舉輿情分析"
281
+ )
282
+ iface.launch(server_name="0.0.0.0", server_port=7860)
283
+ ``````python
284
+ import pandas as pd
285
+ import matplotlib.pyplot as plt
286
+ import io
287
+ import base64
288
+ import os
289
+ import traceback
290
+ import random
291
+ import networkx as nx
292
+ from datetime import datetime, timedelta
293
+ import gradio as gr
294
+ import logging
295
+ from jinja2 import Template
296
+ from matplotlib import font_manager
297
+ # ===== 字型與樣式 =====
298
+ # Load local SimHei font if available
299
+ simhei_path = 'SimHei.ttf' # Assuming it's .ttf; change to .tiff if needed (though .ttf is standard)
300
+ if os.path.exists(simhei_path):
301
+ font_prop = font_manager.FontProperties(fname=simhei_path)
302
+ plt.rcParams['font.family'] = 'sans-serif'
303
+ plt.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft JhengHei', 'Noto Sans TC', 'Arial Unicode MS']
304
+ else:
305
+ plt.rcParams['font.sans-serif'] = ['Microsoft JhengHei', 'Noto Sans TC', 'SimHei', 'Arial Unicode MS']
306
+ plt.rcParams['axes.unicode_minus'] = False
307
+ plt.style.use("seaborn-v0_8")
308
+ # ===== 日誌 =====
309
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
310
+ # ===== 參數 =====
311
+ candidates = ["許智傑", "邱議瑩", "賴瑞隆", "林岱樺", "柯志恩"]
312
+ days_back = 7
313
+ max_tweets_per_candidate = 20
314
+ news_file = "news_sample.csv"
315
+ history_file = "history_sentiment.csv"
316
+ # ===== 情緒分析 =====
317
+ try:
318
+ from transformers import pipeline
319
+ sentiment_pipeline = pipeline(
320
+ "sentiment-analysis",
321
+ model="lxyuan/distilbert-base-multilingual-cased-sentiments-student"
322
+ )
323
+ def sentiment(text):
324
+ return sentiment_pipeline(text)[0]
325
+ except:
326
+ def sentiment(text):
327
+ return {
328
+ "label": random.choice(["positive", "negative", "neutral"]),
329
+ "score": random.uniform(0.3, 0.9)
330
+ }
331
+ # ===== 模擬貼文抓取 =====
332
+ def fetch_tweets(candidate):
333
+ sample_texts = {
334
+ "許智傑": ["許智傑積極參與地方活動", "許智傑被指政策空洞", "支持許智傑,打造高雄新未來!"],
335
+ "邱議瑩": ["邱議瑩強勢表態選市長", "邱議瑩批林岱樺", "邱議瑩推客家文化"],
336
+ "賴瑞隆": ["賴瑞隆推海洋經濟", "賴瑞隆民調領先", "賴瑞隆被質疑經驗不足"],
337
+ "林岱樺": ["林岱樺積極跑基層", "林岱樺涉助理費爭議", "林岱樺獲正國會支持"],
338
+ "柯志恩": ["柯志恩民調大幅領先", "柯志恩被批勘災缺席", "柯志恩推青年政策"]
339
+ }
340
+ return pd.DataFrame([
341
+ {
342
+ "Date": datetime.now() - timedelta(days=random.randint(0, days_back - 1)),
343
+ "User": f"user{random.randint(1, 100)}",
344
+ "Content": random.choice(sample_texts.get(candidate, [f"{candidate} 的貼文 {i}"])),
345
+ "Candidate": candidate
346
+ }
347
+ for i in range(random.randint(5, max_tweets_per_candidate))
348
+ ])
349
+ # ===== 工具: Matplotlib → base64 =====
350
+ def fig_to_base64():
351
+ buf = io.BytesIO()
352
+ plt.savefig(buf, format="png", dpi=120, bbox_inches="tight")
353
+ buf.seek(0)
354
+ img_b64 = base64.b64encode(buf.read()).decode("utf-8")
355
+ buf.close()
356
+ plt.close()
357
+ return img_b64
358
+ # ===== 多圖產生器 =====
359
+ def generate_charts(all_df, summary, df_hist):
360
+ results = {}
361
+ # 1. 每日情緒比例
362
  fig = plt.figure(figsize=(8, 5))
363
  summary[['Positive Ratio', 'Negative Ratio', 'Neutral Ratio']].plot(
364
  kind='bar', stacked=True, colormap='coolwarm', ax=fig.gca()
365
  )
366
+ plt.title("候選人每日社群情緒比例")
367
+ plt.ylabel("比例")
368
+ plt.xlabel("候選人")
369
+ plt.legend(["正面", "負面", "中性"])
370
  results["img_b64_today"] = fig_to_base64()
371
+ # 2. 歷史情緒趨勢
372
  fig = plt.figure(figsize=(10, 5))
373
  for c in candidates:
374
  temp = df_hist[df_hist['Candidate'] == c]
375
  if not temp.empty:
376
+ plt.plot(temp['Date'], temp['Positive Ratio'], marker='o', label=f"{c} 正面")
377
+ plt.plot(temp['Date'], temp['Negative Ratio'], marker='x', label=f"{c} 負面")
378
+ plt.plot(temp['Date'], temp['Neutral Ratio'], marker='s', label=f"{c} 中性")
379
+ plt.title("候選人歷史情緒趨勢")
380
  plt.xticks(rotation=45)
381
+ plt.ylabel("比例")
382
+ plt.xlabel("日期")
383
  plt.legend()
384
  results["img_b64_trend"] = fig_to_base64()
385
+ # 3. 社群情緒趨勢
386
  sentiment_trend = all_df.groupby([pd.Grouper(key='Date', freq='D'), 'Sentiment']).size().unstack(fill_value=0)
387
  sentiment_trend = sentiment_trend.div(sentiment_trend.sum(axis=1), axis=0).fillna(0)
388
  fig = plt.figure(figsize=(8, 5))
389
  for s in ['positive', 'negative', 'neutral']:
390
  if s in sentiment_trend.columns:
391
+ plt.plot(sentiment_trend.index, sentiment_trend[s], marker='o', label={'positive':'正面', 'negative':'負面', 'neutral':'中性'}[s])
392
+ plt.title("社群情緒趨勢")
393
+ plt.xlabel("日期")
394
+ plt.ylabel("比例")
395
  plt.legend()
396
  results["img_social_sentiment"] = fig_to_base64()
397
+ # 4. 平台表現
398
  platforms = ["X", "Facebook", "Instagram", "PTT", "Line"]
399
  platform_counts = pd.Series({p: random.randint(10, 100) for p in platforms})
400
  fig = plt.figure(figsize=(8, 5))
401
  plt.bar(platforms, platform_counts, color='skyblue')
402
+ plt.title("平台貼文聲量")
403
+ plt.xlabel("平台")
404
+ plt.ylabel("貼文數量")
405
  results["img_platform_performance"] = fig_to_base64()
406
+ # 5. 候選人聲量趨勢
407
  candidate_trend = all_df.groupby([pd.Grouper(key='Date', freq='D'), 'Candidate']).size().unstack(fill_value=0)
408
  fig = plt.figure(figsize=(8, 5))
409
  for c in candidates:
410
  if c in candidate_trend.columns:
411
  plt.plot(candidate_trend.index, candidate_trend[c], marker='o', label=c)
412
+ plt.title("候選人貼文聲量趨勢")
413
+ plt.xlabel("日期")
414
+ plt.ylabel("貼文數量")
415
  plt.legend()
416
  results["img_candidate_volume"] = fig_to_base64()
417
+ # 6. 候選人情緒分析
418
  fig = plt.figure(figsize=(8, 5))
419
  summary[['Positive Ratio', 'Negative Ratio', 'Neutral Ratio']].plot(
420
  kind='bar', stacked=True, colormap='coolwarm', ax=fig.gca()
421
  )
422
+ plt.title("候選人貼文情緒分析(正/負/中性)")
423
+ plt.ylabel("比例")
424
+ plt.xlabel("候選人")
425
+ plt.legend(["正面", "負面", "中性"])
426
  results["img_candidate_sentiment"] = fig_to_base64()
427
+ # 7. 知識圖譜
428
  fig, ax = plt.subplots(figsize=(8, 6))
429
  G = nx.Graph()
430
  for c in candidates:
 
432
  for i in range(len(candidates) - 1):
433
  G.add_edge(candidates[i], candidates[i + 1])
434
  nx.draw(G, nx.spring_layout(G), with_labels=True, node_color='lightgreen', font_size=12, ax=ax)
435
+ plt.title("候選人知識圖譜")
436
  results["img_knowledge_graph"] = fig_to_base64()
437
  return results
438
+ # ===== 主分析函數 =====
439
  def run_analysis():
440
  try:
441
  # Embed the template as a string to avoid file dependency and ensure syntax is correct
 
445
  <head>
446
  <meta charset="UTF-8">
447
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
448
+ <title>2026 高雄市長選舉輿情分析報告</title>
449
  <script src="https://cdn.tailwindcss.com"></script>
450
  </head>
451
  <body class="bg-gray-100 font-sans leading-normal tracking-normal">
452
  <div class="container mx-auto p-4">
453
+ <h1 class="text-3xl font-bold mb-4">2026 高雄市長選舉輿情分析報告</h1>
454
+ <p class="mb-4">報告日期: {{ report_date }}</p>
455
 
456
+ <h2 class="text-2xl font-bold mb-2">參與度摘要</h2>
457
  {{ engagement_table | safe }}
458
 
459
+ <h2 class="text-2xl font-bold mb-2">新聞摘要</h2>
460
  <ul class="list-disc pl-5 mb-4">
461
  {% for key, value in news_summary %}
462
  <li><strong>{{ key }}</strong>: {{ value }}</li>
463
  {% endfor %}
464
  </ul>
465
 
466
+ <h2 class="text-2xl font-bold mb-2">新聞詳情</h2>
467
  {{ news_table | safe }}
468
 
469
+ <h2 class="text-2xl font-bold mb-2">今日情緒比例</h2>
470
+ <img src="data:image/png;base64,{{ img_b64_today }}" alt="今日情緒比例" class="mb-4">
471
 
472
+ <h2 class="text-2xl font-bold mb-2">歷史情緒趨勢</h2>
473
+ <img src="data:image/png;base64,{{ img_b64_trend }}" alt="歷史情緒趨勢" class="mb-4">
474
 
475
+ <h2 class="text-2xl font-bold mb-2">社群情緒趨勢</h2>
476
+ <img src="data:image/png;base64,{{ img_social_sentiment }}" alt="社群情緒趨勢" class="mb-4">
477
 
478
+ <h2 class="text-2xl font-bold mb-2">平台表現</h2>
479
+ <img src="data:image/png;base64,{{ img_platform_performance }}" alt="平台表現" class="mb-4">
480
 
481
+ <h2 class="text-2xl font-bold mb-2">候選人聲量趨勢</h2>
482
+ <img src="data:image/png;base64,{{ img_candidate_volume }}" alt="候選人聲量趨勢" class="mb-4">
483
 
484
+ <h2 class="text-2xl font-bold mb-2">候選人情緒分析</h2>
485
+ <img src="data:image/png;base64,{{ img_candidate_sentiment }}" alt="候選人情緒分析" class="mb-4">
486
 
487
+ <h2 class="text-2xl font-bold mb-2">知識圖譜</h2>
488
+ <img src="data:image/png;base64,{{ img_knowledge_graph }}" alt="知識圖譜" class="mb-4">
489
  </div>
490
  </body>
491
  </html>
492
  """
493
+ # --- 貼文 & 情緒分析 ---
494
  all_df = pd.concat([fetch_tweets(c) for c in candidates], ignore_index=True)
495
  all_df['Sentiment'] = all_df['Content'].apply(lambda x: sentiment(x)['label'])
496
  all_df['Confidence'] = all_df['Content'].apply(lambda x: sentiment(x)['score'])
497
+ # --- 統計 ---
498
  summary = all_df.groupby(['Candidate', 'Sentiment']).size().unstack(fill_value=0)
499
  summary['Total Posts'] = summary.sum(axis=1)
500
  summary['Positive Ratio'] = summary.get('positive', 0) / summary['Total Posts'].replace(0, 1)
501
  summary['Negative Ratio'] = summary.get('negative', 0) / summary['Total Posts'].replace(0, 1)
502
  summary['Neutral Ratio'] = summary.get('neutral', 0) / summary['Total Posts'].replace(0, 1)
503
+ # --- 歷史資料 ---
504
  today_str = datetime.now().strftime('%Y-%m-%d')
505
  hist_row = summary[['Positive Ratio', 'Negative Ratio', 'Neutral Ratio']].copy()
506
  hist_row['Date'] = today_str
 
510
  ignore_index=True
511
  ) if os.path.exists(history_file) else hist_row
512
  df_hist.to_csv(history_file, index=False)
513
+ # --- 圖表 ---
514
  charts = generate_charts(all_df, summary, df_hist)
515
+ # --- 新聞 ---
516
  if os.path.exists(news_file):
517
  df_news = pd.read_csv(news_file)
518
  news_summary = df_news.groupby('Category').size().to_dict()
519
  news_table = df_news.to_html(index=False, classes="min-w-full border border-gray-200")
520
  else:
521
  news_summary = {
522
+ "民調": "柯志恩在多份民調中領先綠營候選人。",
523
+ "黨內競爭": "民進黨初選競爭激烈。",
524
+ "爭議": "林岱樺涉助理費爭議。"
525
  }
526
+ news_table = "<p>無新聞資料</p>"
527
  # Convert news_summary to list of tuples to support iteration in template
528
  news_summary = list(news_summary.items())
529
+ # --- 參與表 ---
530
  engagement_table = f"""
531
  <table class="min-w-full bg-white border border-gray-200">
532
  <tr class="bg-gray-100 border-b">
533
+ <th class="py-2 px-4 border-r">總參與度</th>
534
  <td class="py-2 px-4 border-r">{len(all_df)}</td>
535
+ <th class="py-2 px-4 border-r">正面情緒比例</th>
536
  <td class="py-2 px-4 border-r">{all_df['Sentiment'].value_counts(normalize=True).get('positive', 0):.1%}</td>
537
+ <th class="py-2 px-4 border-r">平均互動率</th>
538
  <td class="py-2 px-4 border-r">3.9%</td>
539
+ <th class="py-2 px-4 border-r">活躍平台數</th>
540
  <td class="py-2 px-4">{5}</td>
541
  </tr>
542
  </table>
543
  """
544
+ # --- HTML 渲染 ---
545
  template = Template(html_template)
546
  html_content = template.render(
547
  report_date=datetime.now().strftime('%Y-%m-%d %H:%M'),
548
+ engagement_table=engagement_table if engagement_table else "<p>未提供互動數據</p>",
549
+ news_summary=news_summary if news_summary else "<p>未提供新聞摘要</p>",
550
+ news_table=news_table if news_table else "<p>未提供新聞資���</p>",
551
  **charts
552
  )
553
 
554
  return html_content
555
  except Exception:
556
+ return f"<pre>❌ 分析失敗:\n{traceback.format_exc()}</pre>"
557
+ # ===== Gradio 前端 =====
558
  if __name__ == "__main__":
559
  iface = gr.Interface(
560
  fn=run_analysis,
561
  inputs=[],
562
  outputs=gr.HTML(),
563
+ title="2026 高雄市長選舉輿情分析"
564
  )
565
  iface.launch(server_name="0.0.0.0", server_port=7860)