shiue2000 commited on
Commit
86cc189
·
verified ·
1 Parent(s): c03fc1f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +85 -65
app.py CHANGED
@@ -13,12 +13,12 @@ import logging
13
  # 設置日誌
14
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
15
 
16
- # 字體設置 (使用標準字體以確保英文顯示)
17
- plt.rcParams['font.sans-serif'] = ['Arial', 'DejaVu Sans']
18
  plt.rcParams['axes.unicode_minus'] = False
19
 
20
  # 參數設定
21
- candidates = ["Xu Zhijie", "Qiu Yiying", "Lai Ruilong", "Lin Daihua", "Ke Zhien"] # English transliteration
22
  days_back = 7
23
  max_tweets_per_candidate = 20
24
  news_file = "news_sample.csv"
@@ -28,22 +28,29 @@ history_file = "history_sentiment.csv"
28
  try:
29
  from transformers import pipeline
30
  sentiment_pipeline = pipeline("sentiment-analysis", model="lxyuan/distilbert-base-multilingual-cased-sentiments-student")
31
- def sentiment(text):
32
- logging.info(f"Performing sentiment analysis on text: {text[:50]}...")
33
  return sentiment_pipeline(text)[0]
34
  except:
35
  def sentiment(text):
36
- logging.warning("Sentiment model failed to load, using random fallback.")
37
- return {"label": random.choice(["positive", "negative"]), "score": 0.5}
38
 
39
- # 模擬抓貼文
40
  def fetch_tweets(candidate):
41
- logging.info(f"Fetching tweets for candidate: {candidate}")
 
 
 
 
 
 
 
42
  return pd.DataFrame([
43
  {
44
  "Date": datetime.now() - timedelta(days=random.randint(0, days_back - 1)),
45
  "User": f"user{random.randint(1, 100)}",
46
- "Content": f"{candidate}'s post {i}",
47
  "Candidate": candidate
48
  } for i in range(random.randint(5, max_tweets_per_candidate))
49
  ])
@@ -64,11 +71,11 @@ def run_analysis():
64
  # 檢查模板檔案
65
  template_path = "templates/index.html"
66
  if not os.path.exists(template_path):
67
- logging.error(f"Template file {template_path} not found.")
68
- return f"<pre>❌ Template file {template_path} not found</pre>"
69
 
70
- # 抓貼文與情緒分析
71
- logging.info("Fetching and analyzing tweets...")
72
  all_df = pd.concat([fetch_tweets(c) for c in candidates], ignore_index=True)
73
  all_df['Sentiment'] = all_df['Content'].apply(lambda x: sentiment(x)['label'])
74
  all_df['Confidence'] = all_df['Content'].apply(lambda x: sentiment(x)['score'])
@@ -78,80 +85,82 @@ def run_analysis():
78
  summary['Total Posts'] = summary.sum(axis=1)
79
  summary['Positive Ratio'] = summary.get('positive', 0) / summary['Total Posts'].replace(0, 1)
80
  summary['Negative Ratio'] = summary.get('negative', 0) / summary['Total Posts'].replace(0, 1)
 
81
 
82
  # 更新歷史資料
83
  today_str = datetime.now().strftime('%Y-%m-%d')
84
- hist_row = summary[['Positive Ratio', 'Negative Ratio']].copy()
85
  hist_row['Date'] = today_str
86
  hist_row['Candidate'] = summary.index
87
  df_hist = pd.concat([pd.read_csv(history_file), hist_row], ignore_index=True) if os.path.exists(history_file) else hist_row
88
  df_hist.to_csv(history_file, index=False)
89
 
90
  # 圖表生成
91
- # 1. Daily Sentiment Proportion
92
  fig1 = plt.figure(figsize=(8, 5))
93
- summary[['Positive Ratio', 'Negative Ratio']].plot(kind='bar', stacked=True, colormap='coolwarm', ax=fig1.gca())
94
- fig1.gca().set_title("Candidate Daily Social Sentiment Proportion")
95
- fig1.gca().set_ylabel("Proportion")
96
- fig1.gca().set_xlabel("Candidate")
97
  img_b64_today = plot_to_base64(fig1)
98
 
99
- # 2. Historical Sentiment Trend
100
  fig2 = plt.figure(figsize=(10, 5))
101
  for c in candidates:
102
  temp = df_hist[df_hist['Candidate'] == c]
103
- plt.plot(temp['Date'], temp['Positive Ratio'], marker='o', label=f"{c} Positive")
104
- plt.plot(temp['Date'], temp['Negative Ratio'], marker='x', label=f"{c} Negative")
105
- plt.title("Candidate Historical Sentiment Trend")
 
106
  plt.xticks(rotation=45)
107
- plt.ylabel("Proportion")
108
  plt.legend()
109
  img_b64_trend = plot_to_base64(fig2)
110
 
111
- # 3. Social Sentiment Trend
112
  sentiment_trend = all_df.groupby([pd.Grouper(key='Date', freq='D'), 'Sentiment']).size().unstack(fill_value=0)
113
  sentiment_trend = sentiment_trend.div(sentiment_trend.sum(axis=1), axis=0).fillna(0)
114
  fig3 = plt.figure(figsize=(8, 5))
115
- for s in ['positive', 'negative']:
116
  if s in sentiment_trend.columns:
117
  plt.plot(sentiment_trend.index, sentiment_trend[s], marker='o', label=s.capitalize())
118
- plt.title("Social Sentiment Trend")
119
- plt.xlabel("Date")
120
- plt.ylabel("Proportion")
121
  plt.legend()
122
  img_social_sentiment = plot_to_base64(fig3)
123
 
124
- # 4. Platform Performance
125
  platforms = ["X", "Facebook", "Instagram", "PTT", "Line"]
126
  platform_counts = pd.Series({p: random.randint(10, 100) for p in platforms})
127
  fig4 = plt.figure(figsize=(8, 5))
128
  plt.bar(platforms, platform_counts, color='skyblue')
129
- plt.title("Platform Post Volume")
130
- plt.xlabel("Platform")
131
- plt.ylabel("Post Count")
132
  img_platform_performance = plot_to_base64(fig4)
133
 
134
- # 5. Candidate Post Volume Trend
135
  candidate_trend = all_df.groupby([pd.Grouper(key='Date', freq='D'), 'Candidate']).size().unstack(fill_value=0)
136
  fig5 = plt.figure(figsize=(8, 5))
137
  for c in candidates:
138
  if c in candidate_trend.columns:
139
  plt.plot(candidate_trend.index, candidate_trend[c], marker='o', label=c)
140
- plt.title("Candidate Post Volume Trend")
141
- plt.xlabel("Date")
142
- plt.ylabel("Post Count")
143
  plt.legend()
144
  img_candidate_volume = plot_to_base64(fig5)
145
 
146
- # 6. Candidate Sentiment Analysis
147
  fig6 = plt.figure(figsize=(8, 5))
148
- summary[['Positive Ratio', 'Negative Ratio']].plot(kind='bar', stacked=True, colormap='coolwarm', ax=fig6.gca())
149
- fig6.gca().set_title("Candidate Post Sentiment Analysis (Positive/Negative)")
150
- fig6.gca().set_ylabel("Proportion")
151
- fig6.gca().set_xlabel("Candidate")
152
  img_candidate_sentiment = plot_to_base64(fig6)
153
 
154
- # 7. Knowledge Graph
155
  fig7, ax7 = plt.subplots(figsize=(8, 6))
156
  G = nx.Graph()
157
  for c in candidates:
@@ -164,32 +173,37 @@ def run_analysis():
164
  # 新聞資料
165
  if os.path.exists(news_file):
166
  df_news = pd.read_csv(news_file)
167
- news_summary = df_news.groupby('Category').size().to_dict() # Assuming 'Category' is English
168
  news_table = df_news.to_html(index=False, classes="min-w-full border border-gray-200")
169
  else:
170
- news_summary = {}
171
- news_table = "<p>No news data provided</p>"
 
 
 
 
172
 
173
  # 社群參與表格
174
  engagement_table = f"""
175
  <table class="min-w-full bg-white border border-gray-200">
176
- <tr class="bg-gray-100 border-b">
177
- <th class="py-2 px-4 border-r">Total Engagement</th>
178
- <td class="py-2 px-4 border-r">{len(all_df)}</td>
179
- <th class="py-2 px-4 border-r">Positive Sentiment Proportion</th>
180
- <td class="py-2 px-4 border-r">{all_df['Sentiment'].value_counts(normalize=True).get('positive', 0):.1%}</td>
181
- <th class="py-2 px-4 border-r">Average Interaction Rate</th>
182
- <td class="py-2 px-4 border-r">3.9%</td>
183
- <th class="py-2 px-4 border-r">Active Platforms</th>
184
- <td class="py-2 px-4">{len(platforms)}</td>
185
- </tr></table>
 
186
  """
187
 
188
- # HTML template
189
- logging.info(f"Loading template from {template_path}")
190
  with open(template_path, encoding='utf-8') as f:
191
  html_template = f.read()
192
- logging.info("Formatting HTML template...")
193
  html_content = html_template.format(
194
  report_date=datetime.now().strftime('%Y-%m-%d %H:%M'),
195
  img_b64_today=img_b64_today,
@@ -203,14 +217,20 @@ def run_analysis():
203
  news_summary=news_summary,
204
  news_table=news_table
205
  )
206
- logging.info("HTML content generated successfully.")
207
  return html_content
 
208
  except Exception as e:
209
- logging.error(f"Analysis failed: {str(e)}")
210
- return f"<pre>❌ Analysis failed:\n{traceback.format_exc()}</pre>"
211
 
212
  # Gradio 前端
213
  if __name__ == "__main__":
214
- logging.info("Starting Gradio interface...")
215
- iface = gr.Interface(fn=run_analysis, inputs=[], outputs=gr.HTML(), title="Kaohsiung Mayoral Election Sentiment Analysis")
 
 
 
 
 
216
  iface.launch(server_name="0.0.0.0", server_port=7860)
 
13
  # 設置日誌
14
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
15
 
16
+ # 字體設置 (使用繁體中文支援字體)
17
+ plt.rcParams['font.sans-serif'] = ['Microsoft JhengHei', 'Arial', 'DejaVu Sans']
18
  plt.rcParams['axes.unicode_minus'] = False
19
 
20
  # 參數設定
21
+ candidates = ["許智傑", "邱議瑩", "賴瑞隆", "林岱樺", "柯志恩"]
22
  days_back = 7
23
  max_tweets_per_candidate = 20
24
  news_file = "news_sample.csv"
 
28
  try:
29
  from transformers import pipeline
30
  sentiment_pipeline = pipeline("sentiment-analysis", model="lxyuan/distilbert-base-multilingual-cased-sentiments-student")
31
+ def sentiment(text):
32
+ logging.info(f"正在對文字進行情緒分析: {text[:50]}...")
33
  return sentiment_pipeline(text)[0]
34
  except:
35
  def sentiment(text):
36
+ logging.warning("情緒分析模型載入失敗,使用隨機備用方案。")
37
+ return {"label": random.choice(["positive", "negative", "neutral"]), "score": random.uniform(0.3, 0.9)}
38
 
39
+ # 模擬抓取 X 貼文
40
  def fetch_tweets(candidate):
41
+ logging.info(f"正在為候選人抓取貼文: {candidate}")
42
+ sample_texts = {
43
+ "許智傑": ["許智傑積極參與地方活動,親民形象受好評!", "許智傑被指政策空洞,民眾不滿", "支持許智傑,打造高雄新未來!"],
44
+ "邱議瑩": ["邱議瑩強勢表態選市長,展現領導力", "邱議瑩批林岱樺,黨內競爭加劇", "邱議瑩推客家文化,獲基層支持"],
45
+ "賴瑞隆": ["賴瑞隆推海洋經濟,展現專業", "賴瑞隆民調領先,陳菊子弟兵受矚目", "賴瑞隆被質疑行政經驗不足"],
46
+ "林岱樺": ["林岱樺積極跑基層,民調領先!", "林岱樺涉助理費爭議,形象受損", "林岱樺獲正國會支持,選情看好"],
47
+ "柯志恩": ["柯志恩民調大幅領先綠營,藍營看好", "柯志恩被批勘災缺席,引發爭議", "柯志恩推青年政策,吸引年輕選民"]
48
+ }
49
  return pd.DataFrame([
50
  {
51
  "Date": datetime.now() - timedelta(days=random.randint(0, days_back - 1)),
52
  "User": f"user{random.randint(1, 100)}",
53
+ "Content": random.choice(sample_texts.get(candidate, [f"{candidate} 的貼文 {i}"])),
54
  "Candidate": candidate
55
  } for i in range(random.randint(5, max_tweets_per_candidate))
56
  ])
 
71
  # 檢查模板檔案
72
  template_path = "templates/index.html"
73
  if not os.path.exists(template_path):
74
+ logging.error(f"模板檔案 {template_path} 未找到。")
75
+ return f"<pre>❌ 模板檔案 {template_path} 未找到</pre>"
76
 
77
+ # 抓取貼文與情緒分析
78
+ logging.info("正在抓取並分析貼文...")
79
  all_df = pd.concat([fetch_tweets(c) for c in candidates], ignore_index=True)
80
  all_df['Sentiment'] = all_df['Content'].apply(lambda x: sentiment(x)['label'])
81
  all_df['Confidence'] = all_df['Content'].apply(lambda x: sentiment(x)['score'])
 
85
  summary['Total Posts'] = summary.sum(axis=1)
86
  summary['Positive Ratio'] = summary.get('positive', 0) / summary['Total Posts'].replace(0, 1)
87
  summary['Negative Ratio'] = summary.get('negative', 0) / summary['Total Posts'].replace(0, 1)
88
+ summary['Neutral Ratio'] = summary.get('neutral', 0) / summary['Total Posts'].replace(0, 1)
89
 
90
  # 更新歷史資料
91
  today_str = datetime.now().strftime('%Y-%m-%d')
92
+ hist_row = summary[['Positive Ratio', 'Negative Ratio', 'Neutral Ratio']].copy()
93
  hist_row['Date'] = today_str
94
  hist_row['Candidate'] = summary.index
95
  df_hist = pd.concat([pd.read_csv(history_file), hist_row], ignore_index=True) if os.path.exists(history_file) else hist_row
96
  df_hist.to_csv(history_file, index=False)
97
 
98
  # 圖表生成
99
+ # 1. 每日情緒比例
100
  fig1 = plt.figure(figsize=(8, 5))
101
+ summary[['Positive Ratio', 'Negative Ratio', 'Neutral Ratio']].plot(kind='bar', stacked=True, colormap='coolwarm', ax=fig1.gca())
102
+ fig1.gca().set_title("候選人每日社群情緒比例")
103
+ fig1.gca().set_ylabel("比例")
104
+ fig1.gca().set_xlabel("候選人")
105
  img_b64_today = plot_to_base64(fig1)
106
 
107
+ # 2. 歷史情緒趨勢
108
  fig2 = plt.figure(figsize=(10, 5))
109
  for c in candidates:
110
  temp = df_hist[df_hist['Candidate'] == c]
111
+ plt.plot(temp['Date'], temp['Positive Ratio'], marker='o', label=f"{c} 正面")
112
+ plt.plot(temp['Date'], temp['Negative Ratio'], marker='x', label=f"{c} 負面")
113
+ plt.plot(temp['Date'], temp['Neutral Ratio'], marker='s', label=f"{c} 中性")
114
+ plt.title("候選人歷史情緒趨勢")
115
  plt.xticks(rotation=45)
116
+ plt.ylabel("比例")
117
  plt.legend()
118
  img_b64_trend = plot_to_base64(fig2)
119
 
120
+ # 3. 社群情緒趨勢
121
  sentiment_trend = all_df.groupby([pd.Grouper(key='Date', freq='D'), 'Sentiment']).size().unstack(fill_value=0)
122
  sentiment_trend = sentiment_trend.div(sentiment_trend.sum(axis=1), axis=0).fillna(0)
123
  fig3 = plt.figure(figsize=(8, 5))
124
+ for s in ['positive', 'negative', 'neutral']:
125
  if s in sentiment_trend.columns:
126
  plt.plot(sentiment_trend.index, sentiment_trend[s], marker='o', label=s.capitalize())
127
+ plt.title("社群情緒趨勢")
128
+ plt.xlabel("日期")
129
+ plt.ylabel("比例")
130
  plt.legend()
131
  img_social_sentiment = plot_to_base64(fig3)
132
 
133
+ # 4. 平台聲量表現
134
  platforms = ["X", "Facebook", "Instagram", "PTT", "Line"]
135
  platform_counts = pd.Series({p: random.randint(10, 100) for p in platforms})
136
  fig4 = plt.figure(figsize=(8, 5))
137
  plt.bar(platforms, platform_counts, color='skyblue')
138
+ plt.title("平台貼文聲量")
139
+ plt.xlabel("平台")
140
+ plt.ylabel("貼文數量")
141
  img_platform_performance = plot_to_base64(fig4)
142
 
143
+ # 5. 候選人貼文聲量趨勢
144
  candidate_trend = all_df.groupby([pd.Grouper(key='Date', freq='D'), 'Candidate']).size().unstack(fill_value=0)
145
  fig5 = plt.figure(figsize=(8, 5))
146
  for c in candidates:
147
  if c in candidate_trend.columns:
148
  plt.plot(candidate_trend.index, candidate_trend[c], marker='o', label=c)
149
+ plt.title("候選人貼文聲量趨勢")
150
+ plt.xlabel("日期")
151
+ plt.ylabel("貼文數量")
152
  plt.legend()
153
  img_candidate_volume = plot_to_base64(fig5)
154
 
155
+ # 6. 候選人情緒分析
156
  fig6 = plt.figure(figsize=(8, 5))
157
+ summary[['Positive Ratio', 'Negative Ratio', 'Neutral Ratio']].plot(kind='bar', stacked=True, colormap='coolwarm', ax=fig6.gca())
158
+ fig6.gca().set_title("候選人貼文情緒分析(正面/負面/中性)")
159
+ fig6.gca().set_ylabel("比例")
160
+ fig6.gca().set_xlabel("候選人")
161
  img_candidate_sentiment = plot_to_base64(fig6)
162
 
163
+ # 7. 知識圖譜
164
  fig7, ax7 = plt.subplots(figsize=(8, 6))
165
  G = nx.Graph()
166
  for c in candidates:
 
173
  # 新聞資料
174
  if os.path.exists(news_file):
175
  df_news = pd.read_csv(news_file)
176
+ news_summary = df_news.groupby('Category').size().to_dict()
177
  news_table = df_news.to_html(index=False, classes="min-w-full border border-gray-200")
178
  else:
179
+ news_summary = {
180
+ "民調": "柯志恩在多份民調中領先綠營候選人,差距5-23%。",
181
+ "黨內競爭": "民進黨初選競爭激烈,邱議瑩、林岱樺、賴瑞隆、許智傑四人角逐。",
182
+ "爭議": "林岱樺涉助理費爭議,許銘春因職場霸凌案轉低調。"
183
+ }
184
+ news_table = "<p>無新聞資料,僅提供模擬摘要</p>"
185
 
186
  # 社群參與表格
187
  engagement_table = f"""
188
  <table class="min-w-full bg-white border border-gray-200">
189
+ <tr class="bg-gray-100 border-b">
190
+ <th class="py-2 px-4 border-r">總參與度</th>
191
+ <td class="py-2 px-4 border-r">{len(all_df)}</td>
192
+ <th class="py-2 px-4 border-r">正面情緒比例</th>
193
+ <td class="py-2 px-4 border-r">{all_df['Sentiment'].value_counts(normalize=True).get('positive', 0):.1%}</td>
194
+ <th class="py-2 px-4 border-r">平均互動率</th>
195
+ <td class="py-2 px-4 border-r">3.9%</td>
196
+ <th class="py-2 px-4 border-r">活躍平台數</th>
197
+ <td class="py-2 px-4">{len(platforms)}</td>
198
+ </tr>
199
+ </table>
200
  """
201
 
202
+ # HTML 模板
203
+ logging.info(f"正在從 {template_path} 載入模板...")
204
  with open(template_path, encoding='utf-8') as f:
205
  html_template = f.read()
206
+ logging.info("正在格式化 HTML 模板...")
207
  html_content = html_template.format(
208
  report_date=datetime.now().strftime('%Y-%m-%d %H:%M'),
209
  img_b64_today=img_b64_today,
 
217
  news_summary=news_summary,
218
  news_table=news_table
219
  )
220
+ logging.info("HTML 內容生成成功。")
221
  return html_content
222
+
223
  except Exception as e:
224
+ logging.error(f"分析失敗: {str(e)}")
225
+ return f"<pre>❌ 分析失敗:\n{traceback.format_exc()}</pre>"
226
 
227
  # Gradio 前端
228
  if __name__ == "__main__":
229
+ logging.info("正在啟動 Gradio 介面...")
230
+ iface = gr.Interface(
231
+ fn=run_analysis,
232
+ inputs=[],
233
+ outputs=gr.HTML(),
234
+ title="2026 高雄市長選舉輿情分析"
235
+ )
236
  iface.launch(server_name="0.0.0.0", server_port=7860)