shiue2000 commited on
Commit
c03fc1f
·
verified ·
1 Parent(s): a761827

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -71
app.py CHANGED
@@ -9,29 +9,16 @@ import networkx as nx
9
  from datetime import datetime, timedelta
10
  import gradio as gr
11
  import logging
12
- import matplotlib.font_manager as fm
13
 
14
  # 設置日誌
15
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
16
 
17
- # 文顯示設置
18
- font_list = ['Microsoft JhengHei', 'SimHei', 'Arial Unicode MS', 'DejaVu Sans']
19
- available_fonts = [f.name for f in fm.fontManager.ttflist]
20
- selected_font = None
21
- for font in font_list:
22
- if font in available_fonts:
23
- selected_font = font
24
- break
25
-
26
- if selected_font:
27
- logging.info(f"Using font: {selected_font}")
28
- plt.rcParams['font.sans-serif'] = [selected_font]
29
- else:
30
- logging.warning("No specified Chinese fonts found. Falling back to default. Chinese text may not render correctly.")
31
  plt.rcParams['axes.unicode_minus'] = False
32
 
33
  # 參數設定
34
- candidates = ["許智傑", "邱議瑩", "賴瑞隆", "林岱樺", "柯志恩"]
35
  days_back = 7
36
  max_tweets_per_candidate = 20
37
  news_file = "news_sample.csv"
@@ -54,10 +41,10 @@ def fetch_tweets(candidate):
54
  logging.info(f"Fetching tweets for candidate: {candidate}")
55
  return pd.DataFrame([
56
  {
57
- "日期": datetime.now() - timedelta(days=random.randint(0, days_back - 1)),
58
- "使用者": f"user{random.randint(1, 100)}",
59
- "內容": f"{candidate} 的貼文 {i}",
60
- "候選人": candidate
61
  } for i in range(random.randint(5, max_tweets_per_candidate))
62
  ])
63
 
@@ -78,93 +65,93 @@ def run_analysis():
78
  template_path = "templates/index.html"
79
  if not os.path.exists(template_path):
80
  logging.error(f"Template file {template_path} not found.")
81
- return f"<pre>❌ 模板檔案 {template_path} 不存在</pre>"
82
 
83
  # 抓貼文與情緒分析
84
  logging.info("Fetching and analyzing tweets...")
85
  all_df = pd.concat([fetch_tweets(c) for c in candidates], ignore_index=True)
86
- all_df['情緒'] = all_df['內容'].apply(lambda x: sentiment(x)['label'])
87
- all_df['信心度'] = all_df['內容'].apply(lambda x: sentiment(x)['score'])
88
 
89
  # 統計每日情緒
90
- summary = all_df.groupby(['候選人', '情緒']).size().unstack(fill_value=0)
91
- summary['總貼文'] = summary.sum(axis=1)
92
- summary['正面比率'] = summary.get('positive', 0) / summary['總貼文'].replace(0, 1)
93
- summary['負面比率'] = summary.get('negative', 0) / summary['總貼文'].replace(0, 1)
94
 
95
  # 更新歷史資料
96
  today_str = datetime.now().strftime('%Y-%m-%d')
97
- hist_row = summary[['正面比率', '負面比率']].copy()
98
- hist_row['日期'] = today_str
99
- hist_row['候選人'] = summary.index
100
  df_hist = pd.concat([pd.read_csv(history_file), hist_row], ignore_index=True) if os.path.exists(history_file) else hist_row
101
  df_hist.to_csv(history_file, index=False)
102
 
103
  # 圖表生成
104
- # 1. 當日情緒比例
105
  fig1 = plt.figure(figsize=(8, 5))
106
- summary[['正面比率', '負面比率']].plot(kind='bar', stacked=True, colormap='coolwarm', ax=fig1.gca())
107
- fig1.gca().set_title("候選人當日社群情緒比例")
108
- fig1.gca().set_ylabel("比例")
109
- fig1.gca().set_xlabel("候選人")
110
  img_b64_today = plot_to_base64(fig1)
111
 
112
- # 2. 歷史情緒趨勢
113
  fig2 = plt.figure(figsize=(10, 5))
114
  for c in candidates:
115
- temp = df_hist[df_hist['候選人'] == c]
116
- plt.plot(temp['日期'], temp['正面比率'], marker='o', label=f"{c} 正面")
117
- plt.plot(temp['日期'], temp['負面比率'], marker='x', label=f"{c} 負面")
118
- plt.title("候選人歷史情緒趨勢")
119
  plt.xticks(rotation=45)
120
- plt.ylabel("比例")
121
  plt.legend()
122
  img_b64_trend = plot_to_base64(fig2)
123
 
124
- # 3. 社群情感趨勢
125
- sentiment_trend = all_df.groupby([pd.Grouper(key='日期', freq='D'), '情緒']).size().unstack(fill_value=0)
126
  sentiment_trend = sentiment_trend.div(sentiment_trend.sum(axis=1), axis=0).fillna(0)
127
  fig3 = plt.figure(figsize=(8, 5))
128
  for s in ['positive', 'negative']:
129
  if s in sentiment_trend.columns:
130
- plt.plot(sentiment_trend.index, sentiment_trend[s], marker='o', label=s)
131
- plt.title("社群情感趨勢")
132
- plt.xlabel("日期")
133
- plt.ylabel("比例")
134
  plt.legend()
135
  img_social_sentiment = plot_to_base64(fig3)
136
 
137
- # 4. 各平台表現
138
  platforms = ["X", "Facebook", "Instagram", "PTT", "Line"]
139
  platform_counts = pd.Series({p: random.randint(10, 100) for p in platforms})
140
  fig4 = plt.figure(figsize=(8, 5))
141
  plt.bar(platforms, platform_counts, color='skyblue')
142
- plt.title("各平台貼文量")
143
- plt.xlabel("平台")
144
- plt.ylabel("貼文數量")
145
  img_platform_performance = plot_to_base64(fig4)
146
 
147
- # 5. 候選人社群量趨勢
148
- candidate_trend = all_df.groupby([pd.Grouper(key='日期', freq='D'), '候選人']).size().unstack(fill_value=0)
149
  fig5 = plt.figure(figsize=(8, 5))
150
  for c in candidates:
151
  if c in candidate_trend.columns:
152
  plt.plot(candidate_trend.index, candidate_trend[c], marker='o', label=c)
153
- plt.title("候選人社群量趨勢")
154
- plt.xlabel("日期")
155
- plt.ylabel("貼文數量")
156
  plt.legend()
157
  img_candidate_volume = plot_to_base64(fig5)
158
 
159
- # 6. 候選人社群量分析
160
  fig6 = plt.figure(figsize=(8, 5))
161
- summary[['正面比率', '負面比率']].plot(kind='bar', stacked=True, colormap='coolwarm', ax=fig6.gca())
162
- fig6.gca().set_title("候選人社群量分析(正/負面情緒)")
163
- fig6.gca().set_ylabel("比例")
164
- fig6.gca().set_xlabel("候選人")
165
  img_candidate_sentiment = plot_to_base64(fig6)
166
 
167
- # 7. 知識圖譜
168
  fig7, ax7 = plt.subplots(figsize=(8, 6))
169
  G = nx.Graph()
170
  for c in candidates:
@@ -177,23 +164,23 @@ def run_analysis():
177
  # 新聞資料
178
  if os.path.exists(news_file):
179
  df_news = pd.read_csv(news_file)
180
- news_summary = df_news.groupby('類別').size().to_dict()
181
  news_table = df_news.to_html(index=False, classes="min-w-full border border-gray-200")
182
  else:
183
  news_summary = {}
184
- news_table = "<p>未提供新聞資料</p>"
185
 
186
  # 社群參與表格
187
  engagement_table = f"""
188
  <table class="min-w-full bg-white border border-gray-200">
189
  <tr class="bg-gray-100 border-b">
190
- <th class="py-2 px-4 border-r">總參與數</th>
191
  <td class="py-2 px-4 border-r">{len(all_df)}</td>
192
- <th class="py-2 px-4 border-r">正面情緒比例</th>
193
- <td class="py-2 px-4 border-r">{all_df['情緒'].value_counts(normalize=True).get('positive', 0):.1%}</td>
194
- <th class="py-2 px-4 border-r">平均互動率</th>
195
  <td class="py-2 px-4 border-r">3.9%</td>
196
- <th class="py-2 px-4 border-r">活躍平台</th>
197
  <td class="py-2 px-4">{len(platforms)}</td>
198
  </tr></table>
199
  """
@@ -220,10 +207,10 @@ def run_analysis():
220
  return html_content
221
  except Exception as e:
222
  logging.error(f"Analysis failed: {str(e)}")
223
- return f"<pre>❌ 輿情分析執行失敗:\n{traceback.format_exc()}</pre>"
224
 
225
  # Gradio 前端
226
  if __name__ == "__main__":
227
  logging.info("Starting Gradio interface...")
228
- iface = gr.Interface(fn=run_analysis, inputs=[], outputs=gr.HTML(), title="高雄市長選戰輿情分析")
229
  iface.launch(server_name="0.0.0.0", server_port=7860)
 
9
  from datetime import datetime, timedelta
10
  import gradio as gr
11
  import logging
 
12
 
13
  # 設置日誌
14
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
15
 
16
+ # 字體設置 (使用標準字體以確保英文顯示)
17
+ plt.rcParams['font.sans-serif'] = ['Arial', 'DejaVu Sans']
 
 
 
 
 
 
 
 
 
 
 
 
18
  plt.rcParams['axes.unicode_minus'] = False
19
 
20
  # 參數設定
21
+ candidates = ["Xu Zhijie", "Qiu Yiying", "Lai Ruilong", "Lin Daihua", "Ke Zhien"] # English transliteration
22
  days_back = 7
23
  max_tweets_per_candidate = 20
24
  news_file = "news_sample.csv"
 
41
  logging.info(f"Fetching tweets for candidate: {candidate}")
42
  return pd.DataFrame([
43
  {
44
+ "Date": datetime.now() - timedelta(days=random.randint(0, days_back - 1)),
45
+ "User": f"user{random.randint(1, 100)}",
46
+ "Content": f"{candidate}'s post {i}",
47
+ "Candidate": candidate
48
  } for i in range(random.randint(5, max_tweets_per_candidate))
49
  ])
50
 
 
65
  template_path = "templates/index.html"
66
  if not os.path.exists(template_path):
67
  logging.error(f"Template file {template_path} not found.")
68
+ return f"<pre>❌ Template file {template_path} not found</pre>"
69
 
70
  # 抓貼文與情緒分析
71
  logging.info("Fetching and analyzing tweets...")
72
  all_df = pd.concat([fetch_tweets(c) for c in candidates], ignore_index=True)
73
+ all_df['Sentiment'] = all_df['Content'].apply(lambda x: sentiment(x)['label'])
74
+ all_df['Confidence'] = all_df['Content'].apply(lambda x: sentiment(x)['score'])
75
 
76
  # 統計每日情緒
77
+ summary = all_df.groupby(['Candidate', 'Sentiment']).size().unstack(fill_value=0)
78
+ summary['Total Posts'] = summary.sum(axis=1)
79
+ summary['Positive Ratio'] = summary.get('positive', 0) / summary['Total Posts'].replace(0, 1)
80
+ summary['Negative Ratio'] = summary.get('negative', 0) / summary['Total Posts'].replace(0, 1)
81
 
82
  # 更新歷史資料
83
  today_str = datetime.now().strftime('%Y-%m-%d')
84
+ hist_row = summary[['Positive Ratio', 'Negative Ratio']].copy()
85
+ hist_row['Date'] = today_str
86
+ hist_row['Candidate'] = summary.index
87
  df_hist = pd.concat([pd.read_csv(history_file), hist_row], ignore_index=True) if os.path.exists(history_file) else hist_row
88
  df_hist.to_csv(history_file, index=False)
89
 
90
  # 圖表生成
91
+ # 1. Daily Sentiment Proportion
92
  fig1 = plt.figure(figsize=(8, 5))
93
+ summary[['Positive Ratio', 'Negative Ratio']].plot(kind='bar', stacked=True, colormap='coolwarm', ax=fig1.gca())
94
+ fig1.gca().set_title("Candidate Daily Social Sentiment Proportion")
95
+ fig1.gca().set_ylabel("Proportion")
96
+ fig1.gca().set_xlabel("Candidate")
97
  img_b64_today = plot_to_base64(fig1)
98
 
99
+ # 2. Historical Sentiment Trend
100
  fig2 = plt.figure(figsize=(10, 5))
101
  for c in candidates:
102
+ temp = df_hist[df_hist['Candidate'] == c]
103
+ plt.plot(temp['Date'], temp['Positive Ratio'], marker='o', label=f"{c} Positive")
104
+ plt.plot(temp['Date'], temp['Negative Ratio'], marker='x', label=f"{c} Negative")
105
+ plt.title("Candidate Historical Sentiment Trend")
106
  plt.xticks(rotation=45)
107
+ plt.ylabel("Proportion")
108
  plt.legend()
109
  img_b64_trend = plot_to_base64(fig2)
110
 
111
+ # 3. Social Sentiment Trend
112
+ sentiment_trend = all_df.groupby([pd.Grouper(key='Date', freq='D'), 'Sentiment']).size().unstack(fill_value=0)
113
  sentiment_trend = sentiment_trend.div(sentiment_trend.sum(axis=1), axis=0).fillna(0)
114
  fig3 = plt.figure(figsize=(8, 5))
115
  for s in ['positive', 'negative']:
116
  if s in sentiment_trend.columns:
117
+ plt.plot(sentiment_trend.index, sentiment_trend[s], marker='o', label=s.capitalize())
118
+ plt.title("Social Sentiment Trend")
119
+ plt.xlabel("Date")
120
+ plt.ylabel("Proportion")
121
  plt.legend()
122
  img_social_sentiment = plot_to_base64(fig3)
123
 
124
+ # 4. Platform Performance
125
  platforms = ["X", "Facebook", "Instagram", "PTT", "Line"]
126
  platform_counts = pd.Series({p: random.randint(10, 100) for p in platforms})
127
  fig4 = plt.figure(figsize=(8, 5))
128
  plt.bar(platforms, platform_counts, color='skyblue')
129
+ plt.title("Platform Post Volume")
130
+ plt.xlabel("Platform")
131
+ plt.ylabel("Post Count")
132
  img_platform_performance = plot_to_base64(fig4)
133
 
134
+ # 5. Candidate Post Volume Trend
135
+ candidate_trend = all_df.groupby([pd.Grouper(key='Date', freq='D'), 'Candidate']).size().unstack(fill_value=0)
136
  fig5 = plt.figure(figsize=(8, 5))
137
  for c in candidates:
138
  if c in candidate_trend.columns:
139
  plt.plot(candidate_trend.index, candidate_trend[c], marker='o', label=c)
140
+ plt.title("Candidate Post Volume Trend")
141
+ plt.xlabel("Date")
142
+ plt.ylabel("Post Count")
143
  plt.legend()
144
  img_candidate_volume = plot_to_base64(fig5)
145
 
146
+ # 6. Candidate Sentiment Analysis
147
  fig6 = plt.figure(figsize=(8, 5))
148
+ summary[['Positive Ratio', 'Negative Ratio']].plot(kind='bar', stacked=True, colormap='coolwarm', ax=fig6.gca())
149
+ fig6.gca().set_title("Candidate Post Sentiment Analysis (Positive/Negative)")
150
+ fig6.gca().set_ylabel("Proportion")
151
+ fig6.gca().set_xlabel("Candidate")
152
  img_candidate_sentiment = plot_to_base64(fig6)
153
 
154
+ # 7. Knowledge Graph
155
  fig7, ax7 = plt.subplots(figsize=(8, 6))
156
  G = nx.Graph()
157
  for c in candidates:
 
164
  # 新聞資料
165
  if os.path.exists(news_file):
166
  df_news = pd.read_csv(news_file)
167
+ news_summary = df_news.groupby('Category').size().to_dict() # Assuming 'Category' is English
168
  news_table = df_news.to_html(index=False, classes="min-w-full border border-gray-200")
169
  else:
170
  news_summary = {}
171
+ news_table = "<p>No news data provided</p>"
172
 
173
  # 社群參與表格
174
  engagement_table = f"""
175
  <table class="min-w-full bg-white border border-gray-200">
176
  <tr class="bg-gray-100 border-b">
177
+ <th class="py-2 px-4 border-r">Total Engagement</th>
178
  <td class="py-2 px-4 border-r">{len(all_df)}</td>
179
+ <th class="py-2 px-4 border-r">Positive Sentiment Proportion</th>
180
+ <td class="py-2 px-4 border-r">{all_df['Sentiment'].value_counts(normalize=True).get('positive', 0):.1%}</td>
181
+ <th class="py-2 px-4 border-r">Average Interaction Rate</th>
182
  <td class="py-2 px-4 border-r">3.9%</td>
183
+ <th class="py-2 px-4 border-r">Active Platforms</th>
184
  <td class="py-2 px-4">{len(platforms)}</td>
185
  </tr></table>
186
  """
 
207
  return html_content
208
  except Exception as e:
209
  logging.error(f"Analysis failed: {str(e)}")
210
+ return f"<pre>❌ Analysis failed:\n{traceback.format_exc()}</pre>"
211
 
212
  # Gradio 前端
213
  if __name__ == "__main__":
214
  logging.info("Starting Gradio interface...")
215
+ iface = gr.Interface(fn=run_analysis, inputs=[], outputs=gr.HTML(), title="Kaohsiung Mayoral Election Sentiment Analysis")
216
  iface.launch(server_name="0.0.0.0", server_port=7860)