shiue2000 commited on
Commit
28d89bb
·
verified ·
1 Parent(s): 9925690

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -78
app.py CHANGED
@@ -1,13 +1,20 @@
1
- # 1. 匯入套件與參數設定
2
- import pandas as pd, matplotlib.pyplot as plt, io, base64, os, traceback, random, networkx as nx
 
 
 
 
 
 
3
  from datetime import datetime, timedelta
4
- import gradio as gr, schedule, time, threading
5
 
6
- # 中文顯示
7
- plt.rcParams['font.sans-serif'] = ['Microsoft JhengHei','Arial Unicode MS','SimHei','DejaVu Sans']
8
  plt.rcParams['axes.unicode_minus'] = False
9
 
10
- candidates = ["許智傑","邱議瑩","賴瑞隆","林岱樺","柯志恩"]
 
11
  days_back = 7
12
  max_tweets_per_candidate = 20
13
  news_file = "news_sample.csv"
@@ -18,19 +25,23 @@ try:
18
  from transformers import pipeline
19
  sentiment_pipeline = pipeline("sentiment-analysis", model="lxyuan/distilbert-base-multilingual-cased-sentiments-student")
20
  def sentiment(text): return sentiment_pipeline(text)[0]
21
- except:
22
- def sentiment(text): return {"label": random.choice(["positive","negative"]), "score":0.5}
23
 
24
  # 模擬抓貼文
25
  def fetch_tweets(candidate):
26
- return pd.DataFrame([{"日期": datetime.now()-timedelta(days=random.randint(0,6)),
27
- "使用者": f"user{random.randint(1,100)}",
28
- "內容": f"{candidate} 的貼文 {i}",
29
- "候選人": candidate} for i in range(random.randint(5,max_tweets_per_candidate))])
30
-
31
- # base64 圖片轉換
 
 
 
 
32
  def plot_to_base64(fig):
33
- buf=io.BytesIO()
34
  fig.savefig(buf, format='png', bbox_inches='tight')
35
  buf.seek(0)
36
  img_b64 = base64.b64encode(buf.read()).decode('utf-8')
@@ -47,36 +58,32 @@ def run_analysis():
47
  all_df['信心度'] = all_df['內容'].apply(lambda x: sentiment(x)['score'])
48
 
49
  # 統計每日情緒
50
- summary = all_df.groupby(['候選人','情緒']).size().unstack(fill_value=0)
51
  summary['總貼文'] = summary.sum(axis=1)
52
- summary['正面比率'] = summary.get('positive',0)/summary['總貼文']
53
- summary['負面比率'] = summary.get('negative',0)/summary['總貼文']
54
 
55
  # 更新歷史資料
56
  today_str = datetime.now().strftime('%Y-%m-%d')
57
- hist_row = summary[['正面比率','負面比率']].copy()
58
  hist_row['日期'] = today_str
59
  hist_row['候選人'] = summary.index
60
  df_hist = pd.concat([pd.read_csv(history_file), hist_row], ignore_index=True) if os.path.exists(history_file) else hist_row
61
- df_hist.to_csv(history_file,index=False)
62
 
63
- # ----------------- 圖表 -----------------
64
  # 1. 當日情緒比例
65
- plt.rcParams['font.sans-serif'] = ['Microsoft JhengHei', 'Arial Unicode MS', 'SimHei', 'DejaVu Sans']
66
- plt.rcParams['axes.unicode_minus'] = False
67
-
68
- fig1 = plt.figure(figsize=(8,5))
69
- summary[['正面比率','負面比率']].plot(kind='bar', stacked=True, colormap='coolwarm', ax=fig1.gca())
70
  fig1.gca().set_title("候選人當日社群情緒比例")
 
 
71
  img_b64_today = plot_to_base64(fig1)
72
 
73
  # 2. 歷史情緒趨勢
74
- plt.rcParams['font.sans-serif'] = ['Microsoft JhengHei', 'Arial Unicode MS', 'SimHei', 'DejaVu Sans']
75
- plt.rcParams['axes.unicode_minus'] = False
76
-
77
- fig2 = plt.figure(figsize=(10,5))
78
  for c in candidates:
79
- temp = df_hist[df_hist['候選人']==c]
80
  plt.plot(temp['日期'], temp['正面比率'], marker='o', label=f"{c} 正面")
81
  plt.plot(temp['日期'], temp['負面���率'], marker='x', label=f"{c} 負面")
82
  plt.title("候選人歷史情緒趨勢")
@@ -85,55 +92,56 @@ def run_analysis():
85
  plt.legend()
86
  img_b64_trend = plot_to_base64(fig2)
87
 
88
- # 3~8 其他圖表生成
89
- # 社群情感趨勢
90
- plt.rcParams['font.sans-serif'] = ['Microsoft JhengHei', 'Arial Unicode MS', 'SimHei', 'DejaVu Sans']
91
- plt.rcParams['axes.unicode_minus'] = False
92
-
93
- fig3 = plt.figure(figsize=(8,5))
94
- plt.plot(range(7), [random.random() for _ in range(7)], marker='o', label="正面")
95
- plt.plot(range(7), [random.random() for _ in range(7)], marker='x', label="負面")
96
  plt.title("社群情感趨勢")
 
 
97
  plt.legend()
98
  img_social_sentiment = plot_to_base64(fig3)
99
 
100
- # 各平台表現
101
- plt.rcParams['font.sans-serif'] = ['Microsoft JhengHei', 'Arial Unicode MS', 'SimHei', 'DejaVu Sans']
102
- plt.rcParams['axes.unicode_minus'] = False
103
-
104
- fig4 = plt.figure(figsize=(8,5))
105
- platforms=["X","Facebook","Instagram","PTT","Line"]
106
- plt.bar(platforms, [random.randint(10,100) for _ in platforms], color='skyblue')
107
  plt.title("各平台貼文量")
 
 
108
  img_platform_performance = plot_to_base64(fig4)
109
 
110
- # 候選人社群量趨勢
111
- plt.rcParams['font.sans-serif'] = ['Microsoft JhengHei', 'Arial Unicode MS', 'SimHei', 'DejaVu Sans']
112
- plt.rcParams['axes.unicode_minus'] = False
113
-
114
- fig5 = plt.figure(figsize=(8,5))
115
- for c in candidates: plt.plot(range(7), [random.randint(5,20) for _ in range(7)], marker='o', label=c)
116
  plt.title("候選人社群量趨勢")
 
 
117
  plt.legend()
118
  img_candidate_volume = plot_to_base64(fig5)
119
 
120
- # 候選人社群量分析
121
- plt.rcParams['font.sans-serif'] = ['Microsoft JhengHei', 'Arial Unicode MS', 'SimHei', 'DejaVu Sans']
122
- plt.rcParams['axes.unicode_minus'] = False
123
-
124
- fig6 = plt.figure(figsize=(8,5))
125
- summary[['正面比率','負面比率']].plot(kind='bar', stacked=True, colormap='coolwarm', ax=fig6.gca())
126
  fig6.gca().set_title("候選人社群量分析(正/負面情緒)")
 
 
127
  img_candidate_sentiment = plot_to_base64(fig6)
128
 
129
- # 知識圖譜
130
- plt.rcParams['font.sans-serif'] = ['Microsoft JhengHei', 'Arial Unicode MS', 'SimHei', 'DejaVu Sans']
131
- plt.rcParams['axes.unicode_minus'] = False
132
-
133
- fig7, ax7 = plt.subplots(figsize=(8,6))
134
- G=nx.Graph()
135
- for c in candidates: G.add_node(c)
136
- for i in range(len(candidates)-1): G.add_edge(candidates[i], candidates[i+1])
137
  nx.draw(G, nx.spring_layout(G), with_labels=True, node_color='lightgreen', font_size=12, ax=ax7)
138
  img_knowledge_graph = plot_to_base64(fig7)
139
 
@@ -141,31 +149,29 @@ def run_analysis():
141
  if os.path.exists(news_file):
142
  df_news = pd.read_csv(news_file)
143
  news_summary = df_news.groupby('類別').size().to_dict()
144
- news_table = df_news.to_html(index=False)
145
  else:
146
  news_summary = {}
147
  news_table = "<p>未提供新聞資料</p>"
148
-
149
  # 社群參與表格
150
- engagement_table=f"""
151
  <table class="min-w-full bg-white border border-gray-200">
152
  <tr class="bg-gray-100 border-b">
153
  <th class="py-2 px-4 border-r">總參與數</th>
154
  <td class="py-2 px-4 border-r">{len(all_df)}</td>
155
  <th class="py-2 px-4 border-r">正面情緒比例</th>
156
- <td class="py-2 px-4 border-r">{all_df['情緒'].value_counts(normalize=True).get('positive',0):.1%}</td>
157
  <th class="py-2 px-4 border-r">平均互動率</th>
158
  <td class="py-2 px-4 border-r">3.9%</td>
159
  <th class="py-2 px-4 border-r">活躍平台</th>
160
- <td class="py-2 px-4">6</td>
161
  </tr></table>
162
  """
163
 
164
  # HTML template
165
- plt.rcParams['font.sans-serif'] = ['Microsoft JhengHei', 'Arial Unicode MS', 'SimHei', 'DejaVu Sans']
166
- plt.rcParams['axes.unicode_minus'] = False
167
-
168
- html_template = open("templates/index.html").read()
169
  html_content = html_template.format(
170
  report_date=datetime.now().strftime('%Y-%m-%d %H:%M'),
171
  img_b64_today=img_b64_today,
@@ -180,8 +186,10 @@ def run_analysis():
180
  news_table=news_table
181
  )
182
  return html_content
183
- except Exception: return f"<pre>❌ 輿情分析執行失敗:\n{traceback.format_exc()}</pre>"
 
184
 
185
  # Gradio 前端
186
- iface = gr.Interface(fn=run_analysis, inputs=[], outputs=gr.HTML(), title="高雄市長選戰輿情分析")
187
- iface.launch(server_name="0.0.0.0", server_port=7860)
 
 
1
+ import pandas as pd
2
+ import matplotlib.pyplot as plt
3
+ import io
4
+ import base64
5
+ import os
6
+ import traceback
7
+ import random
8
+ import networkx as nx
9
  from datetime import datetime, timedelta
10
+ import gradio as gr
11
 
12
+ # 中文顯示設置 (只需設置一次)
13
+ plt.rcParams['font.sans-serif'] = ['Microsoft JhengHei', 'Arial Unicode MS', 'SimHei', 'DejaVu Sans']
14
  plt.rcParams['axes.unicode_minus'] = False
15
 
16
+ # 參數設定
17
+ candidates = ["許智傑", "邱議瑩", "賴瑞隆", "林岱樺", "柯志恩"]
18
  days_back = 7
19
  max_tweets_per_candidate = 20
20
  news_file = "news_sample.csv"
 
25
  from transformers import pipeline
26
  sentiment_pipeline = pipeline("sentiment-analysis", model="lxyuan/distilbert-base-multilingual-cased-sentiments-student")
27
  def sentiment(text): return sentiment_pipeline(text)[0]
28
+ except:
29
+ def sentiment(text): return {"label": random.choice(["positive", "negative"]), "score": 0.5}
30
 
31
  # 模擬抓貼文
32
  def fetch_tweets(candidate):
33
+ return pd.DataFrame([
34
+ {
35
+ "日期": datetime.now() - timedelta(days=random.randint(0, days_back - 1)),
36
+ "使用者": f"user{random.randint(1, 100)}",
37
+ "內容": f"{candidate} 的貼文 {i}",
38
+ "候選人": candidate
39
+ } for i in range(random.randint(5, max_tweets_per_candidate))
40
+ ])
41
+
42
+ # Base64 圖片轉換
43
  def plot_to_base64(fig):
44
+ buf = io.BytesIO()
45
  fig.savefig(buf, format='png', bbox_inches='tight')
46
  buf.seek(0)
47
  img_b64 = base64.b64encode(buf.read()).decode('utf-8')
 
58
  all_df['信心度'] = all_df['內容'].apply(lambda x: sentiment(x)['score'])
59
 
60
  # 統計每日情緒
61
+ summary = all_df.groupby(['候選人', '情緒']).size().unstack(fill_value=0)
62
  summary['總貼文'] = summary.sum(axis=1)
63
+ summary['正面比率'] = summary.get('positive', 0) / summary['總貼文'].replace(0, 1) # Avoid division by zero
64
+ summary['負面比率'] = summary.get('negative', 0) / summary['總貼文'].replace(0, 1)
65
 
66
  # 更新歷史資料
67
  today_str = datetime.now().strftime('%Y-%m-%d')
68
+ hist_row = summary[['正面比率', '負面比率']].copy()
69
  hist_row['日期'] = today_str
70
  hist_row['候選人'] = summary.index
71
  df_hist = pd.concat([pd.read_csv(history_file), hist_row], ignore_index=True) if os.path.exists(history_file) else hist_row
72
+ df_hist.to_csv(history_file, index=False)
73
 
74
+ # 圖表生成
75
  # 1. 當日情緒比例
76
+ fig1 = plt.figure(figsize=(8, 5))
77
+ summary[['正面比率', '負面比率']].plot(kind='bar', stacked=True, colormap='coolwarm', ax=fig1.gca())
 
 
 
78
  fig1.gca().set_title("候選人當日社群情緒比例")
79
+ fig1.gca().set_ylabel("比例")
80
+ fig1.gca().set_xlabel("候選人")
81
  img_b64_today = plot_to_base64(fig1)
82
 
83
  # 2. 歷史情緒趨勢
84
+ fig2 = plt.figure(figsize=(10, 5))
 
 
 
85
  for c in candidates:
86
+ temp = df_hist[df_hist['候選人'] == c]
87
  plt.plot(temp['日期'], temp['正面比率'], marker='o', label=f"{c} 正面")
88
  plt.plot(temp['日期'], temp['負面���率'], marker='x', label=f"{c} 負面")
89
  plt.title("候選人歷史情緒趨勢")
 
92
  plt.legend()
93
  img_b64_trend = plot_to_base64(fig2)
94
 
95
+ # 3. 社群情感趨勢 (使用實際資料而非隨機)
96
+ sentiment_trend = all_df.groupby([pd.Grouper(key='日期', freq='D'), '情緒']).size().unstack(fill_value=0)
97
+ sentiment_trend = sentiment_trend.div(sentiment_trend.sum(axis=1), axis=0).fillna(0)
98
+ fig3 = plt.figure(figsize=(8, 5))
99
+ for s in ['positive', 'negative']:
100
+ if s in sentiment_trend.columns:
101
+ plt.plot(sentiment_trend.index, sentiment_trend[s], marker='o', label=s)
 
102
  plt.title("社群情感趨勢")
103
+ plt.xlabel("日期")
104
+ plt.ylabel("比例")
105
  plt.legend()
106
  img_social_sentiment = plot_to_base64(fig3)
107
 
108
+ # 4. 各平台表現 (模擬平台數據)
109
+ platforms = ["X", "Facebook", "Instagram", "PTT", "Line"]
110
+ platform_counts = pd.Series({p: random.randint(10, 100) for p in platforms})
111
+ fig4 = plt.figure(figsize=(8, 5))
112
+ plt.bar(platforms, platform_counts, color='skyblue')
 
 
113
  plt.title("各平台貼文量")
114
+ plt.xlabel("平台")
115
+ plt.ylabel("貼文數量")
116
  img_platform_performance = plot_to_base64(fig4)
117
 
118
+ # 5. 候選人社群量趨勢
119
+ candidate_trend = all_df.groupby([pd.Grouper(key='日期', freq='D'), '候選人']).size().unstack(fill_value=0)
120
+ fig5 = plt.figure(figsize=(8, 5))
121
+ for c in candidates:
122
+ if c in candidate_trend.columns:
123
+ plt.plot(candidate_trend.index, candidate_trend[c], marker='o', label=c)
124
  plt.title("候選人社群量趨勢")
125
+ plt.xlabel("日期")
126
+ plt.ylabel("貼文數量")
127
  plt.legend()
128
  img_candidate_volume = plot_to_base64(fig5)
129
 
130
+ # 6. 候選人社群量分析
131
+ fig6 = plt.figure(figsize=(8, 5))
132
+ summary[['正面比率', '負面比率']].plot(kind='bar', stacked=True, colormap='coolwarm', ax=fig6.gca())
 
 
 
133
  fig6.gca().set_title("候選人社群量分析(正/負面情緒)")
134
+ fig6.gca().set_ylabel("比例")
135
+ fig6.gca().set_xlabel("候選人")
136
  img_candidate_sentiment = plot_to_base64(fig6)
137
 
138
+ # 7. 知識圖譜
139
+ fig7, ax7 = plt.subplots(figsize=(8, 6))
140
+ G = nx.Graph()
141
+ for c in candidates:
142
+ G.add_node(c)
143
+ for i in range(len(candidates) - 1):
144
+ G.add_edge(candidates[i], candidates[i + 1])
 
145
  nx.draw(G, nx.spring_layout(G), with_labels=True, node_color='lightgreen', font_size=12, ax=ax7)
146
  img_knowledge_graph = plot_to_base64(fig7)
147
 
 
149
  if os.path.exists(news_file):
150
  df_news = pd.read_csv(news_file)
151
  news_summary = df_news.groupby('類別').size().to_dict()
152
+ news_table = df_news.to_html(index=False, classes="min-w-full border border-gray-200")
153
  else:
154
  news_summary = {}
155
  news_table = "<p>未提供新聞資料</p>"
156
+
157
  # 社群參與表格
158
+ engagement_table = f"""
159
  <table class="min-w-full bg-white border border-gray-200">
160
  <tr class="bg-gray-100 border-b">
161
  <th class="py-2 px-4 border-r">總參與數</th>
162
  <td class="py-2 px-4 border-r">{len(all_df)}</td>
163
  <th class="py-2 px-4 border-r">正面情緒比例</th>
164
+ <td class="py-2 px-4 border-r">{all_df['情緒'].value_counts(normalize=True).get('positive', 0):.1%}</td>
165
  <th class="py-2 px-4 border-r">平均互動率</th>
166
  <td class="py-2 px-4 border-r">3.9%</td>
167
  <th class="py-2 px-4 border-r">活躍平台</th>
168
+ <td class="py-2 px-4">{len(platforms)}</td>
169
  </tr></table>
170
  """
171
 
172
  # HTML template
173
+ with open("templates/index.html", encoding='utf-8') as f:
174
+ html_template = f.read()
 
 
175
  html_content = html_template.format(
176
  report_date=datetime.now().strftime('%Y-%m-%d %H:%M'),
177
  img_b64_today=img_b64_today,
 
186
  news_table=news_table
187
  )
188
  return html_content
189
+ except Exception as e:
190
+ return f"<pre>❌ 輿情分析執行失敗:\n{traceback.format_exc()}</pre>"
191
 
192
  # Gradio 前端
193
+ if __name__ == "__main__":
194
+ iface = gr.Interface(fn=run_analysis, inputs=[], outputs=gr.HTML(), title="高雄市長選戰輿情分析")
195
+ iface.launch(server_name="0.0.0.0", server_port=7860)