shiue2000 commited on
Commit
07dbcee
·
verified ·
1 Parent(s): af46a64

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -79
app.py CHANGED
@@ -13,19 +13,19 @@ import time
13
  import threading
14
  import traceback
15
  import networkx as nx
16
- import random # For fallback dummy data
17
 
18
  # -----------------------------
19
  # 參數設定
20
  # -----------------------------
21
  candidates = ["許智傑", "邱議瑩", "賴瑞隆", "林岱樺", "柯志恩"]
22
  days_back = 7
23
- max_tweets_per_candidate = 20 # Reduced for tool limits
24
  news_file = "news_sample.csv"
25
  history_file = "history_sentiment.csv"
26
 
27
  # -----------------------------
28
- # 情緒分析模型 (統一接口)
29
  # -----------------------------
30
  try:
31
  from transformers import pipeline
@@ -36,82 +36,59 @@ try:
36
  print("✅ 情緒分析模型加載成功")
37
 
38
  def sentiment(text):
39
- # 保證回傳 list of dict
40
- return sentiment_pipeline(text)
41
  except Exception as e:
42
- print(f"⚠️ 警告: {e}. 情緒分析將使用預設值 (positive/negative 隨機分配)。")
43
-
44
  def sentiment(text):
45
- # 回傳 list of dict,和 pipeline 統一接口
46
- return [{"label": random.choice(["positive", "negative"]), "score": 0.5}]
47
 
48
  # -----------------------------
49
- # X Tool Simulation (Replace with actual function calls in production)
50
  # -----------------------------
51
  def fetch_tweets_via_x_tools(candidate, since_date, until_date):
52
- print(f"🔍 Fetching tweets for {candidate} via X tools...")
53
  mock_tweets = [
54
- {
55
- 'id': 1970100275227869230,
56
- 'date': datetime(2025, 9, 22, 12, 18, 2),
57
- 'user': 'mai_ka_nng',
58
- 'content': '邱志偉(提案)、許智傑、陳其邁、劉建國、管碧玲、高志鵬、林淑芬、楊曜、許添財、葉宜津、陳節如、尤美女、姚文智、陳歐珀、陳唐山、蘇震清、蔡煌瑯'
59
- }
60
- ] if candidate == "許智傑" else []
61
-
62
- if not mock_tweets:
63
- mock_tweets = [
64
- {
65
- 'id': random.randint(1000000000000000000, 1999999999999999999),
66
- 'date': datetime.now() - timedelta(days=random.randint(1, days_back)),
67
- 'user': f'user_{random.randint(1, 1000)}',
68
- 'content': f'Sample tweet about {candidate} {random.choice(["supportive", "critical", "neutral"])} opinion.'
69
- }
70
- for _ in range(random.randint(1, max_tweets_per_candidate))
71
- ]
72
- print(f"ℹ️ Using fallback dummy data for {candidate}")
73
-
74
  print(f"✅ Fetched {len(mock_tweets)} tweets for {candidate}")
75
- return [[tweet['date'], tweet['user'], tweet['content'], candidate] for tweet in mock_tweets]
76
 
77
  # -----------------------------
78
  # 主分析函數
79
  # -----------------------------
80
  def run_analysis():
81
- print("🔄 開始執行輿情分析...")
82
  try:
83
- since_date = (datetime.now() - timedelta(days=days_back)).strftime('%Y-%m-%d')
84
- until_date = datetime.now().strftime('%Y-%m-%d')
85
-
86
  # 1. 抓貼文
87
  all_tweets = []
88
- for candidate in candidates:
89
- tweets = fetch_tweets_via_x_tools(candidate, since_date, until_date)
90
- all_tweets.extend(tweets)
91
-
92
- if not all_tweets:
93
- raise ValueError("No tweets fetched. Using full dummy data.")
94
 
95
- df_tweets = pd.DataFrame(all_tweets, columns=["日期", "使用者", "內容", "候選人"])
96
 
97
  # 2. 情緒分析
98
  df_tweets['情緒'] = df_tweets['內容'].apply(lambda x: sentiment(x)['label'])
99
  df_tweets['信心度'] = df_tweets['內容'].apply(lambda x: sentiment(x)['score'])
100
 
101
- # 統計每位候選人情緒比例
102
- summary = df_tweets.groupby(['候選人', '情緒']).size().unstack(fill_value=0)
103
  summary['總貼文'] = summary.sum(axis=1)
104
- summary['正面比率'] = summary.get('positive', 0) / summary['總貼文']
105
- summary['負面比率'] = summary.get('negative', 0) / summary['總貼文']
106
 
107
- # 3. 更新歷史資料
108
  summary['日期'] = datetime.now().strftime('%Y-%m-%d %H:%M %Z')
109
  if os.path.exists(history_file):
110
  df_history = pd.read_csv(history_file)
111
  df_history = pd.concat([df_history, summary.reset_index()[['日期','候選人','正面比率','負面比率']]], ignore_index=True)
112
  else:
113
  df_history = summary.reset_index()[['日期','候選人','正面比率','負面比率']]
114
- df_history.to_csv(history_file, index=False)
115
 
116
  # ----------------- 圖表生成 -----------------
117
  # 當日情緒比例
@@ -123,7 +100,7 @@ def run_analysis():
123
  plt.xticks(rotation=0)
124
  plt.tight_layout()
125
  buf = io.BytesIO()
126
- plt.savefig(buf, format='png')
127
  buf.seek(0)
128
  img_b64_today = base64.b64encode(buf.read()).decode('utf-8')
129
  buf.close()
@@ -140,35 +117,35 @@ def run_analysis():
140
  plt.legend()
141
  plt.tight_layout()
142
  buf = io.BytesIO()
143
- plt.savefig(buf, format='png')
144
  buf.seek(0)
145
  img_b64_trend = base64.b64encode(buf.read()).decode('utf-8')
146
  buf.close()
147
 
148
- # 其他圖表 placeholder(可自行生成圖表後轉 base64)
149
  img_social_sentiment = ""
150
  img_platform_performance = ""
151
  img_candidate_volume = ""
152
  img_candidate_sentiment = ""
153
  img_knowledge_graph = ""
154
 
155
- # 社群參與表格
156
  engagement_table = f"""
157
  <table class="min-w-full bg-white border border-gray-200">
158
- <tr class="bg-gray-100 border-b">
159
- <th class="py-2 px-4 border-r">總參與數</th>
160
- <td class="py-2 px-4 border-r">{len(df_tweets)}</td>
161
- <th class="py-2 px-4 border-r">正面情緒比例</th>
162
- <td class="py-2 px-4 border-r">{df_tweets['情緒'].value_counts(normalize=True).get('positive',0):.1%}</td>
163
- <th class="py-2 px-4 border-r">平均互動率</th>
164
- <td class="py-2 px-4 border-r">3.9%</td>
165
- <th class="py-2 px-4 border-r">活躍平台</th>
166
- <td class="py-2 px-4">6</td>
167
- </tr>
168
  </table>
169
  """
170
 
171
- # 新聞資料
172
  if os.path.exists(news_file):
173
  df_news = pd.read_csv(news_file)
174
  news_summary = df_news.groupby('類別').size().to_dict()
@@ -177,7 +154,7 @@ def run_analysis():
177
  news_summary = {}
178
  news_table = "<p>未提供新聞資料</p>"
179
 
180
- # ----------------- 內嵌 HTML 模板 -----------------
181
  html_template = """<!DOCTYPE html>
182
  <html lang="zh-TW">
183
  <head>
@@ -211,14 +188,12 @@ body {{
211
  <main class="grid grid-cols-1 md:grid-cols-2 gap-6">
212
  <div class="card">
213
  <h2 class="text-xl font-semibold mb-4">1. 當日社群貼文情緒</h2>
214
- <div class="chart-container">
215
- <img src="data:image/png;base64,{img_b64_today}" class="w-full">
216
- </div></div>
217
  <div class="card">
218
  <h2 class="text-xl font-semibold mb-4">2. 歷史情緒趨勢</h2>
219
- <div class="chart-container">
220
- <img src="data:image/png;base64,{img_b64_trend}" class="w-full">
221
- </div></div>
222
  <div class="card md:col-span-2">
223
  <h2 class="text-xl font-semibold mb-4">3. 社群媒體參與概況</h2>
224
  {engagement_table}
@@ -244,16 +219,13 @@ body {{
244
  news_table=news_table
245
  )
246
 
247
- print("✅ 輿情分析完成")
248
  return html_content
249
 
250
- except Exception as e:
251
- err_msg = f" 輿情分析執行失敗:\n{traceback.format_exc()}"
252
- print(err_msg)
253
- return f"<pre>{err_msg}</pre>"
254
 
255
  # -----------------------------
256
- # 自動排程設定 (每天 08:00 執行)
257
  # -----------------------------
258
  def schedule_daily_run():
259
  schedule.every().day.at("08:00").do(run_analysis)
@@ -269,7 +241,6 @@ threading.Thread(target=schedule_daily_run, daemon=True).start()
269
  # -----------------------------
270
  # Gradio 前端
271
  # -----------------------------
272
- print("🔄 啟動 Gradio 介面...")
273
  iface = gr.Interface(
274
  fn=run_analysis,
275
  inputs=[],
@@ -278,5 +249,4 @@ iface = gr.Interface(
278
  title="高雄市長選戰輿情分析",
279
  description="每日自動抓取 X 貼文 + 新聞議題分析 + 歷史情緒趨勢"
280
  )
281
- print("✅ Gradio 介面啟動,監聽 0.0.0.0:7860")
282
  iface.launch(server_name="0.0.0.0", server_port=7860)
 
13
  import threading
14
  import traceback
15
  import networkx as nx
16
+ import random
17
 
18
  # -----------------------------
19
  # 參數設定
20
  # -----------------------------
21
  candidates = ["許智傑", "邱議瑩", "賴瑞隆", "林岱樺", "柯志恩"]
22
  days_back = 7
23
+ max_tweets_per_candidate = 20
24
  news_file = "news_sample.csv"
25
  history_file = "history_sentiment.csv"
26
 
27
  # -----------------------------
28
+ # 情緒分析模型
29
  # -----------------------------
30
  try:
31
  from transformers import pipeline
 
36
  print("✅ 情緒分析模型加載成功")
37
 
38
  def sentiment(text):
39
+ return sentiment_pipeline(text)[0] # 保證回傳單 dict
 
40
  except Exception as e:
41
+ print(f"⚠️ 警告: {e}. 將使用隨機情緒")
 
42
  def sentiment(text):
43
+ return {"label": random.choice(["positive", "negative"]), "score": 0.5}
 
44
 
45
  # -----------------------------
46
+ # 模擬抓貼文
47
  # -----------------------------
48
  def fetch_tweets_via_x_tools(candidate, since_date, until_date):
49
+ print(f"🔍 Fetching tweets for {candidate}...")
50
  mock_tweets = [
51
+ {'id': random.randint(1000000000000000000,1999999999999999999),
52
+ 'date': datetime.now() - timedelta(days=random.randint(0, days_back)),
53
+ 'user': f'user_{random.randint(1,1000)}',
54
+ 'content': f'Sample tweet about {candidate} {random.choice(["supportive","critical","neutral"])}'}
55
+ for _ in range(random.randint(1,max_tweets_per_candidate))
56
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  print(f"✅ Fetched {len(mock_tweets)} tweets for {candidate}")
58
+ return [[t['date'], t['user'], t['content'], candidate] for t in mock_tweets]
59
 
60
  # -----------------------------
61
  # 主分析函數
62
  # -----------------------------
63
  def run_analysis():
 
64
  try:
 
 
 
65
  # 1. 抓貼文
66
  all_tweets = []
67
+ since_date = (datetime.now() - timedelta(days=days_back)).strftime('%Y-%m-%d')
68
+ until_date = datetime.now().strftime('%Y-%m-%d')
69
+ for c in candidates:
70
+ all_tweets.extend(fetch_tweets_via_x_tools(c, since_date, until_date))
 
 
71
 
72
+ df_tweets = pd.DataFrame(all_tweets, columns=["日期","使用者","內容","候選人"])
73
 
74
  # 2. 情緒分析
75
  df_tweets['情緒'] = df_tweets['內容'].apply(lambda x: sentiment(x)['label'])
76
  df_tweets['信心度'] = df_tweets['內容'].apply(lambda x: sentiment(x)['score'])
77
 
78
+ # 3. 統計每日情緒
79
+ summary = df_tweets.groupby(['候選人','情緒']).size().unstack(fill_value=0)
80
  summary['總貼文'] = summary.sum(axis=1)
81
+ summary['正面比率'] = summary.get('positive',0)/summary['總貼文']
82
+ summary['負面比率'] = summary.get('negative',0)/summary['總貼文']
83
 
84
+ # 4. 更新歷史資料
85
  summary['日期'] = datetime.now().strftime('%Y-%m-%d %H:%M %Z')
86
  if os.path.exists(history_file):
87
  df_history = pd.read_csv(history_file)
88
  df_history = pd.concat([df_history, summary.reset_index()[['日期','候選人','正面比率','負面比率']]], ignore_index=True)
89
  else:
90
  df_history = summary.reset_index()[['日期','候選人','正面比率','負面比率']]
91
+ df_history.to_csv(history_file,index=False)
92
 
93
  # ----------------- 圖表生成 -----------------
94
  # 當日情緒比例
 
100
  plt.xticks(rotation=0)
101
  plt.tight_layout()
102
  buf = io.BytesIO()
103
+ plt.savefig(buf,format='png')
104
  buf.seek(0)
105
  img_b64_today = base64.b64encode(buf.read()).decode('utf-8')
106
  buf.close()
 
117
  plt.legend()
118
  plt.tight_layout()
119
  buf = io.BytesIO()
120
+ plt.savefig(buf,format='png')
121
  buf.seek(0)
122
  img_b64_trend = base64.b64encode(buf.read()).decode('utf-8')
123
  buf.close()
124
 
125
+ # ----------------- 其他圖表 placeholder -----------------
126
  img_social_sentiment = ""
127
  img_platform_performance = ""
128
  img_candidate_volume = ""
129
  img_candidate_sentiment = ""
130
  img_knowledge_graph = ""
131
 
132
+ # ----------------- 社群參與表格 -----------------
133
  engagement_table = f"""
134
  <table class="min-w-full bg-white border border-gray-200">
135
+ <tr class="bg-gray-100 border-b">
136
+ <th class="py-2 px-4 border-r">總參與數</th>
137
+ <td class="py-2 px-4 border-r">{len(df_tweets)}</td>
138
+ <th class="py-2 px-4 border-r">正面情緒比例</th>
139
+ <td class="py-2 px-4 border-r">{df_tweets['情緒'].value_counts(normalize=True).get('positive',0):.1%}</td>
140
+ <th class="py-2 px-4 border-r">平均互動率</th>
141
+ <td class="py-2 px-4 border-r">3.9%</td>
142
+ <th class="py-2 px-4 border-r">活躍平台</th>
143
+ <td class="py-2 px-4">6</td>
144
+ </tr>
145
  </table>
146
  """
147
 
148
+ # ----------------- 新聞資料 -----------------
149
  if os.path.exists(news_file):
150
  df_news = pd.read_csv(news_file)
151
  news_summary = df_news.groupby('類別').size().to_dict()
 
154
  news_summary = {}
155
  news_table = "<p>未提供新聞資料</p>"
156
 
157
+ # ----------------- 內嵌 HTML -----------------
158
  html_template = """<!DOCTYPE html>
159
  <html lang="zh-TW">
160
  <head>
 
188
  <main class="grid grid-cols-1 md:grid-cols-2 gap-6">
189
  <div class="card">
190
  <h2 class="text-xl font-semibold mb-4">1. 當日社群貼文情緒</h2>
191
+ <div class="chart-container"><img src="data:image/png;base64,{img_b64_today}" class="w-full"></div>
192
+ </div>
 
193
  <div class="card">
194
  <h2 class="text-xl font-semibold mb-4">2. 歷史情緒趨勢</h2>
195
+ <div class="chart-container"><img src="data:image/png;base64,{img_b64_trend}" class="w-full"></div>
196
+ </div>
 
197
  <div class="card md:col-span-2">
198
  <h2 class="text-xl font-semibold mb-4">3. 社群媒體參與概況</h2>
199
  {engagement_table}
 
219
  news_table=news_table
220
  )
221
 
 
222
  return html_content
223
 
224
+ except Exception:
225
+ return f"<pre>❌ 輿情分析執行失敗:\n{traceback.format_exc()}</pre>"
 
 
226
 
227
  # -----------------------------
228
+ # 自動排程設定
229
  # -----------------------------
230
  def schedule_daily_run():
231
  schedule.every().day.at("08:00").do(run_analysis)
 
241
  # -----------------------------
242
  # Gradio 前端
243
  # -----------------------------
 
244
  iface = gr.Interface(
245
  fn=run_analysis,
246
  inputs=[],
 
249
  title="高雄市長選戰輿情分析",
250
  description="每日自動抓取 X 貼文 + 新聞議題分析 + 歷史情緒趨勢"
251
  )
 
252
  iface.launch(server_name="0.0.0.0", server_port=7860)