Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -13,19 +13,19 @@ import time
|
|
| 13 |
import threading
|
| 14 |
import traceback
|
| 15 |
import networkx as nx
|
| 16 |
-
import random
|
| 17 |
|
| 18 |
# -----------------------------
|
| 19 |
# 參數設定
|
| 20 |
# -----------------------------
|
| 21 |
candidates = ["許智傑", "邱議瑩", "賴瑞隆", "林岱樺", "柯志恩"]
|
| 22 |
days_back = 7
|
| 23 |
-
max_tweets_per_candidate = 20
|
| 24 |
news_file = "news_sample.csv"
|
| 25 |
history_file = "history_sentiment.csv"
|
| 26 |
|
| 27 |
# -----------------------------
|
| 28 |
-
# 情緒分析模型
|
| 29 |
# -----------------------------
|
| 30 |
try:
|
| 31 |
from transformers import pipeline
|
|
@@ -36,82 +36,59 @@ try:
|
|
| 36 |
print("✅ 情緒分析模型加載成功")
|
| 37 |
|
| 38 |
def sentiment(text):
|
| 39 |
-
#
|
| 40 |
-
return sentiment_pipeline(text)
|
| 41 |
except Exception as e:
|
| 42 |
-
print(f"⚠️ 警告: {e}.
|
| 43 |
-
|
| 44 |
def sentiment(text):
|
| 45 |
-
|
| 46 |
-
return [{"label": random.choice(["positive", "negative"]), "score": 0.5}]
|
| 47 |
|
| 48 |
# -----------------------------
|
| 49 |
-
#
|
| 50 |
# -----------------------------
|
| 51 |
def fetch_tweets_via_x_tools(candidate, since_date, until_date):
|
| 52 |
-
print(f"🔍 Fetching tweets for {candidate}
|
| 53 |
mock_tweets = [
|
| 54 |
-
{
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
] if candidate == "許智傑" else []
|
| 61 |
-
|
| 62 |
-
if not mock_tweets:
|
| 63 |
-
mock_tweets = [
|
| 64 |
-
{
|
| 65 |
-
'id': random.randint(1000000000000000000, 1999999999999999999),
|
| 66 |
-
'date': datetime.now() - timedelta(days=random.randint(1, days_back)),
|
| 67 |
-
'user': f'user_{random.randint(1, 1000)}',
|
| 68 |
-
'content': f'Sample tweet about {candidate} {random.choice(["supportive", "critical", "neutral"])} opinion.'
|
| 69 |
-
}
|
| 70 |
-
for _ in range(random.randint(1, max_tweets_per_candidate))
|
| 71 |
-
]
|
| 72 |
-
print(f"ℹ️ Using fallback dummy data for {candidate}")
|
| 73 |
-
|
| 74 |
print(f"✅ Fetched {len(mock_tweets)} tweets for {candidate}")
|
| 75 |
-
return [[
|
| 76 |
|
| 77 |
# -----------------------------
|
| 78 |
# 主分析函數
|
| 79 |
# -----------------------------
|
| 80 |
def run_analysis():
|
| 81 |
-
print("🔄 開始執行輿情分析...")
|
| 82 |
try:
|
| 83 |
-
since_date = (datetime.now() - timedelta(days=days_back)).strftime('%Y-%m-%d')
|
| 84 |
-
until_date = datetime.now().strftime('%Y-%m-%d')
|
| 85 |
-
|
| 86 |
# 1. 抓貼文
|
| 87 |
all_tweets = []
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
if not all_tweets:
|
| 93 |
-
raise ValueError("No tweets fetched. Using full dummy data.")
|
| 94 |
|
| 95 |
-
df_tweets = pd.DataFrame(all_tweets, columns=["日期",
|
| 96 |
|
| 97 |
# 2. 情緒分析
|
| 98 |
df_tweets['情緒'] = df_tweets['內容'].apply(lambda x: sentiment(x)['label'])
|
| 99 |
df_tweets['信心度'] = df_tweets['內容'].apply(lambda x: sentiment(x)['score'])
|
| 100 |
|
| 101 |
-
#
|
| 102 |
-
summary = df_tweets.groupby(['候選人',
|
| 103 |
summary['總貼文'] = summary.sum(axis=1)
|
| 104 |
-
summary['正面比率'] = summary.get('positive',
|
| 105 |
-
summary['負面比率'] = summary.get('negative',
|
| 106 |
|
| 107 |
-
#
|
| 108 |
summary['日期'] = datetime.now().strftime('%Y-%m-%d %H:%M %Z')
|
| 109 |
if os.path.exists(history_file):
|
| 110 |
df_history = pd.read_csv(history_file)
|
| 111 |
df_history = pd.concat([df_history, summary.reset_index()[['日期','候選人','正面比率','負面比率']]], ignore_index=True)
|
| 112 |
else:
|
| 113 |
df_history = summary.reset_index()[['日期','候選人','正面比率','負面比率']]
|
| 114 |
-
df_history.to_csv(history_file,
|
| 115 |
|
| 116 |
# ----------------- 圖表生成 -----------------
|
| 117 |
# 當日情緒比例
|
|
@@ -123,7 +100,7 @@ def run_analysis():
|
|
| 123 |
plt.xticks(rotation=0)
|
| 124 |
plt.tight_layout()
|
| 125 |
buf = io.BytesIO()
|
| 126 |
-
plt.savefig(buf,
|
| 127 |
buf.seek(0)
|
| 128 |
img_b64_today = base64.b64encode(buf.read()).decode('utf-8')
|
| 129 |
buf.close()
|
|
@@ -140,35 +117,35 @@ def run_analysis():
|
|
| 140 |
plt.legend()
|
| 141 |
plt.tight_layout()
|
| 142 |
buf = io.BytesIO()
|
| 143 |
-
plt.savefig(buf,
|
| 144 |
buf.seek(0)
|
| 145 |
img_b64_trend = base64.b64encode(buf.read()).decode('utf-8')
|
| 146 |
buf.close()
|
| 147 |
|
| 148 |
-
# 其他圖表 placeholder
|
| 149 |
img_social_sentiment = ""
|
| 150 |
img_platform_performance = ""
|
| 151 |
img_candidate_volume = ""
|
| 152 |
img_candidate_sentiment = ""
|
| 153 |
img_knowledge_graph = ""
|
| 154 |
|
| 155 |
-
# 社群參與表格
|
| 156 |
engagement_table = f"""
|
| 157 |
<table class="min-w-full bg-white border border-gray-200">
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
</table>
|
| 169 |
"""
|
| 170 |
|
| 171 |
-
# 新聞資料
|
| 172 |
if os.path.exists(news_file):
|
| 173 |
df_news = pd.read_csv(news_file)
|
| 174 |
news_summary = df_news.groupby('類別').size().to_dict()
|
|
@@ -177,7 +154,7 @@ def run_analysis():
|
|
| 177 |
news_summary = {}
|
| 178 |
news_table = "<p>未提供新聞資料</p>"
|
| 179 |
|
| 180 |
-
# ----------------- 內嵌 HTML
|
| 181 |
html_template = """<!DOCTYPE html>
|
| 182 |
<html lang="zh-TW">
|
| 183 |
<head>
|
|
@@ -211,14 +188,12 @@ body {{
|
|
| 211 |
<main class="grid grid-cols-1 md:grid-cols-2 gap-6">
|
| 212 |
<div class="card">
|
| 213 |
<h2 class="text-xl font-semibold mb-4">1. 當日社群貼文情緒</h2>
|
| 214 |
-
<div class="chart-container">
|
| 215 |
-
|
| 216 |
-
</div></div>
|
| 217 |
<div class="card">
|
| 218 |
<h2 class="text-xl font-semibold mb-4">2. 歷史情緒趨勢</h2>
|
| 219 |
-
<div class="chart-container">
|
| 220 |
-
|
| 221 |
-
</div></div>
|
| 222 |
<div class="card md:col-span-2">
|
| 223 |
<h2 class="text-xl font-semibold mb-4">3. 社群媒體參與概況</h2>
|
| 224 |
{engagement_table}
|
|
@@ -244,16 +219,13 @@ body {{
|
|
| 244 |
news_table=news_table
|
| 245 |
)
|
| 246 |
|
| 247 |
-
print("✅ 輿情分析完成")
|
| 248 |
return html_content
|
| 249 |
|
| 250 |
-
except Exception
|
| 251 |
-
|
| 252 |
-
print(err_msg)
|
| 253 |
-
return f"<pre>{err_msg}</pre>"
|
| 254 |
|
| 255 |
# -----------------------------
|
| 256 |
-
# 自動排程設定
|
| 257 |
# -----------------------------
|
| 258 |
def schedule_daily_run():
|
| 259 |
schedule.every().day.at("08:00").do(run_analysis)
|
|
@@ -269,7 +241,6 @@ threading.Thread(target=schedule_daily_run, daemon=True).start()
|
|
| 269 |
# -----------------------------
|
| 270 |
# Gradio 前端
|
| 271 |
# -----------------------------
|
| 272 |
-
print("🔄 啟動 Gradio 介面...")
|
| 273 |
iface = gr.Interface(
|
| 274 |
fn=run_analysis,
|
| 275 |
inputs=[],
|
|
@@ -278,5 +249,4 @@ iface = gr.Interface(
|
|
| 278 |
title="高雄市長選戰輿情分析",
|
| 279 |
description="每日自動抓取 X 貼文 + 新聞議題分析 + 歷史情緒趨勢"
|
| 280 |
)
|
| 281 |
-
print("✅ Gradio 介面啟動,監聽 0.0.0.0:7860")
|
| 282 |
iface.launch(server_name="0.0.0.0", server_port=7860)
|
|
|
|
| 13 |
import threading
|
| 14 |
import traceback
|
| 15 |
import networkx as nx
|
| 16 |
+
import random
|
| 17 |
|
| 18 |
# -----------------------------
|
| 19 |
# 參數設定
|
| 20 |
# -----------------------------
|
| 21 |
candidates = ["許智傑", "邱議瑩", "賴瑞隆", "林岱樺", "柯志恩"]
|
| 22 |
days_back = 7
|
| 23 |
+
max_tweets_per_candidate = 20
|
| 24 |
news_file = "news_sample.csv"
|
| 25 |
history_file = "history_sentiment.csv"
|
| 26 |
|
| 27 |
# -----------------------------
|
| 28 |
+
# 情緒分析模型
|
| 29 |
# -----------------------------
|
| 30 |
try:
|
| 31 |
from transformers import pipeline
|
|
|
|
| 36 |
print("✅ 情緒分析模型加載成功")
|
| 37 |
|
| 38 |
def sentiment(text):
|
| 39 |
+
return sentiment_pipeline(text)[0] # 保證回傳單 dict
|
|
|
|
| 40 |
except Exception as e:
|
| 41 |
+
print(f"⚠️ 警告: {e}. 將使用隨機情緒")
|
|
|
|
| 42 |
def sentiment(text):
|
| 43 |
+
return {"label": random.choice(["positive", "negative"]), "score": 0.5}
|
|
|
|
| 44 |
|
| 45 |
# -----------------------------
|
| 46 |
+
# 模擬抓貼文
|
| 47 |
# -----------------------------
|
| 48 |
def fetch_tweets_via_x_tools(candidate, since_date, until_date):
|
| 49 |
+
print(f"🔍 Fetching tweets for {candidate}...")
|
| 50 |
mock_tweets = [
|
| 51 |
+
{'id': random.randint(1000000000000000000,1999999999999999999),
|
| 52 |
+
'date': datetime.now() - timedelta(days=random.randint(0, days_back)),
|
| 53 |
+
'user': f'user_{random.randint(1,1000)}',
|
| 54 |
+
'content': f'Sample tweet about {candidate} {random.choice(["supportive","critical","neutral"])}'}
|
| 55 |
+
for _ in range(random.randint(1,max_tweets_per_candidate))
|
| 56 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
print(f"✅ Fetched {len(mock_tweets)} tweets for {candidate}")
|
| 58 |
+
return [[t['date'], t['user'], t['content'], candidate] for t in mock_tweets]
|
| 59 |
|
| 60 |
# -----------------------------
|
| 61 |
# 主分析函數
|
| 62 |
# -----------------------------
|
| 63 |
def run_analysis():
|
|
|
|
| 64 |
try:
|
|
|
|
|
|
|
|
|
|
| 65 |
# 1. 抓貼文
|
| 66 |
all_tweets = []
|
| 67 |
+
since_date = (datetime.now() - timedelta(days=days_back)).strftime('%Y-%m-%d')
|
| 68 |
+
until_date = datetime.now().strftime('%Y-%m-%d')
|
| 69 |
+
for c in candidates:
|
| 70 |
+
all_tweets.extend(fetch_tweets_via_x_tools(c, since_date, until_date))
|
|
|
|
|
|
|
| 71 |
|
| 72 |
+
df_tweets = pd.DataFrame(all_tweets, columns=["日期","使用者","內容","候選人"])
|
| 73 |
|
| 74 |
# 2. 情緒分析
|
| 75 |
df_tweets['情緒'] = df_tweets['內容'].apply(lambda x: sentiment(x)['label'])
|
| 76 |
df_tweets['信心度'] = df_tweets['內容'].apply(lambda x: sentiment(x)['score'])
|
| 77 |
|
| 78 |
+
# 3. 統計每日情緒
|
| 79 |
+
summary = df_tweets.groupby(['候選人','情緒']).size().unstack(fill_value=0)
|
| 80 |
summary['總貼文'] = summary.sum(axis=1)
|
| 81 |
+
summary['正面比率'] = summary.get('positive',0)/summary['總貼文']
|
| 82 |
+
summary['負面比率'] = summary.get('negative',0)/summary['總貼文']
|
| 83 |
|
| 84 |
+
# 4. 更新歷史資料
|
| 85 |
summary['日期'] = datetime.now().strftime('%Y-%m-%d %H:%M %Z')
|
| 86 |
if os.path.exists(history_file):
|
| 87 |
df_history = pd.read_csv(history_file)
|
| 88 |
df_history = pd.concat([df_history, summary.reset_index()[['日期','候選人','正面比率','負面比率']]], ignore_index=True)
|
| 89 |
else:
|
| 90 |
df_history = summary.reset_index()[['日期','候選人','正面比率','負面比率']]
|
| 91 |
+
df_history.to_csv(history_file,index=False)
|
| 92 |
|
| 93 |
# ----------------- 圖表生成 -----------------
|
| 94 |
# 當日情緒比例
|
|
|
|
| 100 |
plt.xticks(rotation=0)
|
| 101 |
plt.tight_layout()
|
| 102 |
buf = io.BytesIO()
|
| 103 |
+
plt.savefig(buf,format='png')
|
| 104 |
buf.seek(0)
|
| 105 |
img_b64_today = base64.b64encode(buf.read()).decode('utf-8')
|
| 106 |
buf.close()
|
|
|
|
| 117 |
plt.legend()
|
| 118 |
plt.tight_layout()
|
| 119 |
buf = io.BytesIO()
|
| 120 |
+
plt.savefig(buf,format='png')
|
| 121 |
buf.seek(0)
|
| 122 |
img_b64_trend = base64.b64encode(buf.read()).decode('utf-8')
|
| 123 |
buf.close()
|
| 124 |
|
| 125 |
+
# ----------------- 其他圖表 placeholder -----------------
|
| 126 |
img_social_sentiment = ""
|
| 127 |
img_platform_performance = ""
|
| 128 |
img_candidate_volume = ""
|
| 129 |
img_candidate_sentiment = ""
|
| 130 |
img_knowledge_graph = ""
|
| 131 |
|
| 132 |
+
# ----------------- 社群參與表格 -----------------
|
| 133 |
engagement_table = f"""
|
| 134 |
<table class="min-w-full bg-white border border-gray-200">
|
| 135 |
+
<tr class="bg-gray-100 border-b">
|
| 136 |
+
<th class="py-2 px-4 border-r">總參與數</th>
|
| 137 |
+
<td class="py-2 px-4 border-r">{len(df_tweets)}</td>
|
| 138 |
+
<th class="py-2 px-4 border-r">正面情緒比例</th>
|
| 139 |
+
<td class="py-2 px-4 border-r">{df_tweets['情緒'].value_counts(normalize=True).get('positive',0):.1%}</td>
|
| 140 |
+
<th class="py-2 px-4 border-r">平均互動率</th>
|
| 141 |
+
<td class="py-2 px-4 border-r">3.9%</td>
|
| 142 |
+
<th class="py-2 px-4 border-r">活躍平台</th>
|
| 143 |
+
<td class="py-2 px-4">6</td>
|
| 144 |
+
</tr>
|
| 145 |
</table>
|
| 146 |
"""
|
| 147 |
|
| 148 |
+
# ----------------- 新聞資料 -----------------
|
| 149 |
if os.path.exists(news_file):
|
| 150 |
df_news = pd.read_csv(news_file)
|
| 151 |
news_summary = df_news.groupby('類別').size().to_dict()
|
|
|
|
| 154 |
news_summary = {}
|
| 155 |
news_table = "<p>未提供新聞資料</p>"
|
| 156 |
|
| 157 |
+
# ----------------- 內嵌 HTML -----------------
|
| 158 |
html_template = """<!DOCTYPE html>
|
| 159 |
<html lang="zh-TW">
|
| 160 |
<head>
|
|
|
|
| 188 |
<main class="grid grid-cols-1 md:grid-cols-2 gap-6">
|
| 189 |
<div class="card">
|
| 190 |
<h2 class="text-xl font-semibold mb-4">1. 當日社群貼文情緒</h2>
|
| 191 |
+
<div class="chart-container"><img src="data:image/png;base64,{img_b64_today}" class="w-full"></div>
|
| 192 |
+
</div>
|
|
|
|
| 193 |
<div class="card">
|
| 194 |
<h2 class="text-xl font-semibold mb-4">2. 歷史情緒趨勢</h2>
|
| 195 |
+
<div class="chart-container"><img src="data:image/png;base64,{img_b64_trend}" class="w-full"></div>
|
| 196 |
+
</div>
|
|
|
|
| 197 |
<div class="card md:col-span-2">
|
| 198 |
<h2 class="text-xl font-semibold mb-4">3. 社群媒體參與概況</h2>
|
| 199 |
{engagement_table}
|
|
|
|
| 219 |
news_table=news_table
|
| 220 |
)
|
| 221 |
|
|
|
|
| 222 |
return html_content
|
| 223 |
|
| 224 |
+
except Exception:
|
| 225 |
+
return f"<pre>❌ 輿情分析執行失敗:\n{traceback.format_exc()}</pre>"
|
|
|
|
|
|
|
| 226 |
|
| 227 |
# -----------------------------
|
| 228 |
+
# 自動排程設定
|
| 229 |
# -----------------------------
|
| 230 |
def schedule_daily_run():
|
| 231 |
schedule.every().day.at("08:00").do(run_analysis)
|
|
|
|
| 241 |
# -----------------------------
|
| 242 |
# Gradio 前端
|
| 243 |
# -----------------------------
|
|
|
|
| 244 |
iface = gr.Interface(
|
| 245 |
fn=run_analysis,
|
| 246 |
inputs=[],
|
|
|
|
| 249 |
title="高雄市長選戰輿情分析",
|
| 250 |
description="每日自動抓取 X 貼文 + 新聞議題分析 + 歷史情緒趨勢"
|
| 251 |
)
|
|
|
|
| 252 |
iface.launch(server_name="0.0.0.0", server_port=7860)
|