Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ==========================================
|
| 2 |
+
# 高雄市長選戰輕量化輿情分析 (HF Spaces)
|
| 3 |
+
# ==========================================
|
| 4 |
+
|
| 5 |
+
import snscrape.modules.twitter as sntwitter
|
| 6 |
+
import pandas as pd
|
| 7 |
+
from datetime import datetime, timedelta
|
| 8 |
+
from transformers import pipeline
|
| 9 |
+
import matplotlib.pyplot as plt
|
| 10 |
+
import io
|
| 11 |
+
import base64
|
| 12 |
+
import gradio as gr
|
| 13 |
+
|
| 14 |
+
# -----------------------------
|
| 15 |
+
# 參數設定
|
| 16 |
+
# -----------------------------
|
| 17 |
+
candidates = ["許智傑", "邱議瑩", "賴瑞隆", "林岱樺", "柯志恩"]
|
| 18 |
+
days_back = 7
|
| 19 |
+
max_tweets_per_candidate = 100 # HF部署建議少量
|
| 20 |
+
|
| 21 |
+
# -----------------------------
|
| 22 |
+
# 中文情緒分析模型
|
| 23 |
+
# -----------------------------
|
| 24 |
+
sentiment = pipeline("sentiment-analysis", model="uer/roberta-base-finetuned-chinanews-chinese-sentiment")
|
| 25 |
+
|
| 26 |
+
# -----------------------------
|
| 27 |
+
# 主分析函數
|
| 28 |
+
# -----------------------------
|
| 29 |
+
def run_analysis():
|
| 30 |
+
since_date = (datetime.now() - timedelta(days=days_back)).strftime('%Y-%m-%d')
|
| 31 |
+
until_date = datetime.now().strftime('%Y-%m-%d')
|
| 32 |
+
|
| 33 |
+
# 1. 抓貼文
|
| 34 |
+
all_tweets = []
|
| 35 |
+
for candidate in candidates:
|
| 36 |
+
query = f'{candidate} since:{since_date} until:{until_date}'
|
| 37 |
+
for i, tweet in enumerate(sntwitter.TwitterSearchScraper(query).get_items()):
|
| 38 |
+
if i >= max_tweets_per_candidate:
|
| 39 |
+
break
|
| 40 |
+
all_tweets.append([tweet.date, tweet.user.username, tweet.content, candidate])
|
| 41 |
+
df_tweets = pd.DataFrame(all_tweets, columns=["日期", "使用者", "內容", "候選人"])
|
| 42 |
+
|
| 43 |
+
# 2. 情緒分析
|
| 44 |
+
df_tweets['情緒'] = df_tweets['內容'].apply(lambda x: sentiment(x)[0]['label'])
|
| 45 |
+
df_tweets['信心度'] = df_tweets['內容'].apply(lambda x: sentiment(x)[0]['score'])
|
| 46 |
+
|
| 47 |
+
summary = df_tweets.groupby(['候選人', '情緒']).size().unstack(fill_value=0)
|
| 48 |
+
summary['總貼文'] = summary.sum(axis=1)
|
| 49 |
+
summary['正面比率'] = summary.get('positive', 0) / summary['總貼文']
|
| 50 |
+
summary['負面比率'] = summary.get('negative', 0) / summary['總貼文']
|
| 51 |
+
|
| 52 |
+
# 3. 可視化
|
| 53 |
+
plt.figure(figsize=(8,5))
|
| 54 |
+
summary[['正面比率','負面比率']].plot(kind='bar', stacked=True, colormap='coolwarm')
|
| 55 |
+
plt.title("候選人社群情緒比例")
|
| 56 |
+
plt.ylabel("比例")
|
| 57 |
+
plt.xlabel("候選人")
|
| 58 |
+
plt.xticks(rotation=0)
|
| 59 |
+
plt.tight_layout()
|
| 60 |
+
|
| 61 |
+
buf = io.BytesIO()
|
| 62 |
+
plt.savefig(buf, format="png")
|
| 63 |
+
buf.seek(0)
|
| 64 |
+
img_b64 = base64.b64encode(buf.read()).decode("utf-8")
|
| 65 |
+
buf.close()
|
| 66 |
+
|
| 67 |
+
# 4. 即時輿情報告 (HTML)
|
| 68 |
+
html_report = f"""
|
| 69 |
+
<h2>高雄市長選戰輿情摘要 ({datetime.now().strftime('%Y-%m-%d')})</h2>
|
| 70 |
+
<h3>1. 社群貼文聲量與情緒</h3>
|
| 71 |
+
{summary.to_html()}
|
| 72 |
+
<h3>2. 情緒圖表</h3>
|
| 73 |
+
<img src='data:image/png;base64,{img_b64}' width='600'>
|
| 74 |
+
"""
|
| 75 |
+
return html_report
|
| 76 |
+
|
| 77 |
+
# -----------------------------
|
| 78 |
+
# Gradio 前端
|
| 79 |
+
# -----------------------------
|
| 80 |
+
iface = gr.Interface(
|
| 81 |
+
fn=run_analysis,
|
| 82 |
+
inputs=[],
|
| 83 |
+
outputs=gr.HTML,
|
| 84 |
+
live=False,
|
| 85 |
+
title="高雄市長選戰輿情分析",
|
| 86 |
+
description="抓取 X(Twitter)貼文、分析情緒比例,並生成即時輿情報告"
|
| 87 |
+
)
|
| 88 |
+
|
| 89 |
+
iface.launch()
|