shiue2000 commited on
Commit
9200c2b
·
verified ·
1 Parent(s): c34b06e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +89 -0
app.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ==========================================
2
+ # 高雄市長選戰輕量化輿情分析 (HF Spaces)
3
+ # ==========================================
4
+
5
+ import snscrape.modules.twitter as sntwitter
6
+ import pandas as pd
7
+ from datetime import datetime, timedelta
8
+ from transformers import pipeline
9
+ import matplotlib.pyplot as plt
10
+ import io
11
+ import base64
12
+ import gradio as gr
13
+
14
+ # -----------------------------
15
+ # 參數設定
16
+ # -----------------------------
17
+ candidates = ["許智傑", "邱議瑩", "賴瑞隆", "林岱樺", "柯志恩"]
18
+ days_back = 7
19
+ max_tweets_per_candidate = 100 # HF部署建議少量
20
+
21
+ # -----------------------------
22
+ # 中文情緒分析模型
23
+ # -----------------------------
24
+ sentiment = pipeline("sentiment-analysis", model="uer/roberta-base-finetuned-chinanews-chinese-sentiment")
25
+
26
+ # -----------------------------
27
+ # 主分析函數
28
+ # -----------------------------
29
+ def run_analysis():
30
+ since_date = (datetime.now() - timedelta(days=days_back)).strftime('%Y-%m-%d')
31
+ until_date = datetime.now().strftime('%Y-%m-%d')
32
+
33
+ # 1. 抓貼文
34
+ all_tweets = []
35
+ for candidate in candidates:
36
+ query = f'{candidate} since:{since_date} until:{until_date}'
37
+ for i, tweet in enumerate(sntwitter.TwitterSearchScraper(query).get_items()):
38
+ if i >= max_tweets_per_candidate:
39
+ break
40
+ all_tweets.append([tweet.date, tweet.user.username, tweet.content, candidate])
41
+ df_tweets = pd.DataFrame(all_tweets, columns=["日期", "使用者", "內容", "候選人"])
42
+
43
+ # 2. 情緒分析
44
+ df_tweets['情緒'] = df_tweets['內容'].apply(lambda x: sentiment(x)[0]['label'])
45
+ df_tweets['信心度'] = df_tweets['內容'].apply(lambda x: sentiment(x)[0]['score'])
46
+
47
+ summary = df_tweets.groupby(['候選人', '情緒']).size().unstack(fill_value=0)
48
+ summary['總貼文'] = summary.sum(axis=1)
49
+ summary['正面比率'] = summary.get('positive', 0) / summary['總貼文']
50
+ summary['負面比率'] = summary.get('negative', 0) / summary['總貼文']
51
+
52
+ # 3. 可視化
53
+ plt.figure(figsize=(8,5))
54
+ summary[['正面比率','負面比率']].plot(kind='bar', stacked=True, colormap='coolwarm')
55
+ plt.title("候選人社群情緒比例")
56
+ plt.ylabel("比例")
57
+ plt.xlabel("候選人")
58
+ plt.xticks(rotation=0)
59
+ plt.tight_layout()
60
+
61
+ buf = io.BytesIO()
62
+ plt.savefig(buf, format="png")
63
+ buf.seek(0)
64
+ img_b64 = base64.b64encode(buf.read()).decode("utf-8")
65
+ buf.close()
66
+
67
+ # 4. 即時輿情報告 (HTML)
68
+ html_report = f"""
69
+ <h2>高雄市長選戰輿情摘要 ({datetime.now().strftime('%Y-%m-%d')})</h2>
70
+ <h3>1. 社群貼文聲量與情緒</h3>
71
+ {summary.to_html()}
72
+ <h3>2. 情緒圖表</h3>
73
+ <img src='data:image/png;base64,{img_b64}' width='600'>
74
+ """
75
+ return html_report
76
+
77
+ # -----------------------------
78
+ # Gradio 前端
79
+ # -----------------------------
80
+ iface = gr.Interface(
81
+ fn=run_analysis,
82
+ inputs=[],
83
+ outputs=gr.HTML,
84
+ live=False,
85
+ title="高雄市長選戰輿情分析",
86
+ description="抓取 X(Twitter)貼文、分析情緒比例,並生成即時輿情報告"
87
+ )
88
+
89
+ iface.launch()