KYTHY commited on
Commit
5455f6f
·
verified ·
1 Parent(s): 259000b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +141 -372
app.py CHANGED
@@ -1,392 +1,161 @@
1
  import streamlit as st
2
- from transformers import pipeline
3
- from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
4
  import pandas as pd
5
- from datetime import datetime, timedelta
6
- import plotly.graph_objects as go
7
- from wordcloud import WordCloud
8
- import base64
9
- from io import BytesIO
10
  import nltk
11
- from textblob import TextBlob
12
- import os
13
  import time
14
- from functools import lru_cache
15
- import numpy as np
16
- from sklearn.linear_model import Ridge
17
- from sklearn.preprocessing import PolynomialFeatures
18
- from sklearn.pipeline import make_pipeline
19
- import feedparser
20
-
21
- # --------------------------
22
- # Initial Setup
23
- # --------------------------
24
 
25
- st.set_page_config(
26
- page_title="🚀 SentimentSync Pro",
27
- page_icon="📈",
28
- layout="wide"
29
- )
30
 
31
- # --------------------------
32
- # Performance Optimizations
33
- # --------------------------
34
 
 
35
  @st.cache_resource
36
- def load_models():
37
- """Load models with progress indicators"""
38
- progress = st.progress(0, text="Loading sentiment models...")
39
-
40
- try:
41
- with st.spinner("Loading BERT model..."):
42
- bert_sentiment = pipeline(
43
- "sentiment-analysis",
44
- model="nlptown/bert-base-multilingual-uncased-sentiment"
45
- )
46
- progress.progress(50)
47
-
48
- with st.spinner("Loading VADER analyzer..."):
49
- vader_analyzer = SentimentIntensityAnalyzer()
50
- progress.progress(100)
51
 
52
- return bert_sentiment, vader_analyzer
53
- except Exception as e:
54
- st.error(f"Model loading failed: {str(e)}")
55
- return None, None
56
 
57
- # --------------------------
58
- # Fetch Financial News
59
- # --------------------------
 
60
 
 
 
 
61
  @st.cache_data(ttl=3600, show_spinner="Fetching financial news...")
62
- def fetch_financial_news(keyword, limit=30):
63
- """Fetch recent financial news (past 7 days) using Google News RSS"""
64
- try:
65
- base_url = "https://news.google.com/rss/search"
66
- query = f"{keyword}+finance+stock"
67
- feed_url = f"{base_url}?q={query}&hl=en-US&gl=US&ceid=US:en"
68
-
69
- feed = feedparser.parse(feed_url)
70
- seven_days_ago = datetime.now() - timedelta(days=7)
71
-
72
- articles = []
73
- for entry in feed.entries:
74
- published = None
75
- if hasattr(entry, 'published_parsed') and entry.published_parsed:
76
- published = datetime(*entry.published_parsed[:6])
77
- elif hasattr(entry, 'updated_parsed') and entry.updated_parsed:
78
- published = datetime(*entry.updated_parsed[:6])
79
- else:
80
- continue
81
-
82
- if published < seven_days_ago:
83
- continue
84
-
85
- text = f"{entry.title}\n{entry.summary}" if hasattr(entry, 'summary') else entry.title
86
-
87
- articles.append({
88
- 'date': published,
89
- 'text': text,
90
- 'source': 'Financial News',
91
- 'url': entry.link
92
- })
93
-
94
- if len(articles) >= limit:
95
- break
96
-
97
- return pd.DataFrame(articles)
98
-
99
- except Exception as e:
100
- st.error(f"News fetch error: {str(e)}")
101
  return pd.DataFrame()
102
 
103
- # --------------------------
104
- # Sentiment Analysis
105
- # --------------------------
106
-
107
- def analyze_text(text, models):
108
- bert_sentiment, vader_analyzer = models
109
- truncated_text = text[:2000] if text else ""
110
-
111
- try:
112
- if not truncated_text.strip():
113
- return {
114
- 'vader': 0,
115
- 'bert': 0,
116
- 'textblob': 0,
117
- 'bert_label': 'Neutral',
118
- 'bert_confidence': 0
119
- }
120
-
121
- vader_score = vader_analyzer.polarity_scores(truncated_text)['compound']
122
- textblob_score = TextBlob(truncated_text).sentiment.polarity
123
-
124
- bert_result = bert_sentiment(truncated_text[:512])[0]
125
-
126
- label_map = {
127
- '1 star': -1,
128
- '2 stars': -0.5,
129
- '3 stars': 0,
130
- '4 stars': 0.5,
131
- '5 stars': 1
132
- }
133
- bert_num = label_map.get(bert_result['label'], 0)
134
-
135
- return {
136
- 'vader': vader_score,
137
- 'bert': bert_num,
138
- 'textblob': textblob_score,
139
- 'bert_label': bert_result['label'],
140
- 'bert_confidence': bert_result['score']
141
- }
142
- except Exception as e:
143
- st.error(f"Analysis error: {str(e)}")
144
- return {
145
- 'vader': 0,
146
- 'bert': 0,
147
- 'textblob': 0,
148
- 'bert_label': 'Error',
149
- 'bert_confidence': 0
150
- }
151
-
152
- # --------------------------
153
- # Visualization
154
- # --------------------------
155
-
156
- def generate_wordcloud(text):
157
- try:
158
- if not text.strip():
159
- return ""
160
-
161
- wordcloud = WordCloud(
162
- width=800,
163
- height=400,
164
- background_color='white',
165
- collocations=False,
166
- stopwords=nltk.corpus.stopwords.words('english')
167
- ).generate(text)
168
-
169
- img = BytesIO()
170
- wordcloud.to_image().save(img, format='PNG')
171
- return base64.b64encode(img.getvalue()).decode()
172
- except Exception as e:
173
- st.error(f"Word cloud generation error: {str(e)}")
174
- return ""
175
-
176
- # --------------------------
177
- # Prediction & Plotting
178
- # --------------------------
179
-
180
- def prepare_data_for_prediction(data):
181
- try:
182
- if data.empty:
183
- st.warning("No data available for prediction")
184
- return None
185
-
186
- data = data.sort_values('date')
187
- data = data.dropna(subset=['average'])
188
- daily_data = data.groupby(pd.Grouper(key='date', freq='D'))['average'].mean().reset_index()
189
- daily_data = daily_data.dropna(subset=['average'])
190
-
191
- if len(daily_data) < 5:
192
- st.warning("Insufficient valid data points for prediction (minimum 5 required)")
193
- return None
194
-
195
- daily_data['days'] = (daily_data['date'] - daily_data['date'].min()).dt.days
196
- return daily_data
197
- except Exception as e:
198
- st.error(f"Data preparation error: {str(e)}")
199
- return None
200
-
201
- def train_sentiment_model(data):
202
- try:
203
- if data is None or len(data) < 5:
204
- return None, None
205
-
206
- X = data['days'].values.reshape(-1, 1)
207
- y = data['average'].values
208
-
209
- model = make_pipeline(PolynomialFeatures(degree=2), Ridge(alpha=1.0))
210
- model.fit(X, y)
211
-
212
- return model, data
213
- except Exception as e:
214
- st.error(f"Model training error: {str(e)}")
215
- return None, None
216
-
217
- def predict_future_sentiment(model, training_data, days_to_predict=15):
218
- try:
219
- if model is None or training_data is None:
220
- return None
221
-
222
- last_date = training_data['date'].max()
223
- future_dates = [last_date + timedelta(days=i) for i in range(1, days_to_predict + 1)]
224
- min_date = training_data['date'].min()
225
- future_days = [(date - min_date).days for date in future_dates]
226
- X_future = np.array(future_days).reshape(-1, 1)
227
-
228
- predictions = model.predict(X_future)
229
-
230
- pred_df = pd.DataFrame({
231
- 'date': future_dates,
232
- 'average': predictions,
233
- 'type': 'prediction'
234
  })
235
 
236
- training_df = training_data.copy()
237
- training_df['type'] = 'actual'
238
 
239
- return pd.concat([training_df, pred_df], ignore_index=True)
240
- except Exception as e:
241
- st.error(f"Prediction error: {str(e)}")
242
- return None
243
-
244
- def plot_sentiment(data, keyword):
245
  try:
246
- if data is None or data.empty:
247
- st.warning("No data available for plotting sentiment trends")
248
- return None
249
-
250
- actual_data = data[data['type'] == 'actual']
251
- pred_data = data[data['type'] == 'prediction']
252
-
253
- fig = go.Figure()
254
-
255
- if not actual_data.empty:
256
- fig.add_trace(go.Scatter(
257
- x=actual_data['date'],
258
- y=actual_data['average'],
259
- name='Actual Sentiment',
260
- mode='lines+markers',
261
- line=dict(color='#636EFA')
262
- ))
263
-
264
- if not pred_data.empty:
265
- fig.add_trace(go.Scatter(
266
- x=pred_data['date'],
267
- y=pred_data['average'],
268
- name='Predicted Sentiment',
269
- mode='lines+markers',
270
- line=dict(color='#EF553B', dash='dot')
271
- ))
272
-
273
- fig.update_layout(
274
- title=f'Sentiment Analysis and Prediction for "{keyword}"',
275
- xaxis_title="Date",
276
- yaxis_title="Sentiment Score",
277
- hovermode="x unified",
278
- legend_title="Data Type"
279
  )
280
 
281
- return fig
282
- except Exception as e:
283
- st.error(f"Plotting error: {str(e)}")
284
- return None
285
-
286
- # --------------------------
287
- # Main App
288
- # --------------------------
289
-
290
- def main():
291
- st.title("🚀 SentimentSync Pro - Financial News Sentiment Dashboard")
292
-
293
- with st.sidebar:
294
- st.header("🔧 Analysis Controls")
295
- analysis_mode = st.radio(
296
- "Mode",
297
- ["Text Analysis", "Financial News Analysis"],
298
- index=1
299
- )
300
-
301
- if analysis_mode == "Text Analysis":
302
- user_input = st.text_area("Enter text to analyze", height=200, placeholder="Paste your content here...")
303
- analyze_btn = st.button("Analyze Now")
304
- else:
305
- keyword = st.text_input("Enter keyword (e.g., Apple, Tesla, Bitcoin)")
306
- analyze_btn = st.button("Fetch & Analyze")
307
-
308
- st.markdown("---")
309
- show_details = st.checkbox("Show detailed results", value=False)
310
- enable_prediction = st.checkbox("Enable sentiment prediction", value=True)
311
- st.markdown("---")
312
-
313
- if analyze_btn:
314
- models = load_models()
315
- if not all(models):
316
- st.error("Model loading failed")
317
- return
318
-
319
- if analysis_mode == "Text Analysis":
320
- if not user_input.strip():
321
- st.warning("Please enter some text")
322
- return
323
-
324
- with st.spinner("Analyzing..."):
325
- result = analyze_text(user_input, models)
326
- st.success("✅ Analysis completed")
327
-
328
- cols = st.columns(3)
329
- cols[0].metric("VADER Score", f"{result['vader']:.2f}")
330
- cols[1].metric("BERT Label", result['bert_label'])
331
- cols[2].metric("TextBlob", f"{result['textblob']:.2f}")
332
-
333
- st.subheader("📊 Word Cloud")
334
- wc_img = f"data:image/png;base64,{generate_wordcloud(user_input)}"
335
- st.image(wc_img, use_column_width=True)
336
-
337
- else:
338
- if not keyword.strip():
339
- st.warning("Please enter a keyword")
340
- return
341
-
342
- with st.spinner(f"Fetching financial news for '{keyword}'..."):
343
- start_time = time.time()
344
- news_data = fetch_financial_news(keyword)
345
- if news_data.empty:
346
- st.error("No news found for the past 7 days.")
347
- return
348
-
349
- analysis_results = []
350
- for _, row in news_data.iterrows():
351
- analysis_results.append(analyze_text(row['text'], models))
352
-
353
- news_data['vader'] = [r['vader'] for r in analysis_results]
354
- news_data['bert'] = [r['bert'] for r in analysis_results]
355
- news_data['textblob'] = [r['textblob'] for r in analysis_results]
356
- news_data['average'] = news_data[['vader', 'bert', 'textblob']].mean(axis=1)
357
-
358
- processing_time = time.time() - start_time
359
- st.success(f"Analyzed {len(news_data)} articles in {processing_time:.2f}s")
360
-
361
- avg_sentiment = news_data['average'].mean()
362
- cols = st.columns(3)
363
- cols[0].metric("Avg Sentiment", f"{avg_sentiment:.2f}")
364
- cols[1].metric("Positive", f"{(news_data['average'] > 0.1).mean() * 100:.1f}%")
365
- cols[2].metric("Negative", f"{(news_data['average'] < -0.1).mean() * 100:.1f}%")
366
-
367
- all_text = " ".join(news_data['text'])
368
- wc_img = f"data:image/png;base64,{generate_wordcloud(all_text)}"
369
- st.subheader("📊 Word Cloud")
370
- st.image(wc_img, use_column_width=True)
371
-
372
- if enable_prediction:
373
- daily_data = prepare_data_for_prediction(news_data)
374
- model, training_data = train_sentiment_model(daily_data)
375
- if model is not None:
376
- full_data = predict_future_sentiment(model, training_data)
377
- fig = plot_sentiment(full_data, keyword)
378
- st.plotly_chart(fig, use_container_width=True)
379
-
380
- if show_details:
381
- st.subheader("📰 Detailed News Data")
382
- st.dataframe(news_data[['date', 'source', 'text', 'average', 'url']], use_container_width=True)
383
-
384
-
385
- if __name__ == "__main__":
386
- try:
387
- nltk.data.path.append(os.path.join(os.path.expanduser("~"), "nltk_data"))
388
- nltk.download('stopwords', quiet=True)
389
- except:
390
- pass
391
-
392
- main()
 
1
  import streamlit as st
 
 
2
  import pandas as pd
3
+ import requests
4
+ import numpy as np
5
+ import plotly.express as px
 
 
6
  import nltk
 
 
7
  import time
8
+ from datetime import datetime, timedelta
9
+ from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
10
+ from textblob import TextBlob
11
+ from transformers import pipeline
12
+ from wordcloud import WordCloud
13
+ from sklearn.linear_model import LinearRegression
14
+ import matplotlib.pyplot as plt
 
 
 
15
 
16
+ # =========================================
17
+ # SETUP
18
+ # =========================================
19
+ st.set_page_config(page_title="Financial Sentiment Analyzer", layout="wide")
 
20
 
21
+ nltk.download("stopwords", quiet=True)
 
 
22
 
23
+ # โหลดโมเดล sentiment ของ BERT
24
  @st.cache_resource
25
+ def load_bert_model():
26
+ return pipeline("sentiment-analysis", model="nlptown/bert-base-multilingual-uncased-sentiment")
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
+ bert_model = load_bert_model()
29
+ vader = SentimentIntensityAnalyzer()
 
 
30
 
31
+ # ใส่ API key ของคุณ
32
+ API_KEY = st.secrets["NEWS_API_KEY"] # ใส่ใน .streamlit/secrets.toml
33
+ # หรือถ้ารัน local:
34
+ # API_KEY = "88bc396d4eab4be494a4b86ec842db47"
35
 
36
+ # =========================================
37
+ # FUNCTION: ดึงข่าวจาก NewsAPI.org
38
+ # =========================================
39
  @st.cache_data(ttl=3600, show_spinner="Fetching financial news...")
40
+ def fetch_financial_news(keyword, days=7, limit=50):
41
+ """ดึงข่าวการเงินย้อนหลังจาก NewsAPI.org"""
42
+ to_date = datetime.now().strftime('%Y-%m-%d')
43
+ from_date = (datetime.now() - timedelta(days=days)).strftime('%Y-%m-%d')
44
+
45
+ url = (
46
+ f"https://newsapi.org/v2/everything?"
47
+ f"q={keyword}+finance+stock&"
48
+ f"from={from_date}&to={to_date}&"
49
+ f"language=en&"
50
+ f"sortBy=publishedAt&"
51
+ f"pageSize={limit}&"
52
+ f"apiKey={API_KEY}"
53
+ )
54
+
55
+ response = requests.get(url)
56
+ data = response.json()
57
+
58
+ if data.get("status") != "ok":
59
+ st.error(f"Error fetching news: {data.get('message', 'Unknown error')}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  return pd.DataFrame()
61
 
62
+ articles = []
63
+ for a in data["articles"]:
64
+ articles.append({
65
+ "date": pd.to_datetime(a["publishedAt"]),
66
+ "text": f"{a['title']}\n{a.get('description', '')}",
67
+ "source": a["source"]["name"],
68
+ "url": a["url"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  })
70
 
71
+ return pd.DataFrame(articles)
 
72
 
73
+ # =========================================
74
+ # FUNCTION: วิเคราะห์อารมณ์
75
+ # =========================================
76
+ def analyze_sentiment(text):
77
+ """รวมผลจาก BERT, VADER, TextBlob"""
 
78
  try:
79
+ bert_label = bert_model(text[:512])[0]["label"]
80
+ vader_score = vader.polarity_scores(text)["compound"]
81
+ blob_score = TextBlob(text).sentiment.polarity
82
+
83
+ bert_score = (
84
+ 1 if "5" in bert_label or "4" in bert_label
85
+ else -1 if "1" in bert_label or "2" in bert_label
86
+ else 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  )
88
 
89
+ final_score = np.mean([bert_score, np.sign(vader_score), np.sign(blob_score)])
90
+ return final_score
91
+ except Exception:
92
+ return 0
93
+
94
+ # =========================================
95
+ # FUNCTION: สร้าง Word Cloud
96
+ # =========================================
97
+ def create_wordcloud(texts):
98
+ text = " ".join(texts)
99
+ wc = WordCloud(width=800, height=400, background_color="white",
100
+ stopwords=set(nltk.corpus.stopwords.words("english"))).generate(text)
101
+ return wc
102
+
103
+ # =========================================
104
+ # FUNCTION: พยากรณ์แนวโน้มอารมณ์
105
+ # =========================================
106
+ def forecast_sentiment_trend(df):
107
+ df = df.sort_values("date")
108
+ df["timestamp"] = (df["date"] - df["date"].min()).dt.days
109
+ model = LinearRegression()
110
+ model.fit(df[["timestamp"]], df["sentiment"])
111
+ future = pd.DataFrame({"timestamp": np.arange(df["timestamp"].max()+1, df["timestamp"].max()+8)})
112
+ pred = model.predict(future)
113
+ return pred
114
+
115
+ # =========================================
116
+ # MAIN APP
117
+ # =========================================
118
+ st.title("💹 Financial News Sentiment Analyzer (NewsAPI.org version)")
119
+ st.markdown("วิเคราะห์อารมณ์ของข่าวการเงินย้อนหลังจาก **NewsAPI.org** โดยใช้ BERT + VADER + TextBlob")
120
+
121
+ keyword = st.text_input("🔍 ใส่ชื่อบริษัท / หุ้น / คำค้นหา", "Tesla")
122
+ limit = st.slider("จำนวนข่าวที่ต้องการดึง", 10, 100, 50)
123
+
124
+ if st.button("เริ่มวิเคราะห์ข่าว"):
125
+ with st.spinner(f"กำลังดึงข่าวเกี่ยวกับ '{keyword}' ..."):
126
+ news_df = fetch_financial_news(keyword, days=7, limit=limit)
127
+
128
+ if news_df.empty:
129
+ st.error(" ไม่พบข่าวในช่วง 7 วันที่ผ่านมา")
130
+ st.stop()
131
+
132
+ st.success(f"✅ ดึงข่าวได้ {len(news_df)} รายการจาก NewsAPI.org")
133
+
134
+ # วิเคราะห์ sentiment
135
+ st.info("🔎 กำลังวิเคราะห์อารมณ์ของข่าวแต่ละรายการ...")
136
+ news_df["sentiment"] = news_df["text"].apply(analyze_sentiment)
137
+
138
+ # แสดงผลรวม
139
+ avg_sentiment = news_df["sentiment"].mean()
140
+ st.metric("📊 ค่าเฉลี่ยอารมณ์โดยรวม", f"{avg_sentiment:.2f}")
141
+
142
+ # กราฟแนวโน้ม
143
+ fig = px.line(news_df.sort_values("date"), x="date", y="sentiment",
144
+ title=f"แนวโน้มอารมณ์ของข่าว '{keyword}'",
145
+ markers=True)
146
+ st.plotly_chart(fig, use_container_width=True)
147
+
148
+ # Word Cloud
149
+ st.subheader("☁️ คำที่ถูกใช้บ่อยในข่าว")
150
+ wc = create_wordcloud(news_df["text"].tolist())
151
+ st.image(wc.to_array())
152
+
153
+ # พยากรณ์แนวโน้ม
154
+ st.subheader("📈 พยากรณ์แนวโน้มอารมณ์ใน 7 วันข้างหน้า")
155
+ forecast = forecast_sentiment_trend(news_df)
156
+ st.line_chart(forecast)
157
+
158
+ # แสดงข่าวต้นฉบับ
159
+ st.subheader("📰 ข่าวที่ใช้ในการวิเคราะห์")
160
+ for _, row in news_df.iterrows():
161
+ st.markdown(f"**[{row['source']}]({row['url']})** {row['date'].strftime('%Y-%m-%d')} \n{row['text']}")