KYTHY commited on
Commit
26d9a95
·
verified ·
1 Parent(s): 6bf60df

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -82
app.py CHANGED
@@ -3,16 +3,13 @@ import requests
3
  import pandas as pd
4
  from datetime import datetime, timedelta
5
  import nltk
6
- from wordcloud import WordCloud
7
- import base64
8
- from io import BytesIO
9
  import numpy as np
10
  from sklearn.linear_model import LinearRegression
11
  import plotly.graph_objects as go
12
  from plotly.subplots import make_subplots
13
  import yfinance as yf
14
  import torch
15
- from transformers import AutoTokenizer, AutoModelForSequenceClassification
16
 
17
  # --------------------------
18
  # CONFIG
@@ -31,11 +28,21 @@ def load_finbert():
31
 
32
  tokenizer, model = load_finbert()
33
 
 
 
 
 
 
 
 
 
 
 
34
  # --------------------------
35
  # UTILITIES
36
  # --------------------------
37
  def analyze_text(text):
38
- """วิเคราะห์อารมณ์ของข่าว"""
39
  if not text or not text.strip():
40
  return 0
41
 
@@ -57,13 +64,15 @@ def analyze_text(text):
57
  return float(score)
58
 
59
 
60
- def generate_wordcloud(text):
61
- stopwords = nltk.corpus.stopwords.words('english')
62
- wordcloud = WordCloud(width=800, height=400, background_color="white", stopwords=stopwords).generate(text)
63
- buf = BytesIO()
64
- wordcloud.to_image().save(buf, format="PNG")
65
- return base64.b64encode(buf.getvalue()).decode()
66
-
 
 
67
 
68
  # --------------------------
69
  # แปลงชื่อ/ตัวย่อ → (Company Name, Symbol)
@@ -96,7 +105,6 @@ def resolve_company_symbol(keyword: str):
96
 
97
  return name, ticker
98
 
99
-
100
  # --------------------------
101
  # ดึงข่าว 7 วัน
102
  # --------------------------
@@ -143,7 +151,6 @@ def fetch_financial_news(keyword):
143
 
144
  return pd.DataFrame(all_articles)
145
 
146
-
147
  # --------------------------
148
  # ดึงราคาหุ้น
149
  # --------------------------
@@ -169,7 +176,6 @@ def fetch_stock_price(symbol, start_date, end_date):
169
  st.warning(f"ดึงราคาหุ้นล้มเหลว: {e}")
170
  return pd.DataFrame()
171
 
172
-
173
  # --------------------------
174
  # MAIN APP
175
  # --------------------------
@@ -208,14 +214,16 @@ def main():
208
  col2.metric("ข่าวเชิงบวก", f"{pos_pct:.1f}%")
209
  col3.metric("ข่าวเชิงลบ", f"{neg_pct:.1f}%")
210
 
211
- # WordCloud
212
- st.subheader("☁️ Word Cloud")
213
- all_text = " ".join(news_df["text"].tolist())
214
- img = generate_wordcloud(all_text)
215
- st.image(f"data:image/png;base64,{img}", use_column_width=True)
 
 
216
 
217
  # ---------------------------------------------------------
218
- # เตรียมข้อมูลสำหรับกราฟ Sentiment & Price
219
  # ---------------------------------------------------------
220
  st.subheader("📈 แนวโน้มอารมณ์ของข่าว & ราคาหุ้น")
221
 
@@ -243,28 +251,21 @@ def main():
243
  # ดึงราคาหุ้น
244
  _, symbol = resolve_company_symbol(keyword)
245
  min_date, max_date = df_sorted["date_day"].min(), df_sorted["date_day"].max()
246
-
247
  st.info(f"กำลังดึงราคาหุ้น {symbol} ...")
248
  stock_df = fetch_stock_price(symbol, min_date, max_date)
249
 
250
  plot_data = pd.merge(df_sorted, stock_df, left_on="date_day", right_on="date", how="left")
251
 
252
- # ---------------------------------------------------------
253
  # Correlation
254
- # ---------------------------------------------------------
255
  correlation = plot_data['price'].corr(plot_data['avg_sentiment'])
256
-
257
  corr_text = "ไม่มีความสัมพันธ์"
258
  if correlation > 0.5:
259
  corr_text = "มีความสัมพันธ์ในทิศทางเดียวกัน"
260
  elif correlation < -0.5:
261
  corr_text = "มีความสัมพันธ์ในทิศท��งตรงข้าม"
 
262
 
263
- st.metric("วิเคราะห์ความสัมพันธ์ระหว่างอารมณ์ของข่าวกับราคาหุ้น", corr_text, f"{correlation:.2f}")
264
-
265
- # ---------------------------------------------------------
266
  # Forecast Sentiment
267
- # ---------------------------------------------------------
268
  plot_data["timestamp"] = (plot_data["date_day"] - plot_data["date_day"].min()).dt.days
269
  train_data = plot_data.dropna(subset=['avg_sentiment'])
270
 
@@ -280,87 +281,51 @@ def main():
280
  future_dates = [plot_data["date_day"].max() + timedelta(days=i) for i in range(1, future_days + 1)]
281
  future_preds = model_lr.predict(future_timestamps.reshape(-1, 1))
282
 
283
- # ---------------------------------------------------------
284
  # Plot
285
- # ---------------------------------------------------------
286
  fig = make_subplots(rows=2, cols=1, specs=[[{"secondary_y": True}], [{}]],
287
  row_heights=[0.7, 0.3], vertical_spacing=0.1,
288
  shared_xaxes=True)
289
 
290
  # ราคาหุ้น
291
- fig.add_trace(
292
- go.Scatter(
293
- x=plot_data["date_day"], y=plot_data["price"],
294
- name=f"{symbol} Price", mode="lines+markers", line=dict(color="orange")
295
- ),
296
- row=1, col=1, secondary_y=False
297
- )
298
-
299
  # Sentiment จริง
300
- fig.add_trace(
301
- go.Scatter(
302
- x=plot_data["date_day"], y=plot_data["avg_sentiment"],
303
- name="Actual Sentiment", mode="lines+markers", line=dict(color="blue")
304
- ),
305
- row=1, col=1, secondary_y=True
306
- )
307
-
308
  # Sentiment พยากรณ์
309
  if "future_preds" in locals():
310
- fig.add_trace(
311
- go.Scatter(
312
- x=future_dates, y=future_preds,
313
- name="Predicted Sentiment", mode="lines+markers", line=dict(color="#05a0fa", dash="dash")
314
- ),
315
- row=1, col=1, secondary_y=True
316
- )
317
-
318
- # ---------------------------------------------------------
319
  # เส้นเชื่อม Actual -> Predicted
320
- # ---------------------------------------------------------
321
  last_actual_date = plot_data["date_day"].max()
322
  last_actual_value = plot_data["avg_sentiment"].iloc[-1]
323
  first_pred_date = future_dates[0]
324
  first_pred_value = future_preds[0]
325
-
326
- fig.add_trace(
327
- go.Scatter(
328
- x=[last_actual_date, first_pred_date],
329
- y=[last_actual_value, first_pred_value],
330
- mode="lines",
331
- line=dict(color="#05a0fa", dash="dot"),
332
- name="Connector Actual→Predicted"
333
- ),
334
- row=1, col=1, secondary_y=True
335
- )
336
 
337
  # จำนวนข่าว
338
  for col in ["neutral", "negative", "positive"]:
339
  if col not in plot_data.columns:
340
  plot_data[col] = 0
341
-
342
  fig.add_trace(go.Bar(x=plot_data["date_day"], y=plot_data["neutral"], name="Neutral",
343
- marker_color='rgba(128, 128, 128, 0.7)'), row=2, col=1)
344
  fig.add_trace(go.Bar(x=plot_data["date_day"], y=plot_data["negative"], name="Negative",
345
- marker_color='rgba(255, 0, 0, 0.7)'), row=2, col=1)
346
  fig.add_trace(go.Bar(x=plot_data["date_day"], y=plot_data["positive"], name="Positive",
347
- marker_color='rgba(0, 128, 0, 0.7)'), row=2, col=1)
348
-
349
 
350
- fig.update_layout(
351
- title=f"แนวโน้มอารมณ์ของข่าว + ราคาหุ้น ({symbol})",
352
- barmode="stack",
353
- height=650,
354
- hovermode="x unified",
355
- template="plotly_white"
356
- )
357
 
358
  st.plotly_chart(fig, use_container_width=True)
359
 
360
  # แสดงรายการข่าว
361
  st.subheader("📰 รายการข่าวทั้งหมด")
362
- st.dataframe(news_df[["date", "source", "text", "sentiment", "url"]], use_container_width=True)
363
-
364
 
365
  # ---------------------------------------------------------
366
  # RUN APP
 
3
  import pandas as pd
4
  from datetime import datetime, timedelta
5
  import nltk
 
 
 
6
  import numpy as np
7
  from sklearn.linear_model import LinearRegression
8
  import plotly.graph_objects as go
9
  from plotly.subplots import make_subplots
10
  import yfinance as yf
11
  import torch
12
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
13
 
14
  # --------------------------
15
  # CONFIG
 
28
 
29
  tokenizer, model = load_finbert()
30
 
31
+ # --------------------------
32
+ # โหลด Zero-shot classifier สำหรับธีมข่าว
33
+ # --------------------------
34
+ @st.cache_resource
35
+ def load_theme_classifier():
36
+ return pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
37
+
38
+ theme_classifier = load_theme_classifier()
39
+ candidate_labels = ["Stock Movement", "Earnings", "M&A", "Regulation", "Product Launch", "Market Analysis"]
40
+
41
  # --------------------------
42
  # UTILITIES
43
  # --------------------------
44
  def analyze_text(text):
45
+ """วิเคราะห์อารมณ์ของข่าวด้วย FinBERT"""
46
  if not text or not text.strip():
47
  return 0
48
 
 
64
  return float(score)
65
 
66
 
67
+ def summarize_themes(news_texts):
68
+ """สรุปธีมข่าวด้วย Zero-shot classification"""
69
+ themes = []
70
+ for text in news_texts:
71
+ if not text.strip():
72
+ continue
73
+ result = theme_classifier(text, candidate_labels)
74
+ themes.append(result["labels"][0])
75
+ return themes
76
 
77
  # --------------------------
78
  # แปลงชื่อ/ตัวย่อ → (Company Name, Symbol)
 
105
 
106
  return name, ticker
107
 
 
108
  # --------------------------
109
  # ดึงข่าว 7 วัน
110
  # --------------------------
 
151
 
152
  return pd.DataFrame(all_articles)
153
 
 
154
  # --------------------------
155
  # ดึงราคาหุ้น
156
  # --------------------------
 
176
  st.warning(f"ดึงราคาหุ้นล้มเหลว: {e}")
177
  return pd.DataFrame()
178
 
 
179
  # --------------------------
180
  # MAIN APP
181
  # --------------------------
 
214
  col2.metric("ข่าวเชิงบวก", f"{pos_pct:.1f}%")
215
  col3.metric("ข่าวเชิงลบ", f"{neg_pct:.1f}%")
216
 
217
+ # ---------------------------------------------------------
218
+ # ธีมข่าวแทน Word Cloud
219
+ # ---------------------------------------------------------
220
+ st.subheader("📰 ธีมข่าว (Top Theme per Article)")
221
+ news_df["theme"] = summarize_themes(news_df["text"].tolist())
222
+ theme_counts = news_df["theme"].value_counts()
223
+ st.bar_chart(theme_counts)
224
 
225
  # ---------------------------------------------------------
226
+ # ส่วนกราฟ Sentiment & Price (เหมือนเดิม)
227
  # ---------------------------------------------------------
228
  st.subheader("📈 แนวโน้มอารมณ์ของข่าว & ราคาหุ้น")
229
 
 
251
  # ดึงราคาหุ้น
252
  _, symbol = resolve_company_symbol(keyword)
253
  min_date, max_date = df_sorted["date_day"].min(), df_sorted["date_day"].max()
 
254
  st.info(f"กำลังดึงราคาหุ้น {symbol} ...")
255
  stock_df = fetch_stock_price(symbol, min_date, max_date)
256
 
257
  plot_data = pd.merge(df_sorted, stock_df, left_on="date_day", right_on="date", how="left")
258
 
 
259
  # Correlation
 
260
  correlation = plot_data['price'].corr(plot_data['avg_sentiment'])
 
261
  corr_text = "ไม่มีความสัมพันธ์"
262
  if correlation > 0.5:
263
  corr_text = "มีความสัมพันธ์ในทิศทางเดียวกัน"
264
  elif correlation < -0.5:
265
  corr_text = "มีความสัมพันธ์ในทิศท��งตรงข้าม"
266
+ st.metric("วิเคราะห์ความสัมพันธ์ระหว่างอารมณ์ของข่าวกับราคาหุ้น (Correlation)", corr_text, f"{correlation:.2f}")
267
 
 
 
 
268
  # Forecast Sentiment
 
269
  plot_data["timestamp"] = (plot_data["date_day"] - plot_data["date_day"].min()).dt.days
270
  train_data = plot_data.dropna(subset=['avg_sentiment'])
271
 
 
281
  future_dates = [plot_data["date_day"].max() + timedelta(days=i) for i in range(1, future_days + 1)]
282
  future_preds = model_lr.predict(future_timestamps.reshape(-1, 1))
283
 
 
284
  # Plot
 
285
  fig = make_subplots(rows=2, cols=1, specs=[[{"secondary_y": True}], [{}]],
286
  row_heights=[0.7, 0.3], vertical_spacing=0.1,
287
  shared_xaxes=True)
288
 
289
  # ราคาหุ้น
290
+ fig.add_trace(go.Scatter(x=plot_data["date_day"], y=plot_data["price"], name=f"{symbol} Price",
291
+ mode="lines+markers", line=dict(color="orange")), row=1, col=1, secondary_y=False)
 
 
 
 
 
 
292
  # Sentiment จริง
293
+ fig.add_trace(go.Scatter(x=plot_data["date_day"], y=plot_data["avg_sentiment"], name="Actual Sentiment",
294
+ mode="lines+markers", line=dict(color="blue")), row=1, col=1, secondary_y=True)
 
 
 
 
 
 
295
  # Sentiment พยากรณ์
296
  if "future_preds" in locals():
297
+ fig.add_trace(go.Scatter(x=future_dates, y=future_preds, name="Predicted Sentiment",
298
+ mode="lines+markers", line=dict(color="#05a0fa", dash="dash")), row=1, col=1, secondary_y=True)
 
 
 
 
 
 
 
299
  # เส้นเชื่อม Actual -> Predicted
 
300
  last_actual_date = plot_data["date_day"].max()
301
  last_actual_value = plot_data["avg_sentiment"].iloc[-1]
302
  first_pred_date = future_dates[0]
303
  first_pred_value = future_preds[0]
304
+ fig.add_trace(go.Scatter(x=[last_actual_date, first_pred_date],
305
+ y=[last_actual_value, first_pred_value],
306
+ mode="lines",
307
+ line=dict(color="#05a0fa", dash="dot"),
308
+ name="Connector Actual→Predicted"), row=1, col=1, secondary_y=True)
 
 
 
 
 
 
309
 
310
  # จำนวนข่าว
311
  for col in ["neutral", "negative", "positive"]:
312
  if col not in plot_data.columns:
313
  plot_data[col] = 0
 
314
  fig.add_trace(go.Bar(x=plot_data["date_day"], y=plot_data["neutral"], name="Neutral",
315
+ marker_color='rgba(128, 128, 128, 0.7)'), row=2, col=1)
316
  fig.add_trace(go.Bar(x=plot_data["date_day"], y=plot_data["negative"], name="Negative",
317
+ marker_color='rgba(255, 0, 0, 0.7)'), row=2, col=1)
318
  fig.add_trace(go.Bar(x=plot_data["date_day"], y=plot_data["positive"], name="Positive",
319
+ marker_color='rgba(0, 128, 0, 0.7)'), row=2, col=1)
 
320
 
321
+ fig.update_layout(title=f"แนวโน้มอารมณ์ของข่าว + ราคาหุ้น ({symbol})",
322
+ barmode="stack", height=650, hovermode="x unified", template="plotly_white")
 
 
 
 
 
323
 
324
  st.plotly_chart(fig, use_container_width=True)
325
 
326
  # แสดงรายการข่าว
327
  st.subheader("📰 รายการข่าวทั้งหมด")
328
+ st.dataframe(news_df[["date", "source", "text", "sentiment", "theme", "url"]], use_container_width=True)
 
329
 
330
  # ---------------------------------------------------------
331
  # RUN APP