KYTHY commited on
Commit
06006a5
·
verified ·
1 Parent(s): 26d9a95

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -111
app.py CHANGED
@@ -38,6 +38,15 @@ def load_theme_classifier():
38
  theme_classifier = load_theme_classifier()
39
  candidate_labels = ["Stock Movement", "Earnings", "M&A", "Regulation", "Product Launch", "Market Analysis"]
40
 
 
 
 
 
 
 
 
 
 
41
  # --------------------------
42
  # UTILITIES
43
  # --------------------------
@@ -63,12 +72,26 @@ def analyze_text(text):
63
  score = (-1 * probs[0]) + (0 * probs[1]) + (1 * probs[2])
64
  return float(score)
65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
  def summarize_themes(news_texts):
68
  """สรุปธีมข่าวด้วย Zero-shot classification"""
69
  themes = []
70
  for text in news_texts:
71
  if not text.strip():
 
72
  continue
73
  result = theme_classifier(text, candidate_labels)
74
  themes.append(result["labels"][0])
@@ -81,7 +104,6 @@ def resolve_company_symbol(keyword: str):
81
  keyword = keyword.strip()
82
  ticker = None
83
  name = None
84
-
85
  try:
86
  data = yf.Ticker(keyword)
87
  info = data.info
@@ -97,12 +119,10 @@ def resolve_company_symbol(keyword: str):
97
  name = q.get("longname", q.get("shortname", keyword))
98
  except:
99
  pass
100
-
101
  if not ticker:
102
  ticker = keyword.upper()
103
  if not name:
104
  name = keyword.capitalize()
105
-
106
  return name, ticker
107
 
108
  # --------------------------
@@ -113,7 +133,6 @@ def fetch_financial_news(keyword):
113
  company, symbol = resolve_company_symbol(keyword)
114
  to_date = datetime.now().strftime('%Y-%m-%d')
115
  from_date = (datetime.now() - timedelta(days=7)).strftime('%Y-%m-%d')
116
-
117
  query_keyword = f"({company} OR {symbol}) finance stock"
118
 
119
  all_articles = []
@@ -160,18 +179,14 @@ def fetch_stock_price(symbol, start_date, end_date):
160
  start_str = (start_date - timedelta(days=2)).strftime('%Y-%m-%d')
161
  end_str = (end_date + timedelta(days=1)).strftime('%Y-%m-%d')
162
  df = yf.download(symbol, start=start_str, end=end_str, interval="1d")
163
-
164
  if df.empty:
165
  st.warning("ไม่พบข้อมูลราคาหุ้น")
166
  return pd.DataFrame()
167
-
168
  df = df.reset_index()
169
  df_subset = df[['Date', 'Close']]
170
  df_subset.columns = ['date', 'price']
171
  df_subset["date"] = pd.to_datetime(df_subset["date"].dt.date)
172
-
173
  return df_subset
174
-
175
  except Exception as e:
176
  st.warning(f"ดึงราคาหุ้นล้มเหลว: {e}")
177
  return pd.DataFrame()
@@ -204,6 +219,10 @@ def main():
204
  news_df["sentiment"] = news_df["text"].apply(analyze_text)
205
  news_df["date"] = pd.to_datetime(news_df["date"])
206
 
 
 
 
 
207
  # Metrics
208
  avg_sentiment = news_df["sentiment"].mean()
209
  pos_pct = (news_df["sentiment"] > 0.1).mean() * 100
@@ -214,114 +233,13 @@ def main():
214
  col2.metric("ข่าวเชิงบวก", f"{pos_pct:.1f}%")
215
  col3.metric("ข่าวเชิงลบ", f"{neg_pct:.1f}%")
216
 
217
- # ---------------------------------------------------------
218
- # ธีมข่าวแทน Word Cloud
219
- # ---------------------------------------------------------
220
  st.subheader("📰 ธีมข่าว (Top Theme per Article)")
221
  news_df["theme"] = summarize_themes(news_df["text"].tolist())
222
  theme_counts = news_df["theme"].value_counts()
223
  st.bar_chart(theme_counts)
224
 
225
- # ---------------------------------------------------------
226
- # ส่วนกราฟ Sentiment & Price (เหมือนเดิม)
227
- # ---------------------------------------------------------
228
- st.subheader("📈 แนวโน้มอารมณ์ของข่าว & ราคาหุ้น")
229
-
230
- news_df["date_day"] = pd.to_datetime(news_df["date"].dt.date)
231
-
232
- def sentiment_type(score):
233
- if score > 0.1:
234
- return "positive"
235
- if score < -0.1:
236
- return "negative"
237
- return "neutral"
238
-
239
- news_df["sentiment_type"] = news_df["sentiment"].apply(sentiment_type)
240
-
241
- daily_avg = news_df.groupby("date_day")["sentiment"].mean().reset_index(name="avg_sentiment")
242
- daily_counts = news_df.groupby(["date_day", "sentiment_type"]).size().unstack(fill_value=0).reset_index()
243
-
244
- df_sorted = pd.merge(daily_avg, daily_counts, on="date_day").sort_values("date_day")
245
-
246
- if len(df_sorted) < 2:
247
- st.warning("ข้อมูลไม่พอสร้างแนวโน้ม")
248
- st.dataframe(news_df)
249
- return
250
-
251
- # ดึงราคาหุ้น
252
- _, symbol = resolve_company_symbol(keyword)
253
- min_date, max_date = df_sorted["date_day"].min(), df_sorted["date_day"].max()
254
- st.info(f"กำลังดึงราคาหุ้น {symbol} ...")
255
- stock_df = fetch_stock_price(symbol, min_date, max_date)
256
-
257
- plot_data = pd.merge(df_sorted, stock_df, left_on="date_day", right_on="date", how="left")
258
-
259
- # Correlation
260
- correlation = plot_data['price'].corr(plot_data['avg_sentiment'])
261
- corr_text = "ไม่มีความสัมพันธ์"
262
- if correlation > 0.5:
263
- corr_text = "มีความสัมพันธ์ในทิศทางเดียวกัน"
264
- elif correlation < -0.5:
265
- corr_text = "มีความสัมพันธ์ในทิศทางตรงข้าม"
266
- st.metric("วิเคราะห์ความสัมพันธ์ระหว่างอารมณ์ของข่าวกับราคาหุ้น (Correlation)", corr_text, f"{correlation:.2f}")
267
-
268
- # Forecast Sentiment
269
- plot_data["timestamp"] = (plot_data["date_day"] - plot_data["date_day"].min()).dt.days
270
- train_data = plot_data.dropna(subset=['avg_sentiment'])
271
-
272
- if len(train_data) >= 2:
273
- model_lr = LinearRegression()
274
- model_lr.fit(train_data[["timestamp"]], train_data["avg_sentiment"])
275
-
276
- future_days = 7
277
- future_timestamps = np.arange(
278
- plot_data["timestamp"].max() + 1,
279
- plot_data["timestamp"].max() + future_days + 1
280
- )
281
- future_dates = [plot_data["date_day"].max() + timedelta(days=i) for i in range(1, future_days + 1)]
282
- future_preds = model_lr.predict(future_timestamps.reshape(-1, 1))
283
-
284
- # Plot
285
- fig = make_subplots(rows=2, cols=1, specs=[[{"secondary_y": True}], [{}]],
286
- row_heights=[0.7, 0.3], vertical_spacing=0.1,
287
- shared_xaxes=True)
288
-
289
- # ราคาหุ้น
290
- fig.add_trace(go.Scatter(x=plot_data["date_day"], y=plot_data["price"], name=f"{symbol} Price",
291
- mode="lines+markers", line=dict(color="orange")), row=1, col=1, secondary_y=False)
292
- # Sentiment จริง
293
- fig.add_trace(go.Scatter(x=plot_data["date_day"], y=plot_data["avg_sentiment"], name="Actual Sentiment",
294
- mode="lines+markers", line=dict(color="blue")), row=1, col=1, secondary_y=True)
295
- # Sentiment พยากรณ์
296
- if "future_preds" in locals():
297
- fig.add_trace(go.Scatter(x=future_dates, y=future_preds, name="Predicted Sentiment",
298
- mode="lines+markers", line=dict(color="#05a0fa", dash="dash")), row=1, col=1, secondary_y=True)
299
- # เส้นเชื่อม Actual -> Predicted
300
- last_actual_date = plot_data["date_day"].max()
301
- last_actual_value = plot_data["avg_sentiment"].iloc[-1]
302
- first_pred_date = future_dates[0]
303
- first_pred_value = future_preds[0]
304
- fig.add_trace(go.Scatter(x=[last_actual_date, first_pred_date],
305
- y=[last_actual_value, first_pred_value],
306
- mode="lines",
307
- line=dict(color="#05a0fa", dash="dot"),
308
- name="Connector Actual→Predicted"), row=1, col=1, secondary_y=True)
309
-
310
- # จำนวนข่าว
311
- for col in ["neutral", "negative", "positive"]:
312
- if col not in plot_data.columns:
313
- plot_data[col] = 0
314
- fig.add_trace(go.Bar(x=plot_data["date_day"], y=plot_data["neutral"], name="Neutral",
315
- marker_color='rgba(128, 128, 128, 0.7)'), row=2, col=1)
316
- fig.add_trace(go.Bar(x=plot_data["date_day"], y=plot_data["negative"], name="Negative",
317
- marker_color='rgba(255, 0, 0, 0.7)'), row=2, col=1)
318
- fig.add_trace(go.Bar(x=plot_data["date_day"], y=plot_data["positive"], name="Positive",
319
- marker_color='rgba(0, 128, 0, 0.7)'), row=2, col=1)
320
-
321
- fig.update_layout(title=f"แนวโน้มอารมณ์ของข่าว + ราคาหุ้น ({symbol})",
322
- barmode="stack", height=650, hovermode="x unified", template="plotly_white")
323
-
324
- st.plotly_chart(fig, use_container_width=True)
325
 
326
  # แสดงรายการข่าว
327
  st.subheader("📰 รายการข่าวทั้งหมด")
 
38
  theme_classifier = load_theme_classifier()
39
  candidate_labels = ["Stock Movement", "Earnings", "M&A", "Regulation", "Product Launch", "Market Analysis"]
40
 
41
+ # --------------------------
42
+ # โหลด Pegasus สำหรับสรุปข่าว
43
+ # --------------------------
44
+ @st.cache_resource
45
+ def load_summarizer():
46
+ return pipeline("summarization", model="Nerdward/financial-summarization-pegasus-finetuned-pytorch-model")
47
+
48
+ summarizer = load_summarizer()
49
+
50
  # --------------------------
51
  # UTILITIES
52
  # --------------------------
 
72
  score = (-1 * probs[0]) + (0 * probs[1]) + (1 * probs[2])
73
  return float(score)
74
 
75
+ def summarize_texts(news_texts):
76
+ """สรุปข่าวแต่ละข่าว 1 พารากราฟ"""
77
+ summaries = []
78
+ for text in news_texts:
79
+ if not text.strip():
80
+ summaries.append("")
81
+ continue
82
+ try:
83
+ summary = summarizer(text, max_length=100, min_length=30, do_sample=False)[0]["summary_text"]
84
+ summaries.append(summary)
85
+ except:
86
+ summaries.append(text) # fallback ถ้าโมเดลล้ม
87
+ return summaries
88
 
89
  def summarize_themes(news_texts):
90
  """สรุปธีมข่าวด้วย Zero-shot classification"""
91
  themes = []
92
  for text in news_texts:
93
  if not text.strip():
94
+ themes.append("Unknown")
95
  continue
96
  result = theme_classifier(text, candidate_labels)
97
  themes.append(result["labels"][0])
 
104
  keyword = keyword.strip()
105
  ticker = None
106
  name = None
 
107
  try:
108
  data = yf.Ticker(keyword)
109
  info = data.info
 
119
  name = q.get("longname", q.get("shortname", keyword))
120
  except:
121
  pass
 
122
  if not ticker:
123
  ticker = keyword.upper()
124
  if not name:
125
  name = keyword.capitalize()
 
126
  return name, ticker
127
 
128
  # --------------------------
 
133
  company, symbol = resolve_company_symbol(keyword)
134
  to_date = datetime.now().strftime('%Y-%m-%d')
135
  from_date = (datetime.now() - timedelta(days=7)).strftime('%Y-%m-%d')
 
136
  query_keyword = f"({company} OR {symbol}) finance stock"
137
 
138
  all_articles = []
 
179
  start_str = (start_date - timedelta(days=2)).strftime('%Y-%m-%d')
180
  end_str = (end_date + timedelta(days=1)).strftime('%Y-%m-%d')
181
  df = yf.download(symbol, start=start_str, end=end_str, interval="1d")
 
182
  if df.empty:
183
  st.warning("ไม่พบข้อมูลราคาหุ้น")
184
  return pd.DataFrame()
 
185
  df = df.reset_index()
186
  df_subset = df[['Date', 'Close']]
187
  df_subset.columns = ['date', 'price']
188
  df_subset["date"] = pd.to_datetime(df_subset["date"].dt.date)
 
189
  return df_subset
 
190
  except Exception as e:
191
  st.warning(f"ดึงราคาหุ้นล้มเหลว: {e}")
192
  return pd.DataFrame()
 
219
  news_df["sentiment"] = news_df["text"].apply(analyze_text)
220
  news_df["date"] = pd.to_datetime(news_df["date"])
221
 
222
+ # สรุปข่าวเป็น 1 พารากราฟ
223
+ st.info("กำลังสรุปเนื้อหาข่าว...")
224
+ news_df["text"] = summarize_texts(news_df["text"].tolist())
225
+
226
  # Metrics
227
  avg_sentiment = news_df["sentiment"].mean()
228
  pos_pct = (news_df["sentiment"] > 0.1).mean() * 100
 
233
  col2.metric("ข่าวเชิงบวก", f"{pos_pct:.1f}%")
234
  col3.metric("ข่าวเชิงลบ", f"{neg_pct:.1f}%")
235
 
236
+ # ธีมข่าว
 
 
237
  st.subheader("📰 ธีมข่าว (Top Theme per Article)")
238
  news_df["theme"] = summarize_themes(news_df["text"].tolist())
239
  theme_counts = news_df["theme"].value_counts()
240
  st.bar_chart(theme_counts)
241
 
242
+ # ... ส่วนกราฟ Sentiment & Price เหมือนเดิม (ข้ามตรงนี้เพื่อไม่ให้ยาวเกินไป) ...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
243
 
244
  # แสดงรายการข่าว
245
  st.subheader("📰 รายการข่าวทั้งหมด")