SentimentAnalyzerOriginal

Sleeping

App Files Files Community

KYTHY commited on Nov 15, 2025

Commit

5665b46

verified ·

1 Parent(s): 1cc85d5

Update app.py

Browse files

Files changed (1) hide show

app.py +132 -66

app.py CHANGED Viewed

@@ -9,7 +9,7 @@ import plotly.graph_objects as go
 from plotly.subplots import make_subplots
 import yfinance as yf
 import torch
-from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
 # --------------------------
 # CONFIG
@@ -29,21 +29,19 @@ def load_finbert():
 tokenizer, model = load_finbert()
 # --------------------------
-# โหลด Zero-shot classifier สำหรับธีมข่าว
-# --------------------------
-@st.cache_resource
-def load_theme_classifier():
-    return pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
-theme_classifier = load_theme_classifier()
-candidate_labels = ["Stock Movement", "Earnings", "M&A", "Regulation", "Product Launch", "Market Analysis"]
-# --------------------------
-# โหลด Pegasus สำหรับสรุปข่าว
 # --------------------------
 @st.cache_resource
 def load_summarizer():
-    return pipeline("summarization", model="Nerdward/financial-summarization-pegasus-finetuned-pytorch-model")
 summarizer = load_summarizer()
@@ -72,49 +70,12 @@ def analyze_text(text):
     score = (-1 * probs[0]) + (0 * probs[1]) + (1 * probs[2])
     return float(score)
-def summarize_texts(news_texts):
-    """สรุปข่าวแต่ละข่าว 1 พารากราฟ พร้อม progress bar"""
-    summaries = []
-    progress_text = st.empty()
-    progress_bar = st.progress(0)
-    total = len(news_texts)
-    for i, text in enumerate(news_texts):
-        if not text.strip():
-            summaries.append("")
-        else:
-            try:
-                summary = summarizer(text, max_length=100, min_length=30, do_sample=False)[0]["summary_text"]
-                summaries.append(summary)
-            except:
-                summaries.append(text)
-        progress_text.text(f"กำลังสรุปข่าว {i+1}/{total}")
-        progress_bar.progress((i+1)/total)
-    progress_bar.empty()
-    progress_text.empty()
-    return summaries
-def summarize_themes(news_texts):
-    """สรุปธีมข่าวแต่ละข่าว พร้อม progress bar"""
-    themes = []
-    progress_text = st.empty()
-    progress_bar = st.progress(0)
-    total = len(news_texts)
-    for i, text in enumerate(news_texts):
-        if not text.strip():
-            themes.append("Unknown")
-        else:
-            try:
-                result = theme_classifier(text, candidate_labels)
-                themes.append(result["labels"][0])
-            except:
-                themes.append("Unknown")
-        progress_text.text(f"กำลังสรุปธีมข่าว {i+1}/{total}")
-        progress_bar.progress((i+1)/total)
-    progress_bar.empty()
-    progress_text.empty()
-    return themes
 # --------------------------
 # แปลงชื่อ/ตัวย่อ → (Company Name, Symbol)
@@ -123,6 +84,7 @@ def resolve_company_symbol(keyword: str):
     keyword = keyword.strip()
     ticker = None
     name = None
     try:
         data = yf.Ticker(keyword)
         info = data.info
@@ -138,10 +100,12 @@ def resolve_company_symbol(keyword: str):
                 name = q.get("longname", q.get("shortname", keyword))
     except:
         pass
     if not ticker:
         ticker = keyword.upper()
     if not name:
         name = keyword.capitalize()
     return name, ticker
 # --------------------------
@@ -152,6 +116,7 @@ def fetch_financial_news(keyword):
     company, symbol = resolve_company_symbol(keyword)
     to_date = datetime.now().strftime('%Y-%m-%d')
     from_date = (datetime.now() - timedelta(days=7)).strftime('%Y-%m-%d')
     query_keyword = f"({company} OR {symbol}) finance stock"
     all_articles = []
@@ -198,14 +163,18 @@ def fetch_stock_price(symbol, start_date, end_date):
         start_str = (start_date - timedelta(days=2)).strftime('%Y-%m-%d')
         end_str = (end_date + timedelta(days=1)).strftime('%Y-%m-%d')
         df = yf.download(symbol, start=start_str, end=end_str, interval="1d")
         if df.empty:
             st.warning("ไม่พบข้อมูลราคาหุ้น")
             return pd.DataFrame()
         df = df.reset_index()
         df_subset = df[['Date', 'Close']]
         df_subset.columns = ['date', 'price']
         df_subset["date"] = pd.to_datetime(df_subset["date"].dt.date)
         return df_subset
     except Exception as e:
         st.warning(f"ดึงราคาหุ้นล้มเหลว: {e}")
         return pd.DataFrame()
@@ -238,14 +207,6 @@ def main():
     news_df["sentiment"] = news_df["text"].apply(analyze_text)
     news_df["date"] = pd.to_datetime(news_df["date"])
-    # สรุปข่าวเป็น 1 พารากราฟ พร้อม progress bar
-    st.info("กำลังสรุปเนื้อหาข่าว...")
-    news_df["text"] = summarize_texts(news_df["text"].tolist())
-    # สรุปธีมข่าวพร้อม progress bar
-    st.info("กำลังสรุปธีมข่าว...")
-    news_df["theme"] = summarize_themes(news_df["text"].tolist())
     # Metrics
     avg_sentiment = news_df["sentiment"].mean()
     pos_pct = (news_df["sentiment"] > 0.1).mean() * 100
@@ -256,9 +217,114 @@ def main():
     col2.metric("ข่าวเชิงบวก", f"{pos_pct:.1f}%")
     col3.metric("ข่าวเชิงลบ", f"{neg_pct:.1f}%")
-    # แสดงรายการข่าว
     st.subheader("📰 รายการข่าวทั้งหมด")
-    st.dataframe(news_df[["date", "source", "text", "sentiment", "theme", "url"]], use_container_width=True)
 # ---------------------------------------------------------
 # RUN APP

 from plotly.subplots import make_subplots
 import yfinance as yf
 import torch
+from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModelForSeq2SeqLM, pipeline
 # --------------------------
 # CONFIG
 tokenizer, model = load_finbert()
 # --------------------------
+# โหลด Pegasus summarizer (slow tokenizer)
 # --------------------------
 @st.cache_resource
 def load_summarizer():
+    tokenizer_sum = AutoTokenizer.from_pretrained(
+        "Nerdward/financial-summarization-pegasus-finetuned-pytorch-model",
+        use_fast=False  # ใช้ slow tokenizer
+    )
+    model_sum = AutoModelForSeq2SeqLM.from_pretrained(
+        "Nerdward/financial-summarization-pegasus-finetuned-pytorch-model"
+    )
+    summarizer = pipeline("summarization", model=model_sum, tokenizer=tokenizer_sum)
+    return summarizer
 summarizer = load_summarizer()
     score = (-1 * probs[0]) + (0 * probs[1]) + (1 * probs[2])
     return float(score)
+def summarize_article(text):
+    """สรุปข่าวเป็น 1 พารากราฟ"""
+    if not text.strip():
+        return ""
+    summary_list = summarizer(text, max_length=150, min_length=50, do_sample=False)
+    return summary_list[0]['summary_text']
 # --------------------------
 # แปลงชื่อ/ตัวย่อ → (Company Name, Symbol)
     keyword = keyword.strip()
     ticker = None
     name = None
     try:
         data = yf.Ticker(keyword)
         info = data.info
                 name = q.get("longname", q.get("shortname", keyword))
     except:
         pass
     if not ticker:
         ticker = keyword.upper()
     if not name:
         name = keyword.capitalize()
     return name, ticker
 # --------------------------
     company, symbol = resolve_company_symbol(keyword)
     to_date = datetime.now().strftime('%Y-%m-%d')
     from_date = (datetime.now() - timedelta(days=7)).strftime('%Y-%m-%d')
     query_keyword = f"({company} OR {symbol}) finance stock"
     all_articles = []
         start_str = (start_date - timedelta(days=2)).strftime('%Y-%m-%d')
         end_str = (end_date + timedelta(days=1)).strftime('%Y-%m-%d')
         df = yf.download(symbol, start=start_str, end=end_str, interval="1d")
         if df.empty:
             st.warning("ไม่พบข้อมูลราคาหุ้น")
             return pd.DataFrame()
         df = df.reset_index()
         df_subset = df[['Date', 'Close']]
         df_subset.columns = ['date', 'price']
         df_subset["date"] = pd.to_datetime(df_subset["date"].dt.date)
         return df_subset
     except Exception as e:
         st.warning(f"ดึงราคาหุ้นล้มเหลว: {e}")
         return pd.DataFrame()
     news_df["sentiment"] = news_df["text"].apply(analyze_text)
     news_df["date"] = pd.to_datetime(news_df["date"])
     # Metrics
     avg_sentiment = news_df["sentiment"].mean()
     pos_pct = (news_df["sentiment"] > 0.1).mean() * 100
     col2.metric("ข่าวเชิงบวก", f"{pos_pct:.1f}%")
     col3.metric("ข่าวเชิงลบ", f"{neg_pct:.1f}%")
+    # สรุปข่าว 1 พารากราฟ
+    st.info("กำลังสรุปข่าวเป็น 1 พารากราฟต่อข่าว...")
+    news_df["text"] = news_df["text"].apply(lambda x: summarize_article(x) if x.strip() else "")
+    # ---------------------------------------------------------
+    # ส่วนกราฟ Sentiment & Price (เหมือนเดิม)
+    # ---------------------------------------------------------
+    st.subheader("📈 แนวโน้มอารมณ์ของข่าว & ราคาหุ้น")
+    news_df["date_day"] = pd.to_datetime(news_df["date"].dt.date)
+    def sentiment_type(score):
+        if score > 0.1:
+            return "positive"
+        if score < -0.1:
+            return "negative"
+        return "neutral"
+    news_df["sentiment_type"] = news_df["sentiment"].apply(sentiment_type)
+    daily_avg = news_df.groupby("date_day")["sentiment"].mean().reset_index(name="avg_sentiment")
+    daily_counts = news_df.groupby(["date_day", "sentiment_type"]).size().unstack(fill_value=0).reset_index()
+    df_sorted = pd.merge(daily_avg, daily_counts, on="date_day").sort_values("date_day")
+    if len(df_sorted) < 2:
+        st.warning("ข้อมูลไม่พอสร้างแนวโน้ม")
+        st.dataframe(news_df)
+        return
+    # ดึงราคาหุ้น
+    _, symbol = resolve_company_symbol(keyword)
+    min_date, max_date = df_sorted["date_day"].min(), df_sorted["date_day"].max()
+    st.info(f"กำลังดึงราคาหุ้น {symbol} ...")
+    stock_df = fetch_stock_price(symbol, min_date, max_date)
+    plot_data = pd.merge(df_sorted, stock_df, left_on="date_day", right_on="date", how="left")
+    # Correlation
+    correlation = plot_data['price'].corr(plot_data['avg_sentiment'])
+    corr_text = "ไม่มีความสัมพันธ์"
+    if correlation > 0.5:
+        corr_text = "มีความสัมพันธ์ในทิศทางเดียวกัน"
+    elif correlation < -0.5:
+        corr_text = "มีความสัมพันธ์ในทิศทางตรงข้าม"
+    st.metric("วิเคราะห์ความสัมพันธ์ระหว่างอารมณ์ของข่าวกับราคาหุ้น (Correlation)", corr_text, f"{correlation:.2f}")
+    # Forecast Sentiment
+    plot_data["timestamp"] = (plot_data["date_day"] - plot_data["date_day"].min()).dt.days
+    train_data = plot_data.dropna(subset=['avg_sentiment'])
+    if len(train_data) >= 2:
+        model_lr = LinearRegression()
+        model_lr.fit(train_data[["timestamp"]], train_data["avg_sentiment"])
+        future_days = 7
+        future_timestamps = np.arange(
+            plot_data["timestamp"].max() + 1,
+            plot_data["timestamp"].max() + future_days + 1
+        )
+        future_dates = [plot_data["date_day"].max() + timedelta(days=i) for i in range(1, future_days + 1)]
+        future_preds = model_lr.predict(future_timestamps.reshape(-1, 1))
+    # Plot
+    fig = make_subplots(rows=2, cols=1, specs=[[{"secondary_y": True}], [{}]],
+                        row_heights=[0.7, 0.3], vertical_spacing=0.1,
+                        shared_xaxes=True)
+    # ราคาหุ้น
+    fig.add_trace(go.Scatter(x=plot_data["date_day"], y=plot_data["price"], name=f"{symbol} Price",
+                             mode="lines+markers", line=dict(color="orange")), row=1, col=1)
+    # Sentiment จริง
+    fig.add_trace(go.Scatter(x=plot_data["date_day"], y=plot_data["avg_sentiment"], name="Actual Sentiment",
+                             mode="lines+markers", line=dict(color="blue")), row=1, col=1, secondary_y=True)
+    # Sentiment พยากรณ์
+    if "future_preds" in locals():
+        fig.add_trace(go.Scatter(x=future_dates, y=future_preds, name="Predicted Sentiment",
+                                 mode="lines+markers", line=dict(color="#05a0fa", dash="dash")), row=1, col=1, secondary_y=True)
+        # เส้นเชื่อม Actual -> Predicted
+        last_actual_date = plot_data["date_day"].max()
+        last_actual_value = plot_data["avg_sentiment"].iloc[-1]
+        first_pred_date = future_dates[0]
+        first_pred_value = future_preds[0]
+        fig.add_trace(go.Scatter(x=[last_actual_date, first_pred_date],
+                                 y=[last_actual_value, first_pred_value],
+                                 mode="lines",
+                                 line=dict(color="#05a0fa", dash="dot"),
+                                 name="Connector Actual→Predicted"), row=1, col=1, secondary_y=True)
+    # จำนวนข่าว
+    for col in ["neutral", "negative", "positive"]:
+        if col not in plot_data.columns:
+            plot_data[col] = 0
+    fig.add_trace(go.Bar(x=plot_data["date_day"], y=plot_data["neutral"], name="Neutral",
+                         marker_color='rgba(128, 128, 128, 0.7)'), row=2, col=1)
+    fig.add_trace(go.Bar(x=plot_data["date_day"], y=plot_data["negative"], name="Negative",
+                         marker_color='rgba(255, 0, 0, 0.7)'), row=2, col=1)
+    fig.add_trace(go.Bar(x=plot_data["date_day"], y=plot_data["positive"], name="Positive",
+                         marker_color='rgba(0, 128, 0, 0.7)'), row=2, col=1)
+    fig.update_layout(title=f"แนวโน้มอารมณ์ของข่าว + ราคาหุ้น ({symbol})",
+                      barmode="stack", height=650, hovermode="x unified", template="plotly_white")
+    st.plotly_chart(fig, use_container_width=True)
+    # แสดงรายการข่าวทั้งหมด (text เป็นสรุปแล้ว)
     st.subheader("📰 รายการข่าวทั้งหมด")
+    st.dataframe(news_df[["date", "source", "text", "sentiment", "url"]], use_container_width=True)
 # ---------------------------------------------------------
 # RUN APP