SentimentAnalyzerFinbert

Sleeping

App Files Files Community

KYTHY commited on Nov 15, 2025

Commit

6a9e024

verified ·

1 Parent(s): 96780bb

Update app.py

Browse files

Files changed (1) hide show

app.py +150 -110

app.py CHANGED Viewed

@@ -29,7 +29,7 @@ def load_finbert():
 tokenizer, model = load_finbert()
 # --------------------------
-# Zero-shot classifier สำหรับธีมข่าว
 # --------------------------
 @st.cache_resource
 def load_theme_classifier():
@@ -39,29 +39,38 @@ theme_classifier = load_theme_classifier()
 candidate_labels = ["Stock Movement", "Earnings", "M&A", "Regulation", "Product Launch", "Market Analysis"]
 # --------------------------
-# Summarizer model
 # --------------------------
 @st.cache_resource
 def load_summarizer():
     return pipeline("summarization", model="Nerdward/financial-summarization-pegasus-finetuned-pytorch-model")
 summarizer = load_summarizer()
 # --------------------------
-# Utilities
 # --------------------------
 def analyze_text(text):
     if not text or not text.strip():
         return 0
     inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
     with torch.no_grad():
         outputs = model(**inputs)
-        logits = outputs.logits
-        probs = torch.softmax(logits, dim=1).numpy()[0]
     score = (-1 * probs[0]) + (0 * probs[1]) + (1 * probs[2])
     return float(score)
 def summarize_themes(news_texts):
     themes = []
     for text in news_texts:
         if not text.strip():
@@ -70,20 +79,8 @@ def summarize_themes(news_texts):
         themes.append(result["labels"][0])
     return themes
-@st.cache_data(ttl=3600)
-def summarize_news(texts):
-    """สรุปข่าวทีละข่าว ใช้ caching"""
-    summaries = []
-    for t in texts:
-        if not t.strip():
-            summaries.append("")
-            continue
-        summ = summarizer(t, max_length=150, min_length=50, do_sample=False)
-        summaries.append(summ[0]["summary_text"])
-    return summaries
 # --------------------------
-# Yahoo Finance helpers
 # --------------------------
 def resolve_company_symbol(keyword: str):
     keyword = keyword.strip()
@@ -110,6 +107,9 @@ def resolve_company_symbol(keyword: str):
         name = keyword.capitalize()
     return name, ticker
 @st.cache_data(ttl=3600)
 def fetch_financial_news(keyword):
     company, symbol = resolve_company_symbol(keyword)
@@ -147,6 +147,9 @@ def fetch_financial_news(keyword):
         page += 1
     return pd.DataFrame(all_articles)
 @st.cache_data(ttl=3600)
 def fetch_stock_price(symbol, start_date, end_date):
     try:
@@ -170,108 +173,145 @@ def fetch_stock_price(symbol, start_date, end_date):
 # --------------------------
 def main():
     st.title("📰 News Sentiment Analysis for Young Investor")
-    st.markdown("วิเคราะห์ข่าวย้อนหลัง 7 วัน พร้อมราคาหุ้น")
     # Sidebar
     with st.sidebar:
-        keyword = st.text_input("ค้นหา Stock Symbol (เช่น AAPL, TSLA):", "")
-        analyze_btn = st.button("วิเคราะห์ sentiment & ราคา")
-        summarize_btn = st.button("สรุปข่าว")
-    if not keyword:
-        st.info("กรอกคำค้นแล้วกดปุ่ม")
-        return
-    # ดึงข่าว (ใช้ cache เดียวกันสำหรับทั้งสองปุ่ม)
-    news_df = fetch_financial_news(keyword)
-    if news_df.empty:
-        st.warning("ไม่พบบทความข่าว")
-        return
     if analyze_btn:
-        # วิเคราะห์ sentiment
-        st.info("กำลังวิเคราะห์อารมณ์ของข่าว...")
-        news_df["sentiment"] = news_df["text"].apply(analyze_text)
-        news_df["date"] = pd.to_datetime(news_df["date"])
-        # Metrics
-        avg_sentiment = news_df["sentiment"].mean()
-        pos_pct = (news_df["sentiment"] > 0.1).mean() * 100
-        neg_pct = (news_df["sentiment"] < -0.1).mean() * 100
-        col1, col2, col3 = st.columns(3)
-        col1.metric("ค่าเฉลี่ยอารมณ์ข่าว", f"{avg_sentiment:.2f}")
-        col2.metric("ข่าวเชิงบวก", f"{pos_pct:.1f}%")
-        col3.metric("ข่าวเชิงลบ", f"{neg_pct:.1f}%")
-        # ธีมข่าว
-        news_df["theme"] = summarize_themes(news_df["text"].tolist())
-        # ส่วนกราฟ sentiment & price
-        st.subheader("📈 แนวโน้มอารมณ์ของข่าว & ราคาหุ้น")
-        news_df["date_day"] = pd.to_datetime(news_df["date"].dt.date)
-        def sentiment_type(score):
-            if score > 0.1: return "positive"
-            if score < -0.1: return "negative"
-            return "neutral"
-        news_df["sentiment_type"] = news_df["sentiment"].apply(sentiment_type)
-        daily_avg = news_df.groupby("date_day")["sentiment"].mean().reset_index(name="avg_sentiment")
-        daily_counts = news_df.groupby(["date_day", "sentiment_type"]).size().unstack(fill_value=0).reset_index()
-        df_sorted = pd.merge(daily_avg, daily_counts, on="date_day").sort_values("date_day")
-        if len(df_sorted) < 2:
-            st.warning("ข้อมูลไม่พอสร้างแนวโน้ม")
-            st.dataframe(news_df)
-            return
-        _, symbol = resolve_company_symbol(keyword)
-        min_date, max_date = df_sorted["date_day"].min(), df_sorted["date_day"].max()
-        st.info(f"กำลังดึงราคาหุ้น {symbol} ...")
-        stock_df = fetch_stock_price(symbol, min_date, max_date)
-        plot_data = pd.merge(df_sorted, stock_df, left_on="date_day", right_on="date", how="left")
-        correlation = plot_data['price'].corr(plot_data['avg_sentiment'])
-        corr_text = "ไม่มีความสัมพันธ์"
-        if correlation > 0.5:
-            corr_text = "มีความสัมพันธ์ในทิศทางเดียวกัน"
-        elif correlation < -0.5:
-            corr_text = "มีความสัมพันธ์ในทิศทางตรงข้าม"
-        st.metric("Correlation อารมณ์ข่าว vs ราคาหุ้น", corr_text, f"{correlation:.2f}")
-        # Forecast Sentiment
-        plot_data["timestamp"] = (plot_data["date_day"] - plot_data["date_day"].min()).dt.days
-        train_data = plot_data.dropna(subset=['avg_sentiment'])
-        if len(train_data) >= 2:
-            model_lr = LinearRegression()
-            model_lr.fit(train_data[["timestamp"]], train_data["avg_sentiment"])
-            future_days = 7
-            future_timestamps = np.arange(plot_data["timestamp"].max()+1, plot_data["timestamp"].max()+future_days+1)
-            future_dates = [plot_data["date_day"].max() + timedelta(days=i) for i in range(1, future_days+1)]
-            future_preds = model_lr.predict(future_timestamps.reshape(-1,1))
-        # Plot
-        fig = make_subplots(rows=2, cols=1, specs=[[{"secondary_y": True}], [{}]], row_heights=[0.7,0.3], vertical_spacing=0.1, shared_xaxes=True)
-        fig.add_trace(go.Scatter(x=plot_data["date_day"], y=plot_data["price"], name=f"{symbol} Price", mode="lines+markers", line=dict(color="orange")), row=1, col=1, secondary_y=False)
-        fig.add_trace(go.Scatter(x=plot_data["date_day"], y=plot_data["avg_sentiment"], name="Actual Sentiment", mode="lines+markers", line=dict(color="blue")), row=1, col=1, secondary_y=True)
-        if "future_preds" in locals():
-            fig.add_trace(go.Scatter(x=future_dates, y=future_preds, name="Predicted Sentiment", mode="lines+markers", line=dict(color="#05a0fa", dash="dash")), row=1, col=1, secondary_y=True)
-        for col in ["neutral","negative","positive"]:
-            if col not in plot_data.columns: plot_data[col]=0
-        fig.add_trace(go.Bar(x=plot_data["date_day"], y=plot_data["neutral"], name="Neutral", marker_color='rgba(128,128,128,0.7)'), row=2, col=1)
-        fig.add_trace(go.Bar(x=plot_data["date_day"], y=plot_data["negative"], name="Negative", marker_color='rgba(255,0,0,0.7)'), row=2, col=1)
-        fig.add_trace(go.Bar(x=plot_data["date_day"], y=plot_data["positive"], name="Positive", marker_color='rgba(0,128,0,0.7)'), row=2, col=1)
-        fig.update_layout(title=f"แนวโน้มอารมณ์ของข่าว + ราคาหุ้น ({symbol})", barmode="stack", height=650, hovermode="x unified", template="plotly_white")
-        st.plotly_chart(fig, use_container_width=True)
     if summarize_btn:
-        st.info("กำลังสรุปข่าวแต่ละข่าว...")
-        news_df["text"] = summarize_news(news_df["text"].tolist())
-    # แสดงรายการข่าว (เหมือนกันทั้งสองปุ่ม)
-    st.subheader("📰 รายการข่าวทั้งหมด")
-    st.dataframe(news_df[["date","source","text","sentiment"]].fillna(""), use_container_width=True)
-# --------------------------
-# RUN APP
-# --------------------------
 if __name__ == "__main__":
     nltk.download("stopwords", quiet=True)
     main()

 tokenizer, model = load_finbert()
 # --------------------------
+# โหลด Zero-shot classifier สำหรับธีมข่าว
 # --------------------------
 @st.cache_resource
 def load_theme_classifier():
 candidate_labels = ["Stock Movement", "Earnings", "M&A", "Regulation", "Product Launch", "Market Analysis"]
 # --------------------------
+# โหลด summarization model
 # --------------------------
 @st.cache_resource
 def load_summarizer():
+    # เปลี่ยนเป็นโมเดลสรุปข่าวสายการเงิน
     return pipeline("summarization", model="Nerdward/financial-summarization-pegasus-finetuned-pytorch-model")
 summarizer = load_summarizer()
 # --------------------------
+# UTILITIES
 # --------------------------
 def analyze_text(text):
+    """วิเคราะห์อารมณ์ของข่าวด้วย FinBERT"""
     if not text or not text.strip():
         return 0
     inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
     with torch.no_grad():
         outputs = model(**inputs)
+        probs = torch.softmax(outputs.logits, dim=1).numpy()[0]
     score = (-1 * probs[0]) + (0 * probs[1]) + (1 * probs[2])
     return float(score)
+def summarize_text(text):
+    """สรุปข่าวเป็นย่อหน้าเดียว"""
+    if not text or not text.strip():
+        return ""
+    result = summarizer(text, max_length=150, min_length=50, do_sample=False)
+    return result[0]["summary_text"]
 def summarize_themes(news_texts):
+    """สรุปธีมข่าวด้วย Zero-shot classification"""
     themes = []
     for text in news_texts:
         if not text.strip():
         themes.append(result["labels"][0])
     return themes
 # --------------------------
+# แปลงชื่อ/ตัวย่อ → (Company Name, Symbol)
 # --------------------------
 def resolve_company_symbol(keyword: str):
     keyword = keyword.strip()
         name = keyword.capitalize()
     return name, ticker
+# --------------------------
+# ดึงข่าว 7 วัน
+# --------------------------
 @st.cache_data(ttl=3600)
 def fetch_financial_news(keyword):
     company, symbol = resolve_company_symbol(keyword)
         page += 1
     return pd.DataFrame(all_articles)
+# --------------------------
+# ดึงราคาหุ้น
+# --------------------------
 @st.cache_data(ttl=3600)
 def fetch_stock_price(symbol, start_date, end_date):
     try:
 # --------------------------
 def main():
     st.title("📰 News Sentiment Analysis for Young Investor")
+    st.markdown("วิเคราะห์แนวโน้มอารมณ์ของข่าวย้อนหลัง 7 วัน พร้อมราคาหุ้น และสรุปข่าว")
     # Sidebar
     with st.sidebar:
+        st.header("ปุ่มวิเคราะห์ข่าว")
+        keyword1 = st.text_input("ค้นหา Stock Symbol สำหรับวิเคราะห์:", key="keyword1")
+        analyze_btn = st.button("วิเคราะห์ข่าว + Sentiment + ราคาหุ้น", key="analyze_btn")
+        st.markdown("---")
+        st.header("ปุ่มสรุปข่าว")
+        keyword2 = st.text_input("ค้นหา Stock Symbol สำหรับสรุปข่าว:", key="keyword2")
+        from_date = st.date_input("จากวันที่", datetime.now() - timedelta(days=7), key="from_date")
+        to_date = st.date_input("ถึงวันที่", datetime.now(), key="to_date")
+        max_news = st.number_input("จำนวนข่าวสูงสุดที่จะสรุป", min_value=1, max_value=50, value=10, key="max_news")
+        summarize_btn = st.button("สรุปข่าว", key="summarize_btn")
+    # ------------------ ปุ่ม 1 ------------------
     if analyze_btn:
+        if not keyword1:
+            st.warning("กรุณากรอก Stock Symbol")
+        else:
+            st.info(f"กำลังดึงข่าวย้อนหลัง 7 วันสำหรับ '{keyword1}'...")
+            news_df = fetch_financial_news(keyword1)
+            if news_df.empty:
+                st.warning("ไม่พบบทความข่าว")
+            else:
+                st.info("กำลังวิเคราะห์อารมณ์ของข่าว...")
+                news_df["sentiment"] = news_df["text"].apply(analyze_text)
+                news_df["date"] = pd.to_datetime(news_df["date"])
+                # Metrics
+                avg_sentiment = news_df["sentiment"].mean()
+                pos_pct = (news_df["sentiment"] > 0.1).mean() * 100
+                neg_pct = (news_df["sentiment"] < -0.1).mean() * 100
+                col1, col2, col3 = st.columns(3)
+                col1.metric("ค่าเฉลี่ยอารมณ์ข่าว", f"{avg_sentiment:.2f}")
+                col2.metric("ข่าวเชิงบวก", f"{pos_pct:.1f}%")
+                col3.metric("ข่าวเชิงลบ", f"{neg_pct:.1f}%")
+                # Theme
+                news_df["theme"] = summarize_themes(news_df["text"].tolist())
+                # Sentiment & Price
+                news_df["date_day"] = pd.to_datetime(news_df["date"].dt.date)
+                def sentiment_type(score):
+                    if score > 0.1: return "positive"
+                    if score < -0.1: return "negative"
+                    return "neutral"
+                news_df["sentiment_type"] = news_df["sentiment"].apply(sentiment_type)
+                daily_avg = news_df.groupby("date_day")["sentiment"].mean().reset_index(name="avg_sentiment")
+                daily_counts = news_df.groupby(["date_day", "sentiment_type"]).size().unstack(fill_value=0).reset_index()
+                df_sorted = pd.merge(daily_avg, daily_counts, on="date_day").sort_values("date_day")
+                # ราคาหุ้น
+                _, symbol = resolve_company_symbol(keyword1)
+                min_date, max_date = df_sorted["date_day"].min(), df_sorted["date_day"].max()
+                st.info(f"กำลังดึงราคาหุ้น {symbol} ...")
+                stock_df = fetch_stock_price(symbol, min_date, max_date)
+                plot_data = pd.merge(df_sorted, stock_df, left_on="date_day", right_on="date", how="left")
+                # Correlation
+                correlation = plot_data['price'].corr(plot_data['avg_sentiment'])
+                corr_text = "ไม่มีความสัมพันธ์"
+                if correlation > 0.5:
+                    corr_text = "มีความสัมพันธ์ในทิศทางเดียวกัน"
+                elif correlation < -0.5:
+                    corr_text = "มีความสัมพันธ์ในทิศทางตรงข้าม"
+                st.metric("วิเคราะห์ความสัมพันธ์ระหว่างอารมณ์ของข่าวกับราคาหุ้น (Correlation)", corr_text, f"{correlation:.2f}")
+                # Forecast Sentiment
+                plot_data["timestamp"] = (plot_data["date_day"] - plot_data["date_day"].min()).dt.days
+                train_data = plot_data.dropna(subset=['avg_sentiment'])
+                if len(train_data) >= 2:
+                    model_lr = LinearRegression()
+                    model_lr.fit(train_data[["timestamp"]], train_data["avg_sentiment"])
+                    future_days = 7
+                    future_timestamps = np.arange(plot_data["timestamp"].max() + 1, plot_data["timestamp"].max() + future_days + 1)
+                    future_dates = [plot_data["date_day"].max() + timedelta(days=i) for i in range(1, future_days + 1)]
+                    future_preds = model_lr.predict(future_timestamps.reshape(-1, 1))
+                # Plot
+                fig = make_subplots(rows=2, cols=1, specs=[[{"secondary_y": True}], [{}]],
+                                    row_heights=[0.7, 0.3], vertical_spacing=0.1, shared_xaxes=True)
+                fig.add_trace(go.Scatter(x=plot_data["date_day"], y=plot_data["price"], name=f"{symbol} Price",
+                                         mode="lines+markers", line=dict(color="orange")), row=1, col=1, secondary_y=False)
+                fig.add_trace(go.Scatter(x=plot_data["date_day"], y=plot_data["avg_sentiment"], name="Actual Sentiment",
+                                         mode="lines+markers", line=dict(color="blue")), row=1, col=1, secondary_y=True)
+                if "future_preds" in locals():
+                    fig.add_trace(go.Scatter(x=future_dates, y=future_preds, name="Predicted Sentiment",
+                                             mode="lines+markers", line=dict(color="#05a0fa", dash="dash")), row=1, col=1, secondary_y=True)
+                    last_actual_date = plot_data["date_day"].max()
+                    last_actual_value = plot_data["avg_sentiment"].iloc[-1]
+                    first_pred_date = future_dates[0]
+                    first_pred_value = future_preds[0]
+                    fig.add_trace(go.Scatter(x=[last_actual_date, first_pred_date],
+                                             y=[last_actual_value, first_pred_value],
+                                             mode="lines", line=dict(color="#05a0fa", dash="dot"),
+                                             name="Connector Actual→Predicted"), row=1, col=1, secondary_y=True)
+                for col in ["neutral", "negative", "positive"]:
+                    if col not in plot_data.columns:
+                        plot_data[col] = 0
+                fig.add_trace(go.Bar(x=plot_data["date_day"], y=plot_data["neutral"], name="Neutral",
+                                     marker_color='rgba(128, 128, 128, 0.7)'), row=2, col=1)
+                fig.add_trace(go.Bar(x=plot_data["date_day"], y=plot_data["negative"], name="Negative",
+                                     marker_color='rgba(255, 0, 0, 0.7)'), row=2, col=1)
+                fig.add_trace(go.Bar(x=plot_data["date_day"], y=plot_data["positive"], name="Positive",
+                                     marker_color='rgba(0, 128, 0, 0.7)'), row=2, col=1)
+                fig.update_layout(title=f"แนวโน้มอารมณ์ของข่าว + ราคาหุ้น ({symbol})",
+                                  barmode="stack", height=650, hovermode="x unified", template="plotly_white")
+                st.plotly_chart(fig, use_container_width=True)
+                st.subheader("📰 รายการข่าวทั้งหมด")
+                st.dataframe(news_df[["date", "source", "text", "sentiment", "theme", "url"]], use_container_width=True)
+    # ------------------ ปุ่ม 2 ------------------
     if summarize_btn:
+        if not keyword2:
+            st.warning("กรุณากรอก Stock Symbol")
+        else:
+            news_df = fetch_financial_news(keyword2)
+            if news_df.empty:
+                st.warning("ไม่พบบทความข่าว")
+            else:
+                # กรองตามช่วงวันที่
+                news_df = news_df[(news_df["date"].dt.date >= from_date) & (news_df["date"].dt.date <= to_date)]
+                news_df = news_df.head(max_news)
+                # สรุปข่าว
+                st.info("กำลังสรุปข่าว...")
+                news_df["summary"] = news_df["text"].apply(summarize_text)
+                st.subheader("📰 ข่าวที่สรุปแล้ว")
+                st.dataframe(news_df[["date", "source", "summary", "url"]], use_container_width=True)
 if __name__ == "__main__":
     nltk.download("stopwords", quiet=True)
     main()