SentimentAnalyzerFinbert

Sleeping

App Files Files Community

KYTHY commited on Nov 15, 2025

Commit

c40e7ab

verified ·

1 Parent(s): 6a9e024

Update app.py

Browse files

Files changed (1) hide show

app.py +193 -169

app.py CHANGED Viewed

@@ -38,16 +38,6 @@ def load_theme_classifier():
 theme_classifier = load_theme_classifier()
 candidate_labels = ["Stock Movement", "Earnings", "M&A", "Regulation", "Product Launch", "Market Analysis"]
-# --------------------------
-# โหลด summarization model
-# --------------------------
-@st.cache_resource
-def load_summarizer():
-    # เปลี่ยนเป็นโมเดลสรุปข่าวสายการเงิน
-    return pipeline("summarization", model="Nerdward/financial-summarization-pegasus-finetuned-pytorch-model")
-summarizer = load_summarizer()
 # --------------------------
 # UTILITIES
 # --------------------------
@@ -55,33 +45,20 @@ def analyze_text(text):
     """วิเคราะห์อารมณ์ของข่าวด้วย FinBERT"""
     if not text or not text.strip():
         return 0
-    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
     with torch.no_grad():
         outputs = model(**inputs)
-        probs = torch.softmax(outputs.logits, dim=1).numpy()[0]
-    score = (-1 * probs[0]) + (0 * probs[1]) + (1 * probs[2])
-    return float(score)
-def summarize_text(text):
-    """สรุปข่าวเป็นย่อหน้าเดียว"""
-    if not text or not text.strip():
-        return ""
-    result = summarizer(text, max_length=150, min_length=50, do_sample=False)
-    return result[0]["summary_text"]
-def summarize_themes(news_texts):
-    """สรุปธีมข่าวด้วย Zero-shot classification"""
-    themes = []
-    for text in news_texts:
-        if not text.strip():
-            continue
-        result = theme_classifier(text, candidate_labels)
-        themes.append(result["labels"][0])
-    return themes
-# --------------------------
-# แปลงชื่อ/ตัวย่อ → (Company Name, Symbol)
-# --------------------------
 def resolve_company_symbol(keyword: str):
     keyword = keyword.strip()
     ticker = None
@@ -108,7 +85,7 @@ def resolve_company_symbol(keyword: str):
     return name, ticker
 # --------------------------
-# ดึงข่าว 7 วัน
 # --------------------------
 @st.cache_data(ttl=3600)
 def fetch_financial_news(keyword):
@@ -118,6 +95,8 @@ def fetch_financial_news(keyword):
     query_keyword = f"({company} OR {symbol}) finance stock"
     all_articles = []
     page = 1
     while True:
         url = (
             f"https://newsapi.org/v2/everything?"
@@ -142,11 +121,44 @@ def fetch_financial_news(keyword):
                     "source": a["source"]["name"],
                     "url": a["url"]
                 })
         if len(articles) < 100:
             break
         page += 1
     return pd.DataFrame(all_articles)
 # --------------------------
 # ดึงราคาหุ้น
 # --------------------------
@@ -173,145 +185,157 @@ def fetch_stock_price(symbol, start_date, end_date):
 # --------------------------
 def main():
     st.title("📰 News Sentiment Analysis for Young Investor")
-    st.markdown("วิเคราะห์แนวโน้มอารมณ์ของข่าวย้อนหลัง 7 วัน พร้อมราคาหุ้น และสรุปข่าว")
     # Sidebar
-    with st.sidebar:
-        st.header("ปุ่มวิเคราะห์ข่าว")
-        keyword1 = st.text_input("ค้นหา Stock Symbol สำหรับวิเคราะห์:", key="keyword1")
-        analyze_btn = st.button("วิเคราะห์ข่าว + Sentiment + ราคาหุ้น", key="analyze_btn")
-        st.markdown("---")
-        st.header("ปุ่มสรุปข่าว")
-        keyword2 = st.text_input("ค้นหา Stock Symbol สำหรับสรุปข่าว:", key="keyword2")
-        from_date = st.date_input("จากวันที่", datetime.now() - timedelta(days=7), key="from_date")
-        to_date = st.date_input("ถึงวันที่", datetime.now(), key="to_date")
-        max_news = st.number_input("จำนวนข่าวสูงสุดที่จะสรุป", min_value=1, max_value=50, value=10, key="max_news")
-        summarize_btn = st.button("สรุปข่าว", key="summarize_btn")
-    # ------------------ ปุ่ม 1 ------------------
-    if analyze_btn:
-        if not keyword1:
-            st.warning("กรุณากรอก Stock Symbol")
-        else:
-            st.info(f"กำลังดึงข่าวย้อนหลัง 7 วันสำหรับ '{keyword1}'...")
-            news_df = fetch_financial_news(keyword1)
-            if news_df.empty:
-                st.warning("ไม่พบบทความข่าว")
-            else:
-                st.info("กำลังวิเคราะห์อารมณ์ของข่าว...")
-                news_df["sentiment"] = news_df["text"].apply(analyze_text)
-                news_df["date"] = pd.to_datetime(news_df["date"])
-                # Metrics
-                avg_sentiment = news_df["sentiment"].mean()
-                pos_pct = (news_df["sentiment"] > 0.1).mean() * 100
-                neg_pct = (news_df["sentiment"] < -0.1).mean() * 100
-                col1, col2, col3 = st.columns(3)
-                col1.metric("ค่าเฉลี่ยอารมณ์ข่าว", f"{avg_sentiment:.2f}")
-                col2.metric("ข่าวเชิงบวก", f"{pos_pct:.1f}%")
-                col3.metric("ข่าวเชิงลบ", f"{neg_pct:.1f}%")
-                # Theme
-                news_df["theme"] = summarize_themes(news_df["text"].tolist())
-                # Sentiment & Price
-                news_df["date_day"] = pd.to_datetime(news_df["date"].dt.date)
-                def sentiment_type(score):
-                    if score > 0.1: return "positive"
-                    if score < -0.1: return "negative"
-                    return "neutral"
-                news_df["sentiment_type"] = news_df["sentiment"].apply(sentiment_type)
-                daily_avg = news_df.groupby("date_day")["sentiment"].mean().reset_index(name="avg_sentiment")
-                daily_counts = news_df.groupby(["date_day", "sentiment_type"]).size().unstack(fill_value=0).reset_index()
-                df_sorted = pd.merge(daily_avg, daily_counts, on="date_day").sort_values("date_day")
-                # ราคาหุ้น
-                _, symbol = resolve_company_symbol(keyword1)
-                min_date, max_date = df_sorted["date_day"].min(), df_sorted["date_day"].max()
-                st.info(f"กำลังดึงราคาหุ้น {symbol} ...")
-                stock_df = fetch_stock_price(symbol, min_date, max_date)
-                plot_data = pd.merge(df_sorted, stock_df, left_on="date_day", right_on="date", how="left")
-                # Correlation
-                correlation = plot_data['price'].corr(plot_data['avg_sentiment'])
-                corr_text = "ไม่มีความสัมพันธ์"
-                if correlation > 0.5:
-                    corr_text = "มีความสัมพันธ์ในทิศทางเดียวกัน"
-                elif correlation < -0.5:
-                    corr_text = "มีความสัมพันธ์ในทิศทางตรงข้าม"
-                st.metric("วิเคราะห์ความสัมพันธ์ระหว่างอารมณ์ของข่าวกับราคาหุ้น (Correlation)", corr_text, f"{correlation:.2f}")
-                # Forecast Sentiment
-                plot_data["timestamp"] = (plot_data["date_day"] - plot_data["date_day"].min()).dt.days
-                train_data = plot_data.dropna(subset=['avg_sentiment'])
-                if len(train_data) >= 2:
-                    model_lr = LinearRegression()
-                    model_lr.fit(train_data[["timestamp"]], train_data["avg_sentiment"])
-                    future_days = 7
-                    future_timestamps = np.arange(plot_data["timestamp"].max() + 1, plot_data["timestamp"].max() + future_days + 1)
-                    future_dates = [plot_data["date_day"].max() + timedelta(days=i) for i in range(1, future_days + 1)]
-                    future_preds = model_lr.predict(future_timestamps.reshape(-1, 1))
-                # Plot
-                fig = make_subplots(rows=2, cols=1, specs=[[{"secondary_y": True}], [{}]],
-                                    row_heights=[0.7, 0.3], vertical_spacing=0.1, shared_xaxes=True)
-                fig.add_trace(go.Scatter(x=plot_data["date_day"], y=plot_data["price"], name=f"{symbol} Price",
-                                         mode="lines+markers", line=dict(color="orange")), row=1, col=1, secondary_y=False)
-                fig.add_trace(go.Scatter(x=plot_data["date_day"], y=plot_data["avg_sentiment"], name="Actual Sentiment",
-                                         mode="lines+markers", line=dict(color="blue")), row=1, col=1, secondary_y=True)
-                if "future_preds" in locals():
-                    fig.add_trace(go.Scatter(x=future_dates, y=future_preds, name="Predicted Sentiment",
-                                             mode="lines+markers", line=dict(color="#05a0fa", dash="dash")), row=1, col=1, secondary_y=True)
-                    last_actual_date = plot_data["date_day"].max()
-                    last_actual_value = plot_data["avg_sentiment"].iloc[-1]
-                    first_pred_date = future_dates[0]
-                    first_pred_value = future_preds[0]
-                    fig.add_trace(go.Scatter(x=[last_actual_date, first_pred_date],
-                                             y=[last_actual_value, first_pred_value],
-                                             mode="lines", line=dict(color="#05a0fa", dash="dot"),
-                                             name="Connector Actual→Predicted"), row=1, col=1, secondary_y=True)
-                for col in ["neutral", "negative", "positive"]:
-                    if col not in plot_data.columns:
-                        plot_data[col] = 0
-                fig.add_trace(go.Bar(x=plot_data["date_day"], y=plot_data["neutral"], name="Neutral",
-                                     marker_color='rgba(128, 128, 128, 0.7)'), row=2, col=1)
-                fig.add_trace(go.Bar(x=plot_data["date_day"], y=plot_data["negative"], name="Negative",
-                                     marker_color='rgba(255, 0, 0, 0.7)'), row=2, col=1)
-                fig.add_trace(go.Bar(x=plot_data["date_day"], y=plot_data["positive"], name="Positive",
-                                     marker_color='rgba(0, 128, 0, 0.7)'), row=2, col=1)
-                fig.update_layout(title=f"แนวโน้มอารมณ์ของข่าว + ราคาหุ้น ({symbol})",
-                                  barmode="stack", height=650, hovermode="x unified", template="plotly_white")
-                st.plotly_chart(fig, use_container_width=True)
-                st.subheader("📰 รายการข่าวทั้งหมด")
-                st.dataframe(news_df[["date", "source", "text", "sentiment", "theme", "url"]], use_container_width=True)
-    # ------------------ ปุ่ม 2 ------------------
-    if summarize_btn:
-        if not keyword2:
-            st.warning("กรุณากรอก Stock Symbol")
-        else:
-            news_df = fetch_financial_news(keyword2)
-            if news_df.empty:
-                st.warning("ไม่พบบทความข่าว")
-            else:
-                # กรองตามช่วงวันที่
-                news_df = news_df[(news_df["date"].dt.date >= from_date) & (news_df["date"].dt.date <= to_date)]
-                news_df = news_df.head(max_news)
-                # สรุปข่าว
-                st.info("กำลังสรุปข่าว...")
-                news_df["summary"] = news_df["text"].apply(summarize_text)
-                st.subheader("📰 ข่าวที่สรุปแล้ว")
-                st.dataframe(news_df[["date", "source", "summary", "url"]], use_container_width=True)
 if __name__ == "__main__":
     nltk.download("stopwords", quiet=True)
     main()

 theme_classifier = load_theme_classifier()
 candidate_labels = ["Stock Movement", "Earnings", "M&A", "Regulation", "Product Launch", "Market Analysis"]
 # --------------------------
 # UTILITIES
 # --------------------------
     """วิเคราะห์อารมณ์ของข่าวด้วย FinBERT"""
     if not text or not text.strip():
         return 0
+    inputs = tokenizer(
+        text,
+        return_tensors="pt",
+        padding=True,
+        truncation=True,
+        max_length=512
+    )
     with torch.no_grad():
         outputs = model(**inputs)
+        logits = outputs.logits
+        probs = torch.softmax(logits, dim=1).numpy()[0]  # FinBERT = [negative, neutral, positive]
+        score = (-1 * probs[0]) + (0 * probs[1]) + (1 * probs[2])
+        return float(score)
 def resolve_company_symbol(keyword: str):
     keyword = keyword.strip()
     ticker = None
     return name, ticker
 # --------------------------
+# ดึงข่าว 7 วัน (เพิ่ม progress bar)
 # --------------------------
 @st.cache_data(ttl=3600)
 def fetch_financial_news(keyword):
     query_keyword = f"({company} OR {symbol}) finance stock"
     all_articles = []
     page = 1
+    progress_bar = st.progress(0)
     while True:
         url = (
             f"https://newsapi.org/v2/everything?"
                     "source": a["source"]["name"],
                     "url": a["url"]
                 })
+        progress_bar.progress(min(page * 10, 100))  # อัปเดต progress bar แบบหยาบ ๆ
         if len(articles) < 100:
             break
         page += 1
+    progress_bar.progress(100)
     return pd.DataFrame(all_articles)
+# --------------------------
+# วิเคราะห์ Sentiment (เพิ่ม progress bar)
+# --------------------------
+def analyze_news_sentiment(news_df):
+    sentiments = []
+    progress_bar = st.progress(0)
+    total = len(news_df)
+    for i, text in enumerate(news_df["text"]):
+        sentiments.append(analyze_text(text))
+        progress_bar.progress(int((i + 1) / total * 100))
+    progress_bar.progress(100)
+    return sentiments
+# --------------------------
+# สรุปธีมข่าว (เพิ่ม progress bar)
+# --------------------------
+def summarize_themes(news_texts):
+    themes = []
+    progress_bar = st.progress(0)
+    total = len(news_texts)
+    for i, text in enumerate(news_texts):
+        if not text.strip():
+            themes.append("Unknown")
+        else:
+            result = theme_classifier(text, candidate_labels)
+            themes.append(result["labels"][0])
+        progress_bar.progress(int((i + 1) / total * 100))
+    progress_bar.progress(100)
+    return themes
 # --------------------------
 # ดึงราคาหุ้น
 # --------------------------
 # --------------------------
 def main():
     st.title("📰 News Sentiment Analysis for Young Investor")
+    st.markdown("วิเคราะห์แนวโน้มอารมณ์ของข่าวย้อนหลัง 7 วัน พร้อมราคาหุ้น")
     # Sidebar
+    keyword = st.text_input("ค้นหา Stock Symbol (เช่น AAPL, TSLA):", "")
+    analyze_btn = st.button("วิเคราะห์เลย")
+    if not analyze_btn:
+        st.info("กรอกคำค้นแล้วกด 'วิเคราะห์เลย'")
+        return
+    # ดึงข่าว
+    st.info(f"กำลังดึงข่าวย้อนหลัง 7 วันสำหรับ '{keyword}'...")
+    news_df = fetch_financial_news(keyword)
+    if news_df.empty:
+        st.warning("ไม่พบบทความข่าว")
+        return
+    # วิเคราะห์ Sentiment
+    st.info("กำลังวิเคราะห์อารมณ์ของข่าว...")
+    news_df["sentiment"] = analyze_news_sentiment(news_df)
+    # สรุปธีมข่าว
+    st.info("กำลังสรุปธีมข่าว...")
+    news_df["theme"] = summarize_themes(news_df["text"].tolist())
+    news_df["date"] = pd.to_datetime(news_df["date"])
+    # Metrics
+    avg_sentiment = news_df["sentiment"].mean()
+    pos_pct = (news_df["sentiment"] > 0.1).mean() * 100
+    neg_pct = (news_df["sentiment"] < -0.1).mean() * 100
+    col1, col2, col3 = st.columns(3)
+    col1.metric("ค่าเฉลี่ยอารมณ์ข่าว", f"{avg_sentiment:.2f}")
+    col2.metric("ข่าวเชิงบวก", f"{pos_pct:.1f}%")
+    col3.metric("ข่าวเชิงลบ", f"{neg_pct:.1f}%")
+    # ส่วนกราฟ Sentiment & Price
+    st.subheader("📈 แนวโน้มอารมณ์ของข่าว & ราคาหุ้น")
+    news_df["date_day"] = pd.to_datetime(news_df["date"].dt.date)
+    def sentiment_type(score):
+        if score > 0.1:
+            return "positive"
+        if score < -0.1:
+            return "negative"
+        return "neutral"
+    news_df["sentiment_type"] = news_df["sentiment"].apply(sentiment_type)
+    daily_avg = news_df.groupby("date_day")["sentiment"].mean().reset_index(name="avg_sentiment")
+    daily_counts = news_df.groupby(["date_day", "sentiment_type"]).size().unstack(fill_value=0).reset_index()
+    df_sorted = pd.merge(daily_avg, daily_counts, on="date_day").sort_values("date_day")
+    if len(df_sorted) < 2:
+        st.warning("ข้อมูลไม่พอสร้างแนวโน้ม")
+        st.dataframe(news_df)
+        return
+    # ดึงราคาหุ้น
+    _, symbol = resolve_company_symbol(keyword)
+    min_date, max_date = df_sorted["date_day"].min(), df_sorted["date_day"].max()
+    st.info(f"กำลังดึงราคาหุ้น {symbol} ...")
+    stock_df = fetch_stock_price(symbol, min_date, max_date)
+    plot_data = pd.merge(df_sorted, stock_df, left_on="date_day", right_on="date", how="left")
+    # Correlation
+    correlation = plot_data['price'].corr(plot_data['avg_sentiment'])
+    corr_text = "ไม่มีความสัมพันธ์"
+    if correlation > 0.5:
+        corr_text = "มีความสัมพันธ์ในทิศทางเดียวกัน"
+    elif correlation < -0.5:
+        corr_text = "มีความสัมพันธ์ในทิศทางตรงข้าม"
+    st.metric("วิเคราะห์ความสัมพันธ์ระหว่างอารมณ์ของข่าวกับราคาหุ้น (Correlation)", corr_text, f"{correlation:.2f}")
+    # Forecast Sentiment
+    plot_data["timestamp"] = (plot_data["date_day"] - plot_data["date_day"].min()).dt.days
+    train_data = plot_data.dropna(subset=['avg_sentiment'])
+    if len(train_data) >= 2:
+        model_lr = LinearRegression()
+        model_lr.fit(train_data[["timestamp"]], train_data["avg_sentiment"])
+        future_days = 7
+        future_timestamps = np.arange(
+            plot_data["timestamp"].max() + 1,
+            plot_data["timestamp"].max() + future_days + 1
+        )
+        future_dates = [plot_data["date_day"].max() + timedelta(days=i) for i in range(1, future_days + 1)]
+        future_preds = model_lr.predict(future_timestamps.reshape(-1, 1))
+    # Plot
+    fig = make_subplots(
+        rows=2, cols=1,
+        specs=[[{"secondary_y": True}], [{}]],
+        row_heights=[0.7, 0.3],
+        vertical_spacing=0.1,
+        shared_xaxes=True
+    )
+    # ราคาหุ้น
+    fig.add_trace(
+        go.Scatter(x=plot_data["date_day"], y=plot_data["price"], name=f"{symbol} Price",
+                   mode="lines+markers", line=dict(color="orange")),
+        row=1, col=1, secondary_y=False
+    )
+    # Sentiment จริง
+    fig.add_trace(
+        go.Scatter(x=plot_data["date_day"], y=plot_data["avg_sentiment"], name="Actual Sentiment",
+                   mode="lines+markers", line=dict(color="blue")),
+        row=1, col=1, secondary_y=True
+    )
+    # Sentiment พยากรณ์
+    if "future_preds" in locals():
+        fig.add_trace(
+            go.Scatter(x=future_dates, y=future_preds, name="Predicted Sentiment",
+                       mode="lines+markers", line=dict(color="#05a0fa", dash="dash")),
+            row=1, col=1, secondary_y=True
+        )
+        # เส้นเชื่อม Actual -> Predicted
+        last_actual_date = plot_data["date_day"].max()
+        last_actual_value = plot_data["avg_sentiment"].iloc[-1]
+        first_pred_date = future_dates[0]
+        first_pred_value = future_preds[0]
+        fig.add_trace(
+            go.Scatter(x=[last_actual_date, first_pred_date], y=[last_actual_value, first_pred_value],
+                       mode="lines", line=dict(color="#05a0fa", dash="dot"),
+                       name="Connector Actual→Predicted"),
+            row=1, col=1, secondary_y=True
+        )
+    # จำนวนข่าว
+    for col in ["neutral", "negative", "positive"]:
+        if col not in plot_data.columns:
+            plot_data[col] = 0
+    fig.add_trace(go.Bar(x=plot_data["date_day"], y=plot_data["neutral"], name="Neutral",
+                         marker_color='rgba(128, 128, 128, 0.7)'), row=2, col=1)
+    fig.add_trace(go.Bar(x=plot_data["date_day"], y=plot_data["negative"], name="Negative",
+                         marker_color='rgba(255, 0, 0, 0.7)'), row=2, col=1)
+    fig.add_trace(go.Bar(x=plot_data["date_day"], y=plot_data["positive"], name="Positive",
+                         marker_color='rgba(0, 128, 0, 0.7)'), row=2, col=1)
+    fig.update_layout(title=f"แนวโน้มอารมณ์ของข่าว + ราคาหุ้น ({symbol})",
+                      barmode="stack", height=650, hovermode="x unified", template="plotly_white")
+    st.plotly_chart(fig, use_container_width=True)
+    # แสดงรายการข่าว
+    st.subheader("📰 รายการข่าวทั้งหมด")
+    st.dataframe(news_df[["date", "source", "text", "sentiment", "theme", "url"]], use_container_width=True)
+# --------------------------
+# RUN APP
+# --------------------------
 if __name__ == "__main__":
     nltk.download("stopwords", quiet=True)
     main()