KYTHY commited on
Commit
3970759
·
verified ·
1 Parent(s): 34ffce3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +169 -186
app.py CHANGED
@@ -1,220 +1,203 @@
1
  import streamlit as st
2
- import pandas as pd
3
  import requests
4
- import yfinance as yf
 
5
  from transformers import pipeline
6
  from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
7
  from textblob import TextBlob
8
- from datetime import datetime, timedelta
9
- import plotly.graph_objects as go
10
  import nltk
 
 
 
11
  import numpy as np
12
- from sklearn.linear_model import Ridge
13
- from sklearn.preprocessing import PolynomialFeatures
14
- from sklearn.pipeline import make_pipeline
15
- import os
16
-
17
- # -------------------------------
18
- # 🔧 CONFIG
19
- # -------------------------------
20
- st.set_page_config(page_title="📈 News Sentiment & Stock Tracker", layout="wide")
21
  API_KEY = "88bc396d4eab4be494a4b86ec842db47"
22
 
23
- # -------------------------------
24
- # 📦 โหลดโมเดล
25
- # -------------------------------
26
  @st.cache_resource
27
  def load_models():
28
- bert = pipeline("sentiment-analysis", model="nlptown/bert-base-multilingual-uncased-sentiment")
 
29
  vader = SentimentIntensityAnalyzer()
30
- return bert, vader
31
-
32
- bert_model, vader_analyzer = load_models()
33
-
34
- # -------------------------------
35
- # 🧠 ฟังก์ชันแปลงชื่อบริษัท <-> ตัวย่อหุ้น
36
- # -------------------------------
37
- @st.cache_data(ttl=86400)
38
- def resolve_company_symbol(keyword: str):
39
- keyword = keyword.strip()
40
- ticker = None
41
- name = None
42
- try:
43
- data = yf.Ticker(keyword)
44
- info = data.info
45
- if "symbol" in info and info["symbol"]:
46
- ticker = info["symbol"]
47
- name = info.get("longName", info.get("shortName", keyword))
48
- else:
49
- url = f"https://query2.finance.yahoo.com/v1/finance/search?q={keyword}"
50
- res = requests.get(url).json()
51
- if "quotes" in res and len(res["quotes"]) > 0:
52
- q = res["quotes"][0]
53
- ticker = q.get("symbol")
54
- name = q.get("longname", q.get("shortname", keyword))
55
- except Exception as e:
56
- st.warning(f"⚠️ ไม่สามารถค้นหาข้อมูลบริษัทได้: {e}")
57
-
58
- if not ticker:
59
- ticker = keyword.upper()
60
- if not name:
61
- name = keyword.capitalize()
62
- return name, ticker
63
-
64
- # -------------------------------
65
- # 📰 ดึงข่าวย้อนหลัง 7 วัน
66
- # -------------------------------
67
- @st.cache_data(ttl=3600)
68
- def fetch_news(company, symbol):
69
- to_date = datetime.utcnow()
70
- from_date = to_date - timedelta(days=7)
71
- query = f"({company} OR {symbol}) finance stock"
72
-
73
- url = (
74
- f"https://newsapi.org/v2/everything?"
75
- f"q={query}&from={from_date.date()}&to={to_date.isoformat()}&"
76
- f"language=en&sortBy=publishedAt&pageSize=100&apiKey={API_KEY}"
77
- )
78
 
79
- res = requests.get(url)
80
- data = res.json()
81
- if data.get("status") != "ok":
82
- st.error("❌ ดึงข้อมูลข่าวไม่สำเร็จ")
83
- return pd.DataFrame()
84
-
85
- articles = data.get("articles", [])
86
- df = pd.DataFrame([{
87
- "date": datetime.fromisoformat(a["publishedAt"].replace("Z", "+00:00")),
88
- "title": a["title"],
89
- "description": a["description"],
90
- "source": a["source"]["name"],
91
- "url": a["url"],
92
- } for a in articles])
93
-
94
- df["text"] = df["title"].fillna('') + " " + df["description"].fillna('')
95
- df["company"] = company
96
- df["symbol"] = symbol
97
- return df
98
-
99
- # -------------------------------
100
- # 💬 วิเคราะห์อารมณ์ข่าว
101
- # -------------------------------
102
- def analyze_sentiment(text, models):
103
- bert, vader = models
104
  if not text.strip():
105
  return 0
106
- try:
107
- vader_score = vader.polarity_scores(text)["compound"]
108
- tb_score = TextBlob(text).sentiment.polarity
109
- bert_res = bert(text[:512])[0]
110
- label_map = {
111
- "1 star": -1, "2 stars": -0.5, "3 stars": 0,
112
- "4 stars": 0.5, "5 stars": 1
113
- }
114
- bert_score = label_map.get(bert_res["label"], 0)
115
- return np.mean([vader_score, tb_score, bert_score])
116
- except Exception:
117
- return 0
118
 
119
- # -------------------------------
120
- # 📈 สร้างโมเดลพยากรณ์
121
- # -------------------------------
122
- def forecast_sentiment_trend(df):
123
- # ensure datetime format
124
- df["date"] = pd.to_datetime(df["date"], errors="coerce")
125
- df = df.dropna(subset=["date"])
126
- df_daily = df.groupby(df["date"].dt.date)["sentiment"].mean().reset_index()
127
- df_daily["date"] = pd.to_datetime(df_daily["date"])
128
- df_daily["days"] = (df_daily["date"] - df_daily["date"].min()).dt.days
129
-
130
- X = df_daily["days"].values.reshape(-1, 1)
131
- y = df_daily["sentiment"].values
132
- model = make_pipeline(PolynomialFeatures(2), Ridge(alpha=1.0))
133
- model.fit(X, y)
134
-
135
- last_day = df_daily["days"].max()
136
- future_days = np.arange(last_day + 1, last_day + 8).reshape(-1, 1)
137
- future_preds = model.predict(future_days)
138
- future_dates = [df_daily["date"].max() + timedelta(days=i) for i in range(1, 8)]
139
- forecast_df = pd.DataFrame({"date": future_dates, "predicted_sentiment": future_preds})
140
-
141
- return df_daily, forecast_df
142
-
143
- # -------------------------------
144
- # 📊 ส่วนแสดงผลหลัก
145
- # -------------------------------
146
- st.title("📈 News Sentiment & Stock Tracker")
147
-
148
- keyword = st.text_input("🔍 ค้นหาบริษัทหรือตัวย่อหุ้น (เช่น Apple หรือ AAPL):", "AAPL")
149
- if st.button("Analyze"):
150
- company, symbol = resolve_company_symbol(keyword)
151
- st.info(f"📊 กำลังวิเคราะห์ข่าวของ **{company} ({symbol})**...")
152
-
153
- news_df = fetch_news(company, symbol)
154
- if news_df.empty:
155
- st.warning("ไม่พบข่าวในช่วง 7 วันที่ผ่านมา")
156
- st.stop()
157
 
158
- news_df["sentiment"] = news_df["text"].apply(lambda x: analyze_sentiment(x, (bert_model, vader_analyzer)))
159
 
160
- avg_sent = news_df["sentiment"].mean()
161
- st.metric("📈 ค่าเฉลี่ยอารมณ์ข่าว (7 วัน)", f"{avg_sent:.2f}",
162
- "Positive" if avg_sent > 0 else "Negative" if avg_sent < 0 else "Neutral")
163
-
164
- # -------------------------------
165
- # 📈 แนวโน้มอารมณ์ + ราคาหุ้น
166
- # -------------------------------
167
- st.subheader("📊 แนวโน้มอารมณ์ข่าว & ราคาหุ้น")
168
-
169
- df_actual, df_forecast = forecast_sentiment_trend(news_df)
170
-
171
- # ดึงราคาหุ้นจาก yfinance
172
- price_df = yf.download(symbol, period="14d", interval="1d")
173
- price_df = price_df.reset_index()[["Date", "Close"]]
174
- price_df.rename(columns={"Date": "date"}, inplace=True)
175
- price_df["date"] = pd.to_datetime(price_df["date"]).dt.date
176
- df_actual["date"] = pd.to_datetime(df_actual["date"]).dt.date
177
- df_forecast["date"] = pd.to_datetime(df_forecast["date"]).dt.date
178
-
179
- # สร้างกราฟรวม
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
  fig = go.Figure()
 
181
  fig.add_trace(go.Scatter(
182
- x=df_actual["date"], y=df_actual["sentiment"],
183
- mode="lines+markers", name="Actual Sentiment", line=dict(color="blue")
 
184
  ))
 
185
  fig.add_trace(go.Scatter(
186
- x=df_forecast["date"], y=df_forecast["predicted_sentiment"],
187
- mode="lines+markers", name="Predicted Sentiment (Next 7 Days)",
188
  line=dict(color="orange", dash="dash")
189
  ))
 
190
  fig.add_trace(go.Scatter(
191
- x=price_df["date"], y=price_df["Close"],
192
- mode="lines+markers", name=f"{symbol} Stock Price",
193
- line=dict(color="green"), yaxis="y2"
 
 
 
194
  ))
195
 
196
  fig.update_layout(
197
- title=f"📈 แนวโน้มอารมณ์ข่าว & ราคาหุ้น ({symbol})",
198
- xaxis=dict(title="วันที่"),
199
- yaxis=dict(title="Sentiment", side="left", range=[-1, 1]),
200
- yaxis2=dict(title="Stock Price (USD)", overlaying="y", side="right", showgrid=False),
201
- legend=dict(x=0, y=1.1, orientation="h"),
202
  hovermode="x unified",
203
  template="plotly_white"
204
  )
205
  st.plotly_chart(fig, use_container_width=True)
206
 
207
- # -------------------------------
208
- # 📰 แสดงข่าวที่ใช้วิเคราะห์
209
- # -------------------------------
210
- st.subheader("📰 ข่าวที่ใช้วิเคราะห์")
211
- st.dataframe(news_df[["date", "source", "title", "sentiment"]])
212
-
213
- # -------------------------------
214
- # ��� โหลด NLTK
215
- # -------------------------------
216
- try:
217
- nltk.download("punkt", quiet=True)
218
  nltk.download("stopwords", quiet=True)
219
- except:
220
- pass
 
1
  import streamlit as st
 
2
  import requests
3
+ import pandas as pd
4
+ from datetime import datetime, timedelta
5
  from transformers import pipeline
6
  from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
7
  from textblob import TextBlob
 
 
8
  import nltk
9
+ from wordcloud import WordCloud
10
+ import base64
11
+ from io import BytesIO
12
  import numpy as np
13
+ from sklearn.linear_model import LinearRegression
14
+ import plotly.graph_objects as go
15
+
16
+ # --------------------------
17
+ # CONFIG
18
+ # --------------------------
19
+ st.set_page_config(page_title="📰 SentimentSync NewsAI", layout="wide")
 
 
20
  API_KEY = "88bc396d4eab4be494a4b86ec842db47"
21
 
22
+ # --------------------------
23
+ # UTILITIES
24
+ # --------------------------
25
  @st.cache_resource
26
  def load_models():
27
+ st.info("Loading sentiment models...")
28
+ bert_model = pipeline("sentiment-analysis", model="nlptown/bert-base-multilingual-uncased-sentiment")
29
  vader = SentimentIntensityAnalyzer()
30
+ return bert_model, vader
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
+
33
+ def analyze_text(text, bert_model, vader):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  if not text.strip():
35
  return 0
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
+ vader_score = vader.polarity_scores(text)["compound"]
38
+ textblob_score = TextBlob(text).sentiment.polarity
39
+ bert_result = bert_model(text[:512])[0]
40
+ label_map = {
41
+ "1 star": -1,
42
+ "2 stars": -0.5,
43
+ "3 stars": 0,
44
+ "4 stars": 0.5,
45
+ "5 stars": 1
46
+ }
47
+ bert_score = label_map.get(bert_result["label"], 0)
48
+ return np.mean([vader_score, textblob_score, bert_score])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
 
50
 
51
+ @st.cache_data(ttl=3600)
52
+ def fetch_financial_news(keyword):
53
+ """ดึงข่าวย้อนหลัง 7 วันจาก NewsAPI.org"""
54
+ to_date = datetime.now().strftime('%Y-%m-%d')
55
+ from_date = (datetime.now() - timedelta(days=7)).strftime('%Y-%m-%d')
56
+
57
+ all_articles = []
58
+ page = 1
59
+ while True:
60
+ url = (
61
+ f"https://newsapi.org/v2/everything?"
62
+ f"q={keyword}+finance+stock&"
63
+ f"from={from_date}&to={to_date}&"
64
+ f"language=en&sortBy=publishedAt&"
65
+ f"pageSize=100&page={page}&apiKey={API_KEY}"
66
+ )
67
+ r = requests.get(url)
68
+ data = r.json()
69
+
70
+ if data.get("status") != "ok":
71
+ st.error(f"API Error: {data}")
72
+ break
73
+
74
+ articles = data.get("articles", [])
75
+ if not articles:
76
+ break
77
+
78
+ for a in articles:
79
+ if a["description"]:
80
+ all_articles.append({
81
+ "date": pd.to_datetime(a["publishedAt"]),
82
+ "text": f"{a['title']} {a['description']}",
83
+ "source": a["source"]["name"],
84
+ "url": a["url"]
85
+ })
86
+
87
+ if len(articles) < 100:
88
+ break # หมดแล้ว
89
+ page += 1
90
+
91
+ return pd.DataFrame(all_articles)
92
+
93
+
94
+ def generate_wordcloud(text):
95
+ stopwords = nltk.corpus.stopwords.words('english')
96
+ wordcloud = WordCloud(width=800, height=400, background_color="white", stopwords=stopwords).generate(text)
97
+ buf = BytesIO()
98
+ wordcloud.to_image().save(buf, format="PNG")
99
+ return base64.b64encode(buf.getvalue()).decode()
100
+
101
+
102
+ # --------------------------
103
+ # MAIN APP
104
+ # --------------------------
105
+ def main():
106
+ st.title("📰 SentimentSync NewsAI")
107
+ st.markdown("วิเคราะห์แนวโน้มอารมณ์ของข่าวการเงินย้อนหลัง 7 วัน พร้���มพยากรณ์แนวโน้มในอนาคต")
108
+
109
+ # Sidebar
110
+ with st.sidebar:
111
+ keyword = st.text_input("ค้นหาคำ (เช่น Tesla, Bitcoin, Inflation):", "")
112
+ analyze_btn = st.button("วิเคราะห์เลย")
113
+
114
+ if not analyze_btn:
115
+ st.info("กรอกคำค้นแล้วกด 'วิเคราะห์เลย' เพื่อเริ่มต้น")
116
+ return
117
+
118
+ bert_model, vader = load_models()
119
+
120
+ # ดึงข่าว
121
+ st.info(f"กำลังดึงข่าวย้อนหลัง 7 วันจาก NewsAPI.org สำหรับ '{keyword}' ...")
122
+ news_df = fetch_financial_news(keyword)
123
+ if news_df.empty:
124
+ st.warning("ไม่พบบทความข่าวในช่วง 7 วันที่ผ่านมา")
125
+ return
126
+
127
+ # วิเคราะห์ sentiment
128
+ st.info("กำลังวิเคราะห์อารมณ์ของข่าว...")
129
+ news_df["sentiment"] = news_df["text"].apply(lambda x: analyze_text(x, bert_model, vader))
130
+ news_df["date"] = pd.to_datetime(news_df["date"])
131
+
132
+ avg_sentiment = news_df["sentiment"].mean()
133
+ pos_pct = (news_df["sentiment"] > 0.1).mean() * 100
134
+ neg_pct = (news_df["sentiment"] < -0.1).mean() * 100
135
+
136
+ col1, col2, col3 = st.columns(3)
137
+ col1.metric("ค่าเฉลี่ยอารมณ์ข่าว", f"{avg_sentiment:.2f}",
138
+ "Positive" if avg_sentiment > 0 else "Negative" if avg_sentiment < 0 else "Neutral")
139
+ col2.metric("ข่าวเชิงบวก", f"{pos_pct:.1f}%")
140
+ col3.metric("ข่าวเชิงลบ", f"{neg_pct:.1f}%")
141
+
142
+ # Wordcloud
143
+ st.subheader("☁️ Word Cloud ของข่าว")
144
+ all_text = " ".join(news_df["text"].tolist())
145
+ img = generate_wordcloud(all_text)
146
+ st.image(f"data:image/png;base64,{img}", use_column_width=True)
147
+
148
+ # แนวโน้มและพยากรณ์ในกราฟเดียว
149
+ st.subheader("📈 แนวโน้มและพยากรณ์อารมณ์ของข่าว")
150
+
151
+ df_sorted = news_df.sort_values("date").copy()
152
+ df_sorted["timestamp"] = (df_sorted["date"] - df_sorted["date"].min()).dt.days
153
+
154
+ # Train model
155
+ model = LinearRegression()
156
+ model.fit(df_sorted[["timestamp"]], df_sorted["sentiment"])
157
+
158
+ # Forecast next 7 days
159
+ future_days = 7
160
+ future_timestamps = np.arange(df_sorted["timestamp"].max() + 1, df_sorted["timestamp"].max() + future_days + 1)
161
+ future_dates = [df_sorted["date"].max() + timedelta(days=i) for i in range(1, future_days + 1)]
162
+ future_preds = model.predict(future_timestamps.reshape(-1, 1))
163
+
164
+ # Plot both actual + prediction
165
  fig = go.Figure()
166
+
167
  fig.add_trace(go.Scatter(
168
+ x=df_sorted["date"], y=df_sorted["sentiment"],
169
+ mode="lines+markers", name="Actual Sentiment",
170
+ line=dict(color="blue")
171
  ))
172
+
173
  fig.add_trace(go.Scatter(
174
+ x=future_dates, y=future_preds,
175
+ mode="lines+markers", name="Predicted Sentiment (7-day Forecast)",
176
  line=dict(color="orange", dash="dash")
177
  ))
178
+
179
  fig.add_trace(go.Scatter(
180
+ x=future_dates + future_dates[::-1],
181
+ y=list(future_preds + 0.1) + list((future_preds - 0.1)[::-1]),
182
+ fill='toself', fillcolor='rgba(255,165,0,0.2)',
183
+ line=dict(color='rgba(255,255,255,0)'),
184
+ hoverinfo="skip",
185
+ showlegend=False
186
  ))
187
 
188
  fig.update_layout(
189
+ title=f"แนวโน้มและพยากรณ์อารมณ์ของข่าว '{keyword}'",
190
+ xaxis_title="วันที่",
191
+ yaxis_title="ค่าอารมณ์ (Sentiment)",
 
 
192
  hovermode="x unified",
193
  template="plotly_white"
194
  )
195
  st.plotly_chart(fig, use_container_width=True)
196
 
197
+ st.subheader("📰 รายการข่าว")
198
+ st.dataframe(news_df[["date", "source", "text", "sentiment", "url"]], use_container_width=True)
199
+
200
+
201
+ if __name__ == "__main__":
 
 
 
 
 
 
202
  nltk.download("stopwords", quiet=True)
203
+ main()