KYTHY commited on
Commit
023dc07
Β·
verified Β·
1 Parent(s): 003cbcb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +146 -304
app.py CHANGED
@@ -3,15 +3,12 @@ from transformers import pipeline
3
  from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
4
  import pandas as pd
5
  from datetime import datetime, timedelta
6
- import plotly.express as px
7
  import plotly.graph_objects as go
8
  from wordcloud import WordCloud
9
  import base64
10
  from io import BytesIO
11
  import nltk
12
  from textblob import TextBlob
13
- import praw
14
- from googleapiclient.discovery import build
15
  import os
16
  import time
17
  from functools import lru_cache
@@ -19,6 +16,7 @@ import numpy as np
19
  from sklearn.linear_model import Ridge
20
  from sklearn.preprocessing import PolynomialFeatures
21
  from sklearn.pipeline import make_pipeline
 
22
 
23
  # --------------------------
24
  # Initial Setup
@@ -38,54 +36,78 @@ st.set_page_config(
38
  def load_models():
39
  """Load models with progress indicators"""
40
  progress = st.progress(0, text="Loading sentiment models...")
41
-
42
  try:
43
  with st.spinner("Loading BERT model..."):
44
  bert_sentiment = pipeline(
45
- "sentiment-analysis",
46
  model="nlptown/bert-base-multilingual-uncased-sentiment"
47
  )
48
  progress.progress(50)
49
-
50
  with st.spinner("Loading VADER analyzer..."):
51
  vader_analyzer = SentimentIntensityAnalyzer()
52
  progress.progress(100)
53
-
54
  return bert_sentiment, vader_analyzer
55
  except Exception as e:
56
  st.error(f"Model loading failed: {str(e)}")
57
  return None, None
58
 
59
- @st.cache_resource
60
- def setup_api_clients():
61
- """Initialize API clients with error handling"""
 
 
 
 
62
  try:
63
- with st.spinner("Initializing Reddit API..."):
64
- reddit = praw.Reddit(
65
- client_id="S7pTXhj5JDFGDb3-_zrJEA",
66
- client_secret="QP3NYN4lrAKVLrBamzLGrpFywiVg8w",
67
- user_agent="SentimentSync/1.0"
68
- )
69
-
70
- with st.spinner("Initializing YouTube API..."):
71
- youtube = build('youtube', 'v3', developerKey="AIzaSyDcUAkcoPvkTwN_tksmiW0dVPI5Bse7qos")
72
-
73
- return reddit, youtube
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  except Exception as e:
75
- st.error(f"API initialization failed: {str(e)}")
76
- return None, None
77
 
78
  # --------------------------
79
- # Core Functions
80
  # --------------------------
81
 
82
  def analyze_text(text, models):
83
- """Optimized text analysis with batch processing"""
84
  bert_sentiment, vader_analyzer = models
85
-
86
- # Truncate very long texts to improve performance
87
  truncated_text = text[:2000] if text else ""
88
-
89
  try:
90
  if not truncated_text.strip():
91
  return {
@@ -95,12 +117,12 @@ def analyze_text(text, models):
95
  'bert_label': 'Neutral',
96
  'bert_confidence': 0
97
  }
98
-
99
  vader_score = vader_analyzer.polarity_scores(truncated_text)['compound']
100
  textblob_score = TextBlob(truncated_text).sentiment.polarity
101
-
102
- bert_result = bert_sentiment(truncated_text[:512])[0] # BERT 512 token limit
103
-
104
  label_map = {
105
  '1 star': -1,
106
  '2 stars': -0.5,
@@ -109,7 +131,7 @@ def analyze_text(text, models):
109
  '5 stars': 1
110
  }
111
  bert_num = label_map.get(bert_result['label'], 0)
112
-
113
  return {
114
  'vader': vader_score,
115
  'bert': bert_num,
@@ -127,72 +149,23 @@ def analyze_text(text, models):
127
  'bert_confidence': 0
128
  }
129
 
130
- @st.cache_data(ttl=3600, show_spinner="Fetching data...")
131
- def fetch_reddit_data(keyword, limit=30):
132
- """Optimized Reddit data fetching"""
133
- try:
134
- reddit, _ = setup_api_clients()
135
- if not reddit:
136
- return pd.DataFrame()
137
-
138
- posts = list(reddit.subreddit("all").search(keyword, limit=limit))
139
-
140
- return pd.DataFrame([{
141
- 'date': datetime.fromtimestamp(post.created_utc),
142
- 'text': f"{post.title}\n{post.selftext}",
143
- 'source': 'Reddit',
144
- 'url': f"https://reddit.com{post.permalink}"
145
- } for post in posts])
146
-
147
- except Exception as e:
148
- st.error(f"Reddit fetch error: {str(e)}")
149
- return pd.DataFrame()
150
-
151
- @st.cache_data(ttl=3600, show_spinner="Fetching data...")
152
- def fetch_youtube_data(keyword, limit=30):
153
- """Optimized YouTube data fetching"""
154
- try:
155
- _, youtube = setup_api_clients()
156
- if not youtube:
157
- return pd.DataFrame()
158
-
159
- response = youtube.search().list(
160
- q=keyword,
161
- part="snippet",
162
- maxResults=limit,
163
- type="video",
164
- order="relevance"
165
- ).execute()
166
-
167
- return pd.DataFrame([{
168
- 'date': datetime.strptime(item['snippet']['publishedAt'], '%Y-%m-%dT%H:%M:%SZ'),
169
- 'text': f"{item['snippet']['title']}\n{item['snippet']['description']}",
170
- 'source': 'YouTube',
171
- 'url': f"https://youtube.com/watch?v={item['id']['videoId']}"
172
- } for item in response['items']])
173
-
174
- except Exception as e:
175
- st.error(f"YouTube fetch error: {str(e)}")
176
- return pd.DataFrame()
177
-
178
  # --------------------------
179
- # Visualization Functions
180
  # --------------------------
181
 
182
  def generate_wordcloud(text):
183
- """Fast word cloud generation"""
184
  try:
185
  if not text.strip():
186
  return ""
187
-
188
  wordcloud = WordCloud(
189
- width=800,
190
- height=400,
191
  background_color='white',
192
- collocations=False, # Faster processing
193
  stopwords=nltk.corpus.stopwords.words('english')
194
  ).generate(text)
195
-
196
  img = BytesIO()
197
  wordcloud.to_image().save(img, format='PNG')
198
  return base64.b64encode(img.getvalue()).decode()
@@ -201,124 +174,84 @@ def generate_wordcloud(text):
201
  return ""
202
 
203
  # --------------------------
204
- # Prediction Functions
205
  # --------------------------
206
 
207
  def prepare_data_for_prediction(data):
208
- """Prepare time series data for prediction, handling NaN values"""
209
  try:
210
  if data.empty:
211
  st.warning("No data available for prediction")
212
  return None
213
-
214
- # Ensure data is sorted by date
215
  data = data.sort_values('date')
216
-
217
- # Filter out rows with invalid sentiment scores
218
  data = data.dropna(subset=['average'])
219
-
220
- # Create daily aggregates
221
  daily_data = data.groupby(pd.Grouper(key='date', freq='D'))['average'].mean().reset_index()
222
-
223
- # Remove any remaining NaN values from aggregation
224
  daily_data = daily_data.dropna(subset=['average'])
225
-
226
- # Check if enough data points remain
227
  if len(daily_data) < 5:
228
  st.warning("Insufficient valid data points for prediction (minimum 5 required)")
229
  return None
230
-
231
- # Create numerical features (days since first date)
232
  daily_data['days'] = (daily_data['date'] - daily_data['date'].min()).dt.days
233
-
234
  return daily_data
235
  except Exception as e:
236
  st.error(f"Data preparation error: {str(e)}")
237
  return None
238
 
239
  def train_sentiment_model(data):
240
- """Train Ridge regression model, ensuring valid input"""
241
  try:
242
- if data is None:
243
- st.warning("No valid data for model training")
244
- return None, None
245
-
246
- # Verify sufficient data points
247
- if len(data) < 5:
248
- st.warning("Not enough data points for reliable prediction (minimum 5 required)")
249
  return None, None
250
-
251
- # Extract features and target
252
  X = data['days'].values.reshape(-1, 1)
253
  y = data['average'].values
254
-
255
- # Check for NaN values
256
- if np.any(np.isnan(X)) or np.any(np.isnan(y)):
257
- st.warning("Invalid values detected in data. Skipping prediction.")
258
- return None, None
259
-
260
- # Train polynomial Ridge regression
261
- model = make_pipeline(
262
- PolynomialFeatures(degree=2),
263
- Ridge(alpha=1.0)
264
- )
265
-
266
  model.fit(X, y)
267
-
268
  return model, data
269
  except Exception as e:
270
  st.error(f"Model training error: {str(e)}")
271
  return None, None
272
 
273
  def predict_future_sentiment(model, training_data, days_to_predict=15):
274
- """Predict future sentiment using trained model"""
275
  try:
276
  if model is None or training_data is None:
277
- st.warning("No valid model or data for prediction")
278
  return None
279
-
280
- # Create future dates
281
  last_date = training_data['date'].max()
282
- future_dates = [last_date + timedelta(days=i) for i in range(1, days_to_predict+1)]
283
-
284
- # Create feature matrix for future dates
285
  min_date = training_data['date'].min()
286
  future_days = [(date - min_date).days for date in future_dates]
287
  X_future = np.array(future_days).reshape(-1, 1)
288
-
289
- # Make predictions
290
  predictions = model.predict(X_future)
291
-
292
- # Create prediction dataframe
293
  pred_df = pd.DataFrame({
294
  'date': future_dates,
295
  'average': predictions,
296
  'type': 'prediction'
297
  })
298
-
299
- # Add training data for plotting
300
  training_df = training_data.copy()
301
  training_df['type'] = 'actual'
302
-
303
  return pd.concat([training_df, pred_df], ignore_index=True)
304
  except Exception as e:
305
  st.error(f"Prediction error: {str(e)}")
306
  return None
307
 
308
  def plot_sentiment(data, keyword):
309
- """Plot sentiment trends, handling missing data"""
310
  try:
311
  if data is None or data.empty:
312
  st.warning("No data available for plotting sentiment trends")
313
  return None
314
-
315
- # Separate actual and predicted data
316
  actual_data = data[data['type'] == 'actual']
317
  pred_data = data[data['type'] == 'prediction']
318
-
319
  fig = go.Figure()
320
-
321
- # Add actual data
322
  if not actual_data.empty:
323
  fig.add_trace(go.Scatter(
324
  x=actual_data['date'],
@@ -327,8 +260,7 @@ def plot_sentiment(data, keyword):
327
  mode='lines+markers',
328
  line=dict(color='#636EFA')
329
  ))
330
-
331
- # Add predicted data if available
332
  if not pred_data.empty:
333
  fig.add_trace(go.Scatter(
334
  x=pred_data['date'],
@@ -337,27 +269,7 @@ def plot_sentiment(data, keyword):
337
  mode='lines+markers',
338
  line=dict(color='#EF553B', dash='dot')
339
  ))
340
-
341
- # Add confidence interval
342
- fig.add_trace(go.Scatter(
343
- x=pred_data['date'],
344
- y=pred_data['average'] + 0.1,
345
- mode='lines',
346
- line=dict(width=0),
347
- showlegend=False,
348
- hoverinfo='skip'
349
- ))
350
-
351
- fig.add_trace(go.Scatter(
352
- x=pred_data['date'],
353
- y=pred_data['average'] - 0.1,
354
- mode='lines',
355
- fill='tonexty',
356
- line=dict(width=0),
357
- fillcolor='rgba(239, 85, 59, 0.2)',
358
- name='Prediction Range'
359
- ))
360
-
361
  fig.update_layout(
362
  title=f'Sentiment Analysis and Prediction for "{keyword}"',
363
  xaxis_title="Date",
@@ -365,186 +277,116 @@ def plot_sentiment(data, keyword):
365
  hovermode="x unified",
366
  legend_title="Data Type"
367
  )
368
-
369
  return fig
370
  except Exception as e:
371
  st.error(f"Plotting error: {str(e)}")
372
  return None
373
 
374
  # --------------------------
375
- # Main Application
376
  # --------------------------
377
 
378
  def main():
379
- st.title("πŸš€ SentimentSync Pro - Real-time Analysis Dashboard")
380
-
381
- # Sidebar controls
382
  with st.sidebar:
383
  st.header("πŸ”§ Analysis Controls")
384
  analysis_mode = st.radio(
385
  "Mode",
386
- ["Text Analysis", "Live Data Analysis"],
387
- index=0
388
  )
389
-
390
  if analysis_mode == "Text Analysis":
391
- user_input = st.text_area(
392
- "Enter text to analyze",
393
- height=200,
394
- placeholder="Paste your content here..."
395
- )
396
  analyze_btn = st.button("Analyze Now")
397
  else:
398
- keyword = st.text_input(
399
- "Search keyword",
400
- placeholder="e.g., Apple, Tesla, etc."
401
- )
402
  analyze_btn = st.button("Fetch & Analyze")
403
-
404
  st.markdown("---")
405
- st.markdown("### Options")
406
  show_details = st.checkbox("Show detailed results", value=False)
407
  enable_prediction = st.checkbox("Enable sentiment prediction", value=True)
408
  st.markdown("---")
409
-
410
- # Main content
411
  if analyze_btn:
412
  models = load_models()
413
  if not all(models):
414
- st.error("Required models failed to load")
415
  return
416
-
417
  if analysis_mode == "Text Analysis":
418
  if not user_input.strip():
419
- st.warning("Please enter some text to analyze")
420
  return
421
-
422
- with st.spinner("Analyzing content..."):
423
- start_time = time.time()
424
  result = analyze_text(user_input, models)
425
- processing_time = time.time() - start_time
426
-
427
- st.success(f"Analysis completed in {processing_time:.2f} seconds")
428
-
429
  cols = st.columns(3)
430
- cols[0].metric("VADER Score", f"{result['vader']:.2f}",
431
- "Positive" if result['vader'] > 0 else "Negative" if result['vader'] < 0 else "Neutral")
432
- cols[1].metric("BERT Sentiment", result['bert_label'], f"Confidence: {result['bert_confidence']:.2f}")
433
- cols[2].metric("TextBlob Score", f"{result['textblob']:.2f}",
434
- "Positive" if result['textblob'] > 0 else "Negative" if result['textblob'] < 0 else "Neutral")
435
-
436
- st.subheader("πŸ“Š Text Visualization")
437
- wordcloud_img = f'data:image/png;base64,{generate_wordcloud(user_input)}'
438
- if wordcloud_img:
439
- st.image(wordcloud_img, use_column_width=True)
440
- else:
441
- st.info("No word cloud generated due to insufficient text")
442
-
443
- else: # Live Data Analysis
444
  if not keyword.strip():
445
- st.warning("Please enter a search keyword")
446
  return
447
-
448
- with st.spinner(f"Gathering data for '{keyword}'..."):
449
  start_time = time.time()
450
-
451
- reddit_data = fetch_reddit_data(keyword)
452
- youtube_data = fetch_youtube_data(keyword)
453
-
454
- if reddit_data.empty and youtube_data.empty:
455
- st.error("No data found. Try a different keyword.")
456
  return
457
-
458
- combined_data = pd.concat([reddit_data, youtube_data], ignore_index=True)
459
-
460
- # Filter out empty or invalid texts
461
- combined_data = combined_data[combined_data['text'].str.strip() != '']
462
-
463
- # Analyze in batches
464
  analysis_results = []
465
- for _, row in combined_data.iterrows():
466
  analysis_results.append(analyze_text(row['text'], models))
467
-
468
- # Add results to dataframe
469
- combined_data['vader'] = [r['vader'] for r in analysis_results]
470
- combined_data['bert'] = [r['bert'] for r in analysis_results]
471
- combined_data['textblob'] = [r['textblob'] for r in analysis_results]
472
-
473
- # Ensure no NaN values in sentiment scores
474
- combined_data = combined_data.dropna(subset=['vader', 'bert', 'textblob'])
475
- combined_data['average'] = combined_data[['vader', 'bert', 'textblob']].mean(axis=1)
476
-
477
  processing_time = time.time() - start_time
478
- st.success(f"Analyzed {len(combined_data)} sources in {processing_time:.2f} seconds")
479
-
480
- st.subheader(f"πŸ“ˆ Overall Sentiment for '{keyword}'")
481
-
482
  cols = st.columns(3)
483
- avg_sentiment = combined_data['average'].mean()
484
- pos_pct = (combined_data['average'] > 0.1).mean() * 100
485
- neg_pct = (combined_data['average'] < -0.1).mean() * 100
486
-
487
- cols[0].metric("Avg Sentiment", f"{avg_sentiment:.2f}",
488
- "Positive" if avg_sentiment > 0 else "Negative" if avg_sentiment < 0 else "Neutral")
489
- cols[1].metric("Positive Content", f"{pos_pct:.1f}%")
490
- cols[2].metric("Negative Content", f"{neg_pct:.1f}%")
491
-
492
- st.subheader("πŸ“Š Content Visualization")
493
- all_text = " ".join(combined_data['text'])
494
- wordcloud_img = f'data:image/png;base64,{generate_wordcloud(all_text)}'
495
- if wordcloud_img:
496
- st.image(wordcloud_img, use_column_width=True)
497
- else:
498
- st.info("No word cloud generated due to insufficient text")
499
-
500
- # Filter recent data
501
- combined_data['date'] = pd.to_datetime(combined_data['date'])
502
- recent_data = combined_data[combined_data['date'] >= (datetime.now() - timedelta(days=60))]
503
-
504
- if not recent_data.empty:
505
- st.subheader("πŸ“… Sentiment Over Time")
506
-
507
- if enable_prediction:
508
- with st.spinner("Training prediction model..."):
509
- daily_data = prepare_data_for_prediction(recent_data)
510
- model, training_data = train_sentiment_model(daily_data)
511
-
512
- if model is not None and training_data is not None:
513
- full_data = predict_future_sentiment(model, training_data)
514
- fig = plot_sentiment(full_data, keyword)
515
- else:
516
- daily_data = daily_data if daily_data is not None else recent_data[['date', 'average']].assign(type='actual')
517
- fig = plot_sentiment(daily_data, keyword)
518
- else:
519
- daily_data = prepare_data_for_prediction(recent_data)
520
- fig = plot_sentiment(daily_data.assign(type='actual') if daily_data is not None else recent_data[['date', 'average']].assign(type='actual'), keyword)
521
-
522
- if fig:
523
  st.plotly_chart(fig, use_container_width=True)
524
-
525
- if enable_prediction and 'full_data' in locals() and full_data is not None:
526
- last_actual = full_data[full_data['type'] == 'actual']['average'].iloc[-1]
527
- last_pred = full_data[full_data['type'] == 'prediction']['average'].iloc[-1]
528
-
529
- if last_pred > last_actual + 0.1:
530
- st.success("πŸ“ˆ Prediction: Sentiment is expected to improve in the next 15 days")
531
- elif last_pred < last_actual - 0.1:
532
- st.warning("πŸ“‰ Prediction: Sentiment is expected to decline in the next 15 days")
533
- else:
534
- st.info("πŸ“Š Prediction: Sentiment is expected to remain stable in the next 15 days")
535
-
536
- if show_details:
537
- st.subheader("πŸ” Detailed Results")
538
- st.dataframe(recent_data[['date', 'source', 'text', 'average']], use_container_width=True)
539
- else:
540
- st.info("No recent data found (within last 60 days).")
541
 
542
  if __name__ == "__main__":
543
  try:
544
  nltk.data.path.append(os.path.join(os.path.expanduser("~"), "nltk_data"))
545
- nltk.download('punkt', quiet=True)
546
  nltk.download('stopwords', quiet=True)
547
  except:
548
  pass
549
-
550
  main()
 
3
  from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
4
  import pandas as pd
5
  from datetime import datetime, timedelta
 
6
  import plotly.graph_objects as go
7
  from wordcloud import WordCloud
8
  import base64
9
  from io import BytesIO
10
  import nltk
11
  from textblob import TextBlob
 
 
12
  import os
13
  import time
14
  from functools import lru_cache
 
16
  from sklearn.linear_model import Ridge
17
  from sklearn.preprocessing import PolynomialFeatures
18
  from sklearn.pipeline import make_pipeline
19
+ import feedparser
20
 
21
  # --------------------------
22
  # Initial Setup
 
36
  def load_models():
37
  """Load models with progress indicators"""
38
  progress = st.progress(0, text="Loading sentiment models...")
39
+
40
  try:
41
  with st.spinner("Loading BERT model..."):
42
  bert_sentiment = pipeline(
43
+ "sentiment-analysis",
44
  model="nlptown/bert-base-multilingual-uncased-sentiment"
45
  )
46
  progress.progress(50)
47
+
48
  with st.spinner("Loading VADER analyzer..."):
49
  vader_analyzer = SentimentIntensityAnalyzer()
50
  progress.progress(100)
51
+
52
  return bert_sentiment, vader_analyzer
53
  except Exception as e:
54
  st.error(f"Model loading failed: {str(e)}")
55
  return None, None
56
 
57
+ # --------------------------
58
+ # Fetch Financial News
59
+ # --------------------------
60
+
61
+ @st.cache_data(ttl=3600, show_spinner="Fetching financial news...")
62
+ def fetch_financial_news(keyword, limit=30):
63
+ """Fetch recent financial news (past 7 days) using Google News RSS"""
64
  try:
65
+ base_url = "https://news.google.com/rss/search"
66
+ query = f"{keyword}+finance+stock"
67
+ feed_url = f"{base_url}?q={query}&hl=en-US&gl=US&ceid=US:en"
68
+
69
+ feed = feedparser.parse(feed_url)
70
+ seven_days_ago = datetime.now() - timedelta(days=7)
71
+
72
+ articles = []
73
+ for entry in feed.entries:
74
+ published = None
75
+ if hasattr(entry, 'published_parsed') and entry.published_parsed:
76
+ published = datetime(*entry.published_parsed[:6])
77
+ elif hasattr(entry, 'updated_parsed') and entry.updated_parsed:
78
+ published = datetime(*entry.updated_parsed[:6])
79
+ else:
80
+ continue
81
+
82
+ if published < seven_days_ago:
83
+ continue
84
+
85
+ text = f"{entry.title}\n{entry.summary}" if hasattr(entry, 'summary') else entry.title
86
+
87
+ articles.append({
88
+ 'date': published,
89
+ 'text': text,
90
+ 'source': 'Financial News',
91
+ 'url': entry.link
92
+ })
93
+
94
+ if len(articles) >= limit:
95
+ break
96
+
97
+ return pd.DataFrame(articles)
98
+
99
  except Exception as e:
100
+ st.error(f"News fetch error: {str(e)}")
101
+ return pd.DataFrame()
102
 
103
  # --------------------------
104
+ # Sentiment Analysis
105
  # --------------------------
106
 
107
  def analyze_text(text, models):
 
108
  bert_sentiment, vader_analyzer = models
 
 
109
  truncated_text = text[:2000] if text else ""
110
+
111
  try:
112
  if not truncated_text.strip():
113
  return {
 
117
  'bert_label': 'Neutral',
118
  'bert_confidence': 0
119
  }
120
+
121
  vader_score = vader_analyzer.polarity_scores(truncated_text)['compound']
122
  textblob_score = TextBlob(truncated_text).sentiment.polarity
123
+
124
+ bert_result = bert_sentiment(truncated_text[:512])[0]
125
+
126
  label_map = {
127
  '1 star': -1,
128
  '2 stars': -0.5,
 
131
  '5 stars': 1
132
  }
133
  bert_num = label_map.get(bert_result['label'], 0)
134
+
135
  return {
136
  'vader': vader_score,
137
  'bert': bert_num,
 
149
  'bert_confidence': 0
150
  }
151
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
  # --------------------------
153
+ # Visualization
154
  # --------------------------
155
 
156
  def generate_wordcloud(text):
 
157
  try:
158
  if not text.strip():
159
  return ""
160
+
161
  wordcloud = WordCloud(
162
+ width=800,
163
+ height=400,
164
  background_color='white',
165
+ collocations=False,
166
  stopwords=nltk.corpus.stopwords.words('english')
167
  ).generate(text)
168
+
169
  img = BytesIO()
170
  wordcloud.to_image().save(img, format='PNG')
171
  return base64.b64encode(img.getvalue()).decode()
 
174
  return ""
175
 
176
  # --------------------------
177
+ # Prediction & Plotting
178
  # --------------------------
179
 
180
  def prepare_data_for_prediction(data):
 
181
  try:
182
  if data.empty:
183
  st.warning("No data available for prediction")
184
  return None
185
+
 
186
  data = data.sort_values('date')
 
 
187
  data = data.dropna(subset=['average'])
 
 
188
  daily_data = data.groupby(pd.Grouper(key='date', freq='D'))['average'].mean().reset_index()
 
 
189
  daily_data = daily_data.dropna(subset=['average'])
190
+
 
191
  if len(daily_data) < 5:
192
  st.warning("Insufficient valid data points for prediction (minimum 5 required)")
193
  return None
194
+
 
195
  daily_data['days'] = (daily_data['date'] - daily_data['date'].min()).dt.days
 
196
  return daily_data
197
  except Exception as e:
198
  st.error(f"Data preparation error: {str(e)}")
199
  return None
200
 
201
  def train_sentiment_model(data):
 
202
  try:
203
+ if data is None or len(data) < 5:
 
 
 
 
 
 
204
  return None, None
205
+
 
206
  X = data['days'].values.reshape(-1, 1)
207
  y = data['average'].values
208
+
209
+ model = make_pipeline(PolynomialFeatures(degree=2), Ridge(alpha=1.0))
 
 
 
 
 
 
 
 
 
 
210
  model.fit(X, y)
211
+
212
  return model, data
213
  except Exception as e:
214
  st.error(f"Model training error: {str(e)}")
215
  return None, None
216
 
217
  def predict_future_sentiment(model, training_data, days_to_predict=15):
 
218
  try:
219
  if model is None or training_data is None:
 
220
  return None
221
+
 
222
  last_date = training_data['date'].max()
223
+ future_dates = [last_date + timedelta(days=i) for i in range(1, days_to_predict + 1)]
 
 
224
  min_date = training_data['date'].min()
225
  future_days = [(date - min_date).days for date in future_dates]
226
  X_future = np.array(future_days).reshape(-1, 1)
227
+
 
228
  predictions = model.predict(X_future)
229
+
 
230
  pred_df = pd.DataFrame({
231
  'date': future_dates,
232
  'average': predictions,
233
  'type': 'prediction'
234
  })
235
+
 
236
  training_df = training_data.copy()
237
  training_df['type'] = 'actual'
238
+
239
  return pd.concat([training_df, pred_df], ignore_index=True)
240
  except Exception as e:
241
  st.error(f"Prediction error: {str(e)}")
242
  return None
243
 
244
  def plot_sentiment(data, keyword):
 
245
  try:
246
  if data is None or data.empty:
247
  st.warning("No data available for plotting sentiment trends")
248
  return None
249
+
 
250
  actual_data = data[data['type'] == 'actual']
251
  pred_data = data[data['type'] == 'prediction']
252
+
253
  fig = go.Figure()
254
+
 
255
  if not actual_data.empty:
256
  fig.add_trace(go.Scatter(
257
  x=actual_data['date'],
 
260
  mode='lines+markers',
261
  line=dict(color='#636EFA')
262
  ))
263
+
 
264
  if not pred_data.empty:
265
  fig.add_trace(go.Scatter(
266
  x=pred_data['date'],
 
269
  mode='lines+markers',
270
  line=dict(color='#EF553B', dash='dot')
271
  ))
272
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
273
  fig.update_layout(
274
  title=f'Sentiment Analysis and Prediction for "{keyword}"',
275
  xaxis_title="Date",
 
277
  hovermode="x unified",
278
  legend_title="Data Type"
279
  )
280
+
281
  return fig
282
  except Exception as e:
283
  st.error(f"Plotting error: {str(e)}")
284
  return None
285
 
286
  # --------------------------
287
+ # Main App
288
  # --------------------------
289
 
290
  def main():
291
+ st.title("πŸš€ SentimentSync Pro - Financial News Sentiment Dashboard")
292
+
 
293
  with st.sidebar:
294
  st.header("πŸ”§ Analysis Controls")
295
  analysis_mode = st.radio(
296
  "Mode",
297
+ ["Text Analysis", "Financial News Analysis"],
298
+ index=1
299
  )
300
+
301
  if analysis_mode == "Text Analysis":
302
+ user_input = st.text_area("Enter text to analyze", height=200, placeholder="Paste your content here...")
 
 
 
 
303
  analyze_btn = st.button("Analyze Now")
304
  else:
305
+ keyword = st.text_input("Enter keyword (e.g., Apple, Tesla, Bitcoin)")
 
 
 
306
  analyze_btn = st.button("Fetch & Analyze")
307
+
308
  st.markdown("---")
 
309
  show_details = st.checkbox("Show detailed results", value=False)
310
  enable_prediction = st.checkbox("Enable sentiment prediction", value=True)
311
  st.markdown("---")
312
+
 
313
  if analyze_btn:
314
  models = load_models()
315
  if not all(models):
316
+ st.error("Model loading failed")
317
  return
318
+
319
  if analysis_mode == "Text Analysis":
320
  if not user_input.strip():
321
+ st.warning("Please enter some text")
322
  return
323
+
324
+ with st.spinner("Analyzing..."):
 
325
  result = analyze_text(user_input, models)
326
+ st.success("βœ… Analysis completed")
327
+
 
 
328
  cols = st.columns(3)
329
+ cols[0].metric("VADER Score", f"{result['vader']:.2f}")
330
+ cols[1].metric("BERT Label", result['bert_label'])
331
+ cols[2].metric("TextBlob", f"{result['textblob']:.2f}")
332
+
333
+ st.subheader("πŸ“Š Word Cloud")
334
+ wc_img = f"data:image/png;base64,{generate_wordcloud(user_input)}"
335
+ st.image(wc_img, use_column_width=True)
336
+
337
+ else:
 
 
 
 
 
338
  if not keyword.strip():
339
+ st.warning("Please enter a keyword")
340
  return
341
+
342
+ with st.spinner(f"Fetching financial news for '{keyword}'..."):
343
  start_time = time.time()
344
+ news_data = fetch_financial_news(keyword)
345
+ if news_data.empty:
346
+ st.error("No news found for the past 7 days.")
 
 
 
347
  return
348
+
 
 
 
 
 
 
349
  analysis_results = []
350
+ for _, row in news_data.iterrows():
351
  analysis_results.append(analyze_text(row['text'], models))
352
+
353
+ news_data['vader'] = [r['vader'] for r in analysis_results]
354
+ news_data['bert'] = [r['bert'] for r in analysis_results]
355
+ news_data['textblob'] = [r['textblob'] for r in analysis_results]
356
+ news_data['average'] = news_data[['vader', 'bert', 'textblob']].mean(axis=1)
357
+
 
 
 
 
358
  processing_time = time.time() - start_time
359
+ st.success(f"Analyzed {len(news_data)} articles in {processing_time:.2f}s")
360
+
361
+ avg_sentiment = news_data['average'].mean()
 
362
  cols = st.columns(3)
363
+ cols[0].metric("Avg Sentiment", f"{avg_sentiment:.2f}")
364
+ cols[1].metric("Positive", f"{(news_data['average'] > 0.1).mean() * 100:.1f}%")
365
+ cols[2].metric("Negative", f"{(news_data['average'] < -0.1).mean() * 100:.1f}%")
366
+
367
+ all_text = " ".join(news_data['text'])
368
+ wc_img = f"data:image/png;base64,{generate_wordcloud(all_text)}"
369
+ st.subheader("πŸ“Š Word Cloud")
370
+ st.image(wc_img, use_column_width=True)
371
+
372
+ if enable_prediction:
373
+ daily_data = prepare_data_for_prediction(news_data)
374
+ model, training_data = train_sentiment_model(daily_data)
375
+ if model is not None:
376
+ full_data = predict_future_sentiment(model, training_data)
377
+ fig = plot_sentiment(full_data, keyword)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
378
  st.plotly_chart(fig, use_container_width=True)
379
+
380
+ if show_details:
381
+ st.subheader("πŸ“° Detailed News Data")
382
+ st.dataframe(news_data[['date', 'source', 'text', 'average', 'url']], use_container_width=True)
383
+
 
 
 
 
 
 
 
 
 
 
 
 
384
 
385
  if __name__ == "__main__":
386
  try:
387
  nltk.data.path.append(os.path.join(os.path.expanduser("~"), "nltk_data"))
 
388
  nltk.download('stopwords', quiet=True)
389
  except:
390
  pass
391
+
392
  main()