Soundaryasos commited on
Commit
20d33b1
Β·
verified Β·
1 Parent(s): a2129f9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +164 -18
app.py CHANGED
@@ -15,6 +15,11 @@ from googleapiclient.discovery import build
15
  import os
16
  import time
17
  from functools import lru_cache
 
 
 
 
 
18
 
19
  # --------------------------
20
  # Initial Setup
@@ -166,6 +171,85 @@ def fetch_youtube_data(keyword, limit=30):
166
  st.error(f"YouTube fetch error: {str(e)}")
167
  return pd.DataFrame()
168
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
  # --------------------------
170
  # Visualization Functions
171
  # --------------------------
@@ -191,25 +275,59 @@ def generate_wordcloud(text):
191
  def plot_sentiment(data, keyword):
192
  """Optimized plotting function"""
193
  try:
194
- fig = px.line(
195
- data,
196
- x='date',
197
- y=['vader', 'bert', 'textblob', 'average'],
198
- title=f'Sentiment Analysis for "{keyword}"',
199
- labels={'value': 'Sentiment Score', 'date': 'Date'},
200
- color_discrete_map={
201
- "vader": "#636EFA",
202
- "bert": "#EF553B",
203
- "textblob": "#00CC96",
204
- "average": "#AB63FA"
205
- }
206
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207
  fig.update_layout(
208
- hovermode="x unified",
209
  xaxis_title="Date",
210
  yaxis_title="Sentiment Score",
211
- legend_title="Metric"
 
212
  )
 
213
  return fig
214
  except Exception as e:
215
  st.error(f"Plotting error: {str(e)}")
@@ -248,6 +366,7 @@ def main():
248
  st.markdown("---")
249
  st.markdown("### Options")
250
  show_details = st.checkbox("Show detailed results", value=False)
 
251
  st.markdown("---")
252
 
253
  # Main content
@@ -335,21 +454,48 @@ def main():
335
 
336
  # Filter recent data
337
  combined_data['date'] = pd.to_datetime(combined_data['date'])
338
- recent_data = combined_data[combined_data['date'] >= (datetime.now() - timedelta(days=14))]
339
 
340
  if not recent_data.empty:
341
  # Sentiment trends
342
  st.subheader("πŸ“… Sentiment Over Time")
343
- fig = plot_sentiment(recent_data, keyword)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
344
  if fig:
345
  st.plotly_chart(fig, use_container_width=True)
346
 
 
 
 
 
 
 
 
 
 
 
 
 
347
  # Show details if enabled
348
  if show_details:
349
  st.subheader("πŸ” Detailed Results")
350
  st.dataframe(recent_data[['date', 'source', 'text', 'average']], use_container_width=True)
351
  else:
352
- st.info("No recent data found (within last 14 days).")
353
 
354
  if __name__ == "__main__":
355
  # Initialize NLTK data
 
15
  import os
16
  import time
17
  from functools import lru_cache
18
+ import numpy as np
19
+ from sklearn.linear_model import Ridge
20
+ from sklearn.preprocessing import PolynomialFeatures
21
+ from sklearn.pipeline import make_pipeline
22
+ from sklearn.model_selection import train_test_split
23
 
24
  # --------------------------
25
  # Initial Setup
 
171
  st.error(f"YouTube fetch error: {str(e)}")
172
  return pd.DataFrame()
173
 
174
+ # --------------------------
175
+ # Prediction Functions
176
+ # --------------------------
177
+
178
+ def prepare_data_for_prediction(data):
179
+ """Prepare time series data for prediction"""
180
+ try:
181
+ # Ensure data is sorted by date
182
+ data = data.sort_values('date')
183
+
184
+ # Create daily aggregates
185
+ daily_data = data.groupby(pd.Grouper(key='date', freq='D'))['average'].mean().reset_index()
186
+
187
+ # Create numerical features (days since first date)
188
+ daily_data['days'] = (daily_data['date'] - daily_data['date'].min()).dt.days
189
+
190
+ return daily_data
191
+ except Exception as e:
192
+ st.error(f"Data preparation error: {str(e)}")
193
+ return None
194
+
195
+ def train_sentiment_model(data):
196
+ """Train Ridge regression model for sentiment prediction"""
197
+ try:
198
+ if len(data) < 5:
199
+ st.warning("Not enough data points for reliable prediction (minimum 5 days required)")
200
+ return None, None
201
+
202
+ # Split data into features (days) and target (sentiment)
203
+ X = data['days'].values.reshape(-1, 1)
204
+ y = data['average'].values
205
+
206
+ # Create polynomial features (degree=2 for simple curves)
207
+ model = make_pipeline(
208
+ PolynomialFeatures(degree=2),
209
+ Ridge(alpha=1.0)
210
+ )
211
+
212
+ model.fit(X, y)
213
+
214
+ return model, data
215
+ except Exception as e:
216
+ st.error(f"Model training error: {str(e)}")
217
+ return None, None
218
+
219
+ def predict_future_sentiment(model, training_data, days_to_predict=15):
220
+ """Predict future sentiment using trained model"""
221
+ try:
222
+ if model is None:
223
+ return None
224
+
225
+ # Create future dates
226
+ last_date = training_data['date'].max()
227
+ future_dates = [last_date + timedelta(days=i) for i in range(1, days_to_predict+1)]
228
+
229
+ # Create feature matrix for future dates
230
+ min_date = training_data['date'].min()
231
+ future_days = [(date - min_date).days for date in future_dates]
232
+ X_future = np.array(future_days).reshape(-1, 1)
233
+
234
+ # Make predictions
235
+ predictions = model.predict(X_future)
236
+
237
+ # Create prediction dataframe
238
+ pred_df = pd.DataFrame({
239
+ 'date': future_dates,
240
+ 'average': predictions,
241
+ 'type': 'prediction'
242
+ })
243
+
244
+ # Add training data for plotting
245
+ training_df = training_data.copy()
246
+ training_df['type'] = 'actual'
247
+
248
+ return pd.concat([training_df, pred_df], ignore_index=True)
249
+ except Exception as e:
250
+ st.error(f"Prediction error: {str(e)}")
251
+ return None
252
+
253
  # --------------------------
254
  # Visualization Functions
255
  # --------------------------
 
275
  def plot_sentiment(data, keyword):
276
  """Optimized plotting function"""
277
  try:
278
+ # Separate actual and predicted data
279
+ actual_data = data[data['type'] == 'actual']
280
+ pred_data = data[data['type'] == 'prediction']
281
+
282
+ fig = go.Figure()
283
+
284
+ # Add actual data
285
+ fig.add_trace(go.Scatter(
286
+ x=actual_data['date'],
287
+ y=actual_data['average'],
288
+ name='Actual Sentiment',
289
+ mode='lines+markers',
290
+ line=dict(color='#636EFA')
291
+ ))
292
+
293
+ # Add predicted data if available
294
+ if not pred_data.empty:
295
+ fig.add_trace(go.Scatter(
296
+ x=pred_data['date'],
297
+ y=pred_data['average'],
298
+ name='Predicted Sentiment',
299
+ mode='lines+markers',
300
+ line=dict(color='#EF553B', dash='dot')
301
+ ))
302
+
303
+ # Add confidence interval (simple version)
304
+ fig.add_trace(go.Scatter(
305
+ x=pred_data['date'],
306
+ y=pred_data['average'] + 0.1,
307
+ mode='lines',
308
+ line=dict(width=0),
309
+ showlegend=False,
310
+ hoverinfo='skip'
311
+ ))
312
+
313
+ fig.add_trace(go.Scatter(
314
+ x=pred_data['date'],
315
+ y=pred_data['average'] - 0.1,
316
+ mode='lines',
317
+ fill='tonexty',
318
+ line=dict(width=0),
319
+ fillcolor='rgba(239, 85, 59, 0.2)',
320
+ name='Prediction Range'
321
+ ))
322
+
323
  fig.update_layout(
324
+ title=f'Sentiment Analysis and Prediction for "{keyword}"',
325
  xaxis_title="Date",
326
  yaxis_title="Sentiment Score",
327
+ hovermode="x unified",
328
+ legend_title="Data Type"
329
  )
330
+
331
  return fig
332
  except Exception as e:
333
  st.error(f"Plotting error: {str(e)}")
 
366
  st.markdown("---")
367
  st.markdown("### Options")
368
  show_details = st.checkbox("Show detailed results", value=False)
369
+ enable_prediction = st.checkbox("Enable sentiment prediction", value=True)
370
  st.markdown("---")
371
 
372
  # Main content
 
454
 
455
  # Filter recent data
456
  combined_data['date'] = pd.to_datetime(combined_data['date'])
457
+ recent_data = combined_data[combined_data['date'] >= (datetime.now() - timedelta(days=60))] # Increased to 60 days for better prediction
458
 
459
  if not recent_data.empty:
460
  # Sentiment trends
461
  st.subheader("πŸ“… Sentiment Over Time")
462
+
463
+ # Prepare data for prediction if enabled
464
+ if enable_prediction and len(recent_data) >= 5:
465
+ with st.spinner("Training prediction model..."):
466
+ daily_data = prepare_data_for_prediction(recent_data)
467
+ model, training_data = train_sentiment_model(daily_data)
468
+
469
+ if model is not None:
470
+ full_data = predict_future_sentiment(model, training_data)
471
+ fig = plot_sentiment(full_data, keyword)
472
+ else:
473
+ fig = plot_sentiment(training_data, keyword)
474
+ else:
475
+ daily_data = prepare_data_for_prediction(recent_data)
476
+ fig = plot_sentiment(daily_data.assign(type='actual'), keyword)
477
+
478
  if fig:
479
  st.plotly_chart(fig, use_container_width=True)
480
 
481
+ # Show prediction insights
482
+ if enable_prediction and 'full_data' in locals() and full_data is not None:
483
+ last_actual = full_data[full_data['type'] == 'actual']['average'].iloc[-1]
484
+ last_pred = full_data[full_data['type'] == 'prediction']['average'].iloc[-1]
485
+
486
+ if last_pred > last_actual + 0.1:
487
+ st.success("πŸ“ˆ Prediction: Sentiment is expected to improve in the next 15 days")
488
+ elif last_pred < last_actual - 0.1:
489
+ st.warning("πŸ“‰ Prediction: Sentiment is expected to decline in the next 15 days")
490
+ else:
491
+ st.info("πŸ“Š Prediction: Sentiment is expected to remain stable in the next 15 days")
492
+
493
  # Show details if enabled
494
  if show_details:
495
  st.subheader("πŸ” Detailed Results")
496
  st.dataframe(recent_data[['date', 'source', 'text', 'average']], use_container_width=True)
497
  else:
498
+ st.info("No recent data found (within last 60 days).")
499
 
500
  if __name__ == "__main__":
501
  # Initialize NLTK data