Soundaryasos commited on
Commit
430b0cc
ยท
verified ยท
1 Parent(s): 44001d1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +187 -3
app.py CHANGED
@@ -6,6 +6,9 @@ import pandas as pd
6
  from datetime import datetime, timedelta
7
  import plotly.express as px
8
  from sklearn.linear_model import Ridge
 
 
 
9
  from wordcloud import WordCloud
10
  import base64
11
  from io import BytesIO
@@ -14,6 +17,8 @@ from textblob import TextBlob
14
  import praw
15
  from googleapiclient.discovery import build
16
  import os
 
 
17
 
18
  # --------------------------
19
  # Initial Setup & Configuration
@@ -21,7 +26,7 @@ import os
21
 
22
  # Set page config
23
  st.set_page_config(
24
- page_title="๐ŸŒŸ SentimentSync: Live Sentiment Analysis Dashboard",
25
  page_icon="๐Ÿ“Š",
26
  layout="wide"
27
  )
@@ -141,6 +146,149 @@ def generate_wordcloud(text):
141
  st.error(f"Error generating word cloud: {str(e)}")
142
  return ""
143
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
  # --------------------------
145
  # Data Fetching Functions
146
  # --------------------------
@@ -246,7 +394,7 @@ def plot_sentiment_distribution(df, keyword):
246
  # --------------------------
247
 
248
  def main():
249
- st.title("๐ŸŒŸ SentimentSync: Live Sentiment Analysis Dashboard")
250
 
251
  # Sidebar controls
252
  with st.sidebar:
@@ -274,6 +422,7 @@ def main():
274
  st.markdown("---")
275
  st.markdown("### Settings")
276
  show_raw_data = st.checkbox("Show raw data", value=False)
 
277
  st.markdown("---")
278
  st.button("๐Ÿ”„ Reset Analysis")
279
 
@@ -374,7 +523,7 @@ def main():
374
  st.subheader("๐Ÿ“ Word Cloud")
375
  combined_text = " ".join(df['text'])
376
  wordcloud_img = f'data:image/png;base64,{generate_wordcloud(combined_text)}'
377
- st.image(wordcloud_img, use_column_width=True)
378
 
379
  # Filter recent data (last 14 days)
380
  df['date'] = pd.to_datetime(df['date'])
@@ -390,6 +539,41 @@ def main():
390
  st.subheader("๐Ÿ“Š Sentiment Distribution")
391
  plot_sentiment_distribution(df_recent, keyword)
392
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
393
  # Raw data (if enabled)
394
  if show_raw_data:
395
  st.subheader("๐Ÿ“‹ Raw Data")
 
6
  from datetime import datetime, timedelta
7
  import plotly.express as px
8
  from sklearn.linear_model import Ridge
9
+ from sklearn.ensemble import RandomForestRegressor
10
+ from sklearn.model_selection import train_test_split
11
+ from sklearn.metrics import mean_absolute_error
12
  from wordcloud import WordCloud
13
  import base64
14
  from io import BytesIO
 
17
  import praw
18
  from googleapiclient.discovery import build
19
  import os
20
+ from statsmodels.tsa.arima.model import ARIMA
21
+ from prophet import Prophet
22
 
23
  # --------------------------
24
  # Initial Setup & Configuration
 
26
 
27
  # Set page config
28
  st.set_page_config(
29
+ page_title="๐ŸŒŸ SentimentSync: Live Sentiment Analysis & Prediction Dashboard",
30
  page_icon="๐Ÿ“Š",
31
  layout="wide"
32
  )
 
146
  st.error(f"Error generating word cloud: {str(e)}")
147
  return ""
148
 
149
+ def prepare_time_series_data(df):
150
+ """Prepare time series data for forecasting"""
151
+ try:
152
+ # Resample to daily data
153
+ ts_df = df.set_index('date').resample('D').agg({
154
+ 'Average': 'mean',
155
+ 'VADER': 'mean',
156
+ 'BERT': 'mean',
157
+ 'TextBlob': 'mean'
158
+ }).ffill().reset_index()
159
+
160
+ # Create features
161
+ ts_df['day_of_week'] = ts_df['date'].dt.dayofweek
162
+ ts_df['day_of_month'] = ts_df['date'].dt.day
163
+ ts_df['days_since_start'] = (ts_df['date'] - ts_df['date'].min()).dt.days
164
+
165
+ return ts_df
166
+ except Exception as e:
167
+ st.error(f"Error preparing time series data: {str(e)}")
168
+ return None
169
+
170
+ def predict_sentiment_prophet(df, periods=15):
171
+ """Predict future sentiment using Facebook Prophet"""
172
+ try:
173
+ # Prepare data for Prophet
174
+ prophet_df = df[['date', 'Average']].rename(columns={'date': 'ds', 'Average': 'y'})
175
+
176
+ # Initialize and fit model
177
+ model = Prophet(
178
+ daily_seasonality=True,
179
+ weekly_seasonality=True,
180
+ yearly_seasonality=False
181
+ )
182
+ model.fit(prophet_df)
183
+
184
+ # Make future dataframe
185
+ future = model.make_future_dataframe(periods=periods)
186
+
187
+ # Predict
188
+ forecast = model.predict(future)
189
+
190
+ return forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].rename(columns={
191
+ 'ds': 'date',
192
+ 'yhat': 'predicted_sentiment',
193
+ 'yhat_lower': 'lower_bound',
194
+ 'yhat_upper': 'upper_bound'
195
+ })
196
+ except Exception as e:
197
+ st.error(f"Error with Prophet prediction: {str(e)}")
198
+ return None
199
+
200
+ def predict_sentiment_arima(df, periods=15):
201
+ """Predict future sentiment using ARIMA"""
202
+ try:
203
+ # Fit ARIMA model
204
+ model = ARIMA(df['Average'], order=(2, 1, 2))
205
+ model_fit = model.fit()
206
+
207
+ # Make predictions
208
+ forecast = model_fit.forecast(steps=periods)
209
+
210
+ # Create future dates
211
+ last_date = df['date'].max()
212
+ future_dates = [last_date + timedelta(days=i) for i in range(1, periods+1)]
213
+
214
+ return pd.DataFrame({
215
+ 'date': future_dates,
216
+ 'predicted_sentiment': forecast,
217
+ 'model': 'ARIMA'
218
+ })
219
+ except Exception as e:
220
+ st.error(f"Error with ARIMA prediction: {str(e)}")
221
+ return None
222
+
223
+ def predict_sentiment_rf(df, periods=15):
224
+ """Predict future sentiment using Random Forest"""
225
+ try:
226
+ # Prepare features
227
+ ts_df = prepare_time_series_data(df)
228
+ if ts_df is None or len(ts_df) < 10:
229
+ return None
230
+
231
+ X = ts_df[['days_since_start', 'day_of_week', 'day_of_month']]
232
+ y = ts_df['Average']
233
+
234
+ # Train model
235
+ model = RandomForestRegressor(n_estimators=100, random_state=42)
236
+ model.fit(X, y)
237
+
238
+ # Create future features
239
+ last_date = ts_df['date'].max()
240
+ future_dates = [last_date + timedelta(days=i) for i in range(1, periods+1)]
241
+ future_days_since = [(d - ts_df['date'].min()).days for d in future_dates]
242
+
243
+ future_X = pd.DataFrame({
244
+ 'days_since_start': future_days_since,
245
+ 'day_of_week': [d.weekday() for d in future_dates],
246
+ 'day_of_month': [d.day for d in future_dates]
247
+ })
248
+
249
+ # Make predictions
250
+ predictions = model.predict(future_X)
251
+
252
+ return pd.DataFrame({
253
+ 'date': future_dates,
254
+ 'predicted_sentiment': predictions,
255
+ 'model': 'Random Forest'
256
+ })
257
+ except Exception as e:
258
+ st.error(f"Error with Random Forest prediction: {str(e)}")
259
+ return None
260
+
261
+ def plot_sentiment_predictions(history_df, predictions):
262
+ """Plot historical data and predictions"""
263
+ try:
264
+ # Prepare historical data
265
+ history_df = history_df.set_index('date').resample('D')['Average'].mean().reset_index()
266
+
267
+ # Create figure
268
+ fig = px.line(history_df, x='date', y='Average',
269
+ title='Historical Sentiment & Future Predictions',
270
+ labels={'Average': 'Sentiment Score'})
271
+
272
+ # Add prediction traces
273
+ for model_name, pred_df in predictions.items():
274
+ if pred_df is not None:
275
+ fig.add_scatter(x=pred_df['date'], y=pred_df['predicted_sentiment'],
276
+ mode='lines', name=f'{model_name} Prediction',
277
+ line=dict(dash='dot'))
278
+
279
+ # Add confidence interval if available
280
+ if 'lower_bound' in pred_df.columns and 'upper_bound' in pred_df.columns:
281
+ fig.add_trace(px.area(pred_df, x='date',
282
+ y_upper='upper_bound',
283
+ y_lower='lower_bound',
284
+ title='').data[0])
285
+
286
+ fig.update_layout(hovermode="x unified", showlegend=True)
287
+ return fig
288
+ except Exception as e:
289
+ st.error(f"Error plotting predictions: {str(e)}")
290
+ return None
291
+
292
  # --------------------------
293
  # Data Fetching Functions
294
  # --------------------------
 
394
  # --------------------------
395
 
396
  def main():
397
+ st.title("๐ŸŒŸ SentimentSync: Live Sentiment Analysis & Prediction Dashboard")
398
 
399
  # Sidebar controls
400
  with st.sidebar:
 
422
  st.markdown("---")
423
  st.markdown("### Settings")
424
  show_raw_data = st.checkbox("Show raw data", value=False)
425
+ enable_prediction = st.checkbox("Enable sentiment prediction", value=True)
426
  st.markdown("---")
427
  st.button("๐Ÿ”„ Reset Analysis")
428
 
 
523
  st.subheader("๐Ÿ“ Word Cloud")
524
  combined_text = " ".join(df['text'])
525
  wordcloud_img = f'data:image/png;base64,{generate_wordcloud(combined_text)}'
526
+ st.image(wordcloud_img, use_container_width=True)
527
 
528
  # Filter recent data (last 14 days)
529
  df['date'] = pd.to_datetime(df['date'])
 
539
  st.subheader("๐Ÿ“Š Sentiment Distribution")
540
  plot_sentiment_distribution(df_recent, keyword)
541
 
542
+ # Sentiment prediction
543
+ if enable_prediction and len(df_recent) >= 7: # Need at least 7 days of data
544
+ st.subheader("๐Ÿ”ฎ Sentiment Prediction (Next 15 Days)")
545
+
546
+ with st.spinner("Training prediction models..."):
547
+ # Prepare time series data
548
+ ts_df = prepare_time_series_data(df_recent)
549
+
550
+ if ts_df is not None and len(ts_df) >= 7:
551
+ # Get predictions from different models
552
+ predictions = {
553
+ 'Prophet': predict_sentiment_prophet(ts_df),
554
+ 'ARIMA': predict_sentiment_arima(ts_df),
555
+ 'Random Forest': predict_sentiment_rf(ts_df)
556
+ }
557
+
558
+ # Filter out None predictions
559
+ valid_predictions = {k: v for k, v in predictions.items() if v is not None}
560
+
561
+ if valid_predictions:
562
+ # Plot predictions
563
+ fig = plot_sentiment_predictions(df_recent, valid_predictions)
564
+ if fig:
565
+ st.plotly_chart(fig, use_container_width=True)
566
+
567
+ # Show prediction details
568
+ st.subheader("๐Ÿ“‹ Prediction Details")
569
+ for model_name, pred_df in valid_predictions.items():
570
+ st.markdown(f"**{model_name} Prediction**")
571
+ st.dataframe(pred_df.set_index('date').style.format("{:.2f}"), use_container_width=True)
572
+ else:
573
+ st.warning("Could not generate predictions with the available data.")
574
+ else:
575
+ st.warning("Not enough data points for reliable prediction. Need at least 7 days of data.")
576
+
577
  # Raw data (if enabled)
578
  if show_raw_data:
579
  st.subheader("๐Ÿ“‹ Raw Data")