Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -6,6 +6,9 @@ import pandas as pd
|
|
| 6 |
from datetime import datetime, timedelta
|
| 7 |
import plotly.express as px
|
| 8 |
from sklearn.linear_model import Ridge
|
|
|
|
|
|
|
|
|
|
| 9 |
from wordcloud import WordCloud
|
| 10 |
import base64
|
| 11 |
from io import BytesIO
|
|
@@ -14,6 +17,8 @@ from textblob import TextBlob
|
|
| 14 |
import praw
|
| 15 |
from googleapiclient.discovery import build
|
| 16 |
import os
|
|
|
|
|
|
|
| 17 |
|
| 18 |
# --------------------------
|
| 19 |
# Initial Setup & Configuration
|
|
@@ -21,7 +26,7 @@ import os
|
|
| 21 |
|
| 22 |
# Set page config
|
| 23 |
st.set_page_config(
|
| 24 |
-
page_title="๐ SentimentSync: Live Sentiment Analysis Dashboard",
|
| 25 |
page_icon="๐",
|
| 26 |
layout="wide"
|
| 27 |
)
|
|
@@ -141,6 +146,149 @@ def generate_wordcloud(text):
|
|
| 141 |
st.error(f"Error generating word cloud: {str(e)}")
|
| 142 |
return ""
|
| 143 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 144 |
# --------------------------
|
| 145 |
# Data Fetching Functions
|
| 146 |
# --------------------------
|
|
@@ -246,7 +394,7 @@ def plot_sentiment_distribution(df, keyword):
|
|
| 246 |
# --------------------------
|
| 247 |
|
| 248 |
def main():
|
| 249 |
-
st.title("๐ SentimentSync: Live Sentiment Analysis Dashboard")
|
| 250 |
|
| 251 |
# Sidebar controls
|
| 252 |
with st.sidebar:
|
|
@@ -274,6 +422,7 @@ def main():
|
|
| 274 |
st.markdown("---")
|
| 275 |
st.markdown("### Settings")
|
| 276 |
show_raw_data = st.checkbox("Show raw data", value=False)
|
|
|
|
| 277 |
st.markdown("---")
|
| 278 |
st.button("๐ Reset Analysis")
|
| 279 |
|
|
@@ -374,7 +523,7 @@ def main():
|
|
| 374 |
st.subheader("๐ Word Cloud")
|
| 375 |
combined_text = " ".join(df['text'])
|
| 376 |
wordcloud_img = f'data:image/png;base64,{generate_wordcloud(combined_text)}'
|
| 377 |
-
st.image(wordcloud_img,
|
| 378 |
|
| 379 |
# Filter recent data (last 14 days)
|
| 380 |
df['date'] = pd.to_datetime(df['date'])
|
|
@@ -390,6 +539,41 @@ def main():
|
|
| 390 |
st.subheader("๐ Sentiment Distribution")
|
| 391 |
plot_sentiment_distribution(df_recent, keyword)
|
| 392 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 393 |
# Raw data (if enabled)
|
| 394 |
if show_raw_data:
|
| 395 |
st.subheader("๐ Raw Data")
|
|
|
|
| 6 |
from datetime import datetime, timedelta
|
| 7 |
import plotly.express as px
|
| 8 |
from sklearn.linear_model import Ridge
|
| 9 |
+
from sklearn.ensemble import RandomForestRegressor
|
| 10 |
+
from sklearn.model_selection import train_test_split
|
| 11 |
+
from sklearn.metrics import mean_absolute_error
|
| 12 |
from wordcloud import WordCloud
|
| 13 |
import base64
|
| 14 |
from io import BytesIO
|
|
|
|
| 17 |
import praw
|
| 18 |
from googleapiclient.discovery import build
|
| 19 |
import os
|
| 20 |
+
from statsmodels.tsa.arima.model import ARIMA
|
| 21 |
+
from prophet import Prophet
|
| 22 |
|
| 23 |
# --------------------------
|
| 24 |
# Initial Setup & Configuration
|
|
|
|
| 26 |
|
| 27 |
# Set page config
|
| 28 |
st.set_page_config(
|
| 29 |
+
page_title="๐ SentimentSync: Live Sentiment Analysis & Prediction Dashboard",
|
| 30 |
page_icon="๐",
|
| 31 |
layout="wide"
|
| 32 |
)
|
|
|
|
| 146 |
st.error(f"Error generating word cloud: {str(e)}")
|
| 147 |
return ""
|
| 148 |
|
| 149 |
+
def prepare_time_series_data(df):
|
| 150 |
+
"""Prepare time series data for forecasting"""
|
| 151 |
+
try:
|
| 152 |
+
# Resample to daily data
|
| 153 |
+
ts_df = df.set_index('date').resample('D').agg({
|
| 154 |
+
'Average': 'mean',
|
| 155 |
+
'VADER': 'mean',
|
| 156 |
+
'BERT': 'mean',
|
| 157 |
+
'TextBlob': 'mean'
|
| 158 |
+
}).ffill().reset_index()
|
| 159 |
+
|
| 160 |
+
# Create features
|
| 161 |
+
ts_df['day_of_week'] = ts_df['date'].dt.dayofweek
|
| 162 |
+
ts_df['day_of_month'] = ts_df['date'].dt.day
|
| 163 |
+
ts_df['days_since_start'] = (ts_df['date'] - ts_df['date'].min()).dt.days
|
| 164 |
+
|
| 165 |
+
return ts_df
|
| 166 |
+
except Exception as e:
|
| 167 |
+
st.error(f"Error preparing time series data: {str(e)}")
|
| 168 |
+
return None
|
| 169 |
+
|
| 170 |
+
def predict_sentiment_prophet(df, periods=15):
|
| 171 |
+
"""Predict future sentiment using Facebook Prophet"""
|
| 172 |
+
try:
|
| 173 |
+
# Prepare data for Prophet
|
| 174 |
+
prophet_df = df[['date', 'Average']].rename(columns={'date': 'ds', 'Average': 'y'})
|
| 175 |
+
|
| 176 |
+
# Initialize and fit model
|
| 177 |
+
model = Prophet(
|
| 178 |
+
daily_seasonality=True,
|
| 179 |
+
weekly_seasonality=True,
|
| 180 |
+
yearly_seasonality=False
|
| 181 |
+
)
|
| 182 |
+
model.fit(prophet_df)
|
| 183 |
+
|
| 184 |
+
# Make future dataframe
|
| 185 |
+
future = model.make_future_dataframe(periods=periods)
|
| 186 |
+
|
| 187 |
+
# Predict
|
| 188 |
+
forecast = model.predict(future)
|
| 189 |
+
|
| 190 |
+
return forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].rename(columns={
|
| 191 |
+
'ds': 'date',
|
| 192 |
+
'yhat': 'predicted_sentiment',
|
| 193 |
+
'yhat_lower': 'lower_bound',
|
| 194 |
+
'yhat_upper': 'upper_bound'
|
| 195 |
+
})
|
| 196 |
+
except Exception as e:
|
| 197 |
+
st.error(f"Error with Prophet prediction: {str(e)}")
|
| 198 |
+
return None
|
| 199 |
+
|
| 200 |
+
def predict_sentiment_arima(df, periods=15):
|
| 201 |
+
"""Predict future sentiment using ARIMA"""
|
| 202 |
+
try:
|
| 203 |
+
# Fit ARIMA model
|
| 204 |
+
model = ARIMA(df['Average'], order=(2, 1, 2))
|
| 205 |
+
model_fit = model.fit()
|
| 206 |
+
|
| 207 |
+
# Make predictions
|
| 208 |
+
forecast = model_fit.forecast(steps=periods)
|
| 209 |
+
|
| 210 |
+
# Create future dates
|
| 211 |
+
last_date = df['date'].max()
|
| 212 |
+
future_dates = [last_date + timedelta(days=i) for i in range(1, periods+1)]
|
| 213 |
+
|
| 214 |
+
return pd.DataFrame({
|
| 215 |
+
'date': future_dates,
|
| 216 |
+
'predicted_sentiment': forecast,
|
| 217 |
+
'model': 'ARIMA'
|
| 218 |
+
})
|
| 219 |
+
except Exception as e:
|
| 220 |
+
st.error(f"Error with ARIMA prediction: {str(e)}")
|
| 221 |
+
return None
|
| 222 |
+
|
| 223 |
+
def predict_sentiment_rf(df, periods=15):
|
| 224 |
+
"""Predict future sentiment using Random Forest"""
|
| 225 |
+
try:
|
| 226 |
+
# Prepare features
|
| 227 |
+
ts_df = prepare_time_series_data(df)
|
| 228 |
+
if ts_df is None or len(ts_df) < 10:
|
| 229 |
+
return None
|
| 230 |
+
|
| 231 |
+
X = ts_df[['days_since_start', 'day_of_week', 'day_of_month']]
|
| 232 |
+
y = ts_df['Average']
|
| 233 |
+
|
| 234 |
+
# Train model
|
| 235 |
+
model = RandomForestRegressor(n_estimators=100, random_state=42)
|
| 236 |
+
model.fit(X, y)
|
| 237 |
+
|
| 238 |
+
# Create future features
|
| 239 |
+
last_date = ts_df['date'].max()
|
| 240 |
+
future_dates = [last_date + timedelta(days=i) for i in range(1, periods+1)]
|
| 241 |
+
future_days_since = [(d - ts_df['date'].min()).days for d in future_dates]
|
| 242 |
+
|
| 243 |
+
future_X = pd.DataFrame({
|
| 244 |
+
'days_since_start': future_days_since,
|
| 245 |
+
'day_of_week': [d.weekday() for d in future_dates],
|
| 246 |
+
'day_of_month': [d.day for d in future_dates]
|
| 247 |
+
})
|
| 248 |
+
|
| 249 |
+
# Make predictions
|
| 250 |
+
predictions = model.predict(future_X)
|
| 251 |
+
|
| 252 |
+
return pd.DataFrame({
|
| 253 |
+
'date': future_dates,
|
| 254 |
+
'predicted_sentiment': predictions,
|
| 255 |
+
'model': 'Random Forest'
|
| 256 |
+
})
|
| 257 |
+
except Exception as e:
|
| 258 |
+
st.error(f"Error with Random Forest prediction: {str(e)}")
|
| 259 |
+
return None
|
| 260 |
+
|
| 261 |
+
def plot_sentiment_predictions(history_df, predictions):
|
| 262 |
+
"""Plot historical data and predictions"""
|
| 263 |
+
try:
|
| 264 |
+
# Prepare historical data
|
| 265 |
+
history_df = history_df.set_index('date').resample('D')['Average'].mean().reset_index()
|
| 266 |
+
|
| 267 |
+
# Create figure
|
| 268 |
+
fig = px.line(history_df, x='date', y='Average',
|
| 269 |
+
title='Historical Sentiment & Future Predictions',
|
| 270 |
+
labels={'Average': 'Sentiment Score'})
|
| 271 |
+
|
| 272 |
+
# Add prediction traces
|
| 273 |
+
for model_name, pred_df in predictions.items():
|
| 274 |
+
if pred_df is not None:
|
| 275 |
+
fig.add_scatter(x=pred_df['date'], y=pred_df['predicted_sentiment'],
|
| 276 |
+
mode='lines', name=f'{model_name} Prediction',
|
| 277 |
+
line=dict(dash='dot'))
|
| 278 |
+
|
| 279 |
+
# Add confidence interval if available
|
| 280 |
+
if 'lower_bound' in pred_df.columns and 'upper_bound' in pred_df.columns:
|
| 281 |
+
fig.add_trace(px.area(pred_df, x='date',
|
| 282 |
+
y_upper='upper_bound',
|
| 283 |
+
y_lower='lower_bound',
|
| 284 |
+
title='').data[0])
|
| 285 |
+
|
| 286 |
+
fig.update_layout(hovermode="x unified", showlegend=True)
|
| 287 |
+
return fig
|
| 288 |
+
except Exception as e:
|
| 289 |
+
st.error(f"Error plotting predictions: {str(e)}")
|
| 290 |
+
return None
|
| 291 |
+
|
| 292 |
# --------------------------
|
| 293 |
# Data Fetching Functions
|
| 294 |
# --------------------------
|
|
|
|
| 394 |
# --------------------------
|
| 395 |
|
| 396 |
def main():
|
| 397 |
+
st.title("๐ SentimentSync: Live Sentiment Analysis & Prediction Dashboard")
|
| 398 |
|
| 399 |
# Sidebar controls
|
| 400 |
with st.sidebar:
|
|
|
|
| 422 |
st.markdown("---")
|
| 423 |
st.markdown("### Settings")
|
| 424 |
show_raw_data = st.checkbox("Show raw data", value=False)
|
| 425 |
+
enable_prediction = st.checkbox("Enable sentiment prediction", value=True)
|
| 426 |
st.markdown("---")
|
| 427 |
st.button("๐ Reset Analysis")
|
| 428 |
|
|
|
|
| 523 |
st.subheader("๐ Word Cloud")
|
| 524 |
combined_text = " ".join(df['text'])
|
| 525 |
wordcloud_img = f'data:image/png;base64,{generate_wordcloud(combined_text)}'
|
| 526 |
+
st.image(wordcloud_img, use_container_width=True)
|
| 527 |
|
| 528 |
# Filter recent data (last 14 days)
|
| 529 |
df['date'] = pd.to_datetime(df['date'])
|
|
|
|
| 539 |
st.subheader("๐ Sentiment Distribution")
|
| 540 |
plot_sentiment_distribution(df_recent, keyword)
|
| 541 |
|
| 542 |
+
# Sentiment prediction
|
| 543 |
+
if enable_prediction and len(df_recent) >= 7: # Need at least 7 days of data
|
| 544 |
+
st.subheader("๐ฎ Sentiment Prediction (Next 15 Days)")
|
| 545 |
+
|
| 546 |
+
with st.spinner("Training prediction models..."):
|
| 547 |
+
# Prepare time series data
|
| 548 |
+
ts_df = prepare_time_series_data(df_recent)
|
| 549 |
+
|
| 550 |
+
if ts_df is not None and len(ts_df) >= 7:
|
| 551 |
+
# Get predictions from different models
|
| 552 |
+
predictions = {
|
| 553 |
+
'Prophet': predict_sentiment_prophet(ts_df),
|
| 554 |
+
'ARIMA': predict_sentiment_arima(ts_df),
|
| 555 |
+
'Random Forest': predict_sentiment_rf(ts_df)
|
| 556 |
+
}
|
| 557 |
+
|
| 558 |
+
# Filter out None predictions
|
| 559 |
+
valid_predictions = {k: v for k, v in predictions.items() if v is not None}
|
| 560 |
+
|
| 561 |
+
if valid_predictions:
|
| 562 |
+
# Plot predictions
|
| 563 |
+
fig = plot_sentiment_predictions(df_recent, valid_predictions)
|
| 564 |
+
if fig:
|
| 565 |
+
st.plotly_chart(fig, use_container_width=True)
|
| 566 |
+
|
| 567 |
+
# Show prediction details
|
| 568 |
+
st.subheader("๐ Prediction Details")
|
| 569 |
+
for model_name, pred_df in valid_predictions.items():
|
| 570 |
+
st.markdown(f"**{model_name} Prediction**")
|
| 571 |
+
st.dataframe(pred_df.set_index('date').style.format("{:.2f}"), use_container_width=True)
|
| 572 |
+
else:
|
| 573 |
+
st.warning("Could not generate predictions with the available data.")
|
| 574 |
+
else:
|
| 575 |
+
st.warning("Not enough data points for reliable prediction. Need at least 7 days of data.")
|
| 576 |
+
|
| 577 |
# Raw data (if enabled)
|
| 578 |
if show_raw_data:
|
| 579 |
st.subheader("๐ Raw Data")
|