SentimentAnalyzerFinbert

Sleeping

App Files Files Community

Soundaryasos commited on Apr 14, 2025

Commit

20d33b1

verified ·

1 Parent(s): a2129f9

Update app.py

Browse files

Files changed (1) hide show

app.py +164 -18

app.py CHANGED Viewed

@@ -15,6 +15,11 @@ from googleapiclient.discovery import build
 import os
 import time
 from functools import lru_cache
 # --------------------------
 # Initial Setup
@@ -166,6 +171,85 @@ def fetch_youtube_data(keyword, limit=30):
         st.error(f"YouTube fetch error: {str(e)}")
         return pd.DataFrame()
 # --------------------------
 # Visualization Functions
 # --------------------------
@@ -191,25 +275,59 @@ def generate_wordcloud(text):
 def plot_sentiment(data, keyword):
     """Optimized plotting function"""
     try:
-        fig = px.line(
-            data,
-            x='date',
-            y=['vader', 'bert', 'textblob', 'average'],
-            title=f'Sentiment Analysis for "{keyword}"',
-            labels={'value': 'Sentiment Score', 'date': 'Date'},
-            color_discrete_map={
-                "vader": "#636EFA",
-                "bert": "#EF553B",
-                "textblob": "#00CC96",
-                "average": "#AB63FA"
-            }
-        )
         fig.update_layout(
-            hovermode="x unified",
             xaxis_title="Date",
             yaxis_title="Sentiment Score",
-            legend_title="Metric"
         )
         return fig
     except Exception as e:
         st.error(f"Plotting error: {str(e)}")
@@ -248,6 +366,7 @@ def main():
         st.markdown("---")
         st.markdown("### Options")
         show_details = st.checkbox("Show detailed results", value=False)
         st.markdown("---")
     # Main content
@@ -335,21 +454,48 @@ def main():
                 # Filter recent data
                 combined_data['date'] = pd.to_datetime(combined_data['date'])
-                recent_data = combined_data[combined_data['date'] >= (datetime.now() - timedelta(days=14))]
                 if not recent_data.empty:
                     # Sentiment trends
                     st.subheader("📅 Sentiment Over Time")
-                    fig = plot_sentiment(recent_data, keyword)
                     if fig:
                         st.plotly_chart(fig, use_container_width=True)
                     # Show details if enabled
                     if show_details:
                         st.subheader("🔍 Detailed Results")
                         st.dataframe(recent_data[['date', 'source', 'text', 'average']], use_container_width=True)
                 else:
-                    st.info("No recent data found (within last 14 days).")
 if __name__ == "__main__":
     # Initialize NLTK data

 import os
 import time
 from functools import lru_cache
+import numpy as np
+from sklearn.linear_model import Ridge
+from sklearn.preprocessing import PolynomialFeatures
+from sklearn.pipeline import make_pipeline
+from sklearn.model_selection import train_test_split
 # --------------------------
 # Initial Setup
         st.error(f"YouTube fetch error: {str(e)}")
         return pd.DataFrame()
+# --------------------------
+# Prediction Functions
+# --------------------------
+def prepare_data_for_prediction(data):
+    """Prepare time series data for prediction"""
+    try:
+        # Ensure data is sorted by date
+        data = data.sort_values('date')
+        # Create daily aggregates
+        daily_data = data.groupby(pd.Grouper(key='date', freq='D'))['average'].mean().reset_index()
+        # Create numerical features (days since first date)
+        daily_data['days'] = (daily_data['date'] - daily_data['date'].min()).dt.days
+        return daily_data
+    except Exception as e:
+        st.error(f"Data preparation error: {str(e)}")
+        return None
+def train_sentiment_model(data):
+    """Train Ridge regression model for sentiment prediction"""
+    try:
+        if len(data) < 5:
+            st.warning("Not enough data points for reliable prediction (minimum 5 days required)")
+            return None, None
+        # Split data into features (days) and target (sentiment)
+        X = data['days'].values.reshape(-1, 1)
+        y = data['average'].values
+        # Create polynomial features (degree=2 for simple curves)
+        model = make_pipeline(
+            PolynomialFeatures(degree=2),
+            Ridge(alpha=1.0)
+        )
+        model.fit(X, y)
+        return model, data
+    except Exception as e:
+        st.error(f"Model training error: {str(e)}")
+        return None, None
+def predict_future_sentiment(model, training_data, days_to_predict=15):
+    """Predict future sentiment using trained model"""
+    try:
+        if model is None:
+            return None
+        # Create future dates
+        last_date = training_data['date'].max()
+        future_dates = [last_date + timedelta(days=i) for i in range(1, days_to_predict+1)]
+        # Create feature matrix for future dates
+        min_date = training_data['date'].min()
+        future_days = [(date - min_date).days for date in future_dates]
+        X_future = np.array(future_days).reshape(-1, 1)
+        # Make predictions
+        predictions = model.predict(X_future)
+        # Create prediction dataframe
+        pred_df = pd.DataFrame({
+            'date': future_dates,
+            'average': predictions,
+            'type': 'prediction'
+        })
+        # Add training data for plotting
+        training_df = training_data.copy()
+        training_df['type'] = 'actual'
+        return pd.concat([training_df, pred_df], ignore_index=True)
+    except Exception as e:
+        st.error(f"Prediction error: {str(e)}")
+        return None
 # --------------------------
 # Visualization Functions
 # --------------------------
 def plot_sentiment(data, keyword):
     """Optimized plotting function"""
     try:
+        # Separate actual and predicted data
+        actual_data = data[data['type'] == 'actual']
+        pred_data = data[data['type'] == 'prediction']
+        fig = go.Figure()
+        # Add actual data
+        fig.add_trace(go.Scatter(
+            x=actual_data['date'],
+            y=actual_data['average'],
+            name='Actual Sentiment',
+            mode='lines+markers',
+            line=dict(color='#636EFA')
+        ))
+        # Add predicted data if available
+        if not pred_data.empty:
+            fig.add_trace(go.Scatter(
+                x=pred_data['date'],
+                y=pred_data['average'],
+                name='Predicted Sentiment',
+                mode='lines+markers',
+                line=dict(color='#EF553B', dash='dot')
+            ))
+            # Add confidence interval (simple version)
+            fig.add_trace(go.Scatter(
+                x=pred_data['date'],
+                y=pred_data['average'] + 0.1,
+                mode='lines',
+                line=dict(width=0),
+                showlegend=False,
+                hoverinfo='skip'
+            ))
+            fig.add_trace(go.Scatter(
+                x=pred_data['date'],
+                y=pred_data['average'] - 0.1,
+                mode='lines',
+                fill='tonexty',
+                line=dict(width=0),
+                fillcolor='rgba(239, 85, 59, 0.2)',
+                name='Prediction Range'
+            ))
         fig.update_layout(
+            title=f'Sentiment Analysis and Prediction for "{keyword}"',
             xaxis_title="Date",
             yaxis_title="Sentiment Score",
+            hovermode="x unified",
+            legend_title="Data Type"
         )
         return fig
     except Exception as e:
         st.error(f"Plotting error: {str(e)}")
         st.markdown("---")
         st.markdown("### Options")
         show_details = st.checkbox("Show detailed results", value=False)
+        enable_prediction = st.checkbox("Enable sentiment prediction", value=True)
         st.markdown("---")
     # Main content
                 # Filter recent data
                 combined_data['date'] = pd.to_datetime(combined_data['date'])
+                recent_data = combined_data[combined_data['date'] >= (datetime.now() - timedelta(days=60))]  # Increased to 60 days for better prediction
                 if not recent_data.empty:
                     # Sentiment trends
                     st.subheader("📅 Sentiment Over Time")
+                    # Prepare data for prediction if enabled
+                    if enable_prediction and len(recent_data) >= 5:
+                        with st.spinner("Training prediction model..."):
+                            daily_data = prepare_data_for_prediction(recent_data)
+                            model, training_data = train_sentiment_model(daily_data)
+                            if model is not None:
+                                full_data = predict_future_sentiment(model, training_data)
+                                fig = plot_sentiment(full_data, keyword)
+                            else:
+                                fig = plot_sentiment(training_data, keyword)
+                    else:
+                        daily_data = prepare_data_for_prediction(recent_data)
+                        fig = plot_sentiment(daily_data.assign(type='actual'), keyword)
                     if fig:
                         st.plotly_chart(fig, use_container_width=True)
+                    # Show prediction insights
+                    if enable_prediction and 'full_data' in locals() and full_data is not None:
+                        last_actual = full_data[full_data['type'] == 'actual']['average'].iloc[-1]
+                        last_pred = full_data[full_data['type'] == 'prediction']['average'].iloc[-1]
+                        if last_pred > last_actual + 0.1:
+                            st.success("📈 Prediction: Sentiment is expected to improve in the next 15 days")
+                        elif last_pred < last_actual - 0.1:
+                            st.warning("📉 Prediction: Sentiment is expected to decline in the next 15 days")
+                        else:
+                            st.info("📊 Prediction: Sentiment is expected to remain stable in the next 15 days")
                     # Show details if enabled
                     if show_details:
                         st.subheader("🔍 Detailed Results")
                         st.dataframe(recent_data[['date', 'source', 'text', 'average']], use_container_width=True)
                 else:
+                    st.info("No recent data found (within last 60 days).")
 if __name__ == "__main__":
     # Initialize NLTK data