SentimentAnalyzerFinbert

Sleeping

App Files Files Community

Soundaryasos commited on Mar 16, 2025

Commit

977fdbb

verified ·

1 Parent(s): fa1d910

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -79

app.py CHANGED Viewed

@@ -19,10 +19,16 @@ from nltk.tokenize import word_tokenize
 from nltk.stem import WordNetLemmatizer
 import re
 import json
-import os
-import pickle
 from textblob import TextBlob
 # Download necessary NLTK data
 try:
     nltk.data.find('tokenizers/punkt')
@@ -34,14 +40,6 @@ except LookupError:
     nltk.download('stopwords')
     nltk.download('wordnet')
-# Page configuration
-st.set_page_config(
-    page_title="SentiMind Pro - Advanced Sentiment Analysis",
-    page_icon="📊",
-    layout="wide",
-    initial_sidebar_state="expanded"
-)
 # Custom CSS
 st.markdown("""
 <style>
@@ -99,8 +97,6 @@ if 'initialized' not in st.session_state:
     st.session_state.initialized = False
     st.session_state.user_input = ""
     st.session_state.analysis_done = False
-    st.session_state.historical_data = None
-    st.session_state.sentiment_models = {}
     st.session_state.historical_inputs = []
     st.session_state.historical_results = []
@@ -108,35 +104,28 @@ if 'initialized' not in st.session_state:
 def preprocess_text(text):
     """Preprocess text for sentiment analysis"""
-    # Convert to lowercase
     text = text.lower()
-    # Remove URLs
-    text = re.sub(r'http\S+|www\S+|https\S+', '', text)
-    # Remove mentions and hashtags
-    text = re.sub(r'@\w+|#\w+', '', text)
-    # Remove punctuation
-    text = re.sub(r'[^\w\s]', '', text)
-    # Remove extra whitespace
-    text = re.sub(r'\s+', ' ', text).strip()
-    # Tokenize
-    tokens = word_tokenize(text)
-    # Remove stopwords
     stop_words = set(stopwords.words('english'))
-    tokens = [word for word in tokens if word not in stop_words]
-    # Lemmatize
     lemmatizer = WordNetLemmatizer()
-    tokens = [lemmatizer.lemmatize(word) for word in tokens]
     return ' '.join(tokens)
 def initialize_models():
     """Initialize sentiment analysis models with loading spinner"""
     with st.spinner('Initializing sentiment analysis models...'):
-        # VADER Sentiment Analysis
-        st.session_state.sentiment_models['vader'] = SentimentIntensityAnalyzer()
         # BERT Sentiment Analysis
         try:
@@ -147,9 +136,6 @@ def initialize_models():
         except Exception as e:
             st.error(f"Error loading BERT model: {e}")
             st.session_state.sentiment_models['bert'] = pipeline("sentiment-analysis")
-        # TextBlob for additional analysis
-        st.session_state.sentiment_models['textblob'] = TextBlob
 def generate_sample_data():
     """Generate realistic sample data for demonstration"""
@@ -157,7 +143,6 @@ def generate_sample_data():
     start_date = end_date - timedelta(days=30)
     dates = pd.date_range(start=start_date, end=end_date, freq='D')
-    # Generate more realistic sentiment patterns
     weekday_effect = np.array([0.1 if d.weekday() >= 5 else 0 for d in dates])
     trend = np.linspace(-0.2, 0.3, len(dates))
     seasonal = np.array([-0.15 if d.weekday() == 0 else 0.05 if d.weekday() == 4 else 0 for d in dates])
@@ -178,54 +163,9 @@ def generate_sample_data():
     return df
-def train_prediction_models(df):
-    """Train multiple prediction models and return the best one"""
-    X = df.copy()
-    X['day_of_week'] = X['Date'].dt.dayofweek
-    X['day_of_month'] = X['Date'].dt.day
-    X['month'] = X['Date'].dt.month
-    X['trend'] = np.arange(len(X))
-    features = ['day_of_week', 'day_of_month', 'month', 'trend']
-    X_train = X[features].values
-    y_train = X['Sentiment Score'].values
-    models = {
-        'Linear Regression': LinearRegression(),
-        'Random Forest': RandomForestRegressor(n_estimators=100, random_state=42)
-    }
-    for name, model in models.items():
-        model.fit(X_train, y_train)
-    future_dates = pd.date_range(
-        start=df['Date'].max() + timedelta(days=1),
-        periods=14,
-        freq='D'
-    )
-    X_future = pd.DataFrame({
-        'Date': future_dates,
-        'day_of_week': future_dates.dayofweek,
-        'day_of_month': future_dates.day,
-        'month': future_dates.month,
-        'trend': np.arange(len(X_train), len(X_train) + len(future_dates))
-    })
-    predictions = {}
-    for name, model in models.items():
-        y_pred = model.predict(X_future[features].values)
-        predictions[name] = pd.DataFrame({
-            'Date': future_dates,
-            'Predicted Sentiment': np.clip(y_pred, -1, 1)
-        })
-    return models['Random Forest'], predictions
 def generate_wordcloud(text, sentiment_score):
     """Generate a wordcloud colored by sentiment"""
     text = preprocess_text(text)
     stopwords = set(STOPWORDS)
     def color_func(word, font_size, position, orientation, random_state=None, **kwargs):

 from nltk.stem import WordNetLemmatizer
 import re
 import json
 from textblob import TextBlob
+# Page configuration
+st.set_page_config(
+    page_title="SentiMind Pro - Advanced Sentiment Analysis",
+    page_icon="📊",
+    layout="wide",
+    initial_sidebar_state="expanded"
+)
 # Download necessary NLTK data
 try:
     nltk.data.find('tokenizers/punkt')
     nltk.download('stopwords')
     nltk.download('wordnet')
 # Custom CSS
 st.markdown("""
 <style>
     st.session_state.initialized = False
     st.session_state.user_input = ""
     st.session_state.analysis_done = False
     st.session_state.historical_inputs = []
     st.session_state.historical_results = []
 def preprocess_text(text):
     """Preprocess text for sentiment analysis"""
     text = text.lower()
+    text = re.sub(r'http\S+|www\S+|https\S+', '', text)  # Remove URLs
+    text = re.sub(r'@\w+|#\w+', '', text)  # Remove mentions and hashtags
+    text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation
+    text = re.sub(r'\s+', ' ', text).strip()  # Remove extra whitespace
+    tokens = word_tokenize(text)  # Tokenize
     stop_words = set(stopwords.words('english'))
+    tokens = [word for word in tokens if word not in stop_words]  # Remove stopwords
     lemmatizer = WordNetLemmatizer()
+    tokens = [lemmatizer.lemmatize(word) for word in tokens]  # Lemmatize
     return ' '.join(tokens)
 def initialize_models():
     """Initialize sentiment analysis models with loading spinner"""
     with st.spinner('Initializing sentiment analysis models...'):
+        st.session_state.sentiment_models = {
+            'vader': SentimentIntensityAnalyzer(),
+            'textblob': TextBlob
+        }
         # BERT Sentiment Analysis
         try:
         except Exception as e:
             st.error(f"Error loading BERT model: {e}")
             st.session_state.sentiment_models['bert'] = pipeline("sentiment-analysis")
 def generate_sample_data():
     """Generate realistic sample data for demonstration"""
     start_date = end_date - timedelta(days=30)
     dates = pd.date_range(start=start_date, end=end_date, freq='D')
     weekday_effect = np.array([0.1 if d.weekday() >= 5 else 0 for d in dates])
     trend = np.linspace(-0.2, 0.3, len(dates))
     seasonal = np.array([-0.15 if d.weekday() == 0 else 0.05 if d.weekday() == 4 else 0 for d in dates])
     return df
 def generate_wordcloud(text, sentiment_score):
     """Generate a wordcloud colored by sentiment"""
     text = preprocess_text(text)
     stopwords = set(STOPWORDS)
     def color_func(word, font_size, position, orientation, random_state=None, **kwargs):