Soundaryasos commited on
Commit
977fdbb
·
verified ·
1 Parent(s): fa1d910

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -79
app.py CHANGED
@@ -19,10 +19,16 @@ from nltk.tokenize import word_tokenize
19
  from nltk.stem import WordNetLemmatizer
20
  import re
21
  import json
22
- import os
23
- import pickle
24
  from textblob import TextBlob
25
 
 
 
 
 
 
 
 
 
26
  # Download necessary NLTK data
27
  try:
28
  nltk.data.find('tokenizers/punkt')
@@ -34,14 +40,6 @@ except LookupError:
34
  nltk.download('stopwords')
35
  nltk.download('wordnet')
36
 
37
- # Page configuration
38
- st.set_page_config(
39
- page_title="SentiMind Pro - Advanced Sentiment Analysis",
40
- page_icon="📊",
41
- layout="wide",
42
- initial_sidebar_state="expanded"
43
- )
44
-
45
  # Custom CSS
46
  st.markdown("""
47
  <style>
@@ -99,8 +97,6 @@ if 'initialized' not in st.session_state:
99
  st.session_state.initialized = False
100
  st.session_state.user_input = ""
101
  st.session_state.analysis_done = False
102
- st.session_state.historical_data = None
103
- st.session_state.sentiment_models = {}
104
  st.session_state.historical_inputs = []
105
  st.session_state.historical_results = []
106
 
@@ -108,35 +104,28 @@ if 'initialized' not in st.session_state:
108
 
109
  def preprocess_text(text):
110
  """Preprocess text for sentiment analysis"""
111
- # Convert to lowercase
112
  text = text.lower()
113
- # Remove URLs
114
- text = re.sub(r'http\S+|www\S+|https\S+', '', text)
115
- # Remove mentions and hashtags
116
- text = re.sub(r'@\w+|#\w+', '', text)
117
- # Remove punctuation
118
- text = re.sub(r'[^\w\s]', '', text)
119
- # Remove extra whitespace
120
- text = re.sub(r'\s+', ' ', text).strip()
121
-
122
- # Tokenize
123
- tokens = word_tokenize(text)
124
 
125
- # Remove stopwords
126
  stop_words = set(stopwords.words('english'))
127
- tokens = [word for word in tokens if word not in stop_words]
128
 
129
- # Lemmatize
130
  lemmatizer = WordNetLemmatizer()
131
- tokens = [lemmatizer.lemmatize(word) for word in tokens]
132
 
133
  return ' '.join(tokens)
134
 
135
  def initialize_models():
136
  """Initialize sentiment analysis models with loading spinner"""
137
  with st.spinner('Initializing sentiment analysis models...'):
138
- # VADER Sentiment Analysis
139
- st.session_state.sentiment_models['vader'] = SentimentIntensityAnalyzer()
 
 
140
 
141
  # BERT Sentiment Analysis
142
  try:
@@ -147,9 +136,6 @@ def initialize_models():
147
  except Exception as e:
148
  st.error(f"Error loading BERT model: {e}")
149
  st.session_state.sentiment_models['bert'] = pipeline("sentiment-analysis")
150
-
151
- # TextBlob for additional analysis
152
- st.session_state.sentiment_models['textblob'] = TextBlob
153
 
154
  def generate_sample_data():
155
  """Generate realistic sample data for demonstration"""
@@ -157,7 +143,6 @@ def generate_sample_data():
157
  start_date = end_date - timedelta(days=30)
158
  dates = pd.date_range(start=start_date, end=end_date, freq='D')
159
 
160
- # Generate more realistic sentiment patterns
161
  weekday_effect = np.array([0.1 if d.weekday() >= 5 else 0 for d in dates])
162
  trend = np.linspace(-0.2, 0.3, len(dates))
163
  seasonal = np.array([-0.15 if d.weekday() == 0 else 0.05 if d.weekday() == 4 else 0 for d in dates])
@@ -178,54 +163,9 @@ def generate_sample_data():
178
 
179
  return df
180
 
181
- def train_prediction_models(df):
182
- """Train multiple prediction models and return the best one"""
183
- X = df.copy()
184
- X['day_of_week'] = X['Date'].dt.dayofweek
185
- X['day_of_month'] = X['Date'].dt.day
186
- X['month'] = X['Date'].dt.month
187
- X['trend'] = np.arange(len(X))
188
-
189
- features = ['day_of_week', 'day_of_month', 'month', 'trend']
190
- X_train = X[features].values
191
- y_train = X['Sentiment Score'].values
192
-
193
- models = {
194
- 'Linear Regression': LinearRegression(),
195
- 'Random Forest': RandomForestRegressor(n_estimators=100, random_state=42)
196
- }
197
-
198
- for name, model in models.items():
199
- model.fit(X_train, y_train)
200
-
201
- future_dates = pd.date_range(
202
- start=df['Date'].max() + timedelta(days=1),
203
- periods=14,
204
- freq='D'
205
- )
206
-
207
- X_future = pd.DataFrame({
208
- 'Date': future_dates,
209
- 'day_of_week': future_dates.dayofweek,
210
- 'day_of_month': future_dates.day,
211
- 'month': future_dates.month,
212
- 'trend': np.arange(len(X_train), len(X_train) + len(future_dates))
213
- })
214
-
215
- predictions = {}
216
- for name, model in models.items():
217
- y_pred = model.predict(X_future[features].values)
218
- predictions[name] = pd.DataFrame({
219
- 'Date': future_dates,
220
- 'Predicted Sentiment': np.clip(y_pred, -1, 1)
221
- })
222
-
223
- return models['Random Forest'], predictions
224
-
225
  def generate_wordcloud(text, sentiment_score):
226
  """Generate a wordcloud colored by sentiment"""
227
  text = preprocess_text(text)
228
-
229
  stopwords = set(STOPWORDS)
230
 
231
  def color_func(word, font_size, position, orientation, random_state=None, **kwargs):
 
19
  from nltk.stem import WordNetLemmatizer
20
  import re
21
  import json
 
 
22
  from textblob import TextBlob
23
 
24
+ # Page configuration
25
+ st.set_page_config(
26
+ page_title="SentiMind Pro - Advanced Sentiment Analysis",
27
+ page_icon="📊",
28
+ layout="wide",
29
+ initial_sidebar_state="expanded"
30
+ )
31
+
32
  # Download necessary NLTK data
33
  try:
34
  nltk.data.find('tokenizers/punkt')
 
40
  nltk.download('stopwords')
41
  nltk.download('wordnet')
42
 
 
 
 
 
 
 
 
 
43
  # Custom CSS
44
  st.markdown("""
45
  <style>
 
97
  st.session_state.initialized = False
98
  st.session_state.user_input = ""
99
  st.session_state.analysis_done = False
 
 
100
  st.session_state.historical_inputs = []
101
  st.session_state.historical_results = []
102
 
 
104
 
105
  def preprocess_text(text):
106
  """Preprocess text for sentiment analysis"""
 
107
  text = text.lower()
108
+ text = re.sub(r'http\S+|www\S+|https\S+', '', text) # Remove URLs
109
+ text = re.sub(r'@\w+|#\w+', '', text) # Remove mentions and hashtags
110
+ text = re.sub(r'[^\w\s]', '', text) # Remove punctuation
111
+ text = re.sub(r'\s+', ' ', text).strip() # Remove extra whitespace
 
 
 
 
 
 
 
112
 
113
+ tokens = word_tokenize(text) # Tokenize
114
  stop_words = set(stopwords.words('english'))
115
+ tokens = [word for word in tokens if word not in stop_words] # Remove stopwords
116
 
 
117
  lemmatizer = WordNetLemmatizer()
118
+ tokens = [lemmatizer.lemmatize(word) for word in tokens] # Lemmatize
119
 
120
  return ' '.join(tokens)
121
 
122
  def initialize_models():
123
  """Initialize sentiment analysis models with loading spinner"""
124
  with st.spinner('Initializing sentiment analysis models...'):
125
+ st.session_state.sentiment_models = {
126
+ 'vader': SentimentIntensityAnalyzer(),
127
+ 'textblob': TextBlob
128
+ }
129
 
130
  # BERT Sentiment Analysis
131
  try:
 
136
  except Exception as e:
137
  st.error(f"Error loading BERT model: {e}")
138
  st.session_state.sentiment_models['bert'] = pipeline("sentiment-analysis")
 
 
 
139
 
140
  def generate_sample_data():
141
  """Generate realistic sample data for demonstration"""
 
143
  start_date = end_date - timedelta(days=30)
144
  dates = pd.date_range(start=start_date, end=end_date, freq='D')
145
 
 
146
  weekday_effect = np.array([0.1 if d.weekday() >= 5 else 0 for d in dates])
147
  trend = np.linspace(-0.2, 0.3, len(dates))
148
  seasonal = np.array([-0.15 if d.weekday() == 0 else 0.05 if d.weekday() == 4 else 0 for d in dates])
 
163
 
164
  return df
165
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
  def generate_wordcloud(text, sentiment_score):
167
  """Generate a wordcloud colored by sentiment"""
168
  text = preprocess_text(text)
 
169
  stopwords = set(STOPWORDS)
170
 
171
  def color_func(word, font_size, position, orientation, random_state=None, **kwargs):