Soundaryasos commited on
Commit
2535960
·
verified ·
1 Parent(s): 212a4d5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +370 -105
app.py CHANGED
@@ -1,120 +1,385 @@
1
  import streamlit as st
2
- from transformers import pipeline
3
- from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
4
- import numpy as np
5
  import pandas as pd
 
6
  from datetime import datetime, timedelta
7
  import plotly.express as px
 
 
8
  from sklearn.linear_model import LinearRegression
9
- from wordcloud import WordCloud
10
- import base64
 
 
 
11
  from io import BytesIO
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
- # Initialize sentiment models
14
- bert_sentiment = pipeline("sentiment-analysis")
15
- vader_analyzer = SentimentIntensityAnalyzer()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
- # Generate sample past sentiment data
18
- dates = [datetime.today() - timedelta(days=i) for i in range(14)]
19
- sentiment_scores = np.random.uniform(-1, 1, len(dates))
20
- df = pd.DataFrame({"Date": dates, "Sentiment Score": sentiment_scores})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
- # Train a regression model
23
- X = np.array(range(len(df))).reshape(-1, 1)
24
- y = df["Sentiment Score"]
25
- model = LinearRegression()
26
- model.fit(X, y)
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
- # Predict for next 7 days
29
- future_dates = [datetime.today() + timedelta(days=i) for i in range(1, 8)]
30
- X_future = np.array(range(len(df), len(df) + 7)).reshape(-1, 1)
31
- predictions = model.predict(X_future)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
- future_df = pd.DataFrame({"Date": future_dates, "Predicted Sentiment": predictions})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
- # Generate Word Cloud
36
- def generate_wordcloud(text):
37
- wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  img = BytesIO()
39
- wordcloud.to_image().save(img, format='PNG')
 
 
 
 
 
 
40
  return base64.b64encode(img.getvalue()).decode()
41
 
42
- # Streamlit app setup
43
- st.title("Interactive Sentiment Analysis Dashboard")
44
-
45
- # Sidebar for navigation and settings
46
- st.sidebar.header("Sentiment Analysis Controls")
47
- st.sidebar.subheader("Input")
48
- user_input = st.sidebar.text_input('Enter text for sentiment analysis')
49
-
50
- # Display sentiment analysis results
51
- def display_sentiment_analysis(vader_score, bert_result):
52
- st.subheader("Sentiment Analysis Results:")
53
- st.write(f"**VADER Sentiment Score**: {vader_score:.2f}")
54
- st.write(f"**BERT Sentiment**: {bert_result['label']} ({bert_result['score']:.2f})")
55
-
56
- sentiment_data = {'Positive': max(0, vader_score), 'Negative': min(0, vader_score), 'Neutral': 1 - abs(vader_score)}
57
- sentiment_df = pd.DataFrame(list(sentiment_data.items()), columns=["Sentiment", "Score"])
58
- st.bar_chart(sentiment_df.set_index("Sentiment"))
59
-
60
- wordcloud_img = f'data:image/png;base64,{generate_wordcloud(user_input)}'
61
- st.image(wordcloud_img, use_column_width=True)
62
-
63
- # Analyze sentiment on button click
64
- if st.sidebar.button('Analyze Sentiment'):
65
- if user_input:
66
- with st.spinner('Analyzing text...'):
67
- vader_score = vader_analyzer.polarity_scores(user_input)['compound']
68
- bert_result = bert_sentiment(user_input)[0]
69
- display_sentiment_analysis(vader_score, bert_result)
70
- else:
71
- st.warning("Please enter some text for analysis.")
72
-
73
- # Past sentiment trends
74
- st.subheader("Past Sentiment Trends (Last 14 Days)")
75
- fig1 = px.line(df, x='Date', y='Sentiment Score', title='Past Sentiment Trends', markers=True, line_shape='spline')
76
- st.plotly_chart(fig1)
77
-
78
- # Future sentiment predictions
79
- st.subheader("Sentiment Prediction for Next 7 Days")
80
- fig2 = px.line(future_df, x='Date', y='Predicted Sentiment', title='Sentiment Prediction for Next 7 Days', markers=True, line_shape='spline')
81
- st.plotly_chart(fig2)
82
-
83
- # Sentiment distribution pie chart
84
- st.subheader("Sentiment Distribution")
85
- fig3 = px.pie(values=[sum(df['Sentiment Score'] > 0), sum(df['Sentiment Score'] <= 0)],
86
- names=['Positive', 'Negative'], title='Sentiment Distribution', hole=0.3)
87
- st.plotly_chart(fig3)
88
-
89
- # Histogram of Sentiment Scores
90
- st.subheader("Sentiment Score Distribution (Past 14 Days)")
91
- fig4 = px.histogram(df, x='Sentiment Score', nbins=20, title="Sentiment Score Distribution")
92
- st.plotly_chart(fig4)
93
-
94
- # Sentiment heatmap (corrected version)
95
- st.subheader("Sentiment Heatmap (Past 14 Days)")
96
- df['Day'] = df['Date'].dt.dayofweek # Monday=0, Sunday=6
97
- df['Hour'] = np.random.randint(0, 24, len(df)) # Simulating hourly data
98
- heatmap_data = df.pivot(index='Day', columns='Hour', values='Sentiment Score')
99
- fig5 = px.imshow(
100
- heatmap_data,
101
- title="Heatmap of Sentiment Over Time",
102
- labels={'x': 'Hour of Day', 'y': 'Day of Week'},
103
- color_continuous_scale='RdBu'
104
- )
105
- st.plotly_chart(fig5)
106
-
107
- # Sentiment scatter plot
108
- st.subheader("Sentiment Scatter Plot (Past 14 Days)")
109
- fig6 = px.scatter(df, x='Date', y='Sentiment Score', title='Sentiment Over Time')
110
- st.plotly_chart(fig6)
111
-
112
- # Rolling average sentiment
113
- st.subheader("Rolling Average of Sentiment (7-Day Window)")
114
- df['Rolling Avg Sentiment'] = df['Sentiment Score'].rolling(window=7).mean()
115
- fig7 = px.line(df, x='Date', y='Rolling Avg Sentiment', title="Rolling Average of Sentiment (7-Day Window)")
116
- st.plotly_chart(fig7)
117
-
118
- # Reset button
119
- if st.sidebar.button('Reset Analysis'):
120
- st.experimental_rerun()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
 
 
 
2
  import pandas as pd
3
+ import numpy as np
4
  from datetime import datetime, timedelta
5
  import plotly.express as px
6
+ import plotly.graph_objects as go
7
+ from plotly.subplots import make_subplots
8
  from sklearn.linear_model import LinearRegression
9
+ from sklearn.ensemble import RandomForestRegressor
10
+ from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
11
+ from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
12
+ from wordcloud import WordCloud, STOPWORDS
13
+ import matplotlib.pyplot as plt
14
  from io import BytesIO
15
+ import base64
16
+ import nltk
17
+ from nltk.corpus import stopwords
18
+ from nltk.tokenize import word_tokenize
19
+ from nltk.stem import WordNetLemmatizer
20
+ import re
21
+ import json
22
+ import os
23
+ import pickle
24
+ from textblob import TextBlob
25
+
26
+ # Download necessary NLTK data
27
+ try:
28
+ nltk.data.find('tokenizers/punkt')
29
+ nltk.data.find('corpora/stopwords')
30
+ nltk.data.find('corpora/wordnet')
31
+ except LookupError:
32
+ st.info("Downloading NLTK resources...")
33
+ nltk.download('punkt')
34
+ nltk.download('stopwords')
35
+ nltk.download('wordnet')
36
 
37
+ # Page configuration
38
+ st.set_page_config(
39
+ page_title="SentiMind Pro - Advanced Sentiment Analysis",
40
+ page_icon="📊",
41
+ layout="wide",
42
+ initial_sidebar_state="expanded"
43
+ )
44
+
45
+ # Custom CSS
46
+ st.markdown("""
47
+ <style>
48
+ .main-header {
49
+ font-size: 2.5rem;
50
+ color: #1E88E5;
51
+ text-align: center;
52
+ margin-bottom: 1rem;
53
+ font-weight: bold;
54
+ }
55
+ .sub-header {
56
+ font-size: 1.5rem;
57
+ color: #0D47A1;
58
+ margin-top: 2rem;
59
+ margin-bottom: 1rem;
60
+ font-weight: bold;
61
+ }
62
+ .description {
63
+ font-size: 1rem;
64
+ color: #424242;
65
+ margin-bottom: 2rem;
66
+ }
67
+ .results-container {
68
+ background-color: #f5f5f5;
69
+ padding: 1.5rem;
70
+ border-radius: 10px;
71
+ margin-bottom: 2rem;
72
+ }
73
+ .metric-card {
74
+ background-color: white;
75
+ padding: 1rem;
76
+ border-radius: 10px;
77
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
78
+ text-align: center;
79
+ }
80
+ .metric-value {
81
+ font-size: 1.8rem;
82
+ font-weight: bold;
83
+ color: #1E88E5;
84
+ }
85
+ .metric-label {
86
+ font-size: 0.9rem;
87
+ color: #616161;
88
+ }
89
+ .footer {
90
+ text-align: center;
91
+ margin-top: 3rem;
92
+ color: #616161;
93
+ }
94
+ </style>
95
+ """, unsafe_allow_html=True)
96
+
97
+ # Session state initialization
98
+ if 'initialized' not in st.session_state:
99
+ st.session_state.initialized = False
100
+ st.session_state.user_input = ""
101
+ st.session_state.analysis_done = False
102
+ st.session_state.historical_data = None
103
+ st.session_state.sentiment_models = {}
104
+ st.session_state.historical_inputs = []
105
+ st.session_state.historical_results = []
106
+
107
+ # ----------- HELPER FUNCTIONS -----------
108
 
109
+ def preprocess_text(text):
110
+ """Preprocess text for sentiment analysis"""
111
+ # Convert to lowercase
112
+ text = text.lower()
113
+ # Remove URLs
114
+ text = re.sub(r'http\S+|www\S+|https\S+', '', text)
115
+ # Remove mentions and hashtags
116
+ text = re.sub(r'@\w+|#\w+', '', text)
117
+ # Remove punctuation
118
+ text = re.sub(r'[^\w\s]', '', text)
119
+ # Remove extra whitespace
120
+ text = re.sub(r'\s+', ' ', text).strip()
121
+
122
+ # Tokenize
123
+ tokens = word_tokenize(text)
124
+
125
+ # Remove stopwords
126
+ stop_words = set(stopwords.words('english'))
127
+ tokens = [word for word in tokens if word not in stop_words]
128
+
129
+ # Lemmatize
130
+ lemmatizer = WordNetLemmatizer()
131
+ tokens = [lemmatizer.lemmatize(word) for word in tokens]
132
+
133
+ return ' '.join(tokens)
134
 
135
+ def initialize_models():
136
+ """Initialize sentiment analysis models with loading spinner"""
137
+ with st.spinner('Initializing sentiment analysis models...'):
138
+ # VADER Sentiment Analysis
139
+ st.session_state.sentiment_models['vader'] = SentimentIntensityAnalyzer()
140
+
141
+ # BERT Sentiment Analysis
142
+ try:
143
+ model_name = "distilbert-base-uncased-finetuned-sst-2-english"
144
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
145
+ model = AutoModelForSequenceClassification.from_pretrained(model_name)
146
+ st.session_state.sentiment_models['bert'] = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
147
+ except Exception as e:
148
+ st.error(f"Error loading BERT model: {e}")
149
+ st.session_state.sentiment_models['bert'] = pipeline("sentiment-analysis")
150
+
151
+ # TextBlob for additional analysis
152
+ st.session_state.sentiment_models['textblob'] = TextBlob
153
 
154
+ def generate_sample_data():
155
+ """Generate realistic sample data for demonstration"""
156
+ end_date = datetime.today()
157
+ start_date = end_date - timedelta(days=30)
158
+ dates = pd.date_range(start=start_date, end=end_date, freq='D')
159
+
160
+ # Generate more realistic sentiment patterns
161
+ weekday_effect = np.array([0.1 if d.weekday() >= 5 else 0 for d in dates])
162
+ trend = np.linspace(-0.2, 0.3, len(dates))
163
+ seasonal = np.array([-0.15 if d.weekday() == 0 else 0.05 if d.weekday() == 4 else 0 for d in dates])
164
+ noise = np.random.normal(0, 0.2, len(dates))
165
+
166
+ sentiment_scores = np.clip(weekday_effect + trend + seasonal + noise, -1, 1)
167
+
168
+ df = pd.DataFrame({
169
+ "Date": dates,
170
+ "Sentiment Score": sentiment_scores,
171
+ "Volume": np.random.randint(50, 500, len(dates)) # Simulated volume
172
+ })
173
+
174
+ df['Day'] = df['Date'].dt.dayofweek
175
+ df['Hour'] = np.random.randint(0, 24, len(df))
176
+ df['Weekday'] = df['Date'].dt.day_name()
177
+ df['Month'] = df['Date'].dt.month_name()
178
+
179
+ return df
180
 
181
+ def train_prediction_models(df):
182
+ """Train multiple prediction models and return the best one"""
183
+ X = df.copy()
184
+ X['day_of_week'] = X['Date'].dt.dayofweek
185
+ X['day_of_month'] = X['Date'].dt.day
186
+ X['month'] = X['Date'].dt.month
187
+ X['trend'] = np.arange(len(X))
188
+
189
+ features = ['day_of_week', 'day_of_month', 'month', 'trend']
190
+ X_train = X[features].values
191
+ y_train = X['Sentiment Score'].values
192
+
193
+ models = {
194
+ 'Linear Regression': LinearRegression(),
195
+ 'Random Forest': RandomForestRegressor(n_estimators=100, random_state=42)
196
+ }
197
+
198
+ for name, model in models.items():
199
+ model.fit(X_train, y_train)
200
+
201
+ future_dates = pd.date_range(
202
+ start=df['Date'].max() + timedelta(days=1),
203
+ periods=14,
204
+ freq='D'
205
+ )
206
+
207
+ X_future = pd.DataFrame({
208
+ 'Date': future_dates,
209
+ 'day_of_week': future_dates.dayofweek,
210
+ 'day_of_month': future_dates.day,
211
+ 'month': future_dates.month,
212
+ 'trend': np.arange(len(X_train), len(X_train) + len(future_dates))
213
+ })
214
+
215
+ predictions = {}
216
+ for name, model in models.items():
217
+ y_pred = model.predict(X_future[features].values)
218
+ predictions[name] = pd.DataFrame({
219
+ 'Date': future_dates,
220
+ 'Predicted Sentiment': np.clip(y_pred, -1, 1)
221
+ })
222
+
223
+ return models['Random Forest'], predictions
224
 
225
+ def generate_wordcloud(text, sentiment_score):
226
+ """Generate a wordcloud colored by sentiment"""
227
+ text = preprocess_text(text)
228
+
229
+ stopwords = set(STOPWORDS)
230
+
231
+ def color_func(word, font_size, position, orientation, random_state=None, **kwargs):
232
+ if sentiment_score > 0.5:
233
+ return "rgb(0, 128, 0)" # Green
234
+ elif sentiment_score > 0:
235
+ return "rgb(0, 255, 0)" # Light green
236
+ elif sentiment_score > -0.5:
237
+ return "rgb(255, 165, 0)" # Orange
238
+ else:
239
+ return "rgb(255, 0, 0)" # Red
240
+
241
+ wc = WordCloud(
242
+ width=800,
243
+ height=400,
244
+ background_color='white',
245
+ max_words=100,
246
+ stopwords=stopwords,
247
+ contour_width=3,
248
+ contour_color='steelblue'
249
+ )
250
+
251
+ wordcloud = wc.generate(text)
252
+ wordcloud.recolor(color_func=color_func)
253
+
254
  img = BytesIO()
255
+ plt.figure(figsize=(10, 5))
256
+ plt.imshow(wordcloud, interpolation='bilinear')
257
+ plt.axis('off')
258
+ plt.tight_layout()
259
+ plt.savefig(img, format='PNG', bbox_inches='tight')
260
+ plt.close()
261
+
262
  return base64.b64encode(img.getvalue()).decode()
263
 
264
+ def analyze_sentiment(text):
265
+ """Perform sentiment analysis using multiple models"""
266
+ processed_text = preprocess_text(text)
267
+
268
+ vader_result = st.session_state.sentiment_models['vader'].polarity_scores(text)
269
+ vader_score = vader_result['compound']
270
+
271
+ bert_result = st.session_state.sentiment_models['bert'](text)[0]
272
+ bert_score = bert_result['score'] if bert_result['label'] == 'POSITIVE' else -bert_result['score']
273
+
274
+ blob = st.session_state.sentiment_models['textblob'](text)
275
+ textblob_score = blob.sentiment.polarity
276
+
277
+ combined_score = (0.4 * vader_score + 0.4 * bert_score + 0.2 * textblob_score)
278
+
279
+ key_phrases = extract_key_phrases(text)
280
+ emotions = analyze_emotions(text)
281
+
282
+ sentiment_results = {
283
+ 'raw_text': text,
284
+ 'processed_text': processed_text,
285
+ 'vader': {
286
+ 'score': vader_score,
287
+ 'breakdown': vader_result
288
+ },
289
+ 'bert': {
290
+ 'score': bert_score,
291
+ 'label': bert_result['label'],
292
+ 'confidence': bert_result['score']
293
+ },
294
+ 'textblob': {
295
+ 'score': textblob_score,
296
+ 'subjectivity': blob.sentiment.subjectivity
297
+ },
298
+ 'combined_score': combined_score,
299
+ 'key_phrases': key_phrases,
300
+ 'emotions': emotions,
301
+ 'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S")
302
+ }
303
+
304
+ return sentiment_results
305
+
306
+ def extract_key_phrases(text, num_phrases=5):
307
+ """Extract key phrases from text"""
308
+ blob = TextBlob(text)
309
+ noun_phrases = blob.noun_phrases
310
+
311
+ if len(noun_phrases) < num_phrases:
312
+ tokens = word_tokenize(text.lower())
313
+ bigrams = list(nltk.bigrams(tokens))
314
+ bigram_phrases = [' '.join(bigram) for bigram in bigrams]
315
+
316
+ all_phrases = list(noun_phrases) + bigram_phrases
317
+
318
+ stop_words = set(stopwords.words('english'))
319
+ filtered_phrases = [
320
+ phrase for phrase in all_phrases
321
+ if not all(word in stop_words for word in phrase.split())
322
+ ]
323
+
324
+ return list(set(filtered_phrases))[:num_phrases]
325
+
326
+ return list(set(noun_phrases))[:num_phrases]
327
+
328
+ def analyze_emotions(text):
329
+ """Analyze emotions in text"""
330
+ emotion_dict = {
331
+ 'joy': ['happy', 'delighted', 'pleased', 'glad', 'joy', 'love', 'excellent', 'wonderful'],
332
+ 'sadness': ['sad', 'unhappy', 'sorrow', 'depressed', 'down', 'gloomy'],
333
+ 'anger': ['angry', 'mad', 'furious', 'irritated', 'annoyed'],
334
+ 'fear': ['afraid', 'scared', 'fearful', 'terrified', 'worried'],
335
+ 'surprise': ['surprised', 'amazed', 'astonished', 'shocked'],
336
+ }
337
+
338
+ emotions = {emotion: 0 for emotion in emotion_dict.keys()}
339
+
340
+ for word in text.split():
341
+ for emotion, keywords in emotion_dict.items():
342
+ if word in keywords:
343
+ emotions[emotion] += 1
344
+
345
+ return emotions
346
+
347
+ # Main application logic
348
+ def main():
349
+ st.title("SentiMind Pro - Advanced Sentiment Analysis")
350
+
351
+ if not st.session_state.initialized:
352
+ initialize_models()
353
+ st.session_state.initialized = True
354
+
355
+ st.subheader("Enter Text for Sentiment Analysis")
356
+ user_input = st.text_area("Input Text", height=150)
357
+
358
+ if st.button("Analyze Sentiment"):
359
+ if user_input:
360
+ sentiment_results = analyze_sentiment(user_input)
361
+ st.session_state.historical_inputs.append(user_input)
362
+ st.session_state.historical_results.append(sentiment_results)
363
+ st.session_state.analysis_done = True
364
+
365
+ # Display results
366
+ st.markdown("### Sentiment Analysis Results")
367
+ st.json(sentiment_results)
368
+
369
+ # Generate Word Cloud
370
+ wordcloud_image = generate_wordcloud(user_input, sentiment_results['combined_score'])
371
+ st.image(f"data:image/png;base64,{wordcloud_image}", use_column_width=True)
372
+ else:
373
+ st.warning("Please enter some text for analysis.")
374
+
375
+ if st.session_state.analysis_done:
376
+ st.subheader("Historical Analysis")
377
+ if st.session_state.historical_results:
378
+ for i, result in enumerate(st.session_state.historical_results):
379
+ st.markdown(f"**Input Text {i + 1}:** {st.session_state.historical_inputs[i]}")
380
+ st.json(result)
381
+
382
+ st.markdown("<footer class='footer'>© 2023 SentiMind Pro. All rights reserved.</footer>", unsafe_allow_html=True)
383
+
384
+ if __name__ == "__main__":
385
+ main()