Soundaryasos commited on
Commit
2df2711
·
verified ·
1 Parent(s): e77bbae

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +440 -101
app.py CHANGED
@@ -1,116 +1,455 @@
1
  import streamlit as st
2
  import pandas as pd
 
3
  import matplotlib.pyplot as plt
4
  import seaborn as sns
5
  import nltk
6
  from textblob import TextBlob
7
- import numpy as np
8
- from wordcloud import WordCloud
9
  import plotly.express as px
 
 
10
  from datetime import datetime, timedelta
11
- from transformers import pipeline
12
  from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
13
  from sklearn.linear_model import LinearRegression
 
14
  from sklearn.model_selection import train_test_split
15
- from sklearn.preprocessing import LabelEncoder
 
16
  from io import BytesIO
17
  import base64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  # Ensure necessary NLTK data is available
20
- nltk.download('punkt')
21
-
22
- st.set_page_config(page_title="Advanced Sentiment Analyzer", layout="wide")
23
-
24
- st.title("🔍 Advanced Sentiment Analysis Dashboard")
25
- st.markdown("Analyze sentiments with deep insights and visualizations")
26
-
27
- # Sidebar for user input
28
- st.sidebar.subheader("Enter Text for Sentiment Analysis")
29
- user_input = st.sidebar.text_area("Type or paste text here", "The product is amazing!")
30
-
31
- # Initialize sentiment analyzers
32
- analyzer = SentimentIntensityAnalyzer()
33
- bert_sentiment = pipeline("sentiment-analysis")
34
-
35
- def analyze_vader_sentiment(text):
36
- score = analyzer.polarity_scores(text)['compound']
37
- return "Positive" if score > 0.05 else "Negative" if score < -0.05 else "Neutral"
38
-
39
- def analyze_bert_sentiment(text):
40
- result = bert_sentiment(text)[0]
41
- return result['label']
42
-
43
- def analyze_textblob_sentiment(text):
44
- return "Positive" if TextBlob(text).sentiment.polarity > 0 else "Negative" if TextBlob(text).sentiment.polarity < 0 else "Neutral"
45
-
46
- if user_input:
47
- vader_result = analyze_vader_sentiment(user_input)
48
- bert_result = analyze_bert_sentiment(user_input)
49
- textblob_result = analyze_textblob_sentiment(user_input)
50
-
51
- st.sidebar.markdown(f"**VADER Sentiment:** {vader_result}")
52
- st.sidebar.markdown(f"**BERT Sentiment:** {bert_result}")
53
- st.sidebar.markdown(f"**TextBlob Sentiment:** {textblob_result}")
54
-
55
- # Simulated past sentiment data
56
- dates = [datetime.today() - timedelta(days=i) for i in range(14)]
57
- sentiment_scores = np.random.uniform(-1, 1, len(dates))
58
- df = pd.DataFrame({"Date": dates, "Sentiment Score": sentiment_scores})
59
-
60
- # Train a regression model
61
- X = np.array(range(len(df))).reshape(-1, 1)
62
- y = df["Sentiment Score"]
63
- model = LinearRegression()
64
- model.fit(X, y)
65
-
66
- # Predict for next 7 days
67
- future_dates = [datetime.today() + timedelta(days=i) for i in range(1, 8)]
68
- X_future = np.array(range(len(df), len(df) + 7)).reshape(-1, 1)
69
- predictions = model.predict(X_future)
70
- future_df = pd.DataFrame({"Date": future_dates, "Predicted Sentiment": predictions})
71
-
72
- # Past sentiment trends
73
- st.subheader("📅 Past Sentiment Trends (Last 14 Days)")
74
- fig1 = px.line(df, x='Date', y='Sentiment Score', title='Sentiment Over Time', markers=True, line_shape='spline')
75
- st.plotly_chart(fig1)
76
-
77
- # Future sentiment predictions
78
- st.subheader("🔮 Sentiment Prediction for Next 7 Days")
79
- fig2 = px.line(future_df, x='Date', y='Predicted Sentiment', title='Predicted Sentiment Trend', markers=True, line_shape='spline')
80
- st.plotly_chart(fig2)
81
-
82
- # Sentiment distribution pie chart
83
- st.subheader("📊 Sentiment Distribution")
84
- fig3 = px.pie(values=[sum(df['Sentiment Score'] > 0), sum(df['Sentiment Score'] <= 0)], names=['Positive', 'Negative'], title='Sentiment Distribution', hole=0.3)
85
- st.plotly_chart(fig3)
86
-
87
- # Sentiment scatter plot
88
- st.subheader("🔎 Sentiment Scatter Plot (Last 14 Days)")
89
- fig4 = px.scatter(df, x='Date', y='Sentiment Score', title='Sentiment Over Time')
90
- st.plotly_chart(fig4)
91
-
92
- # Rolling average sentiment
93
- st.subheader("📈 Rolling Average of Sentiment (7-Day Window)")
94
- df['Rolling Avg Sentiment'] = df['Sentiment Score'].rolling(window=7).mean()
95
- fig5 = px.line(df, x='Date', y='Rolling Avg Sentiment', title="7-Day Rolling Average Sentiment")
96
- st.plotly_chart(fig5)
97
-
98
- # Generate Word Cloud
99
- def generate_wordcloud(text):
100
- wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text)
101
- img = BytesIO()
102
- wordcloud.to_image().save(img, format='PNG')
103
- return base64.b64encode(img.getvalue()).decode()
104
-
105
- # Word Cloud
106
- st.subheader("☁️ Word Cloud")
107
- if user_input:
108
- wordcloud_img = f'data:image/png;base64,{generate_wordcloud(user_input)}'
109
- st.image(wordcloud_img, use_column_width=True)
110
-
111
- # Download Report as CSV
112
- st.subheader("📄 Download Report")
113
- csv = df.to_csv(index=False).encode('utf-8')
114
- st.download_button(label="Download CSV", data=csv, file_name="sentiment_analysis.csv", mime='text/csv')
115
-
116
- st.sidebar.markdown("Developed with ❤️")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  import pandas as pd
3
+ import numpy as np
4
  import matplotlib.pyplot as plt
5
  import seaborn as sns
6
  import nltk
7
  from textblob import TextBlob
8
+ from wordcloud import WordCloud, STOPWORDS
 
9
  import plotly.express as px
10
+ import plotly.graph_objects as go
11
+ from plotly.subplots import make_subplots
12
  from datetime import datetime, timedelta
 
13
  from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
14
  from sklearn.linear_model import LinearRegression
15
+ from sklearn.ensemble import RandomForestRegressor
16
  from sklearn.model_selection import train_test_split
17
+ from sklearn.preprocessing import LabelEncoder, MinMaxScaler
18
+ from sklearn.metrics import mean_squared_error, r2_score
19
  from io import BytesIO
20
  import base64
21
+ import re
22
+ import json
23
+ import altair as alt
24
+ import time
25
+ import requests
26
+ from PIL import Image
27
+ from collections import Counter
28
+ import spacy
29
+ import emoji
30
+ import warnings
31
+ warnings.filterwarnings('ignore')
32
+
33
+ # Initialize spaCy for advanced NLP
34
+ try:
35
+ nlp = spacy.load("en_core_web_sm")
36
+ except:
37
+ st.warning("Installing spaCy model. This might take a minute...")
38
+ import subprocess
39
+ subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"], capture_output=True)
40
+ nlp = spacy.load("en_core_web_sm")
41
 
42
  # Ensure necessary NLTK data is available
43
+ nltk.download('punkt', quiet=True)
44
+ nltk.download('stopwords', quiet=True)
45
+ nltk.download('wordnet', quiet=True)
46
+ nltk.download('vader_lexicon', quiet=True)
47
+
48
+ # Page Configuration
49
+ st.set_page_config(
50
+ page_title="Sentiment Pulse | Advanced Sentiment Analyzer",
51
+ page_icon="🔮",
52
+ layout="wide",
53
+ initial_sidebar_state="expanded"
54
+ )
55
+
56
+ # Apply custom CSS for modern look
57
+ st.markdown("""
58
+ <style>
59
+ /* Main theme colors */
60
+ :root {
61
+ --primary: #7B68EE;
62
+ --secondary: #00BFFF;
63
+ --background: #F8F9FA;
64
+ --text: #333333;
65
+ --accent: #FF69B4;
66
+ }
67
+
68
+ /* Base Styles */
69
+ .reportview-container {
70
+ background-color: var(--background);
71
+ color: var(--text);
72
+ }
73
+
74
+ .sidebar .sidebar-content {
75
+ background-image: linear-gradient(to bottom, var(--primary), var(--secondary));
76
+ color: white;
77
+ }
78
+
79
+ /* Card-like containers */
80
+ .card {
81
+ background-color: white;
82
+ border-radius: 10px;
83
+ padding: 20px;
84
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
85
+ margin-bottom: 20px;
86
+ }
87
+
88
+ /* Header styling */
89
+ h1, h2, h3 {
90
+ color: var(--primary);
91
+ font-weight: 700;
92
+ }
93
+
94
+ /* Button styling */
95
+ .stButton>button {
96
+ background-color: var(--primary);
97
+ color: white;
98
+ border-radius: 8px;
99
+ border: none;
100
+ transition: all 0.3s;
101
+ }
102
+ .stButton>button:hover {
103
+ background-color: var(--secondary);
104
+ transform: translateY(-2px);
105
+ box-shadow: 0 4px 8px rgba(0, 0, 0, 0.15);
106
+ }
107
+
108
+ /* Metric styling */
109
+ .metric-value {
110
+ font-size: 32px;
111
+ font-weight: 700;
112
+ color: var(--primary);
113
+ }
114
+
115
+ .metric-label {
116
+ font-size: 14px;
117
+ color: var(--text);
118
+ opacity: 0.7;
119
+ }
120
+
121
+ /* Divider */
122
+ .divider {
123
+ height: 3px;
124
+ background-image: linear-gradient(to right, var(--primary), var(--secondary));
125
+ margin: 20px 0;
126
+ border-radius: 3px;
127
+ }
128
+
129
+ /* Hide hamburger menu and footer */
130
+ #MainMenu {visibility: hidden;}
131
+ footer {visibility: hidden;}
132
+
133
+ /* Custom tab styling */
134
+ .stTabs [data-baseweb="tab-list"] {
135
+ gap: 8px;
136
+ }
137
+
138
+ .stTabs [data-baseweb="tab"] {
139
+ background-color: transparent;
140
+ border-radius: 4px 4px 0px 0px;
141
+ border: none;
142
+ color: var(--text);
143
+ padding: 10px 16px;
144
+ }
145
+
146
+ .stTabs [aria-selected="true"] {
147
+ background-color: white !important;
148
+ color: var(--primary) !important;
149
+ font-weight: bold;
150
+ border-top: 2px solid var(--primary);
151
+ }
152
+
153
+ /* Tooltip */
154
+ .tooltip {
155
+ position: relative;
156
+ display: inline-block;
157
+ border-bottom: 1px dotted black;
158
+ }
159
+
160
+ .tooltip .tooltiptext {
161
+ visibility: hidden;
162
+ width: 200px;
163
+ background-color: #555;
164
+ color: #fff;
165
+ text-align: center;
166
+ border-radius: 6px;
167
+ padding: 5px;
168
+ position: absolute;
169
+ z-index: 1;
170
+ bottom: 125%;
171
+ left: 50%;
172
+ margin-left: -100px;
173
+ opacity: 0;
174
+ transition: opacity 0.3s;
175
+ }
176
+
177
+ .tooltip:hover .tooltiptext {
178
+ visibility: visible;
179
+ opacity: 1;
180
+ }
181
+ </style>
182
+ """, unsafe_allow_html=True)
183
+
184
+ # ===== UTILITY FUNCTIONS =====
185
+ def clean_text(text):
186
+ """Clean and preprocess text for analysis"""
187
+ if not isinstance(text, str):
188
+ return ""
189
+
190
+ # Convert to lowercase
191
+ text = text.lower()
192
+
193
+ # Remove URLs
194
+ text = re.sub(r'https?://\S+|www\.\S+', '', text)
195
+
196
+ # Remove mentions and hashtags for analysis
197
+ text = re.sub(r'@\w+|#\w+', '', text)
198
+
199
+ # Remove punctuation and special characters
200
+ text = re.sub(r'[^\w\s]', '', text)
201
+
202
+ # Remove extra whitespace
203
+ text = re.sub(r'\s+', ' ', text).strip()
204
+
205
+ return text
206
+
207
+ def extract_hashtags(text):
208
+ """Extract hashtags from text"""
209
+ if not isinstance(text, str):
210
+ return []
211
+ return re.findall(r'#(\w+)', text)
212
+
213
+ def extract_mentions(text):
214
+ """Extract mentions from text"""
215
+ if not isinstance(text, str):
216
+ return []
217
+ return re.findall(r'@(\w+)', text)
218
+
219
+ def count_emojis(text):
220
+ """Count emojis in text"""
221
+ if not isinstance(text, str):
222
+ return 0
223
+ return len([c for c in text if c in emoji.EMOJI_DATA])
224
+
225
+ def get_emoji_sentiment(text):
226
+ """Get sentiment of emojis in text"""
227
+ if not isinstance(text, str):
228
+ return 0
229
+
230
+ # Simple dictionary of emoji sentiment (expand as needed)
231
+ emoji_sentiment = {
232
+ '😊': 1, '😃': 1, '😄': 1, '😁': 1, '😍': 1,
233
+ '😢': -1, '😭': -1, '😡': -1, '😠': -1, '😞': -1
234
+ }
235
+
236
+ sentiment = 0
237
+ for char in text:
238
+ if char in emoji_sentiment:
239
+ sentiment += emoji_sentiment[char]
240
+
241
+ return sentiment
242
+
243
+ def generate_wordcloud(text, mask=None, background_color='white'):
244
+ """Generate word cloud from text"""
245
+ if not text or not isinstance(text, str):
246
+ return None
247
+
248
+ stopwords = set(STOPWORDS)
249
+ # Add custom stopwords
250
+ custom_stopwords = {'the', 'and', 'to', 'of', 'a', 'in', 'is', 'that', 'it', 'was'}
251
+ stopwords.update(custom_stopwords)
252
+
253
+ wordcloud = WordCloud(
254
+ width=800,
255
+ height=400,
256
+ background_color=background_color,
257
+ stopwords=stopwords,
258
+ max_words=150,
259
+ colormap='viridis',
260
+ contour_width=3,
261
+ contour_color='steelblue',
262
+ collocations=False
263
+ ).generate(text)
264
+
265
+ return wordcloud
266
+
267
+ def get_entity_analysis(text):
268
+ """Extract named entities from text using spaCy"""
269
+ if not text or not isinstance(text, str):
270
+ return {}
271
+
272
+ doc = nlp(text)
273
+ entities = {}
274
+
275
+ for ent in doc.ents:
276
+ if ent.label_ not in entities:
277
+ entities[ent.label_] = []
278
+ entities[ent.label_].append(ent.text)
279
+
280
+ return entities
281
+
282
+ def extract_keywords(text, top_n=10):
283
+ """Extract keywords from text using spaCy"""
284
+ if not text or not isinstance(text, str):
285
+ return []
286
+
287
+ doc = nlp(text)
288
+ keywords = []
289
+
290
+ for token in doc:
291
+ if (not token.is_stop and
292
+ not token.is_punct and
293
+ token.pos_ in ('NOUN', 'PROPN', 'ADJ') and
294
+ len(token.text) > 1):
295
+ keywords.append(token.text.lower())
296
+
297
+ # Count and get top keywords
298
+ keyword_counts = Counter(keywords)
299
+ return keyword_counts.most_common(top_n)
300
+
301
+ def analyze_tone(text):
302
+ """Analyze the tone of text"""
303
+ if not text or not isinstance(text, str):
304
+ return "Neutral"
305
+
306
+ # Use TextBlob for sentiment
307
+ blob = TextBlob(text)
308
+ polarity = blob.sentiment.polarity
309
+ subjectivity = blob.sentiment.subjectivity
310
+
311
+ # Tone categories
312
+ if polarity > 0.5:
313
+ if subjectivity > 0.7:
314
+ return "Enthusiastic"
315
+ else:
316
+ return "Positive"
317
+ elif polarity > 0.1:
318
+ if subjectivity > 0.7:
319
+ return "Interested"
320
+ else:
321
+ return "Somewhat Positive"
322
+ elif polarity < -0.5:
323
+ if subjectivity > 0.7:
324
+ return "Angry"
325
+ else:
326
+ return "Negative"
327
+ elif polarity < -0.1:
328
+ if subjectivity > 0.7:
329
+ return "Frustrated"
330
+ else:
331
+ return "Somewhat Negative"
332
+ else:
333
+ if subjectivity > 0.7:
334
+ return "Uncertain"
335
+ else:
336
+ return "Neutral"
337
+
338
+ def analyze_readability(text):
339
+ """Analyze text readability metrics"""
340
+ if not text or not isinstance(text, str):
341
+ return {}
342
+
343
+ # Word count
344
+ words = text.split()
345
+ word_count = len(words)
346
+
347
+ if word_count == 0:
348
+ return {
349
+ "word_count": 0,
350
+ "avg_word_length": 0,
351
+ "avg_sentence_length": 0,
352
+ "readability_score": 0,
353
+ "complexity": "N/A"
354
+ }
355
+
356
+ # Sentence count
357
+ sentences = nltk.sent_tokenize(text)
358
+ sentence_count = len(sentences)
359
+
360
+ # Average word length
361
+ avg_word_length = sum(len(word) for word in words) / word_count if word_count > 0 else 0
362
+
363
+ # Average sentence length
364
+ avg_sentence_length = word_count / sentence_count if sentence_count > 0 else 0
365
+
366
+ # Simplified readability score (based on avg word & sentence length)
367
+ readability_score = 206.835 - (1.015 * avg_sentence_length) - (84.6 * avg_word_length / 5)
368
+ readability_score = max(0, min(100, readability_score))
369
+
370
+ # Determine complexity
371
+ if readability_score > 90:
372
+ complexity = "Very Easy"
373
+ elif readability_score > 80:
374
+ complexity = "Easy"
375
+ elif readability_score > 70:
376
+ complexity = "Fairly Easy"
377
+ elif readability_score > 60:
378
+ complexity = "Standard"
379
+ elif readability_score > 50:
380
+ complexity = "Fairly Difficult"
381
+ elif readability_score > 30:
382
+ complexity = "Difficult"
383
+ else:
384
+ complexity = "Very Difficult"
385
+
386
+ return {
387
+ "word_count": word_count,
388
+ "avg_word_length": round(avg_word_length, 2),
389
+ "avg_sentence_length": round(avg_sentence_length, 2),
390
+ "readability_score": round(readability_score, 2),
391
+ "complexity": complexity
392
+ }
393
+
394
+ def get_sentiment_color(score):
395
+ """Get color based on sentiment score"""
396
+ if score > 0.5:
397
+ return "#2E8B57" # Strong positive: Sea Green
398
+ elif score > 0:
399
+ return "#90EE90" # Positive: Light Green
400
+ elif score == 0:
401
+ return "#D3D3D3" # Neutral: Light Gray
402
+ elif score > -0.5:
403
+ return "#FFA07A" # Negative: Light Salmon
404
+ else:
405
+ return "#DC143C" # Strong negative: Crimson
406
+
407
+ def map_sentiment_to_emoji(score):
408
+ """Map sentiment score to emoji"""
409
+ if score > 0.75:
410
+ return "😍"
411
+ elif score > 0.5:
412
+ return "😁"
413
+ elif score > 0.25:
414
+ return "🙂"
415
+ elif score > 0:
416
+ return "😊"
417
+ elif score == 0:
418
+ return "😐"
419
+ elif score > -0.25:
420
+ return "😕"
421
+ elif score > -0.5:
422
+ return "😟"
423
+ elif score > -0.75:
424
+ return "😞"
425
+ else:
426
+ return "😡"
427
+
428
+ def download_as_file(object_to_download, download_filename, button_text, pickle_it=False):
429
+ """
430
+ Generates a link to download the given object_to_download.
431
+
432
+ Args:
433
+ object_to_download: The object to be downloaded.
434
+ download_filename: Filename that the object will be saved as.
435
+ button_text: Text to display on the download button.
436
+ pickle_it: If True, pickle file.
437
+ """
438
+ if pickle_it:
439
+ try:
440
+ object_to_download = pickle.dumps(object_to_download)
441
+ except pickle.PicklingError:
442
+ return None
443
+
444
+ # Convert to bytes
445
+ if isinstance(object_to_download, bytes):
446
+ pass
447
+ elif isinstance(object_to_download, pd.DataFrame):
448
+ object_to_download = object_to_download.to_csv(index=False).encode()
449
+ # Add other data types as needed
450
+ else:
451
+ object_to_download = str(object_to_download).encode()
452
+
453
+ # Generate download button
454
+ b64 = base64.b64encode(object_to_download).decode()
455
+ button_uuid = str(hash(button_text))