zaid002 commited on
Commit
b9fbeb2
Β·
verified Β·
1 Parent(s): ce5f8c0

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +414 -0
app.py ADDED
@@ -0,0 +1,414 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import re
5
+ import nltk
6
+ from nltk.corpus import stopwords
7
+ from nltk.stem import WordNetLemmatizer
8
+ from nltk.tokenize import word_tokenize
9
+ import matplotlib.pyplot as plt
10
+ from wordcloud import WordCloud
11
+ import pickle
12
+ import plotly.express as px
13
+ import os
14
+
15
+ # Download NLTK data
16
+ @st.cache_resource
17
+ def download_nltk_data():
18
+ nltk.download('punkt', quiet=True)
19
+ nltk.download('stopwords', quiet=True)
20
+ nltk.download('wordnet', quiet=True)
21
+
22
+ download_nltk_data()
23
+
24
+ class DataPreprocessor:
25
+ def __init__(self):
26
+ self.lemmatizer = WordNetLemmatizer()
27
+ self.stop_words = set(stopwords.words('english'))
28
+
29
+ def clean_text(self, text):
30
+ if text is None or text != text: # Check for NaN
31
+ return ""
32
+
33
+ # Convert to lowercase
34
+ text = str(text).lower()
35
+
36
+ # Remove special characters and digits
37
+ text = re.sub(r'[^a-zA-Z\s]', '', text)
38
+
39
+ # Remove extra whitespace
40
+ text = re.sub(r'\s+', ' ', text).strip()
41
+
42
+ return text
43
+
44
+ def tokenize_and_lemmatize(self, text):
45
+ tokens = word_tokenize(text)
46
+ tokens = [self.lemmatizer.lemmatize(token) for token in tokens
47
+ if token not in self.stop_words and len(token) > 2]
48
+ return ' '.join(tokens)
49
+
50
+ class SentimentAnalyzerApp:
51
+ def __init__(self):
52
+ self.preprocessor = DataPreprocessor()
53
+ self.model = None
54
+ self.vectorizer = None
55
+ self.df = None
56
+
57
+ def load_sample_data(self):
58
+ """Create sample data for demo purposes"""
59
+ sample_data = {
60
+ 'date': ['2024-01-01', '2024-01-02', '2024-01-03', '2024-01-04', '2024-01-05'],
61
+ 'review': [
62
+ 'This app is absolutely amazing and very helpful!',
63
+ 'The application works okay but could be better.',
64
+ 'I am very disappointed with the performance.',
65
+ 'Excellent features and great user interface.',
66
+ 'Not what I expected, needs improvement.'
67
+ ],
68
+ 'rating': [5, 3, 1, 5, 2],
69
+ 'platform': ['Web', 'Mobile', 'Web', 'Mobile', 'Web'],
70
+ 'language': ['en', 'en', 'en', 'en', 'en'],
71
+ 'location': ['USA', 'UK', 'Canada', 'Australia', 'India'],
72
+ 'verified_purchase': ['Yes', 'No', 'Yes', 'Yes', 'No'],
73
+ 'helpful_votes': [10, 2, 5, 8, 1]
74
+ }
75
+ self.df = pd.DataFrame(sample_data)
76
+ self.df['date'] = pd.to_datetime(self.df['date'])
77
+
78
+ # Create sentiment labels
79
+ def get_sentiment(rating):
80
+ if rating >= 4:
81
+ return 'Positive'
82
+ elif rating == 3:
83
+ return 'Neutral'
84
+ else:
85
+ return 'Negative'
86
+
87
+ self.df['sentiment'] = self.df['rating'].apply(get_sentiment)
88
+ return True
89
+
90
+ def load_model(self):
91
+ """Try to load model, but use simulated predictions if not available"""
92
+ try:
93
+ model_path = 'models/sentiment_model.pkl'
94
+ if os.path.exists(model_path):
95
+ with open(model_path, 'rb') as f:
96
+ model_data = pickle.load(f)
97
+ self.model = model_data['model']
98
+ self.vectorizer = model_data['vectorizer']
99
+ return True
100
+ else:
101
+ st.info("πŸ€– Using simulated sentiment analysis for demo. Upload a trained model for accurate predictions.")
102
+ return False
103
+ except Exception as e:
104
+ st.warning(f"Model loading failed: {e}. Using simulated mode.")
105
+ return False
106
+
107
+ def predict_sentiment(self, text):
108
+ """Predict sentiment for new text"""
109
+ if self.model is not None and self.vectorizer is not None:
110
+ # Use actual model
111
+ cleaned_text = self.preprocessor.clean_text(text)
112
+ processed_text = self.preprocessor.tokenize_and_lemmatize(cleaned_text)
113
+ text_vector = self.vectorizer.transform([processed_text])
114
+ prediction = self.model.predict(text_vector)[0]
115
+ probability = self.model.predict_proba(text_vector)[0]
116
+ return prediction, dict(zip(self.model.classes_, probability))
117
+ else:
118
+ # Simulate prediction
119
+ positive_words = ['good', 'great', 'excellent', 'amazing', 'love', 'awesome', 'perfect', 'fantastic', 'wonderful', 'outstanding']
120
+ negative_words = ['bad', 'terrible', 'awful', 'hate', 'worst', 'disappointed', 'poor', 'horrible', 'waste', 'useless']
121
+
122
+ text_lower = text.lower()
123
+ positive_count = sum(1 for word in positive_words if word in text_lower)
124
+ negative_count = sum(1 for word in negative_words if word in text_lower)
125
+
126
+ if positive_count > negative_count:
127
+ prediction = "Positive"
128
+ confidence = min(0.8 + (positive_count * 0.05), 0.95)
129
+ elif negative_count > positive_count:
130
+ prediction = "Negative"
131
+ confidence = min(0.8 + (negative_count * 0.05), 0.95)
132
+ else:
133
+ prediction = "Neutral"
134
+ confidence = 0.6
135
+
136
+ # Simulate probabilities
137
+ if prediction == "Positive":
138
+ probabilities = {'Positive': confidence, 'Neutral': (1-confidence)/2, 'Negative': (1-confidence)/2}
139
+ elif prediction == "Negative":
140
+ probabilities = {'Positive': (1-confidence)/2, 'Neutral': (1-confidence)/2, 'Negative': confidence}
141
+ else:
142
+ probabilities = {'Positive': 0.2, 'Neutral': confidence, 'Negative': 0.2}
143
+
144
+ return prediction, probabilities
145
+
146
+ def run(self):
147
+ """Main application"""
148
+ st.set_page_config(
149
+ page_title="AI Echo - Sentiment Analysis",
150
+ page_icon="πŸ€–",
151
+ layout="wide",
152
+ initial_sidebar_state="expanded"
153
+ )
154
+
155
+ # Custom CSS
156
+ st.markdown("""
157
+ <style>
158
+ .main-header {
159
+ font-size: 2.5rem;
160
+ color: #1f77b4;
161
+ text-align: center;
162
+ margin-bottom: 2rem;
163
+ }
164
+ .metric-card {
165
+ background-color: #f0f2f6;
166
+ padding: 1rem;
167
+ border-radius: 10px;
168
+ border-left: 4px solid #1f77b4;
169
+ }
170
+ </style>
171
+ """, unsafe_allow_html=True)
172
+
173
+ st.markdown('<h1 class="main-header">πŸ€– AI Echo: Sentiment Analysis</h1>', unsafe_allow_html=True)
174
+ st.markdown("### Customer Review Sentiment Analysis Dashboard")
175
+
176
+ # Initialize and load data
177
+ if 'data_loaded' not in st.session_state:
178
+ self.load_sample_data()
179
+ st.session_state.data_loaded = True
180
+
181
+ if 'model_loaded' not in st.session_state:
182
+ st.session_state.model_loaded = self.load_model()
183
+
184
+ # Sidebar
185
+ st.sidebar.title("Navigation")
186
+ page = st.sidebar.selectbox(
187
+ "Choose a page:",
188
+ ["πŸ“Š Overview", "πŸ€– Model Demo", "πŸ“ˆ Analysis", "πŸ’‘ Insights"]
189
+ )
190
+
191
+ # Page routing
192
+ if page == "πŸ“Š Overview":
193
+ self.show_overview()
194
+ elif page == "πŸ€– Model Demo":
195
+ self.show_model_demo()
196
+ elif page == "πŸ“ˆ Analysis":
197
+ self.show_analysis()
198
+ else:
199
+ self.show_insights()
200
+
201
+ def show_overview(self):
202
+ """Overview page"""
203
+ st.header("πŸ“Š Project Overview")
204
+
205
+ # Key metrics
206
+ col1, col2, col3, col4 = st.columns(4)
207
+
208
+ with col1:
209
+ total_reviews = len(self.df)
210
+ st.metric("Total Reviews", total_reviews)
211
+
212
+ with col2:
213
+ avg_rating = self.df['rating'].mean()
214
+ st.metric("Average Rating", f"{avg_rating:.2f} ⭐")
215
+
216
+ with col3:
217
+ positive_pct = (self.df['sentiment'] == 'Positive').mean() * 100
218
+ st.metric("Positive Reviews", f"{positive_pct:.1f}%")
219
+
220
+ with col4:
221
+ helpful_reviews = self.df['helpful_votes'].sum()
222
+ st.metric("Total Helpful Votes", helpful_reviews)
223
+
224
+ st.markdown("---")
225
+
226
+ # Visualizations
227
+ col1, col2 = st.columns(2)
228
+
229
+ with col1:
230
+ st.subheader("Review Rating Distribution")
231
+ rating_counts = self.df['rating'].value_counts().sort_index()
232
+ fig = px.bar(rating_counts, x=rating_counts.index, y=rating_counts.values,
233
+ labels={'x': 'Rating', 'y': 'Count'},
234
+ title='Distribution of Ratings')
235
+ st.plotly_chart(fig, use_container_width=True)
236
+
237
+ with col2:
238
+ st.subheader("Sentiment Distribution")
239
+ sentiment_counts = self.df['sentiment'].value_counts()
240
+ fig = px.pie(values=sentiment_counts.values, names=sentiment_counts.index,
241
+ title='Sentiment Distribution')
242
+ st.plotly_chart(fig, use_container_width=True)
243
+
244
+ st.info("πŸ’‘ This is a demo with sample data. Upload your dataset to the 'data' folder for real analysis.")
245
+
246
+ def show_model_demo(self):
247
+ """Interactive model demo"""
248
+ st.header("πŸ€– Sentiment Analysis Demo")
249
+
250
+ st.markdown("""
251
+ Enter your own review text below to analyze its sentiment.
252
+ The model will predict whether the sentiment is **Positive**, **Neutral**, or **Negative**.
253
+ """)
254
+
255
+ # Text input
256
+ user_text = st.text_area(
257
+ "Enter your review text:",
258
+ height=150,
259
+ placeholder="Type your review here... Example: 'This app is amazing and very helpful!'",
260
+ value="I love this application! It's incredibly useful and well-designed."
261
+ )
262
+
263
+ if user_text:
264
+ with st.spinner("Analyzing sentiment..."):
265
+ prediction, probabilities = self.predict_sentiment(user_text)
266
+
267
+ # Display results
268
+ st.subheader("🎯 Prediction Results")
269
+
270
+ col1, col2 = st.columns([1, 2])
271
+
272
+ with col1:
273
+ sentiment_colors = {
274
+ 'Positive': '🟒',
275
+ 'Neutral': '🟑',
276
+ 'Negative': 'πŸ”΄'
277
+ }
278
+
279
+ st.metric(
280
+ "Predicted Sentiment",
281
+ f"{sentiment_colors.get(prediction, 'βšͺ')} {prediction}"
282
+ )
283
+
284
+ with col2:
285
+ st.subheader("Confidence Scores")
286
+
287
+ for sentiment, prob in probabilities.items():
288
+ st.write(f"**{sentiment}**: {prob:.1%}")
289
+ st.progress(prob)
290
+
291
+ if self.model is None:
292
+ st.info("πŸ”¬ Currently using simulated analysis. Upload a trained model file for more accurate predictions.")
293
+
294
+ # Example reviews
295
+ st.markdown("---")
296
+ st.subheader("πŸ’‘ Try these examples:")
297
+
298
+ examples = [
299
+ "This app is absolutely fantastic! It helps me so much with my work.",
300
+ "The application is okay, but it could use some improvements.",
301
+ "I'm very disappointed with the performance and customer service.",
302
+ "Outstanding features and excellent user experience!",
303
+ "It's mediocre, nothing special about it."
304
+ ]
305
+
306
+ cols = st.columns(3)
307
+ for i, example in enumerate(examples):
308
+ with cols[i % 3]:
309
+ if st.button(f"'{example[:30]}...'", use_container_width=True):
310
+ st.session_state.demo_text = example
311
+ st.rerun()
312
+
313
+ def show_analysis(self):
314
+ """Analysis page"""
315
+ st.header("πŸ“ˆ Data Analysis")
316
+
317
+ # Platform analysis
318
+ st.subheader("Platform Comparison")
319
+ platform_counts = self.df['platform'].value_counts()
320
+ fig = px.bar(platform_counts, x=platform_counts.index, y=platform_counts.values,
321
+ labels={'x': 'Platform', 'y': 'Number of Reviews'},
322
+ title='Reviews by Platform')
323
+ st.plotly_chart(fig, use_container_width=True)
324
+
325
+ # Sentiment by platform
326
+ platform_sentiment = pd.crosstab(self.df['platform'], self.df['sentiment'], normalize='index') * 100
327
+ fig = px.bar(platform_sentiment, barmode='stack',
328
+ title='Sentiment Distribution by Platform (%)')
329
+ st.plotly_chart(fig, use_container_width=True)
330
+
331
+ # Word clouds
332
+ st.subheader("πŸ“ Word Clouds")
333
+
334
+ positive_text = ' '.join(self.df[self.df['sentiment'] == 'Positive']['review'])
335
+ negative_text = ' '.join(self.df[self.df['sentiment'] == 'Negative']['review'])
336
+
337
+ col1, col2 = st.columns(2)
338
+
339
+ with col1:
340
+ st.markdown("**Positive Reviews**")
341
+ if positive_text.strip():
342
+ wordcloud = WordCloud(width=400, height=300, background_color='white').generate(positive_text)
343
+ fig, ax = plt.subplots(figsize=(10, 6))
344
+ ax.imshow(wordcloud, interpolation='bilinear')
345
+ ax.axis('off')
346
+ st.pyplot(fig)
347
+ else:
348
+ st.info("No positive reviews available")
349
+
350
+ with col2:
351
+ st.markdown("**Negative Reviews**")
352
+ if negative_text.strip():
353
+ wordcloud = WordCloud(width=400, height=300, background_color='white').generate(negative_text)
354
+ fig, ax = plt.subplots(figsize=(10, 6))
355
+ ax.imshow(wordcloud, interpolation='bilinear')
356
+ ax.axis('off')
357
+ st.pyplot(fig)
358
+ else:
359
+ st.info("No negative reviews available")
360
+
361
+ def show_insights(self):
362
+ """Insights page"""
363
+ st.header("πŸ’‘ Business Insights & Recommendations")
364
+
365
+ # Key metrics
366
+ positive_pct = (self.df['sentiment'] == 'Positive').mean() * 100
367
+ avg_rating = self.df['rating'].mean()
368
+
369
+ col1, col2, col3 = st.columns(3)
370
+
371
+ with col1:
372
+ st.metric("Overall Satisfaction", f"{positive_pct:.1f}%")
373
+
374
+ with col2:
375
+ st.metric("Average Rating", f"{avg_rating:.2f} ⭐")
376
+
377
+ with col3:
378
+ verified_ratio = (self.df['verified_purchase'] == 'Yes').mean() * 100
379
+ st.metric("Verified Reviews", f"{verified_ratio:.1f}%")
380
+
381
+ st.markdown("---")
382
+
383
+ # Recommendations
384
+ st.subheader("🎯 Actionable Recommendations")
385
+
386
+ recommendations = [
387
+ "**Monitor Negative Reviews**: Regularly analyze 1-2 star reviews for common issues and pain points",
388
+ "**Platform Optimization**: Ensure consistent user experience across all platforms (Web, Mobile, etc.)",
389
+ "**Feature Development**: Prioritize features frequently mentioned in positive reviews",
390
+ "**Customer Support**: Implement sentiment-based routing for support tickets",
391
+ "**Regional Strategy**: Analyze location-based sentiment for market-specific improvements",
392
+ "**Version Tracking**: Monitor sentiment changes across different application versions"
393
+ ]
394
+
395
+ for i, recommendation in enumerate(recommendations, 1):
396
+ st.markdown(f"{i}. {recommendation}")
397
+
398
+ st.markdown("---")
399
+
400
+ # Technical setup
401
+ st.subheader("πŸ”§ Technical Setup")
402
+ st.info("""
403
+ **To use with your own data:**
404
+ 1. Upload your CSV file to the `data/` folder
405
+ 2. Train and save your model as `models/sentiment_model.pkl`
406
+ 3. The app will automatically detect and use your files
407
+
408
+ **Current mode:** Using sample data with simulated sentiment analysis
409
+ """)
410
+
411
+ # Run the app
412
+ if __name__ == "__main__":
413
+ app = SentimentAnalyzerApp()
414
+ app.run()