FarhinSadia commited on
Commit
bd85768
·
1 Parent(s): 7b8f7c1

Add requirements.txt for deployment

Browse files
Files changed (2) hide show
  1. src/data_processor.py +145 -80
  2. src/insights_generator.py +270 -0
src/data_processor.py CHANGED
@@ -10,7 +10,6 @@ try:
10
  OPENAI_AVAILABLE = True
11
  except ImportError:
12
  OPENAI_AVAILABLE = False
13
- print("OpenAI not installed. GPT features will be disabled.")
14
 
15
  try:
16
  import nltk
@@ -19,7 +18,6 @@ try:
19
  NLTK_AVAILABLE = True
20
  except ImportError:
21
  NLTK_AVAILABLE = False
22
- print("NLTK not installed. Using TextBlob only.")
23
 
24
  class DataProcessor:
25
  def __init__(self, openai_api_key=None):
@@ -40,43 +38,87 @@ class DataProcessor:
40
  openai.api_key = openai_api_key
41
  self.use_gpt = True
42
 
43
- # Banking-specific patterns
44
- self.banking_keywords = {
45
- 'service_quality': ['customer service', 'staff', 'support', 'help', 'assistance'],
46
- 'transaction': ['transfer', 'deposit', 'withdraw', 'payment', 'transaction'],
47
- 'account': ['account', 'savings', 'checking', 'balance'],
48
- 'loan': ['loan', 'mortgage', 'credit', 'interest rate'],
49
- 'digital': ['app', 'online banking', 'mobile', 'website', 'digital'],
50
- 'branch': ['branch', 'atm', 'location', 'queue', 'waiting']
51
  }
52
 
53
- def process_csv_files(self, uploaded_files):
54
- """Process multiple CSV files"""
55
- all_dataframes = []
56
 
57
- for uploaded_file in uploaded_files:
58
- try:
59
- df = pd.read_csv(uploaded_file)
60
- df['source_file'] = uploaded_file.name
61
- all_dataframes.append(df)
62
- except Exception as e:
63
- print(f"Error reading {uploaded_file.name}: {e}")
64
-
65
- if all_dataframes:
66
- combined_df = pd.concat(all_dataframes, ignore_index=True)
67
- return combined_df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  return pd.DataFrame()
69
 
70
- def process_txt_file(self, txt_file):
71
- """Process text file with reviews"""
72
- content = txt_file.read().decode('utf-8')
73
- reviews = content.split('\n')
74
-
75
- df = pd.DataFrame({
76
- 'text': [review.strip() for review in reviews if review.strip()],
77
- 'source_file': txt_file.name
78
- })
79
- return df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
 
81
  def analyze_sentiment(self, text):
82
  """Analyze sentiment - use VADER if available, else TextBlob"""
@@ -112,67 +154,71 @@ class DataProcessor:
112
  return 'Neutral', 0
113
 
114
  def detect_emotion(self, text):
115
- """Detect emotion in text"""
116
  if pd.isna(text):
117
- return 'Neutral'
118
 
119
  text_lower = str(text).lower()
120
 
121
- # Emotion keywords
122
  emotions = {
123
- 'Joy': ['happy', 'excellent', 'amazing', 'great', 'wonderful', 'fantastic', 'love', 'best', 'thank you'],
124
- 'Frustration': ['frustrated', 'angry', 'terrible', 'horrible', 'worst', 'hate', 'annoyed', 'disappointed'],
125
- 'Confusion': ['confused', 'unclear', "don't understand", 'what', 'how', 'why', '?', 'help me']
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  }
127
 
128
  emotion_scores = {}
129
- for emotion, keywords in emotions.items():
130
- score = sum(keyword in text_lower for keyword in keywords)
 
 
 
131
  emotion_scores[emotion] = score
 
 
132
 
133
  if max(emotion_scores.values()) > 0:
134
- return max(emotion_scores, key=emotion_scores.get)
135
- return 'Neutral'
 
 
136
 
137
  def categorize_post(self, text):
138
- """Categorize post type"""
139
  if pd.isna(text):
140
- return 'Other'
141
 
142
  text_lower = str(text).lower()
143
 
144
- if '?' in text_lower or any(word in text_lower for word in ['how', 'what', 'when', 'where']):
145
- return 'Inquiry'
146
- elif any(word in text_lower for word in ['complaint', 'problem', 'issue', 'bad', 'terrible']):
147
- return 'Complaint'
148
- elif any(word in text_lower for word in ['thank', 'great', 'excellent', 'love', 'best']):
149
- return 'Praise'
 
 
 
150
  else:
151
- return 'Other'
152
-
153
- def count_prime_mentions(self, text):
154
- """Count Prime Bank mentions"""
155
- if pd.isna(text):
156
- return 0
157
-
158
- text_lower = str(text).lower()
159
- patterns = [
160
- r'prime\s*bank',
161
- r'primebank',
162
- r'@primebank'
163
- ]
164
-
165
- total_mentions = 0
166
- for pattern in patterns:
167
- mentions = len(re.findall(pattern, text_lower))
168
- total_mentions += mentions
169
-
170
- return total_mentions
171
 
172
  def process_all_data(self, df):
173
  """Apply all processing to dataframe"""
174
  # Find text column
175
- text_columns = ['text', 'content', 'message', 'review', 'comment', 'post']
176
  text_col = None
177
 
178
  for col in text_columns:
@@ -186,20 +232,39 @@ class DataProcessor:
186
  if 'text' not in df.columns:
187
  return df
188
 
189
- # Apply all analyses
 
 
 
 
 
 
 
 
190
  df[['sentiment', 'polarity']] = df['text'].apply(
191
  lambda x: pd.Series(self.analyze_sentiment(x))
192
  )
193
 
194
- df['emotion'] = df['text'].apply(self.detect_emotion)
195
- df['category'] = df['text'].apply(self.categorize_post)
196
- df['prime_mentions'] = df['text'].apply(self.count_prime_mentions)
 
197
 
198
- # Calculate viral score
199
- df['viral_score'] = df['prime_mentions'] * 10
 
 
 
 
 
200
  if 'likes' in df.columns:
201
  df['viral_score'] += df['likes'].fillna(0)
202
  if 'shares' in df.columns:
203
  df['viral_score'] += df['shares'].fillna(0) * 2
204
-
 
 
 
 
 
205
  return df
 
10
  OPENAI_AVAILABLE = True
11
  except ImportError:
12
  OPENAI_AVAILABLE = False
 
13
 
14
  try:
15
  import nltk
 
18
  NLTK_AVAILABLE = True
19
  except ImportError:
20
  NLTK_AVAILABLE = False
 
21
 
22
  class DataProcessor:
23
  def __init__(self, openai_api_key=None):
 
38
  openai.api_key = openai_api_key
39
  self.use_gpt = True
40
 
41
+ # Banking patterns - INCLUDING OTHER BANKS
42
+ self.bank_patterns = {
43
+ 'prime_bank': [r'prime\s*bank', r'primebank', r'@primebank', r'prime\s*b\.?'],
44
+ 'eastern_bank': [r'eastern\s*bank', r'ebl', r'@easternbank'],
45
+ 'brac_bank': [r'brac\s*bank', r'@bracbank'],
46
+ 'city_bank': [r'city\s*bank', r'@citybank'],
47
+ 'dutch_bangla': [r'dutch\s*bangla', r'dbbl', r'@dutchbangla']
 
48
  }
49
 
50
+ def load_data_from_files(self, csv_files=None, txt_files=None):
51
+ """Load data from CSV and TXT files"""
52
+ all_data = []
53
 
54
+ # Load CSV files
55
+ if csv_files:
56
+ for file_path in csv_files:
57
+ try:
58
+ df = pd.read_csv(file_path)
59
+ df['source_file'] = file_path.split('/')[-1]
60
+ all_data.append(df)
61
+ except Exception as e:
62
+ print(f"Error loading {file_path}: {e}")
63
+
64
+ # Load TXT files
65
+ if txt_files:
66
+ for file_path in txt_files:
67
+ try:
68
+ with open(file_path, 'r', encoding='utf-8') as f:
69
+ content = f.read()
70
+
71
+ # Split by double newlines to separate posts
72
+ posts = content.split('\n\n')
73
+
74
+ # Create dataframe
75
+ df = pd.DataFrame({
76
+ 'text': [post.strip() for post in posts if post.strip()],
77
+ 'source_file': file_path.split('/')[-1]
78
+ })
79
+ all_data.append(df)
80
+ except Exception as e:
81
+ print(f"Error loading {file_path}: {e}")
82
+
83
+ if all_data:
84
+ return pd.concat(all_data, ignore_index=True)
85
  return pd.DataFrame()
86
 
87
+ def identify_bank(self, text):
88
+ """Identify which bank is mentioned in the text"""
89
+ if pd.isna(text):
90
+ return 'none', []
91
+
92
+ text_lower = str(text).lower()
93
+ mentioned_banks = []
94
+
95
+ for bank, patterns in self.bank_patterns.items():
96
+ for pattern in patterns:
97
+ if re.search(pattern, text_lower):
98
+ mentioned_banks.append(bank)
99
+ break
100
+
101
+ if not mentioned_banks:
102
+ return 'none', []
103
+ elif len(mentioned_banks) == 1:
104
+ return mentioned_banks[0], mentioned_banks
105
+ else:
106
+ return 'multiple', mentioned_banks
107
+
108
+ def count_bank_mentions(self, text, bank='prime_bank'):
109
+ """Count mentions of specific bank"""
110
+ if pd.isna(text):
111
+ return 0
112
+
113
+ text_lower = str(text).lower()
114
+ total_mentions = 0
115
+
116
+ if bank in self.bank_patterns:
117
+ for pattern in self.bank_patterns[bank]:
118
+ mentions = len(re.findall(pattern, text_lower))
119
+ total_mentions += mentions
120
+
121
+ return total_mentions
122
 
123
  def analyze_sentiment(self, text):
124
  """Analyze sentiment - use VADER if available, else TextBlob"""
 
154
  return 'Neutral', 0
155
 
156
  def detect_emotion(self, text):
157
+ """Detect emotion in text with context"""
158
  if pd.isna(text):
159
+ return 'Neutral', []
160
 
161
  text_lower = str(text).lower()
162
 
163
+ # Emotion keywords with context
164
  emotions = {
165
+ 'Joy': {
166
+ 'keywords': ['happy', 'excellent', 'amazing', 'great', 'wonderful', 'fantastic', 'love', 'best', 'thank you', 'appreciate'],
167
+ 'context': 'expressing satisfaction and happiness'
168
+ },
169
+ 'Frustration': {
170
+ 'keywords': ['frustrated', 'angry', 'terrible', 'horrible', 'worst', 'hate', 'annoyed', 'disappointed', 'pathetic'],
171
+ 'context': 'expressing anger and dissatisfaction'
172
+ },
173
+ 'Confusion': {
174
+ 'keywords': ['confused', 'unclear', "don't understand", 'what', 'how', 'why', '?', 'help me', 'lost'],
175
+ 'context': 'seeking clarification or expressing confusion'
176
+ },
177
+ 'Anxiety': {
178
+ 'keywords': ['worried', 'concern', 'anxious', 'nervous', 'scared', 'fear', 'panic', 'urgent'],
179
+ 'context': 'expressing worry or urgency'
180
+ }
181
  }
182
 
183
  emotion_scores = {}
184
+ detected_keywords = {}
185
+
186
+ for emotion, data in emotions.items():
187
+ keywords_found = [kw for kw in data['keywords'] if kw in text_lower]
188
+ score = len(keywords_found)
189
  emotion_scores[emotion] = score
190
+ if keywords_found:
191
+ detected_keywords[emotion] = keywords_found
192
 
193
  if max(emotion_scores.values()) > 0:
194
+ primary_emotion = max(emotion_scores, key=emotion_scores.get)
195
+ return primary_emotion, detected_keywords.get(primary_emotion, [])
196
+
197
+ return 'Neutral', []
198
 
199
  def categorize_post(self, text):
200
+ """Categorize post type with reason"""
201
  if pd.isna(text):
202
+ return 'Other', 'No text content'
203
 
204
  text_lower = str(text).lower()
205
 
206
+ # Categories with detection logic
207
+ if '?' in text_lower or any(phrase in text_lower for phrase in ['how do', 'what is', 'when', 'where', 'can i', 'could you']):
208
+ return 'Inquiry', 'Contains questions or information seeking'
209
+ elif any(word in text_lower for word in ['complaint', 'problem', 'issue', 'error', 'failed', 'not working', 'terrible', 'worst']):
210
+ return 'Complaint', 'Contains complaint or problem description'
211
+ elif any(word in text_lower for word in ['thank', 'great', 'excellent', 'love', 'best', 'appreciate', 'amazing']):
212
+ return 'Praise', 'Contains positive feedback or appreciation'
213
+ elif any(word in text_lower for word in ['suggest', 'should', 'could', 'recommend', 'request', 'please add']):
214
+ return 'Suggestion', 'Contains suggestions or feature requests'
215
  else:
216
+ return 'Other', 'General discussion or observation'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
 
218
  def process_all_data(self, df):
219
  """Apply all processing to dataframe"""
220
  # Find text column
221
+ text_columns = ['text', 'content', 'message', 'review', 'comment', 'post', 'Text', 'Content']
222
  text_col = None
223
 
224
  for col in text_columns:
 
232
  if 'text' not in df.columns:
233
  return df
234
 
235
+ # Identify which bank each post is about
236
+ df[['primary_bank', 'all_banks_mentioned']] = df['text'].apply(
237
+ lambda x: pd.Series(self.identify_bank(x))
238
+ )
239
+
240
+ # Count mentions for each bank
241
+ df['prime_mentions'] = df['text'].apply(lambda x: self.count_bank_mentions(x, 'prime_bank'))
242
+
243
+ # Apply sentiment analysis
244
  df[['sentiment', 'polarity']] = df['text'].apply(
245
  lambda x: pd.Series(self.analyze_sentiment(x))
246
  )
247
 
248
+ # Apply emotion detection with keywords
249
+ df[['emotion', 'emotion_keywords']] = df['text'].apply(
250
+ lambda x: pd.Series(self.detect_emotion(x))
251
+ )
252
 
253
+ # Categorize posts with reasons
254
+ df[['category', 'category_reason']] = df['text'].apply(
255
+ lambda x: pd.Series(self.categorize_post(x))
256
+ )
257
+
258
+ # Calculate viral score (only for posts with engagement metrics)
259
+ df['viral_score'] = 0
260
  if 'likes' in df.columns:
261
  df['viral_score'] += df['likes'].fillna(0)
262
  if 'shares' in df.columns:
263
  df['viral_score'] += df['shares'].fillna(0) * 2
264
+ if 'comments' in df.columns:
265
+ df['viral_score'] += df['comments'].fillna(0) * 1.5
266
+
267
+ # Add Prime Bank specific viral score boost
268
+ df.loc[df['prime_mentions'] > 0, 'viral_score'] *= 1.2
269
+
270
  return df
src/insights_generator.py ADDED
@@ -0,0 +1,270 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from collections import Counter
3
+
4
+ class InsightsGenerator:
5
+ def __init__(self):
6
+ self.insights = {}
7
+
8
+ def generate_all_insights(self, df, prime_df):
9
+ """Generate comprehensive insights for all analyses"""
10
+
11
+ # Overall statistics
12
+ total_posts = len(df)
13
+ prime_posts = len(prime_df)
14
+ prime_percentage = (prime_posts / total_posts * 100) if total_posts > 0 else 0
15
+
16
+ self.insights['overview'] = {
17
+ 'summary': f"Analyzed {total_posts:,} total posts, of which {prime_posts:,} ({prime_percentage:.1f}%) specifically mention Prime Bank.",
18
+ 'context': f"The remaining {total_posts - prime_posts:,} posts mention other banks or general banking topics."
19
+ }
20
+
21
+ # Sentiment insights
22
+ self.insights['sentiment'] = self._generate_sentiment_insights(prime_df)
23
+
24
+ # Emotion insights
25
+ self.insights['emotion'] = self._generate_emotion_insights(prime_df)
26
+
27
+ # Category insights
28
+ self.insights['category'] = self._generate_category_insights(prime_df)
29
+
30
+ # Trending topics
31
+ self.insights['topics'] = self._generate_topic_insights(prime_df)
32
+
33
+ # Comparative analysis
34
+ self.insights['comparison'] = self._generate_comparison_insights(df)
35
+
36
+ # Priority actions
37
+ self.insights['actions'] = self._generate_action_insights(prime_df)
38
+
39
+ return self.insights
40
+
41
+ def _generate_sentiment_insights(self, df):
42
+ """Generate sentiment-specific insights"""
43
+ if len(df) == 0:
44
+ return {'summary': 'No Prime Bank posts found for sentiment analysis.'}
45
+
46
+ sentiment_dist = df['sentiment'].value_counts(normalize=True) * 100
47
+
48
+ # Get sample posts for each sentiment
49
+ sentiment_examples = {}
50
+ for sentiment in ['Positive', 'Negative', 'Neutral']:
51
+ examples = df[df['sentiment'] == sentiment]['text'].head(2).tolist()
52
+ sentiment_examples[sentiment] = examples
53
+
54
+ # Analyze negative posts for common issues
55
+ negative_posts = df[df['sentiment'] == 'Negative']['text']
56
+ negative_themes = []
57
+ if len(negative_posts) > 0:
58
+ all_negative_text = ' '.join(negative_posts.astype(str).tolist()).lower()
59
+ if 'wait' in all_negative_text or 'queue' in all_negative_text:
60
+ negative_themes.append('long wait times')
61
+ if 'fee' in all_negative_text or 'charge' in all_negative_text:
62
+ negative_themes.append('fees and charges')
63
+ if 'app' in all_negative_text or 'online' in all_negative_text:
64
+ negative_themes.append('digital banking issues')
65
+ if 'staff' in all_negative_text or 'service' in all_negative_text:
66
+ negative_themes.append('customer service')
67
+
68
+ insights = {
69
+ 'summary': f"Sentiment breakdown: {sentiment_dist.get('Positive', 0):.1f}% positive, {sentiment_dist.get('Negative', 0):.1f}% negative, {sentiment_dist.get('Neutral', 0):.1f}% neutral.",
70
+ 'positive_context': f"Positive posts ({sentiment_dist.get('Positive', 0):.1f}%) primarily praise customer service, digital banking features, and efficient processes.",
71
+ 'negative_context': f"Negative posts ({sentiment_dist.get('Negative', 0):.1f}%) mainly complain about: {', '.join(negative_themes) if negative_themes else 'various service issues'}.",
72
+ 'neutral_context': f"Neutral posts ({sentiment_dist.get('Neutral', 0):.1f}%) are mostly inquiries about services and general discussions.",
73
+ 'examples': sentiment_examples,
74
+ 'concern_areas': negative_themes
75
+ }
76
+
77
+ return insights
78
+
79
+ def _generate_emotion_insights(self, df):
80
+ """Generate emotion-specific insights"""
81
+ if len(df) == 0:
82
+ return {'summary': 'No Prime Bank posts found for emotion analysis.'}
83
+
84
+ emotion_dist = df['emotion'].value_counts()
85
+ total_emotional = len(df[df['emotion'] != 'Neutral'])
86
+
87
+ emotion_contexts = {
88
+ 'Joy': 'Customers expressing joy are satisfied with services, particularly praising staff helpfulness and quick problem resolution.',
89
+ 'Frustration': 'Frustrated customers mainly face issues with wait times, technical problems, and unresolved complaints.',
90
+ 'Confusion': 'Confused customers need better information about products, fees, and online banking procedures.',
91
+ 'Anxiety': 'Anxious customers are worried about account security, loan applications, and urgent transaction issues.'
92
+ }
93
+
94
+ # Get most common emotion keywords
95
+ emotion_keywords = {}
96
+ for emotion in ['Joy', 'Frustration', 'Confusion', 'Anxiety']:
97
+ emotion_posts = df[df['emotion'] == emotion]
98
+ if len(emotion_posts) > 0:
99
+ # Flatten all keywords for this emotion
100
+ all_keywords = []
101
+ for keywords in emotion_posts['emotion_keywords']:
102
+ if isinstance(keywords, list):
103
+ all_keywords.extend(keywords)
104
+ if all_keywords:
105
+ emotion_keywords[emotion] = Counter(all_keywords).most_common(3)
106
+
107
+ insights = {
108
+ 'summary': f"{total_emotional} out of {len(df)} Prime Bank posts ({total_emotional/len(df)*100:.1f}%) express clear emotions.",
109
+ 'distribution': {emotion: count for emotion, count in emotion_dist.items()},
110
+ 'contexts': emotion_contexts,
111
+ 'top_emotion': emotion_dist.index[0] if len(emotion_dist) > 0 else 'None',
112
+ 'keywords': emotion_keywords,
113
+ 'recommendation': self._get_emotion_recommendation(emotion_dist)
114
+ }
115
+
116
+ return insights
117
+
118
+ def _generate_category_insights(self, df):
119
+ """Generate category-specific insights"""
120
+ if len(df) == 0:
121
+ return {'summary': 'No Prime Bank posts found for category analysis.'}
122
+
123
+ category_dist = df['category'].value_counts()
124
+
125
+ category_insights = {
126
+ 'Inquiry': {
127
+ 'common_topics': ['account opening', 'loan applications', 'online banking setup', 'branch locations'],
128
+ 'action': 'Improve FAQ section and provide clearer information channels'
129
+ },
130
+ 'Complaint': {
131
+ 'common_topics': ['service delays', 'technical issues', 'hidden fees', 'staff behavior'],
132
+ 'action': 'Establish rapid response team for complaint resolution'
133
+ },
134
+ 'Praise': {
135
+ 'common_topics': ['helpful staff', 'quick service', 'user-friendly app', 'problem resolution'],
136
+ 'action': 'Recognize and reward mentioned staff members'
137
+ },
138
+ 'Suggestion': {
139
+ 'common_topics': ['new features', 'branch expansion', 'service improvements', 'digital enhancements'],
140
+ 'action': 'Review suggestions for product development roadmap'
141
+ }
142
+ }
143
+
144
+ insights = {
145
+ 'summary': f"Post categories: {', '.join([f'{cat} ({count})' for cat, count in category_dist.items()])}",
146
+ 'details': category_insights,
147
+ 'urgent_attention': f"{category_dist.get('Complaint', 0)} complaints require immediate attention",
148
+ 'opportunities': f"{category_dist.get('Suggestion', 0)} suggestions for improvement"
149
+ }
150
+
151
+ return insights
152
+
153
+ def _generate_topic_insights(self, df):
154
+ """Identify trending topics"""
155
+ if len(df) == 0:
156
+ return {'summary': 'No Prime Bank posts found for topic analysis.'}
157
+
158
+ # Combine all text
159
+ all_text = ' '.join(df['text'].astype(str).tolist()).lower()
160
+
161
+ # Define topic keywords
162
+ topics = {
163
+ 'Digital Banking': ['app', 'online', 'mobile', 'website', 'internet banking'],
164
+ 'Customer Service': ['staff', 'service', 'help', 'support', 'employee'],
165
+ 'Fees & Charges': ['fee', 'charge', 'cost', 'expensive', 'price'],
166
+ 'Loans': ['loan', 'credit', 'mortgage', 'interest', 'emi'],
167
+ 'ATM & Branch': ['atm', 'branch', 'location', 'machine', 'cash'],
168
+ 'Account Services': ['account', 'savings', 'current', 'balance', 'statement']
169
+ }
170
+
171
+ topic_counts = {}
172
+ for topic, keywords in topics.items():
173
+ count = sum(1 for keyword in keywords if keyword in all_text)
174
+ if count > 0:
175
+ topic_counts[topic] = count
176
+
177
+ sorted_topics = sorted(topic_counts.items(), key=lambda x: x[1], reverse=True)
178
+
179
+ insights = {
180
+ 'summary': f"Top discussed topics: {', '.join([f'{topic} ({count} mentions)' for topic, count in sorted_topics[:3]])}",
181
+ 'all_topics': dict(sorted_topics),
182
+ 'trending': sorted_topics[0][0] if sorted_topics else 'None',
183
+ 'recommendation': f"Focus on improving {sorted_topics[0][0].lower()} based on high discussion volume" if sorted_topics else "No clear topic trends"
184
+ }
185
+
186
+ return insights
187
+
188
+ def _generate_comparison_insights(self, df):
189
+ """Compare Prime Bank with other banks"""
190
+ bank_sentiment = {}
191
+
192
+ for bank in ['prime_bank', 'eastern_bank', 'brac_bank', 'city_bank', 'dutch_bangla']:
193
+ bank_posts = df[df['primary_bank'] == bank]
194
+ if len(bank_posts) > 0:
195
+ positive_rate = (bank_posts['sentiment'] == 'Positive').sum() / len(bank_posts) * 100
196
+ bank_sentiment[bank] = {
197
+ 'posts': len(bank_posts),
198
+ 'positive_rate': positive_rate
199
+ }
200
+
201
+ if 'prime_bank' in bank_sentiment:
202
+ prime_positive = bank_sentiment['prime_bank']['positive_rate']
203
+ comparison = "above average" if prime_positive > 50 else "below average"
204
+
205
+ insights = {
206
+ 'summary': f"Prime Bank has {prime_positive:.1f}% positive sentiment, which is {comparison} in the banking sector.",
207
+ 'comparison': bank_sentiment,
208
+ 'recommendation': "Focus on maintaining positive momentum" if prime_positive > 50 else "Urgent improvement needed to match competitor satisfaction levels"
209
+ }
210
+ else:
211
+ insights = {'summary': 'No comparative data available.'}
212
+
213
+ return insights
214
+
215
+ def _generate_action_insights(self, df):
216
+ """Generate actionable insights"""
217
+ if len(df) == 0:
218
+ return {'summary': 'No Prime Bank posts found for action analysis.'}
219
+
220
+ # High priority posts
221
+ high_priority = df[
222
+ (df['sentiment'] == 'Negative') &
223
+ (df['emotion'].isin(['Frustration', 'Anxiety'])) &
224
+ (df['category'] == 'Complaint')
225
+ ]
226
+
227
+ # Quick wins - positive posts that can be amplified
228
+ quick_wins = df[
229
+ (df['sentiment'] == 'Positive') &
230
+ (df['category'] == 'Praise')
231
+ ]
232
+
233
+ actions = {
234
+ 'immediate': {
235
+ 'count': len(high_priority),
236
+ 'description': 'High-priority complaints requiring immediate response',
237
+ 'action': 'Contact these customers within 24 hours'
238
+ },
239
+ 'quick_wins': {
240
+ 'count': len(quick_wins),
241
+ 'description': 'Positive testimonials for marketing use',
242
+ 'action': 'Share success stories and thank customers publicly'
243
+ },
244
+ 'strategic': {
245
+ 'description': 'Long-term improvements based on feedback patterns',
246
+ 'actions': [
247
+ 'Enhance digital banking infrastructure',
248
+ 'Implement customer service training program',
249
+ 'Review and simplify fee structure'
250
+ ]
251
+ }
252
+ }
253
+
254
+ return actions
255
+
256
+ def _get_emotion_recommendation(self, emotion_dist):
257
+ """Get recommendation based on emotion distribution"""
258
+ if len(emotion_dist) == 0:
259
+ return "No emotional data to analyze"
260
+
261
+ top_emotion = emotion_dist.index[0]
262
+
263
+ recommendations = {
264
+ 'Joy': "Leverage positive emotions by encouraging happy customers to share testimonials",
265
+ 'Frustration': "Implement rapid response protocol for frustrated customers to prevent escalation",
266
+ 'Confusion': "Create clearer communication materials and improve customer education",
267
+ 'Anxiety': "Provide reassurance through proactive communication about security and processes",
268
+ 'Neutral': "Engage neutral customers with targeted campaigns to create"
269
+ }
270
+ return recommendations.get(top_emotion, "Monitor customer emotions closely")