Spaces:
Sleeping
Sleeping
| #insights_generator.py | |
| import pandas as pd | |
| from collections import Counter | |
| import re | |
| # ### CHANGED: Import the new OpenAI client ### | |
| try: | |
| from openai import OpenAI, AsyncOpenAI | |
| OPENAI_AVAILABLE = True | |
| except ImportError: | |
| OPENAI_AVAILABLE = False | |
| class InsightsGenerator: | |
| """ | |
| Analyzes processed data to generate rich, qualitative insights for the dashboard. | |
| """ | |
| import os | |
| from openai import OpenAI | |
| class InsightsGenerator: | |
| def __init__(self, openai_api_key=None): | |
| self.insights = {} | |
| self.client = None | |
| if openai_api_key and OPENAI_AVAILABLE: | |
| os.environ["OPENAI_API_KEY"] = openai_api_key | |
| self.client = OpenAI() | |
| # ... (the other functions are fine) ... | |
| def generate_all_insights(self, posts_df, all_text_df): | |
| if 'prime_mentions' not in posts_df.columns: prime_posts_df = pd.DataFrame() | |
| else: prime_posts_df = posts_df[posts_df['prime_mentions'] > 0] | |
| if 'prime_mentions' not in all_text_df.columns: prime_all_text_df = pd.DataFrame() | |
| else: prime_all_text_df = all_text_df[all_text_df['prime_mentions'] > 0] | |
| self.insights['sentiment'] = self._generate_sentiment_insights(prime_posts_df) | |
| self.insights['emotion'] = self._generate_emotion_insights(prime_all_text_df) | |
| self.insights['category'] = self._generate_category_insights(prime_all_text_df) | |
| return self.insights | |
| def _get_common_words(self, text_series, top_n=5): | |
| if text_series.empty: return "No data" | |
| stop_words = {'the', 'a', 'an', 'is', 'i', 'to', 'for', 'in', 'it', 'and', 'my', 'of', 'prime', 'bank', 'banker', 'was', 'do', 'with', 'that', 'this', 'have', 'has', 'are', 'not', 'er', 'ta', 'ki', 'ami', 'amar', 'kore', 'hocche', 'bhalo', 'asholei', 'onek', 'apnar', 'sir', 'bro', 'please', 'help', 'need', 'know', 'want'} | |
| all_text = ' '.join(text_series.astype(str).tolist()).lower() | |
| words = re.findall(r'\b[a-z]{4,}\b', all_text) | |
| filtered_words = [word for word in words if word not in stop_words] | |
| if not filtered_words: return "general topics" | |
| return ', '.join([word for word, count in Counter(filtered_words).most_common(top_n)]) | |
| def _generate_sentiment_insights(self, df): | |
| if df.empty: return {'summary': 'No posts found for sentiment analysis.', 'positive_themes': 'N/A', 'negative_themes': 'N/A', 'negative_examples': []} | |
| dist = df['sentiment'].value_counts(normalize=True) * 100 | |
| positive_df = df[df['sentiment'] == 'Positive'] | |
| negative_df = df[df['sentiment'] == 'Negative'] | |
| positive_themes = self._get_common_words(positive_df['text']) | |
| negative_themes = self._get_common_words(negative_df['text']) | |
| return {'summary': f"Positive: {dist.get('Positive', 0):.1f}%, Negative: {dist.get('Negative', 0):.1f}%, Neutral: {dist.get('Neutral', 0):.1f}%", 'positive_themes': f"Customers are happy about: {positive_themes}.", 'negative_themes': f"Customers are unhappy about: {negative_themes}.", 'negative_examples': negative_df['text'].head(3).tolist()} | |
| def _generate_emotion_insights(self, df): | |
| if df.empty: return {'summary': 'No text found for emotion analysis.', 'details': {}} | |
| emotion_dist = df['emotion'].value_counts() | |
| top_emotion = emotion_dist.index[0] if not emotion_dist.empty else "N/A" | |
| insight_details = {} | |
| for emotion in ['Joy', 'Frustration', 'Confusion', 'Anxiety']: | |
| if emotion in df['emotion'].values: | |
| emotion_df = df[df['emotion'] == emotion] | |
| insight_details[emotion] = {'themes': self._get_common_words(emotion_df['text'], 3), 'example': emotion_df['text'].iloc[0] if not emotion_df.empty else "N/A"} | |
| return {'summary': f"The most common emotion is '{top_emotion}'.", 'details': insight_details} | |
| def _generate_category_insights(self, df): | |
| if df.empty: return {'summary': 'No text found for category analysis.', 'details': {}} | |
| insight_details = {} | |
| for category in ['Complaint', 'Inquiry', 'Praise', 'Suggestion']: | |
| if category in df['category'].values: | |
| category_df = df[df['category'] == category] | |
| insight_details[category] = {'themes': self._get_common_words(category_df['text'], 4)} | |
| return {'summary': f"The most frequent category is '{df['category'].mode()[0]}'. Complaints and Inquiries are key areas to watch.", 'details': insight_details} | |
| def _call_gpt_for_summary(self, prompt, max_tokens=150): | |
| if not self.client: | |
| return "OpenAI API key not configured. Cannot generate the AI recommendations." | |
| try: | |
| # ### CHANGED: Use the new client to make the API call ### | |
| response = self.client.chat.completions.create( | |
| model="gpt-3.5-turbo", | |
| messages=[ | |
| {"role": "system", "content": "You are a sharp, concise banking strategy analyst. Your goal is to provide actionable advice based on customer feedback."}, | |
| {"role": "user", "content": prompt} | |
| ], | |
| temperature=0.5, | |
| max_tokens=max_tokens, | |
| n=1, | |
| stop=None, | |
| ) | |
| return response.choices[0].message.content.strip() | |
| except Exception as e: | |
| return f"Error calling OpenAI API: {e}" | |
| def generate_ai_recommendations(self, df): | |
| if df.empty: return {} | |
| recommendations = {} | |
| category_prompts = { | |
| 'Complaint': "Based on these customer complaints, identify the main theme and suggest one concrete action Prime Bank could take to resolve these issues for future customer satisfaction. Complaints:\n\n{}", | |
| 'Suggestion': "Based on these customer suggestions, what is the most impactful feature or service improvement Prime Bank should prioritize? Briefly explain why. Suggestions:\n\n{}", | |
| 'Praise': "Based on this positive feedback, what is Prime Bank doing right that they should double-down on or use in their marketing? Praise:\n\n{}", | |
| 'Inquiry': "These are common questions from customers. What is the most frequent topic of confusion? Suggest how Prime Bank could clarify this through their website FAQ or app. Inquiries:\n\n{}" | |
| } | |
| for category, prompt_template in category_prompts.items(): | |
| category_df = df[df['category'] == category] | |
| if not category_df.empty: | |
| snippets = "\n- ".join(category_df['text'].head(20).tolist()) | |
| full_prompt = prompt_template.format(snippets) | |
| ai_summary = self._call_gpt_for_summary(full_prompt) | |
| recommendations[category] = ai_summary | |
| else: | |
| recommendations[category] = "No data found for this new category." | |
| return recommendations |