Spaces:
Sleeping
Sleeping
| """ | |
| Stage 4: Batch Analysis & Aggregation | |
| - Aggregate insights across all processed reviews | |
| - Identify patterns, trends, critical issues | |
| - Generate actionable recommendations | |
| """ | |
| import json | |
| from typing import Dict, Any, List | |
| from collections import Counter | |
| class Stage4BatchAnalysis: | |
| """ | |
| Stage 4: Batch-level intelligence and recommendations | |
| """ | |
| def __init__(self): | |
| print(" 📊 Stage 4: Batch Analysis initialized") | |
| def analyze_batch(self, reviews: List[Dict[str, Any]]) -> Dict[str, Any]: | |
| """ | |
| Analyze a batch of processed reviews | |
| """ | |
| if not reviews: | |
| print(" ⚠️ No reviews to analyze") | |
| return self._empty_insights() | |
| print(f"\n 📊 Analyzing batch of {len(reviews)} reviews...") | |
| # Initialize counters | |
| total = len(reviews) | |
| # Sentiment distribution | |
| sentiment_counts = Counter() | |
| for review in reviews: | |
| sentiment = review.get('stage3_final_sentiment', 'NEUTRAL') | |
| sentiment_counts[sentiment] += 1 | |
| print(f" 📈 Sentiment: " | |
| f"POS={sentiment_counts.get('POSITIVE', 0)}, " | |
| f"NEU={sentiment_counts.get('NEUTRAL', 0)}, " | |
| f"NEG={sentiment_counts.get('NEGATIVE', 0)}") | |
| # Priority distribution | |
| priority_counts = Counter() | |
| for review in reviews: | |
| priority = review.get('stage1_llm1_priority', 'unknown') | |
| priority_counts[priority] += 1 | |
| print(f" 🎯 Priority: " | |
| f"Critical={priority_counts.get('critical', 0)}, " | |
| f"High={priority_counts.get('high', 0)}, " | |
| f"Medium={priority_counts.get('medium', 0)}, " | |
| f"Low={priority_counts.get('low', 0)}") | |
| # Department routing | |
| dept_counts = Counter() | |
| for review in reviews: | |
| dept = review.get('stage1_llm1_department', 'unknown') | |
| dept_counts[dept] += 1 | |
| print(f" 🏢 Departments: " | |
| f"Eng={dept_counts.get('engineering', 0)}, " | |
| f"UX={dept_counts.get('ux', 0)}, " | |
| f"Support={dept_counts.get('support', 0)}, " | |
| f"Business={dept_counts.get('business', 0)}") | |
| # Emotion distribution | |
| emotion_counts = Counter() | |
| for review in reviews: | |
| emotion = review.get('stage1_llm2_emotion', 'unknown') | |
| emotion_counts[emotion] += 1 | |
| # Review type distribution | |
| type_counts = Counter() | |
| for review in reviews: | |
| review_type = review.get('stage1_llm1_type', 'unknown') | |
| type_counts[review_type] += 1 | |
| # Identify critical issues | |
| critical_issues = self._identify_critical_issues(reviews) | |
| print(f" 🚨 Critical Issues: {len(critical_issues)}") | |
| # Identify quick wins | |
| quick_wins = self._identify_quick_wins(reviews) | |
| print(f" ⚡ Quick Wins: {len(quick_wins)}") | |
| # Calculate churn risk | |
| churn_risk = self._calculate_churn_risk(reviews) | |
| print(f" ⚠️ Churn Risk: {churn_risk:.1f}%") | |
| # Model agreement rate | |
| agreement_count = sum(1 for r in reviews if r.get('stage2_agreement', False)) | |
| agreement_rate = (agreement_count / total * 100) if total > 0 else 0 | |
| print(f" 🤝 Model Agreement: {agreement_rate:.1f}%") | |
| # Generate recommendations | |
| recommendations = self._generate_recommendations( | |
| sentiment_counts, priority_counts, dept_counts, | |
| critical_issues, quick_wins, churn_risk | |
| ) | |
| # Compile batch insights | |
| insights = { | |
| 'total_reviews': total, | |
| # Sentiment | |
| 'sentiment_positive': sentiment_counts.get('POSITIVE', 0), | |
| 'sentiment_neutral': sentiment_counts.get('NEUTRAL', 0), | |
| 'sentiment_negative': sentiment_counts.get('NEGATIVE', 0), | |
| 'sentiment_distribution': dict(sentiment_counts), | |
| # Priority | |
| 'priority_critical': priority_counts.get('critical', 0), | |
| 'priority_high': priority_counts.get('high', 0), | |
| 'priority_medium': priority_counts.get('medium', 0), | |
| 'priority_low': priority_counts.get('low', 0), | |
| 'priority_distribution': dict(priority_counts), | |
| # Department | |
| 'dept_engineering': dept_counts.get('engineering', 0), | |
| 'dept_ux': dept_counts.get('ux', 0), | |
| 'dept_support': dept_counts.get('support', 0), | |
| 'dept_business': dept_counts.get('business', 0), | |
| 'department_distribution': dict(dept_counts), | |
| # Additional insights | |
| 'emotion_distribution': dict(emotion_counts), | |
| 'type_distribution': dict(type_counts), | |
| 'model_agreement_rate': agreement_rate, | |
| 'churn_risk': churn_risk, | |
| # Actionable lists | |
| 'critical_issues': critical_issues, | |
| 'quick_wins': quick_wins, | |
| 'recommendations': recommendations | |
| } | |
| return insights | |
| def _identify_critical_issues(self, reviews: List[Dict]) -> List[Dict]: | |
| """Identify critical issues requiring immediate attention""" | |
| critical = [] | |
| for review in reviews: | |
| priority = review.get('stage1_llm1_priority', '') | |
| sentiment = review.get('stage3_final_sentiment', '') | |
| needs_review = review.get('stage3_needs_human_review', False) | |
| if priority == 'critical' or (sentiment == 'NEGATIVE' and needs_review): | |
| critical.append({ | |
| 'review_id': review.get('review_id', 'unknown'), | |
| 'type': review.get('stage1_llm1_type', 'unknown'), | |
| 'department': review.get('stage1_llm1_department', 'unknown'), | |
| 'reasoning': review.get('stage3_reasoning', ''), | |
| 'action': review.get('stage3_action_recommendation', ''), | |
| 'rating': review.get('rating', 0) | |
| }) | |
| # Sort by rating (lowest first) | |
| critical.sort(key=lambda x: x['rating']) | |
| return critical[:10] # Top 10 critical issues | |
| def _identify_quick_wins(self, reviews: List[Dict]) -> List[Dict]: | |
| """Identify easy-to-fix issues for quick wins""" | |
| quick_wins = [] | |
| for review in reviews: | |
| review_type = review.get('stage1_llm1_type', '') | |
| priority = review.get('stage1_llm1_priority', '') | |
| sentiment = review.get('stage3_final_sentiment', '') | |
| # Suggestions with low priority = quick wins | |
| if review_type == 'suggestion' and priority in ['low', 'medium']: | |
| quick_wins.append({ | |
| 'review_id': review.get('review_id', 'unknown'), | |
| 'suggestion': review.get('review_text', '')[:100], | |
| 'department': review.get('stage1_llm1_department', 'unknown'), | |
| 'action': review.get('stage3_action_recommendation', ''), | |
| 'rating': review.get('rating', 0) | |
| }) | |
| return quick_wins[:10] # Top 10 quick wins | |
| def _calculate_churn_risk(self, reviews: List[Dict]) -> float: | |
| """Calculate overall churn risk percentage""" | |
| if not reviews: | |
| return 0.0 | |
| churn_indicators = 0 | |
| for review in reviews: | |
| user_type = review.get('stage1_llm2_user_type', '') | |
| sentiment = review.get('stage3_final_sentiment', '') | |
| rating = review.get('rating', 3) | |
| # Churn indicators | |
| if user_type == 'churning_user': | |
| churn_indicators += 2 | |
| elif sentiment == 'NEGATIVE' and rating <= 2: | |
| churn_indicators += 1 | |
| elif rating == 1: | |
| churn_indicators += 1 | |
| # Calculate percentage | |
| max_possible = len(reviews) * 2 | |
| churn_risk = (churn_indicators / max_possible * 100) if max_possible > 0 else 0.0 | |
| return min(churn_risk, 100.0) | |
| def _generate_recommendations(self, sentiment_counts, priority_counts, | |
| dept_counts, critical_issues, quick_wins, | |
| churn_risk) -> List[str]: | |
| """Generate actionable recommendations""" | |
| recommendations = [] | |
| # Sentiment-based | |
| total = sum(sentiment_counts.values()) | |
| if total > 0: | |
| neg_pct = (sentiment_counts.get('NEGATIVE', 0) / total * 100) | |
| if neg_pct > 40: | |
| recommendations.append( | |
| f"🚨 HIGH: {neg_pct:.0f}% negative sentiment. Immediate investigation needed." | |
| ) | |
| elif neg_pct > 25: | |
| recommendations.append( | |
| f"⚠️ MEDIUM: {neg_pct:.0f}% negative sentiment. Monitor closely." | |
| ) | |
| # Priority-based | |
| if priority_counts.get('critical', 0) > 0: | |
| recommendations.append( | |
| f"🔥 URGENT: {priority_counts['critical']} critical issues require immediate attention." | |
| ) | |
| # Department-based | |
| if dept_counts: | |
| top_dept = max(dept_counts, key=dept_counts.get) | |
| top_count = dept_counts[top_dept] | |
| recommendations.append( | |
| f"🎯 FOCUS: {top_count} issues routed to {top_dept} department." | |
| ) | |
| # Churn risk | |
| if churn_risk > 30: | |
| recommendations.append( | |
| f"⚠️ CHURN: {churn_risk:.0f}% churn risk detected. Implement retention strategy." | |
| ) | |
| # Quick wins | |
| if quick_wins: | |
| recommendations.append( | |
| f"⚡ OPPORTUNITY: {len(quick_wins)} quick wins available for easy improvements." | |
| ) | |
| return recommendations | |
| def _empty_insights(self) -> Dict[str, Any]: | |
| """Return empty insights structure""" | |
| return { | |
| 'total_reviews': 0, | |
| 'sentiment_positive': 0, | |
| 'sentiment_neutral': 0, | |
| 'sentiment_negative': 0, | |
| 'priority_critical': 0, | |
| 'priority_high': 0, | |
| 'priority_medium': 0, | |
| 'priority_low': 0, | |
| 'dept_engineering': 0, | |
| 'dept_ux': 0, | |
| 'dept_support': 0, | |
| 'dept_business': 0, | |
| 'critical_issues': [], | |
| 'quick_wins': [], | |
| 'recommendations': [] | |
| } | |
| if __name__ == "__main__": | |
| # Test Stage 4 | |
| print("\n" + "="*60) | |
| print("🧪 TESTING STAGE 4 BATCH ANALYSIS") | |
| print("="*60) | |
| # Sample processed reviews | |
| sample_reviews = [ | |
| { | |
| 'review_id': '001', | |
| 'review_text': 'App crashes!', | |
| 'rating': 1, | |
| 'stage1_llm1_type': 'bug_report', | |
| 'stage1_llm1_department': 'engineering', | |
| 'stage1_llm1_priority': 'critical', | |
| 'stage1_llm2_user_type': 'power_user', | |
| 'stage1_llm2_emotion': 'frustration', | |
| 'stage2_agreement': True, | |
| 'stage3_final_sentiment': 'NEGATIVE', | |
| 'stage3_needs_human_review': True, | |
| 'stage3_reasoning': 'Critical bug', | |
| 'stage3_action_recommendation': 'Fix immediately' | |
| }, | |
| { | |
| 'review_id': '002', | |
| 'review_text': 'Great app!', | |
| 'rating': 5, | |
| 'stage1_llm1_type': 'praise', | |
| 'stage1_llm1_department': 'ux', | |
| 'stage1_llm1_priority': 'low', | |
| 'stage1_llm2_user_type': 'regular_user', | |
| 'stage1_llm2_emotion': 'joy', | |
| 'stage2_agreement': True, | |
| 'stage3_final_sentiment': 'POSITIVE', | |
| 'stage3_needs_human_review': False | |
| } | |
| ] | |
| stage4 = Stage4BatchAnalysis() | |
| insights = stage4.analyze_batch(sample_reviews) | |
| print("\n📊 BATCH INSIGHTS:") | |
| print(json.dumps(insights, indent=2)) | |
| print("\n✅ Stage 4 test complete!") | |