""" Metrics calculation for sentiment analysis dashboard Provides key performance indicators and statistical metrics """ import pandas as pd import numpy as np from typing import Dict, List, Tuple class SentimentMetrics: """ Calculates various metrics for sentiment analysis """ @staticmethod def calculate_overall_metrics(df): """ Calculate overall summary metrics Args: df: Sentiment dataframe Returns: dict: Overall metrics """ total_comments = len(df) total_reply_required = df['requires_reply'].sum() if 'requires_reply' in df.columns else 0 # Sentiment distribution sentiment_dist = df['sentiment_polarity'].value_counts(normalize=True) * 100 # Calculate sentiment score (vectorized — no copy needed) sentiment_weights = { 'very_negative': -2, 'negative': -1, 'neutral': 0, 'positive': 1, 'very_positive': 2 } avg_sentiment_score = df['sentiment_polarity'].map(sentiment_weights).mean() # Negative sentiment percentage negative_sentiments = ['negative', 'very_negative'] negative_pct = (df['sentiment_polarity'].isin(negative_sentiments).sum() / total_comments * 100) if total_comments > 0 else 0 # Positive sentiment percentage positive_sentiments = ['positive', 'very_positive'] positive_pct = (df['sentiment_polarity'].isin(positive_sentiments).sum() / total_comments * 100) if total_comments > 0 else 0 return { 'total_comments': total_comments, 'total_reply_required': int(total_reply_required), 'reply_required_pct': (total_reply_required / total_comments * 100) if total_comments > 0 else 0, 'avg_sentiment_score': avg_sentiment_score, 'negative_pct': negative_pct, 'positive_pct': positive_pct, 'sentiment_distribution': sentiment_dist.to_dict() } @staticmethod def calculate_brand_metrics(df): """ Calculate metrics by brand Args: df: Sentiment dataframe Returns: dict: Metrics by brand """ brand_metrics = {} for brand in df['brand'].unique(): brand_df = df[df['brand'] == brand] brand_metrics[brand] = SentimentMetrics.calculate_overall_metrics(brand_df) return brand_metrics @staticmethod def calculate_platform_metrics(df): """ Calculate metrics by platform Args: df: Sentiment dataframe Returns: dict: Metrics by platform """ platform_metrics = {} for platform in df['platform'].unique(): platform_df = df[df['platform'] == platform] platform_metrics[platform] = SentimentMetrics.calculate_overall_metrics(platform_df) return platform_metrics @staticmethod def calculate_content_engagement_score(content_df): """ Calculate engagement score for a content piece Args: content_df: DataFrame for a single content Returns: float: Engagement score (0-100) """ if len(content_df) == 0: return 0 # Factors: # 1. Number of comments (normalized) # 2. Sentiment positivity # 3. Intent diversity # 4. Reply requirement rate comment_count = len(content_df) comment_score = min(comment_count / 100 * 30, 30) # Max 30 points for 100+ comments # Sentiment score (max 40 points) — vectorized, no copy needed sentiment_weights = { 'very_negative': -2, 'negative': -1, 'neutral': 0, 'positive': 1, 'very_positive': 2 } avg_sentiment = content_df['sentiment_polarity'].map(sentiment_weights).mean() sentiment_score = ((avg_sentiment + 2) / 4) * 40 # Normalize to 0-40 # Intent diversity score (max 20 points) unique_intents = content_df['intent'].str.split(',').explode().str.strip().nunique() intent_score = min(unique_intents / 8 * 20, 20) # Max 20 points for 8 unique intents # Interaction requirement (max 10 points) reply_rate = content_df['requires_reply'].sum() / len(content_df) if len(content_df) > 0 else 0 interaction_score = reply_rate * 10 total_score = comment_score + sentiment_score + intent_score + interaction_score return round(total_score, 2) @staticmethod def get_sentiment_health_status(negative_pct): """ Determine health status based on negative sentiment percentage Args: negative_pct: Percentage of negative sentiments Returns: tuple: (status, color) """ if negative_pct < 10: return ("Excellent", "green") elif negative_pct < 20: return ("Good", "lightgreen") elif negative_pct < 30: return ("Fair", "orange") elif negative_pct < 50: return ("Poor", "darkorange") else: return ("Critical", "red") @staticmethod def calculate_intent_priority_score(intent_counts): """ Calculate priority score for different intents Args: intent_counts: Dictionary of intent counts Returns: dict: Priority scores for each intent """ # Priority weights (higher = more urgent) priority_weights = { 'feedback_negative': 5, 'request': 4, 'question': 4, 'suggestion': 3, 'praise': 2, 'humor_sarcasm': 1, 'off_topic': 1, 'spam_selfpromo': 0 } priority_scores = {} for intent, count in intent_counts.items(): weight = priority_weights.get(intent, 1) priority_scores[intent] = count * weight return priority_scores @staticmethod def calculate_response_urgency(df): """ Calculate response urgency metrics Args: df: Sentiment dataframe Returns: dict: Urgency metrics """ reply_required_df = df[df['requires_reply'] == True] if len(reply_required_df) == 0: return { 'urgent_count': 0, 'high_priority_count': 0, 'medium_priority_count': 0, 'low_priority_count': 0 } # Classify urgency based on sentiment and intent urgent = reply_required_df[ reply_required_df['sentiment_polarity'].isin(['very_negative', 'negative']) ] high_priority = reply_required_df[ (reply_required_df['sentiment_polarity'] == 'neutral') & (reply_required_df['intent'].str.contains('feedback_negative|request', na=False)) ] medium_priority = reply_required_df[ reply_required_df['sentiment_polarity'] == 'positive' ] low_priority = reply_required_df[ reply_required_df['sentiment_polarity'] == 'very_positive' ] return { 'urgent_count': len(urgent), 'high_priority_count': len(high_priority), 'medium_priority_count': len(medium_priority), 'low_priority_count': len(low_priority) } @staticmethod def calculate_trend_indicator(df, current_period, previous_period, metric='sentiment_score'): """ Calculate trend indicator comparing two periods Args: df: Sentiment dataframe current_period: Tuple of (start_date, end_date) for current period previous_period: Tuple of (start_date, end_date) for previous period metric: Metric to compare Returns: dict: Trend information """ if 'comment_timestamp' not in df.columns: return {'trend': 'stable', 'change': 0} # Filter data for each period current_df = df[ (df['comment_timestamp'] >= pd.Timestamp(current_period[0])) & (df['comment_timestamp'] <= pd.Timestamp(current_period[1])) ] previous_df = df[ (df['comment_timestamp'] >= pd.Timestamp(previous_period[0])) & (df['comment_timestamp'] <= pd.Timestamp(previous_period[1])) ] if len(current_df) == 0 or len(previous_df) == 0: return {'trend': 'stable', 'change': 0} # Calculate metric for each period if metric == 'sentiment_score': # Vectorized — no copy needed sentiment_weights = { 'very_negative': -2, 'negative': -1, 'neutral': 0, 'positive': 1, 'very_positive': 2 } current_value = current_df['sentiment_polarity'].map(sentiment_weights).mean() previous_value = previous_df['sentiment_polarity'].map(sentiment_weights).mean() else: current_value = len(current_df) previous_value = len(previous_df) # Calculate change change = ((current_value - previous_value) / previous_value * 100) if previous_value != 0 else 0 # Determine trend if abs(change) < 5: trend = 'stable' elif change > 0: trend = 'improving' if metric == 'sentiment_score' else 'increasing' else: trend = 'declining' if metric == 'sentiment_score' else 'decreasing' return { 'trend': trend, 'change': round(change, 2), 'current_value': round(current_value, 2), 'previous_value': round(previous_value, 2) }