import json import pandas as pd import numpy as np import logging from typing import Dict, Any, List, Optional # Fix import paths try: from app.utils.logging_utils import setup_logger except ImportError: # Try relative imports for running from project root from behavior_backend.app.utils.logging_utils import setup_logger # Configure logging logger = logging.getLogger(__name__) def json_to_dataframe(data: Dict[str, List[Dict[str, Any]]]) -> pd.DataFrame: """ Convert JSON emotion data to a pandas DataFrame. Args: data: Dictionary with backend name as key and list of results as value Returns: DataFrame with emotion data """ all_rows = [] for backend_name, results in data.items(): for result in results: if not result: continue frame_index = result.get('frame_index', 0) # Handle case where no faces were detected if 'faces' not in result or not result['faces']: # Check if there's a main_face or main_emotion to use if 'main_face' in result and result['main_face']: main_face = result['main_face'] emotion_scores = main_face.get('emotion', {}) dominant_emotion = main_face.get('dominant_emotion', 'unknown') emotion_confidence = main_face.get('emotion_confidence', 0) emotion_stable = main_face.get('emotion_stable', False) row = { 'backend': backend_name, 'frame_index': frame_index, 'face_index': 0, 'dominant_emotion': dominant_emotion, 'emotion_confidence': emotion_confidence, 'emotion_stable': emotion_stable, 'emotion_scores': emotion_scores, 'face_box': main_face.get('face_box') } all_rows.append(row) continue elif 'main_emotion' in result and result['main_emotion']: main_emotion = result['main_emotion'] dominant_emotion = main_emotion.get('emotion', 'unknown') emotion_confidence = main_emotion.get('confidence', 0) emotion_stable = main_emotion.get('stable', False) # Create a simple emotion_scores dict with just the dominant emotion emotion_scores = {dominant_emotion: emotion_confidence} if dominant_emotion != 'unknown' else {} row = { 'backend': backend_name, 'frame_index': frame_index, 'face_index': 0, 'dominant_emotion': dominant_emotion, 'emotion_confidence': emotion_confidence, 'emotion_stable': emotion_stable, 'emotion_scores': emotion_scores, 'face_box': None } all_rows.append(row) continue else: row = { 'backend': backend_name, 'frame_index': frame_index, 'face_index': 0, 'dominant_emotion': 'unknown', 'emotion_confidence': 0, 'emotion_stable': False, 'emotion_scores': {}, 'face_box': None } all_rows.append(row) continue # Process each face in the frame for face_index, face_data in enumerate(result['faces']): # Get emotion scores directly emotion_scores = face_data.get('emotion', {}) # Get dominant emotion directly if available, otherwise calculate it dominant_emotion = face_data.get('dominant_emotion', 'unknown') emotion_confidence = face_data.get('emotion_confidence', 0) emotion_stable = face_data.get('emotion_stable', False) # If dominant_emotion is not available, calculate it if dominant_emotion == 'unknown' and emotion_scores: max_score = -1 for emotion, score in emotion_scores.items(): if score > max_score: max_score = score dominant_emotion = emotion emotion_confidence = max_score row = { 'backend': backend_name, 'frame_index': frame_index, 'face_index': face_index, 'dominant_emotion': dominant_emotion, 'emotion_confidence': emotion_confidence, 'emotion_stable': emotion_stable, 'emotion_scores': emotion_scores, 'face_box': face_data.get('face_box') } all_rows.append(row) # Create DataFrame if not all_rows: logger.warning("No data to convert to DataFrame") return pd.DataFrame() df = pd.DataFrame(all_rows) return df def calculate_emotion_percentages(df: pd.DataFrame) -> Dict[str, float]: """ Calculate percentages of different emotion categories. Args: df: DataFrame with emotion data Returns: Dictionary with emotion percentages """ if df.empty: return { "angry": 0, "disgust": 0, "fear": 0, "happy": 0, "sad": 0, "surprise": 0, "neutral": 0, "positive": 0, "negative": 0 } # Define emotion categories positive_emotions = ['happy', 'surprise'] negative_emotions = ['angry', 'disgust', 'fear', 'sad'] neutral_emotions = ['neutral'] all_emotions = positive_emotions + negative_emotions + neutral_emotions # Initialize counters for individual emotions emotion_counts = {emotion: 0 for emotion in all_emotions} total_frames = len(df) # Count frames by emotion for emotion in df['dominant_emotion']: if emotion in emotion_counts: emotion_counts[emotion] += 1 # Calculate individual emotion percentages emotion_percentages = { emotion: round((count / total_frames) * 100, 2) for emotion, count in emotion_counts.items() } # Calculate grouped percentages positive_count = sum(emotion_counts[emotion] for emotion in positive_emotions) negative_count = sum(emotion_counts[emotion] for emotion in negative_emotions) neutral_count = sum(emotion_counts[emotion] for emotion in neutral_emotions) # Add grouped percentages emotion_percentages.update({ "positive": round((positive_count / total_frames) * 100, 2) if total_frames > 0 else 0, "negative": round((negative_count / total_frames) * 100, 2) if total_frames > 0 else 0, "neutral_group": round((neutral_count / total_frames) * 100, 2) if total_frames > 0 else 0 }) # Calculate confidence values if available if 'emotion_confidence' in df.columns: confidence_by_emotion = {} for emotion in all_emotions: emotion_df = df[df['dominant_emotion'] == emotion] if not emotion_df.empty: confidence_by_emotion[emotion] = round(emotion_df['emotion_confidence'].mean(), 2) else: confidence_by_emotion[emotion] = 0 # Calculate average confidence across all emotions avg_confidence = round(df['emotion_confidence'].mean(), 2) # Add confidence data to results emotion_percentages["average_confidence"] = avg_confidence emotion_percentages["confidence_by_emotion"] = confidence_by_emotion return emotion_percentages def format_results_for_api( emotion_df: Optional[pd.DataFrame], transcript: str, analysis: Dict[str, Any] ) -> Dict[str, Any]: """ Format results for API response. Args: emotion_df: DataFrame with emotion data transcript: Transcript text analysis: Analysis data Returns: Formatted results dictionary """ if emotion_df is None or emotion_df.empty: emotion_percentages = { "angry": 0, "disgust": 0, "fear": 0, "happy": 0, "sad": 0, "surprise": 0, "neutral": 0, "positive": 0, "negative": 0, "neutral_group": 0, "average_confidence": 0, "confidence_by_emotion": { "angry": 0, "disgust": 0, "fear": 0, "happy": 0, "sad": 0, "surprise": 0, "neutral": 0 } } else: emotion_percentages = calculate_emotion_percentages(emotion_df) # Ensure we have confidence data with the right structure for the frontend if "confidence_by_emotion" not in emotion_percentages: emotion_percentages["confidence_by_emotion"] = { "angry": 0, "disgust": 0, "fear": 0, "happy": 0, "sad": 0, "surprise": 0, "neutral": 0 } if "average_confidence" not in emotion_percentages: emotion_percentages["average_confidence"] = 0 return { "transcript": transcript, "emotion_percentages": emotion_percentages, "analysis": analysis }