Spaces:

urjob
/

test

Sleeping

File size: 10,078 Bytes

8ae78b0

import json
import pandas as pd
import numpy as np
import logging
from typing import Dict, Any, List, Optional

# Fix import paths
try:
    from app.utils.logging_utils import setup_logger
except ImportError:
    # Try relative imports for running from project root
    from behavior_backend.app.utils.logging_utils import setup_logger

# Configure logging
logger = logging.getLogger(__name__)

def json_to_dataframe(data: Dict[str, List[Dict[str, Any]]]) -> pd.DataFrame:
    """
    Convert JSON emotion data to a pandas DataFrame.
    
    Args:
        data: Dictionary with backend name as key and list of results as value
        
    Returns:
        DataFrame with emotion data
    """
    all_rows = []
    
    for backend_name, results in data.items():
        for result in results:
            if not result:
                continue
                
            frame_index = result.get('frame_index', 0)
            
            # Handle case where no faces were detected
            if 'faces' not in result or not result['faces']:
                # Check if there's a main_face or main_emotion to use
                if 'main_face' in result and result['main_face']:
                    main_face = result['main_face']
                    emotion_scores = main_face.get('emotion', {})
                    dominant_emotion = main_face.get('dominant_emotion', 'unknown')
                    emotion_confidence = main_face.get('emotion_confidence', 0)
                    emotion_stable = main_face.get('emotion_stable', False)
                    
                    row = {
                        'backend': backend_name,
                        'frame_index': frame_index,
                        'face_index': 0,
                        'dominant_emotion': dominant_emotion,
                        'emotion_confidence': emotion_confidence,
                        'emotion_stable': emotion_stable,
                        'emotion_scores': emotion_scores,
                        'face_box': main_face.get('face_box')
                    }
                    all_rows.append(row)
                    continue
                elif 'main_emotion' in result and result['main_emotion']:
                    main_emotion = result['main_emotion']
                    dominant_emotion = main_emotion.get('emotion', 'unknown')
                    emotion_confidence = main_emotion.get('confidence', 0)
                    emotion_stable = main_emotion.get('stable', False)
                    
                    # Create a simple emotion_scores dict with just the dominant emotion
                    emotion_scores = {dominant_emotion: emotion_confidence} if dominant_emotion != 'unknown' else {}
                    
                    row = {
                        'backend': backend_name,
                        'frame_index': frame_index,
                        'face_index': 0,
                        'dominant_emotion': dominant_emotion,
                        'emotion_confidence': emotion_confidence,
                        'emotion_stable': emotion_stable,
                        'emotion_scores': emotion_scores,
                        'face_box': None
                    }
                    all_rows.append(row)
                    continue
                else:
                    row = {
                        'backend': backend_name,
                        'frame_index': frame_index,
                        'face_index': 0,
                        'dominant_emotion': 'unknown',
                        'emotion_confidence': 0,
                        'emotion_stable': False,
                        'emotion_scores': {},
                        'face_box': None
                    }
                    all_rows.append(row)
                    continue
            
            # Process each face in the frame
            for face_index, face_data in enumerate(result['faces']):
                # Get emotion scores directly
                emotion_scores = face_data.get('emotion', {})
                
                # Get dominant emotion directly if available, otherwise calculate it
                dominant_emotion = face_data.get('dominant_emotion', 'unknown')
                emotion_confidence = face_data.get('emotion_confidence', 0)
                emotion_stable = face_data.get('emotion_stable', False)
                
                # If dominant_emotion is not available, calculate it
                if dominant_emotion == 'unknown' and emotion_scores:
                    max_score = -1
                    for emotion, score in emotion_scores.items():
                        if score > max_score:
                            max_score = score
                            dominant_emotion = emotion
                    emotion_confidence = max_score
                
                row = {
                    'backend': backend_name,
                    'frame_index': frame_index,
                    'face_index': face_index,
                    'dominant_emotion': dominant_emotion,
                    'emotion_confidence': emotion_confidence,
                    'emotion_stable': emotion_stable,
                    'emotion_scores': emotion_scores,
                    'face_box': face_data.get('face_box')
                }
                all_rows.append(row)
    
    # Create DataFrame
    if not all_rows:
        logger.warning("No data to convert to DataFrame")
        return pd.DataFrame()
    
    df = pd.DataFrame(all_rows)
    return df

def calculate_emotion_percentages(df: pd.DataFrame) -> Dict[str, float]:
    """
    Calculate percentages of different emotion categories.
    
    Args:
        df: DataFrame with emotion data
        
    Returns:
        Dictionary with emotion percentages
    """
    if df.empty:
        return {
            "angry": 0,
            "disgust": 0,
            "fear": 0,
            "happy": 0,
            "sad": 0,
            "surprise": 0,
            "neutral": 0,
            "positive": 0,
            "negative": 0
        }
    
    # Define emotion categories
    positive_emotions = ['happy', 'surprise']
    negative_emotions = ['angry', 'disgust', 'fear', 'sad']
    neutral_emotions = ['neutral']
    all_emotions = positive_emotions + negative_emotions + neutral_emotions
    
    # Initialize counters for individual emotions
    emotion_counts = {emotion: 0 for emotion in all_emotions}
    total_frames = len(df)
    
    # Count frames by emotion
    for emotion in df['dominant_emotion']:
        if emotion in emotion_counts:
            emotion_counts[emotion] += 1
    
    # Calculate individual emotion percentages
    emotion_percentages = {
        emotion: round((count / total_frames) * 100, 2) 
        for emotion, count in emotion_counts.items()
    }
    
    # Calculate grouped percentages
    positive_count = sum(emotion_counts[emotion] for emotion in positive_emotions)
    negative_count = sum(emotion_counts[emotion] for emotion in negative_emotions)
    neutral_count = sum(emotion_counts[emotion] for emotion in neutral_emotions)
    
    # Add grouped percentages
    emotion_percentages.update({
        "positive": round((positive_count / total_frames) * 100, 2) if total_frames > 0 else 0,
        "negative": round((negative_count / total_frames) * 100, 2) if total_frames > 0 else 0,
        "neutral_group": round((neutral_count / total_frames) * 100, 2) if total_frames > 0 else 0
    })
    
    # Calculate confidence values if available
    if 'emotion_confidence' in df.columns:
        confidence_by_emotion = {}
        for emotion in all_emotions:
            emotion_df = df[df['dominant_emotion'] == emotion]
            if not emotion_df.empty:
                confidence_by_emotion[emotion] = round(emotion_df['emotion_confidence'].mean(), 2)
            else:
                confidence_by_emotion[emotion] = 0
        
        # Calculate average confidence across all emotions
        avg_confidence = round(df['emotion_confidence'].mean(), 2)
        
        # Add confidence data to results
        emotion_percentages["average_confidence"] = avg_confidence
        emotion_percentages["confidence_by_emotion"] = confidence_by_emotion
    
    return emotion_percentages

def format_results_for_api(
    emotion_df: Optional[pd.DataFrame], 
    transcript: str, 
    analysis: Dict[str, Any]
) -> Dict[str, Any]:
    """
    Format results for API response.
    
    Args:
        emotion_df: DataFrame with emotion data
        transcript: Transcript text
        analysis: Analysis data
        
    Returns:
        Formatted results dictionary
    """
    if emotion_df is None or emotion_df.empty:
        emotion_percentages = {
            "angry": 0,
            "disgust": 0,
            "fear": 0,
            "happy": 0,
            "sad": 0,
            "surprise": 0,
            "neutral": 0,
            "positive": 0,
            "negative": 0,
            "neutral_group": 0,
            "average_confidence": 0,
            "confidence_by_emotion": {
                "angry": 0,
                "disgust": 0,
                "fear": 0, 
                "happy": 0,
                "sad": 0,
                "surprise": 0,
                "neutral": 0
            }
        }
    else:
        emotion_percentages = calculate_emotion_percentages(emotion_df)
        
        # Ensure we have confidence data with the right structure for the frontend
        if "confidence_by_emotion" not in emotion_percentages:
            emotion_percentages["confidence_by_emotion"] = {
                "angry": 0,
                "disgust": 0,
                "fear": 0, 
                "happy": 0,
                "sad": 0,
                "surprise": 0,
                "neutral": 0
            }
        
        if "average_confidence" not in emotion_percentages:
            emotion_percentages["average_confidence"] = 0
    
    return {
        "transcript": transcript,
        "emotion_percentages": emotion_percentages,
        "analysis": analysis
    }