File size: 10,078 Bytes
8ae78b0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
import json
import pandas as pd
import numpy as np
import logging
from typing import Dict, Any, List, Optional

# Fix import paths
try:
    from app.utils.logging_utils import setup_logger
except ImportError:
    # Try relative imports for running from project root
    from behavior_backend.app.utils.logging_utils import setup_logger

# Configure logging
logger = logging.getLogger(__name__)

def json_to_dataframe(data: Dict[str, List[Dict[str, Any]]]) -> pd.DataFrame:
    """
    Convert JSON emotion data to a pandas DataFrame.
    
    Args:
        data: Dictionary with backend name as key and list of results as value
        
    Returns:
        DataFrame with emotion data
    """
    all_rows = []
    
    for backend_name, results in data.items():
        for result in results:
            if not result:
                continue
                
            frame_index = result.get('frame_index', 0)
            
            # Handle case where no faces were detected
            if 'faces' not in result or not result['faces']:
                # Check if there's a main_face or main_emotion to use
                if 'main_face' in result and result['main_face']:
                    main_face = result['main_face']
                    emotion_scores = main_face.get('emotion', {})
                    dominant_emotion = main_face.get('dominant_emotion', 'unknown')
                    emotion_confidence = main_face.get('emotion_confidence', 0)
                    emotion_stable = main_face.get('emotion_stable', False)
                    
                    row = {
                        'backend': backend_name,
                        'frame_index': frame_index,
                        'face_index': 0,
                        'dominant_emotion': dominant_emotion,
                        'emotion_confidence': emotion_confidence,
                        'emotion_stable': emotion_stable,
                        'emotion_scores': emotion_scores,
                        'face_box': main_face.get('face_box')
                    }
                    all_rows.append(row)
                    continue
                elif 'main_emotion' in result and result['main_emotion']:
                    main_emotion = result['main_emotion']
                    dominant_emotion = main_emotion.get('emotion', 'unknown')
                    emotion_confidence = main_emotion.get('confidence', 0)
                    emotion_stable = main_emotion.get('stable', False)
                    
                    # Create a simple emotion_scores dict with just the dominant emotion
                    emotion_scores = {dominant_emotion: emotion_confidence} if dominant_emotion != 'unknown' else {}
                    
                    row = {
                        'backend': backend_name,
                        'frame_index': frame_index,
                        'face_index': 0,
                        'dominant_emotion': dominant_emotion,
                        'emotion_confidence': emotion_confidence,
                        'emotion_stable': emotion_stable,
                        'emotion_scores': emotion_scores,
                        'face_box': None
                    }
                    all_rows.append(row)
                    continue
                else:
                    row = {
                        'backend': backend_name,
                        'frame_index': frame_index,
                        'face_index': 0,
                        'dominant_emotion': 'unknown',
                        'emotion_confidence': 0,
                        'emotion_stable': False,
                        'emotion_scores': {},
                        'face_box': None
                    }
                    all_rows.append(row)
                    continue
            
            # Process each face in the frame
            for face_index, face_data in enumerate(result['faces']):
                # Get emotion scores directly
                emotion_scores = face_data.get('emotion', {})
                
                # Get dominant emotion directly if available, otherwise calculate it
                dominant_emotion = face_data.get('dominant_emotion', 'unknown')
                emotion_confidence = face_data.get('emotion_confidence', 0)
                emotion_stable = face_data.get('emotion_stable', False)
                
                # If dominant_emotion is not available, calculate it
                if dominant_emotion == 'unknown' and emotion_scores:
                    max_score = -1
                    for emotion, score in emotion_scores.items():
                        if score > max_score:
                            max_score = score
                            dominant_emotion = emotion
                    emotion_confidence = max_score
                
                row = {
                    'backend': backend_name,
                    'frame_index': frame_index,
                    'face_index': face_index,
                    'dominant_emotion': dominant_emotion,
                    'emotion_confidence': emotion_confidence,
                    'emotion_stable': emotion_stable,
                    'emotion_scores': emotion_scores,
                    'face_box': face_data.get('face_box')
                }
                all_rows.append(row)
    
    # Create DataFrame
    if not all_rows:
        logger.warning("No data to convert to DataFrame")
        return pd.DataFrame()
    
    df = pd.DataFrame(all_rows)
    return df

def calculate_emotion_percentages(df: pd.DataFrame) -> Dict[str, float]:
    """
    Calculate percentages of different emotion categories.
    
    Args:
        df: DataFrame with emotion data
        
    Returns:
        Dictionary with emotion percentages
    """
    if df.empty:
        return {
            "angry": 0,
            "disgust": 0,
            "fear": 0,
            "happy": 0,
            "sad": 0,
            "surprise": 0,
            "neutral": 0,
            "positive": 0,
            "negative": 0
        }
    
    # Define emotion categories
    positive_emotions = ['happy', 'surprise']
    negative_emotions = ['angry', 'disgust', 'fear', 'sad']
    neutral_emotions = ['neutral']
    all_emotions = positive_emotions + negative_emotions + neutral_emotions
    
    # Initialize counters for individual emotions
    emotion_counts = {emotion: 0 for emotion in all_emotions}
    total_frames = len(df)
    
    # Count frames by emotion
    for emotion in df['dominant_emotion']:
        if emotion in emotion_counts:
            emotion_counts[emotion] += 1
    
    # Calculate individual emotion percentages
    emotion_percentages = {
        emotion: round((count / total_frames) * 100, 2) 
        for emotion, count in emotion_counts.items()
    }
    
    # Calculate grouped percentages
    positive_count = sum(emotion_counts[emotion] for emotion in positive_emotions)
    negative_count = sum(emotion_counts[emotion] for emotion in negative_emotions)
    neutral_count = sum(emotion_counts[emotion] for emotion in neutral_emotions)
    
    # Add grouped percentages
    emotion_percentages.update({
        "positive": round((positive_count / total_frames) * 100, 2) if total_frames > 0 else 0,
        "negative": round((negative_count / total_frames) * 100, 2) if total_frames > 0 else 0,
        "neutral_group": round((neutral_count / total_frames) * 100, 2) if total_frames > 0 else 0
    })
    
    # Calculate confidence values if available
    if 'emotion_confidence' in df.columns:
        confidence_by_emotion = {}
        for emotion in all_emotions:
            emotion_df = df[df['dominant_emotion'] == emotion]
            if not emotion_df.empty:
                confidence_by_emotion[emotion] = round(emotion_df['emotion_confidence'].mean(), 2)
            else:
                confidence_by_emotion[emotion] = 0
        
        # Calculate average confidence across all emotions
        avg_confidence = round(df['emotion_confidence'].mean(), 2)
        
        # Add confidence data to results
        emotion_percentages["average_confidence"] = avg_confidence
        emotion_percentages["confidence_by_emotion"] = confidence_by_emotion
    
    return emotion_percentages

def format_results_for_api(
    emotion_df: Optional[pd.DataFrame], 
    transcript: str, 
    analysis: Dict[str, Any]
) -> Dict[str, Any]:
    """
    Format results for API response.
    
    Args:
        emotion_df: DataFrame with emotion data
        transcript: Transcript text
        analysis: Analysis data
        
    Returns:
        Formatted results dictionary
    """
    if emotion_df is None or emotion_df.empty:
        emotion_percentages = {
            "angry": 0,
            "disgust": 0,
            "fear": 0,
            "happy": 0,
            "sad": 0,
            "surprise": 0,
            "neutral": 0,
            "positive": 0,
            "negative": 0,
            "neutral_group": 0,
            "average_confidence": 0,
            "confidence_by_emotion": {
                "angry": 0,
                "disgust": 0,
                "fear": 0, 
                "happy": 0,
                "sad": 0,
                "surprise": 0,
                "neutral": 0
            }
        }
    else:
        emotion_percentages = calculate_emotion_percentages(emotion_df)
        
        # Ensure we have confidence data with the right structure for the frontend
        if "confidence_by_emotion" not in emotion_percentages:
            emotion_percentages["confidence_by_emotion"] = {
                "angry": 0,
                "disgust": 0,
                "fear": 0, 
                "happy": 0,
                "sad": 0,
                "surprise": 0,
                "neutral": 0
            }
        
        if "average_confidence" not in emotion_percentages:
            emotion_percentages["average_confidence"] = 0
    
    return {
        "transcript": transcript,
        "emotion_percentages": emotion_percentages,
        "analysis": analysis
    }