test / behavior_backend /app /utils /data_utils.py
hibatorrahmen's picture
Add backend application and Dockerfile
8ae78b0
import json
import pandas as pd
import numpy as np
import logging
from typing import Dict, Any, List, Optional
# Fix import paths
try:
from app.utils.logging_utils import setup_logger
except ImportError:
# Try relative imports for running from project root
from behavior_backend.app.utils.logging_utils import setup_logger
# Configure logging
logger = logging.getLogger(__name__)
def json_to_dataframe(data: Dict[str, List[Dict[str, Any]]]) -> pd.DataFrame:
"""
Convert JSON emotion data to a pandas DataFrame.
Args:
data: Dictionary with backend name as key and list of results as value
Returns:
DataFrame with emotion data
"""
all_rows = []
for backend_name, results in data.items():
for result in results:
if not result:
continue
frame_index = result.get('frame_index', 0)
# Handle case where no faces were detected
if 'faces' not in result or not result['faces']:
# Check if there's a main_face or main_emotion to use
if 'main_face' in result and result['main_face']:
main_face = result['main_face']
emotion_scores = main_face.get('emotion', {})
dominant_emotion = main_face.get('dominant_emotion', 'unknown')
emotion_confidence = main_face.get('emotion_confidence', 0)
emotion_stable = main_face.get('emotion_stable', False)
row = {
'backend': backend_name,
'frame_index': frame_index,
'face_index': 0,
'dominant_emotion': dominant_emotion,
'emotion_confidence': emotion_confidence,
'emotion_stable': emotion_stable,
'emotion_scores': emotion_scores,
'face_box': main_face.get('face_box')
}
all_rows.append(row)
continue
elif 'main_emotion' in result and result['main_emotion']:
main_emotion = result['main_emotion']
dominant_emotion = main_emotion.get('emotion', 'unknown')
emotion_confidence = main_emotion.get('confidence', 0)
emotion_stable = main_emotion.get('stable', False)
# Create a simple emotion_scores dict with just the dominant emotion
emotion_scores = {dominant_emotion: emotion_confidence} if dominant_emotion != 'unknown' else {}
row = {
'backend': backend_name,
'frame_index': frame_index,
'face_index': 0,
'dominant_emotion': dominant_emotion,
'emotion_confidence': emotion_confidence,
'emotion_stable': emotion_stable,
'emotion_scores': emotion_scores,
'face_box': None
}
all_rows.append(row)
continue
else:
row = {
'backend': backend_name,
'frame_index': frame_index,
'face_index': 0,
'dominant_emotion': 'unknown',
'emotion_confidence': 0,
'emotion_stable': False,
'emotion_scores': {},
'face_box': None
}
all_rows.append(row)
continue
# Process each face in the frame
for face_index, face_data in enumerate(result['faces']):
# Get emotion scores directly
emotion_scores = face_data.get('emotion', {})
# Get dominant emotion directly if available, otherwise calculate it
dominant_emotion = face_data.get('dominant_emotion', 'unknown')
emotion_confidence = face_data.get('emotion_confidence', 0)
emotion_stable = face_data.get('emotion_stable', False)
# If dominant_emotion is not available, calculate it
if dominant_emotion == 'unknown' and emotion_scores:
max_score = -1
for emotion, score in emotion_scores.items():
if score > max_score:
max_score = score
dominant_emotion = emotion
emotion_confidence = max_score
row = {
'backend': backend_name,
'frame_index': frame_index,
'face_index': face_index,
'dominant_emotion': dominant_emotion,
'emotion_confidence': emotion_confidence,
'emotion_stable': emotion_stable,
'emotion_scores': emotion_scores,
'face_box': face_data.get('face_box')
}
all_rows.append(row)
# Create DataFrame
if not all_rows:
logger.warning("No data to convert to DataFrame")
return pd.DataFrame()
df = pd.DataFrame(all_rows)
return df
def calculate_emotion_percentages(df: pd.DataFrame) -> Dict[str, float]:
"""
Calculate percentages of different emotion categories.
Args:
df: DataFrame with emotion data
Returns:
Dictionary with emotion percentages
"""
if df.empty:
return {
"angry": 0,
"disgust": 0,
"fear": 0,
"happy": 0,
"sad": 0,
"surprise": 0,
"neutral": 0,
"positive": 0,
"negative": 0
}
# Define emotion categories
positive_emotions = ['happy', 'surprise']
negative_emotions = ['angry', 'disgust', 'fear', 'sad']
neutral_emotions = ['neutral']
all_emotions = positive_emotions + negative_emotions + neutral_emotions
# Initialize counters for individual emotions
emotion_counts = {emotion: 0 for emotion in all_emotions}
total_frames = len(df)
# Count frames by emotion
for emotion in df['dominant_emotion']:
if emotion in emotion_counts:
emotion_counts[emotion] += 1
# Calculate individual emotion percentages
emotion_percentages = {
emotion: round((count / total_frames) * 100, 2)
for emotion, count in emotion_counts.items()
}
# Calculate grouped percentages
positive_count = sum(emotion_counts[emotion] for emotion in positive_emotions)
negative_count = sum(emotion_counts[emotion] for emotion in negative_emotions)
neutral_count = sum(emotion_counts[emotion] for emotion in neutral_emotions)
# Add grouped percentages
emotion_percentages.update({
"positive": round((positive_count / total_frames) * 100, 2) if total_frames > 0 else 0,
"negative": round((negative_count / total_frames) * 100, 2) if total_frames > 0 else 0,
"neutral_group": round((neutral_count / total_frames) * 100, 2) if total_frames > 0 else 0
})
# Calculate confidence values if available
if 'emotion_confidence' in df.columns:
confidence_by_emotion = {}
for emotion in all_emotions:
emotion_df = df[df['dominant_emotion'] == emotion]
if not emotion_df.empty:
confidence_by_emotion[emotion] = round(emotion_df['emotion_confidence'].mean(), 2)
else:
confidence_by_emotion[emotion] = 0
# Calculate average confidence across all emotions
avg_confidence = round(df['emotion_confidence'].mean(), 2)
# Add confidence data to results
emotion_percentages["average_confidence"] = avg_confidence
emotion_percentages["confidence_by_emotion"] = confidence_by_emotion
return emotion_percentages
def format_results_for_api(
emotion_df: Optional[pd.DataFrame],
transcript: str,
analysis: Dict[str, Any]
) -> Dict[str, Any]:
"""
Format results for API response.
Args:
emotion_df: DataFrame with emotion data
transcript: Transcript text
analysis: Analysis data
Returns:
Formatted results dictionary
"""
if emotion_df is None or emotion_df.empty:
emotion_percentages = {
"angry": 0,
"disgust": 0,
"fear": 0,
"happy": 0,
"sad": 0,
"surprise": 0,
"neutral": 0,
"positive": 0,
"negative": 0,
"neutral_group": 0,
"average_confidence": 0,
"confidence_by_emotion": {
"angry": 0,
"disgust": 0,
"fear": 0,
"happy": 0,
"sad": 0,
"surprise": 0,
"neutral": 0
}
}
else:
emotion_percentages = calculate_emotion_percentages(emotion_df)
# Ensure we have confidence data with the right structure for the frontend
if "confidence_by_emotion" not in emotion_percentages:
emotion_percentages["confidence_by_emotion"] = {
"angry": 0,
"disgust": 0,
"fear": 0,
"happy": 0,
"sad": 0,
"surprise": 0,
"neutral": 0
}
if "average_confidence" not in emotion_percentages:
emotion_percentages["average_confidence"] = 0
return {
"transcript": transcript,
"emotion_percentages": emotion_percentages,
"analysis": analysis
}