Spaces:

urjob
/

test

Sleeping

App Files Files Community

test / behavior_backend /app /utils /data_utils.py

hibatorrahmen

Add backend application and Dockerfile

8ae78b0 10 months ago

raw

history blame contribute delete

10.1 kB

	import json
	import pandas as pd
	import numpy as np
	import logging
	from typing import Dict, Any, List, Optional

	# Fix import paths
	try:
	from app.utils.logging_utils import setup_logger
	except ImportError:
	# Try relative imports for running from project root
	from behavior_backend.app.utils.logging_utils import setup_logger

	# Configure logging
	logger = logging.getLogger(__name__)

	def json_to_dataframe(data: Dict[str, List[Dict[str, Any]]]) -> pd.DataFrame:
	"""
	Convert JSON emotion data to a pandas DataFrame.

	Args:
	data: Dictionary with backend name as key and list of results as value

	Returns:
	DataFrame with emotion data
	"""
	all_rows = []

	for backend_name, results in data.items():
	for result in results:
	if not result:
	continue

	frame_index = result.get('frame_index', 0)

	# Handle case where no faces were detected
	if 'faces' not in result or not result['faces']:
	# Check if there's a main_face or main_emotion to use
	if 'main_face' in result and result['main_face']:
	main_face = result['main_face']
	emotion_scores = main_face.get('emotion', {})
	dominant_emotion = main_face.get('dominant_emotion', 'unknown')
	emotion_confidence = main_face.get('emotion_confidence', 0)
	emotion_stable = main_face.get('emotion_stable', False)

	row = {
	'backend': backend_name,
	'frame_index': frame_index,
	'face_index': 0,
	'dominant_emotion': dominant_emotion,
	'emotion_confidence': emotion_confidence,
	'emotion_stable': emotion_stable,
	'emotion_scores': emotion_scores,
	'face_box': main_face.get('face_box')
	}
	all_rows.append(row)
	continue
	elif 'main_emotion' in result and result['main_emotion']:
	main_emotion = result['main_emotion']
	dominant_emotion = main_emotion.get('emotion', 'unknown')
	emotion_confidence = main_emotion.get('confidence', 0)
	emotion_stable = main_emotion.get('stable', False)

	# Create a simple emotion_scores dict with just the dominant emotion
	emotion_scores = {dominant_emotion: emotion_confidence} if dominant_emotion != 'unknown' else {}

	row = {
	'backend': backend_name,
	'frame_index': frame_index,
	'face_index': 0,
	'dominant_emotion': dominant_emotion,
	'emotion_confidence': emotion_confidence,
	'emotion_stable': emotion_stable,
	'emotion_scores': emotion_scores,
	'face_box': None
	}
	all_rows.append(row)
	continue
	else:
	row = {
	'backend': backend_name,
	'frame_index': frame_index,
	'face_index': 0,
	'dominant_emotion': 'unknown',
	'emotion_confidence': 0,
	'emotion_stable': False,
	'emotion_scores': {},
	'face_box': None
	}
	all_rows.append(row)
	continue

	# Process each face in the frame
	for face_index, face_data in enumerate(result['faces']):
	# Get emotion scores directly
	emotion_scores = face_data.get('emotion', {})

	# Get dominant emotion directly if available, otherwise calculate it
	dominant_emotion = face_data.get('dominant_emotion', 'unknown')
	emotion_confidence = face_data.get('emotion_confidence', 0)
	emotion_stable = face_data.get('emotion_stable', False)

	# If dominant_emotion is not available, calculate it
	if dominant_emotion == 'unknown' and emotion_scores:
	max_score = -1
	for emotion, score in emotion_scores.items():
	if score > max_score:
	max_score = score
	dominant_emotion = emotion
	emotion_confidence = max_score

	row = {
	'backend': backend_name,
	'frame_index': frame_index,
	'face_index': face_index,
	'dominant_emotion': dominant_emotion,
	'emotion_confidence': emotion_confidence,
	'emotion_stable': emotion_stable,
	'emotion_scores': emotion_scores,
	'face_box': face_data.get('face_box')
	}
	all_rows.append(row)

	# Create DataFrame
	if not all_rows:
	logger.warning("No data to convert to DataFrame")
	return pd.DataFrame()

	df = pd.DataFrame(all_rows)
	return df

	def calculate_emotion_percentages(df: pd.DataFrame) -> Dict[str, float]:
	"""
	Calculate percentages of different emotion categories.

	Args:
	df: DataFrame with emotion data

	Returns:
	Dictionary with emotion percentages
	"""
	if df.empty:
	return {
	"angry": 0,
	"disgust": 0,
	"fear": 0,
	"happy": 0,
	"sad": 0,
	"surprise": 0,
	"neutral": 0,
	"positive": 0,
	"negative": 0
	}

	# Define emotion categories
	positive_emotions = ['happy', 'surprise']
	negative_emotions = ['angry', 'disgust', 'fear', 'sad']
	neutral_emotions = ['neutral']
	all_emotions = positive_emotions + negative_emotions + neutral_emotions

	# Initialize counters for individual emotions
	emotion_counts = {emotion: 0 for emotion in all_emotions}
	total_frames = len(df)

	# Count frames by emotion
	for emotion in df['dominant_emotion']:
	if emotion in emotion_counts:
	emotion_counts[emotion] += 1

	# Calculate individual emotion percentages
	emotion_percentages = {
	emotion: round((count / total_frames) * 100, 2)
	for emotion, count in emotion_counts.items()
	}

	# Calculate grouped percentages
	positive_count = sum(emotion_counts[emotion] for emotion in positive_emotions)
	negative_count = sum(emotion_counts[emotion] for emotion in negative_emotions)
	neutral_count = sum(emotion_counts[emotion] for emotion in neutral_emotions)

	# Add grouped percentages
	emotion_percentages.update({
	"positive": round((positive_count / total_frames) * 100, 2) if total_frames > 0 else 0,
	"negative": round((negative_count / total_frames) * 100, 2) if total_frames > 0 else 0,
	"neutral_group": round((neutral_count / total_frames) * 100, 2) if total_frames > 0 else 0
	})

	# Calculate confidence values if available
	if 'emotion_confidence' in df.columns:
	confidence_by_emotion = {}
	for emotion in all_emotions:
	emotion_df = df[df['dominant_emotion'] == emotion]
	if not emotion_df.empty:
	confidence_by_emotion[emotion] = round(emotion_df['emotion_confidence'].mean(), 2)
	else:
	confidence_by_emotion[emotion] = 0

	# Calculate average confidence across all emotions
	avg_confidence = round(df['emotion_confidence'].mean(), 2)

	# Add confidence data to results
	emotion_percentages["average_confidence"] = avg_confidence
	emotion_percentages["confidence_by_emotion"] = confidence_by_emotion

	return emotion_percentages

	def format_results_for_api(
	emotion_df: Optional[pd.DataFrame],
	transcript: str,
	analysis: Dict[str, Any]
	) -> Dict[str, Any]:
	"""
	Format results for API response.

	Args:
	emotion_df: DataFrame with emotion data
	transcript: Transcript text
	analysis: Analysis data

	Returns:
	Formatted results dictionary
	"""
	if emotion_df is None or emotion_df.empty:
	emotion_percentages = {
	"angry": 0,
	"disgust": 0,
	"fear": 0,
	"happy": 0,
	"sad": 0,
	"surprise": 0,
	"neutral": 0,
	"positive": 0,
	"negative": 0,
	"neutral_group": 0,
	"average_confidence": 0,
	"confidence_by_emotion": {
	"angry": 0,
	"disgust": 0,
	"fear": 0,
	"happy": 0,
	"sad": 0,
	"surprise": 0,
	"neutral": 0
	}
	}
	else:
	emotion_percentages = calculate_emotion_percentages(emotion_df)

	# Ensure we have confidence data with the right structure for the frontend
	if "confidence_by_emotion" not in emotion_percentages:
	emotion_percentages["confidence_by_emotion"] = {
	"angry": 0,
	"disgust": 0,
	"fear": 0,
	"happy": 0,
	"sad": 0,
	"surprise": 0,
	"neutral": 0
	}

	if "average_confidence" not in emotion_percentages:
	emotion_percentages["average_confidence"] = 0

	return {
	"transcript": transcript,
	"emotion_percentages": emotion_percentages,
	"analysis": analysis
	}