Spaces:

urjob
/

test

Sleeping

App Files Files Community

test / behavior_backend /app /services /processing /ai_analysis.py

hibatorrahmen

Add backend application and Dockerfile

8ae78b0 10 months ago

raw

history blame contribute delete

42.6 kB

	import os
	import json
	import logging
	import pandas as pd
	import openai
	from typing import Dict, Any, List, Optional

	# Fix import paths
	try:
	from app.utils.logging_utils import time_it, setup_logger
	from app.core.config import settings
	except ImportError:
	# Try relative imports for running from project root
	from behavior_backend.app.utils.logging_utils import time_it, setup_logger
	# Mock settings for testing
	class Settings:
	OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "")

	settings = Settings()

	# Configure logging
	logger = setup_logger(__name__)

	class AIAnalysisService:
	"""Service for AI analysis operations."""

	def __init__(self):
	"""Initialize the AI analysis service."""
	self.client = openai.OpenAI(api_key=os.environ.get("OPENAI_API_KEY", ""))

	@time_it
	def analyze_emotions_and_transcript(
	self,
	emotion_df: pd.DataFrame,
	transcript: str,
	language: str = 'en',
	interview_assessment: Optional[Dict[str, Any]] = None,
	eye_contact_data: Optional[Dict[str, Any]] = None,
	body_language_data: Optional[Dict[str, Any]] = None,
	face_analysis_data: Optional[Dict[str, Any]] = None,
	model_name: str = "gpt-4o"
	) -> Dict[str, Any]:
	"""
	Analyze emotions and transcript using OpenAI.

	Args:
	emotion_df: DataFrame with emotion data
	transcript: Transcript text
	language: Language of the transcript
	interview_assessment: Optional interview assessment
	eye_contact_data: Optional eye contact analysis data
	body_language_data: Optional body language analysis data
	face_analysis_data: Optional face analysis data
	model_name: The name of the model to use for AI analysis
	Returns:
	Dictionary with analysis results
	"""
	print("*****************************I AM INSIDE AI ANALYSER *****************************************************")
	logger.info(f"Received interview assessment: {interview_assessment}")
	logger.info(f"Received transcript: {transcript}")
	logger.info(f"Received language: {language}")
	logger.info(f"Received emotion_df: {emotion_df}")
	logger.info(f"Received eye contact data: {eye_contact_data is not None}")
	logger.info(f"Received body language data: {body_language_data is not None}")
	logger.info(f"Received face analysis data: {face_analysis_data is not None}")
	logger.info(f"Using AI model: {model_name}")

	# Check if emotion_df is empty or None
	if emotion_df is None or emotion_df.empty:
	logger.warning("No emotion data available for analysis")
	return self._generate_empty_analysis()

	try:
	# Extract raw emotion scores from the DataFrame
	raw_emotions = {}
	confidence_by_emotion = {}
	average_confidence = 0
	confidence_data = {}

	# Get primary emotion data from the first row of the DataFrame
	if not emotion_df.empty and 'raw_emotion_data' in emotion_df.columns:
	first_row = emotion_df.iloc[0]
	if isinstance(first_row['raw_emotion_data'], dict) and first_row['raw_emotion_data']:
	raw_emotions = first_row['raw_emotion_data']
	logger.info(f"Using raw_emotion_data from DataFrame: {raw_emotions}")

	# Check if confidence data is available in the first row (this would be the "confidence_data" field)
	if 'confidence_data' in emotion_df.columns and isinstance(first_row.get('confidence_data'), dict):
	confidence_data = first_row['confidence_data']
	confidence_by_emotion = confidence_data.get('confidence_by_emotion', {})
	average_confidence = confidence_data.get('average_confidence', 0)

	# Round confidence values to 2 decimal places
	confidence_by_emotion = {emotion: round(value, 2) for emotion, value in confidence_by_emotion.items()}
	average_confidence = round(average_confidence, 2)

	logger.info(f"Using rounded confidence_data - confidence_by_emotion: {confidence_by_emotion}")
	logger.info(f"Using rounded confidence_data - average_confidence: {average_confidence}")

	# Store rounded values back to confidence_data for consistency
	confidence_data['confidence_by_emotion'] = confidence_by_emotion
	confidence_data['average_confidence'] = average_confidence

	# If no raw_emotion_data found, fall back to other methods
	if not raw_emotions:
	logger.info("No raw_emotion_data found, trying alternative sources")
	# First check if we have a main_face column
	if 'main_face' in emotion_df.columns and not emotion_df.empty:
	first_row = emotion_df.iloc[0]
	main_face = first_row.get('main_face', {})
	if isinstance(main_face, dict) and main_face and 'emotion' in main_face:
	raw_emotions = main_face['emotion']
	logger.info(f"Using emotion from main_face: {raw_emotions}")

	# If still no raw emotions, try emotion_scores from first row
	if not raw_emotions and 'emotion_scores' in emotion_df.columns and not emotion_df.empty:
	first_row = emotion_df.iloc[0]
	emotion_scores = first_row.get('emotion_scores', {})
	if isinstance(emotion_scores, dict) and emotion_scores:
	raw_emotions = emotion_scores
	logger.info(f"Using emotion_scores from first row: {raw_emotions}")

	# If still no raw emotions found, log this issue
	if not raw_emotions:
	logger.warning("No emotion data found in the DataFrame")
	# Use empty dict with zero values for all emotions
	raw_emotions = {
	"angry": 0, "disgust": 0, "fear": 0, "happy": 0,
	"sad": 0, "surprise": 0, "neutral": 0
	}

	# Extract confidence values if available
	average_confidence = 0

	# If we have a 'confidence_by_emotion' stat available in any fashion, use it
	if 'main_face' in emotion_df.columns and not emotion_df.empty:
	# Calculate confidence values from dominant emotions in the data
	confidence_values = []
	emotion_confidence_counts = {}

	for index, row in emotion_df.iterrows():
	if 'main_face' in row and row['main_face'] and 'emotion_confidence' in row['main_face']:
	confidence = row['main_face']['emotion_confidence']
	emotion = row['main_face'].get('dominant_emotion', 'neutral')

	# Add to average confidence
	confidence_values.append(confidence)

	# Track by emotion
	if emotion not in emotion_confidence_counts:
	emotion_confidence_counts[emotion] = []
	emotion_confidence_counts[emotion].append(confidence)

	# Calculate average confidence
	if confidence_values:
	average_confidence = sum(confidence_values) / len(confidence_values)

	# Calculate average confidence by emotion
	for emotion, confidences in emotion_confidence_counts.items():
	if confidences:
	confidence_by_emotion[emotion] = sum(confidences) / len(confidences)

	# If we don't have confidence values, check if we have any in first face
	if not confidence_by_emotion and 'faces' in emotion_df.columns and not emotion_df.empty:
	for index, row in emotion_df.iterrows():
	if 'faces' in row and row['faces'] and len(row['faces']) > 0 and 'emotion_confidence' in row['faces'][0]:
	confidence = row['faces'][0]['emotion_confidence']
	emotion = row['faces'][0].get('dominant_emotion', 'neutral')

	# Add to average confidence
	if 'confidence_values' not in locals():
	confidence_values = []
	confidence_values.append(confidence)

	# Track by emotion
	if emotion not in emotion_confidence_counts:
	emotion_confidence_counts = {}
	emotion_confidence_counts[emotion] = []
	emotion_confidence_counts[emotion].append(confidence)

	# Calculate average confidence
	if 'confidence_values' in locals() and confidence_values:
	average_confidence = sum(confidence_values) / len(confidence_values)

	# Calculate average confidence by emotion
	for emotion, confidences in emotion_confidence_counts.items():
	if confidences:
	confidence_by_emotion[emotion] = sum(confidences) / len(confidences)

	# If we still don't have confidence values, use the raw emotions as proxy for confidence
	if not confidence_by_emotion and raw_emotions:
	# Use the raw emotion values as proxy for confidence
	# This ensures we at least have something
	confidence_by_emotion = {k: round(v, 2) for k, v in raw_emotions.items()}
	dominant_emotion, max_value = max(raw_emotions.items(), key=lambda x: x[1], default=("neutral", 0))
	average_confidence = max_value

	# Format the confidence values for display
	for emotion in confidence_by_emotion:
	# Do not round the values to preserve the exact data
	pass

	# Add debug logging for average_confidence
	logger.info(f"Final average_confidence value to be used in result: {average_confidence}")

	# Get the original average_confidence from the confidence_data for the database
	db_average_confidence = confidence_data.get("average_confidence", average_confidence)
	logger.info(f"Using average_confidence from confidence_data for database: {db_average_confidence}")

	# Determine overall sentiment based on the dominant emotion
	if 'overall_sentiment' in first_row and first_row['overall_sentiment']:
	# Use the exact sentiment from the DataFrame if available
	sentiment = first_row['overall_sentiment']
	logger.info(f"Using overall_sentiment from DataFrame: {sentiment}")
	elif raw_emotions:
	# Find the dominant emotion only if we don't have a sentiment already
	dominant_emotion, _ = max(raw_emotions.items(), key=lambda x: x[1], default=("neutral", 0))
	sentiment = dominant_emotion.capitalize()
	logger.info(f"Calculated sentiment from raw_emotions: {sentiment}")
	else:
	# Use the standard method if no raw emotions
	sentiment = self._determine_sentiment(raw_emotions)
	logger.info(f"Determined sentiment via standard method: {sentiment}")

	# Prepare prompt for OpenAI
	prompt = self._generate_prompt(
	sentiment=sentiment,
	raw_emotions=raw_emotions,
	confidence_by_emotion=confidence_by_emotion,
	average_confidence=average_confidence,
	transcript=transcript,
	language=language,
	interview_assessment=interview_assessment,
	eye_contact_data=eye_contact_data,
	body_language_data=body_language_data,
	)
	logger.info(f"Generated prompt: {prompt}")
	# Call OpenAI API
	try:
	system_prompt = """
	You are an expert in analyzing emotions and speech for job interviews and professional presentations.
	You are given a transcript of a video, a summary of the emotions expressed in the video, and detailed interview assessment data when available.
	You are also given the overall sentiment of the video.
	You may also be provided with face analysis, eye contact analysis, and body language analysis.
	You are to analyze all provided data and provide a comprehensive analysis in JSON format.
	Your evaluation must be based on the transcript, emotions expressed, interview assessment data, face analysis, eye contact analysis, and body language analysis (when provided).
	You are to provide a detailed analysis, including:
	- Key points from the transcript
	- Language quality assessment
	- Confidence indicators
	- Overall assessment of the performance including body language, eye contact, and professional appearance
	- Recommendations for improving emotional expression, communication, body language, and professional appearance

	Please provide a comprehensive analysis in JSON format with the following structure:
	{
	"Transcript Analysis": {
	"Key Points": List of key points as bullet points <ul>...</ul> in HTML format from the transcript with critical insight for an HR manager. Use bold <b>...</b> tags to highlight important points.
	"Language Quality": Bullet points <ul>...</ul> in HTML format of assessment of language use, vocabulary,grammar mistakes, clarity, professionalism, and other language-related metrics. Use bold <b>...</b> tags to highlight important points.
	"Confidence Indicators": Bullet points <ul>...</ul> in HTML format of analysis of confidence based on language.
	},
	"Body Language Analysis": {
	"Eye Contact": Analysis of eye contact patterns in HTML format based on the interview assessment data.
	"Posture and Movement": Analysis of posture, movement, and other body language indicators in HTML format.
	"Overall Body Language": Summary assessment of body language in HTML format.
	},
	"Overall Summary": overall assessment of the candidate interview performance with critical insight for an HR manager. Use a chain of thought approach to analyze all available data and provide a comprehensive analysis. Write in HTML and highlight important points with bold <b>...</b> tags.
	"Recommendations": {
	"Emotional Expression": bullet points <ul>...</ul> in HTML format of recommendations for improving emotional expression using bold <b>...</b> tags.
	"Communication": bullet points <ul>...</ul> in HTML format of recommendations for improving communication using bold <b>...</b> tags.
	"Body Language": bullet points <ul>...</ul> in HTML format of specific recommendations for improving body language based on the assessment data using bold <b>...</b> tags.
	"Professional Appearance": bullet points <ul>...</ul> in HTML format of specific recommendations for improving professional appearance using bold <b>...</b> tags.
	}
	}
	"""

	response = self.client.chat.completions.create(
	model=model_name,
	messages=[
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": prompt}
	],
	temperature=0.7,
	max_tokens=2500,
	frequency_penalty=0,
	presence_penalty=0.2
	)

	analysis_text = response.choices[0].message.content.strip()

	# Parse the JSON response
	try:
	analysis = json.loads(analysis_text)
	logger.info("Successfully parsed the OpenAI response")
	except Exception as parse_error:
	logger.error(f"Failed to parse OpenAI response as JSON: {str(parse_error)}")
	logger.info(f"Response content: {analysis_text}")
	analysis = self._extract_json_from_text(analysis_text)

	if not analysis:
	logger.warning("Returning standard analysis structure with error message")
	analysis = self._generate_empty_analysis()
	analysis["Error"] = "Failed to parse OpenAI response"

	# Add raw emotion data to the analysis for consistency with database storage
	analysis["Emotion Analysis"] = {
	"Dominant Emotions": raw_emotions,
	"Confidence By Emotion": confidence_by_emotion,
	"Overall Sentiment": sentiment,
	"Average Confidence": db_average_confidence
	}

	# Add eye contact and body language data directly to the analysis
	# to ensure it's preserved in the returned JSON, using the same keys
	# as in the video_processor.py when it creates comprehensive_results
	if eye_contact_data:
	# Use lowercase key to match video_processor.py
	key = "eye_contact_analysis"
	analysis[key] = eye_contact_data
	logger.info(f"Added {key} to results with {len(str(eye_contact_data))} characters")

	if body_language_data:
	# Use lowercase key to match video_processor.py
	key = "body_language_analysis"
	analysis[key] = body_language_data
	logger.info(f"Added {key} to results with {len(str(body_language_data))} characters")

	if face_analysis_data:
	# Use lowercase key to match video_processor.py
	key = "face_analysis"
	analysis[key] = face_analysis_data
	logger.info(f"Added {key} to results with {len(str(face_analysis_data))} characters")

	# Log the exact emotion analysis that will be stored in the database
	logger.info(f"Emotion Analysis to be stored in database: {analysis['Emotion Analysis']}")
	logger.info(f"Added eye_contact_analysis to results: {bool(eye_contact_data)}")
	logger.info(f"Added body_language_analysis to results: {bool(body_language_data)}")
	logger.info(f"Added face_analysis to results: {bool(face_analysis_data)}")

	return analysis

	except Exception as api_error:
	logger.error(f"Error during OpenAI API call: {str(api_error)}")
	analysis = self._generate_empty_analysis()
	analysis["Error"] = f"OpenAI API error: {str(api_error)}"

	# Still include the emotion data for consistency
	analysis["Emotion Analysis"] = {
	"Dominant Emotions": raw_emotions,
	"Confidence By Emotion": confidence_by_emotion,
	"Overall Sentiment": sentiment,
	"Average Confidence": db_average_confidence
	}

	# Also include eye contact and body language data in error cases
	if eye_contact_data:
	key = "eye_contact_analysis"
	analysis[key] = eye_contact_data
	logger.info(f"Preserved {key} in error case with {len(str(eye_contact_data))} characters")

	if body_language_data:
	key = "body_language_analysis"
	analysis[key] = body_language_data
	logger.info(f"Preserved {key} in error case with {len(str(body_language_data))} characters")

	if face_analysis_data:
	key = "face_analysis"
	analysis[key] = face_analysis_data
	logger.info(f"Preserved {key} in error case with {len(str(face_analysis_data))} characters")

	return analysis

	except Exception as e:
	logger.error(f"Error during analysis: {str(e)}")
	analysis = self._generate_empty_analysis()
	analysis["Error"] = f"Analysis error: {str(e)}"

	# Also include eye contact and body language data in error cases
	if eye_contact_data:
	key = "eye_contact_analysis"
	analysis[key] = eye_contact_data
	logger.info(f"Preserved {key} in error case with {len(str(eye_contact_data))} characters")

	if body_language_data:
	key = "body_language_analysis"
	analysis[key] = body_language_data
	logger.info(f"Preserved {key} in error case with {len(str(body_language_data))} characters")

	if face_analysis_data:
	key = "face_analysis"
	analysis[key] = face_analysis_data
	logger.info(f"Preserved {key} in error case with {len(str(face_analysis_data))} characters")

	return analysis

	def _calculate_emotion_percentages(self, emotion_df: pd.DataFrame) -> Dict[str, float]:
	"""
	Calculate percentages of different emotion categories based on raw emotion scores.

	Args:
	emotion_df: DataFrame with emotion data

	Returns:
	Dictionary with emotion percentages for each emotion and grouped categories
	"""
	# Early return for empty DataFrame
	if emotion_df is None or emotion_df.empty:
	return {
	"angry": 0, "disgust": 0, "fear": 0, "happy": 0,
	"sad": 0, "surprise": 0, "neutral": 0,
	"positive": 0, "negative": 0
	}

	# Define emotion categories
	all_emotions = {'angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral'}
	positive_emotions = {'happy', 'surprise'}
	negative_emotions = {'angry', 'disgust', 'fear', 'sad'}
	neutral_emotions = {'neutral'}

	# Initialize counters for raw emotion scores
	emotion_totals = {emotion: 0 for emotion in all_emotions}
	total_score = 0

	# Process each row's emotion scores
	for _, row in emotion_df.iterrows():
	# Try to get emotion scores from the row
	emotion_scores = {}

	# First check if we have raw emotion scores in the DataFrame
	if 'emotion_scores' in row and row['emotion_scores']:
	emotion_scores = row['emotion_scores']

	# If no scores found, try to use the dominant emotion and confidence
	if not emotion_scores and 'dominant_emotion' in row and 'emotion_confidence' in row:
	emotion = row['dominant_emotion']
	confidence = row['emotion_confidence']
	if emotion != 'unknown' and confidence > 0:
	emotion_scores = {emotion: confidence}

	# Skip if no emotion data
	if not emotion_scores:
	continue

	# Sum up scores by emotion
	for emotion, score in emotion_scores.items():
	total_score += score
	if emotion in emotion_totals:
	emotion_totals[emotion] += score

	# Calculate percentages for each emotion
	emotion_percentages = {}
	if total_score > 0:
	for emotion, total in emotion_totals.items():
	emotion_percentages[emotion] = round((total / total_score) * 100, 2)

	# Add grouped percentages
	positive_total = sum(emotion_totals.get(emotion, 0) for emotion in positive_emotions)
	negative_total = sum(emotion_totals.get(emotion, 0) for emotion in negative_emotions)
	neutral_total = sum(emotion_totals.get(emotion, 0) for emotion in neutral_emotions)

	emotion_percentages.update({
	"positive": round((positive_total / total_score) * 100, 2),
	"negative": round((negative_total / total_score) * 100, 2)
	})
	else:
	# Return zeros if no data
	emotion_percentages = {
	"angry": 0, "disgust": 0, "fear": 0, "happy": 0,
	"sad": 0, "surprise": 0, "neutral": 0,
	"positive": 0, "negative": 0
	}

	return emotion_percentages

	def _determine_sentiment(self, emotion_percentages: Dict[str, float]) -> str:
	"""
	Determine overall sentiment based on emotion percentages.

	Args:
	emotion_percentages: Dictionary with emotion percentages

	Returns:
	Sentiment assessment string
	"""
	# First try to determine sentiment from individual emotions
	individual_emotions = ['angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral']

	# Find the dominant individual emotion
	max_emotion = None
	max_score = -1

	for emotion in individual_emotions:
	if emotion in emotion_percentages and emotion_percentages[emotion] > max_score:
	max_score = emotion_percentages[emotion]
	max_emotion = emotion

	# If we found a dominant individual emotion with significant percentage, use it
	if max_emotion and max_score > 30:
	return max_emotion.capitalize()

	# Otherwise, fall back to category-based sentiment
	positive = emotion_percentages.get("positive", 0)
	negative = emotion_percentages.get("negative", 0)
	neutral = emotion_percentages.get("neutral", 0)

	# Use lookup table for thresholds
	if positive > 60:
	return "Very Positive"
	if positive > 40:
	return "Positive"
	if negative > 60:
	return "Very Negative"
	if negative > 40:
	return "Negative"
	if neutral > 60:
	return "Very Neutral"
	if neutral > 40:
	return "Neutral"

	# Find dominant category
	max_category = max(
	("positive", positive),
	("negative", negative),
	("neutral", neutral),
	key=lambda x: x[1]
	)

	# Map dominant category to sentiment
	sentiment_map = {
	"positive": "Slightly Positive",
	"negative": "Slightly Negative",
	"neutral": "Mixed" # Default case
	}

	return sentiment_map.get(max_category[0], "Mixed")

	def _generate_prompt(
	self,
	sentiment: str,
	raw_emotions: Dict[str, float],
	confidence_by_emotion: Dict[str, float],
	average_confidence: float,
	transcript: str,
	language: str = 'en',
	interview_assessment: Optional[Dict[str, Any]] = None,
	eye_contact_data: Optional[Dict[str, Any]] = None,
	body_language_data: Optional[Dict[str, Any]] = None,
	face_analysis_data: Optional[Dict[str, Any]] = None
	) -> str:
	"""
	Generate a prompt for the AI model.

	Args:
	sentiment: Dominant sentiment
	raw_emotions: Raw emotion scores
	confidence_by_emotion: Confidence scores by emotion
	average_confidence: Average confidence
	transcript: Transcript text
	language: Language of the transcript
	interview_assessment: Optional interview assessment
	eye_contact_data: Optional eye contact analysis data
	body_language_data: Optional body language analysis data
	face_analysis_data: Optional face analysis data

	Returns:
	Prompt for the AI model
	"""
	# Format the emotion data
	emotions_str = ", ".join([f"{emotion}: {value:.1f}%" for emotion, value in raw_emotions.items()])
	confidence_str = ", ".join([f"{emotion}: {value:.2f}" for emotion, value in confidence_by_emotion.items()])

	# Include eye contact analysis if available
	eye_contact_str = ""
	if eye_contact_data:
	ec_stats = eye_contact_data.get("eye_contact_stats", {})
	ec_assessment = eye_contact_data.get("assessment", {})
	if ec_stats and ec_assessment:
	eye_contact_str = f"""
	Eye Contact Analysis:
	- Eye contact percentage: {ec_stats.get('eye_contact_percentage', 0):.1f}%
	- Eye contact duration: {ec_stats.get('eye_contact_duration_seconds', 0):.1f} seconds
	- Longest eye contact: {ec_stats.get('longest_eye_contact_seconds', 0):.1f} seconds
	- Average contact duration: {ec_stats.get('average_contact_duration_seconds', 0):.1f} seconds
	- Contact episodes: {ec_stats.get('contact_episodes', 0)}
	- Assessment score: {ec_assessment.get('score', 0)}/10
	- Key patterns: {', '.join(ec_assessment.get('patterns', []))}
	"""

	# Include body language analysis if available
	body_language_str = ""
	if body_language_data:
	bl_stats = body_language_data.get("body_language_stats", {})
	bl_assessment = body_language_data.get("assessment", {})
	if bl_stats and bl_assessment:
	body_language_str = f"""
	Body Language Analysis:
	- Shoulder misalignment percentage: {bl_stats.get('shoulder_misalignment_percentage', 0):.1f}%
	- Leaning forward percentage: {bl_stats.get('leaning_forward_percentage', 0):.1f}%
	- Head tilt percentage: {bl_stats.get('head_tilt_percentage', 0):.1f}%
	- Arms crossed percentage: {bl_stats.get('arms_crossed_percentage', 0):.1f}%
	- Self-touch percentage: {bl_stats.get('self_touch_percentage', 0):.1f}%
	- Fidgeting percentage: {bl_stats.get('fidgeting_percentage', 0):.1f}%
	- Pose shifts per minute: {bl_stats.get('pose_shifts_per_minute', 0):.1f}
	- Confidence score: {bl_assessment.get('confidence_score', 0)}/10
	- Engagement score: {bl_assessment.get('engagement_score', 0)}/10
	- Comfort score: {bl_assessment.get('comfort_score', 0)}/10
	- Overall score: {bl_assessment.get('overall_score', 0)}/10
	"""

	# Include face analysis if available
	face_analysis_str = ""
	if face_analysis_data:
	face_analysis_str = f"""
	Face Analysis:
	- Professional Impression: {face_analysis_data.get('professionalImpression', 'No data')}
	- Attire Assessment: {face_analysis_data.get('attireAssessment', 'No data')}
	- Facial Expression: {face_analysis_data.get('facialExpressionAnalysis', 'No data')}
	- Background Assessment: {face_analysis_data.get('backgroundAssessment', 'No data')}
	- Personality Indicators: {face_analysis_data.get('personalityIndicators', 'No data')}
	- Recommendations: {face_analysis_data.get('recommendationsForImprovement', 'No data')}
	- Overall Score: {face_analysis_data.get('overallScore', 0)}/10
	"""

	# Format the interview assessment if available
	interview_str = ""
	if interview_assessment:
	interview_str = f"""
	Interview Assessment:
	{json.dumps(interview_assessment, indent=2)}
	"""

	# Create the prompt with different instructions based on language
	if language.lower() in ['en', 'eng', 'english']:
	prompt = f"""
	You are an expert in analyzing human emotions, body language, and eye contact in video interviews. Based on the transcript and emotional data provided, provide a comprehensive analysis of the interview.

	Emotion Analysis:
	Dominant emotion: {sentiment}
	Emotion breakdown: {emotions_str}
	Confidence by emotion: {confidence_str}
	Average confidence: {average_confidence:.2f}

	{eye_contact_str}

	{body_language_str}

	{face_analysis_str}

	{interview_str}

	Transcript:
	{transcript}

	Provide a comprehensive analysis with the following sections:
	1. Emotion Analysis: Analyze the emotions detected in the video.
	2. Transcript Analysis: Analyze the content of the transcript, key themes, and topics discussed.
	3. Body Language Analysis: If body language data is available, analyze the body language observed.
	4. Eye Contact Analysis: If eye contact data is available, analyze the eye contact patterns.
	5. Face Analysis: If face analysis data is available, analyze the professional appearance, attire, and background.
	6. Overall Summary: Provide a holistic view of the interview performance.
	7. Recommendations: Suggest improvements for future interviews.

	Format your response as a structured JSON with the following keys:
	{{
	"Emotion Analysis": {{ detailed analysis }},
	"Transcript Analysis": {{ detailed analysis }},
	"Body Language Analysis": {{ detailed analysis, if data is available }},
	"Eye Contact Analysis": {{ detailed analysis, if data is available }},
	"Face Analysis": {{ detailed analysis, if data is available }},
	"Overall Summary": "summary text",
	"Recommendations": {{ recommendations }}
	}}
	"""
	else:
	# Simplified prompt for other languages
	prompt = f"""
	Analyze the following transcript and emotion data.

	Emotion data: {sentiment}, {emotions_str}

	{eye_contact_str}

	{body_language_str}

	{face_analysis_str}

	{interview_str}

	Transcript: {transcript}

	Provide a summary of the content and emotional state, formatted as JSON.
	"""

	return prompt

	def _generate_empty_analysis(self) -> Dict[str, Any]:
	"""
	Generate empty analysis when no data is available.

	Returns:
	Empty analysis dictionary
	"""
	return {
	"Emotion Analysis": {
	"Dominant Emotions": {
	"angry": 0,
	"disgust": 0,
	"fear": 0,
	"happy": 0,
	"sad": 0,
	"surprise": 0,
	"neutral": 0
	},
	"Confidence By Emotion": {
	"angry": 0,
	"disgust": 0,
	"fear": 0,
	"happy": 0,
	"sad": 0,
	"surprise": 0,
	"neutral": 0
	},
	"Overall Sentiment": "No emotions detected",
	"Average Confidence": 0
	},
	"Transcript Analysis": {
	"Key Points": [],
	"Language Quality": "No transcript available",
	"Confidence Indicators": []
	},
	"Body Language Analysis": {
	"Eye Contact": "No data available",
	"Posture and Movement": "No data available",
	"Overall Body Language": "No data available"
	},
	"Overall Summary": "No data available for analysis",
	"Recommendations": {
	"Emotional Expression": "No recommendations available",
	"Communication": "No recommendations available",
	"Body Language": "No recommendations available",
	"Professional Appearance": "No recommendations available"
	}
	}

	def _extract_json_from_text(self, text: str) -> Dict[str, Any]:
	"""
	Extract JSON from a text string that might contain other content.

	Args:
	text: The text to extract JSON from

	Returns:
	Extracted JSON as dict, or empty dict if extraction fails
	"""
	try:
	# First try to parse the entire text as JSON
	return json.loads(text)
	except json.JSONDecodeError:
	# If that fails, try to find JSON-like content
	try:
	# Check if text starts with markdown code block
	if text.strip().startswith("```json"):
	# Extract content between the markdown delimiters
	parts = text.split("```")
	if len(parts) >= 3: # At least opening and closing backticks with content between
	# Get the content after the first ``` and before the next ```
	json_str = parts[1]
	# Remove "json" language identifier if present
	json_str = json_str.replace("json", "", 1).strip()
	# Try to parse the extracted JSON
	return json.loads(json_str)
	elif text.strip().startswith("```"):
	# Similar handling for code blocks without language specification
	parts = text.split("```")
	if len(parts) >= 3:
	json_str = parts[1].strip()
	return json.loads(json_str)

	# Find the first opening brace and the last closing brace
	json_start = text.find('{')
	json_end = text.rfind('}') + 1

	if json_start >= 0 and json_end > json_start:
	json_str = text[json_start:json_end]
	# Try to parse the extracted JSON
	return json.loads(json_str)

	# If no braces found, look for markdown code blocks elsewhere in the text
	if "```json" in text or "```" in text:
	# Try to extract from code blocks
	lines = text.split("\n")
	start_line = -1
	end_line = -1

	for i, line in enumerate(lines):
	if "```json" in line or line.strip() == "```":
	if start_line == -1:
	start_line = i
	else:
	end_line = i
	break

	if start_line != -1 and end_line != -1:
	# Extract content between markdown delimiters
	json_content = "\n".join(lines[start_line+1:end_line])
	# Clean up and parse
	json_content = json_content.replace("json", "", 1).strip()
	return json.loads(json_content)
	except Exception as e:
	logger.error(f"Error extracting JSON from text: {str(e)}")

	# If all extraction attempts fail, return empty dict
	return {}

	def _format_confidence_values(self, raw_emotions: Dict[str, float], confidence_by_emotion: Dict[str, float]) -> Dict[str, float]:
	"""
	Format the confidence values to match what's expected in the database.

	Args:
	raw_emotions: Raw emotion data
	confidence_by_emotion: Confidence values by emotion

	Returns:
	Formatted confidence values
	"""
	# First check if we have proper confidence values from confidence_by_emotion
	if confidence_by_emotion and any(val > 0 for val in confidence_by_emotion.values()):
	logger.info(f"Using provided confidence values: {confidence_by_emotion}")
	# Ensure values are properly formatted
	return {emotion: round(confidence, 2) for emotion, confidence in confidence_by_emotion.items()}
	else:
	# No valid confidence values found, log this fact
	logger.warning("No valid confidence values found, using raw emotions as proxy for confidence")
	# Use the raw emotions as proxy for confidence (this was the source of the issue)
	return {emotion: round(value, 2) for emotion, value in raw_emotions.items()}

	def _get_dominant_confidence(self, raw_emotions: Dict[str, float], average_confidence: float) -> float:
	"""
	Get the confidence value of the dominant emotion.

	Args:
	raw_emotions: Raw emotion data
	average_confidence: Average confidence value from the data

	Returns:
	Dominant emotion confidence
	"""
	# Simply return the provided average_confidence
	# This method is maintained for backward compatibility
	logger.info(f"Using average confidence: {average_confidence}")
	return round(average_confidence, 2)