Spaces:

urjob
/

test

Sleeping

File size: 42,582 Bytes

8ae78b0

import os
import json
import logging
import pandas as pd
import openai
from typing import Dict, Any, List, Optional

# Fix import paths
try:
    from app.utils.logging_utils import time_it, setup_logger
    from app.core.config import settings
except ImportError:
    # Try relative imports for running from project root
    from behavior_backend.app.utils.logging_utils import time_it, setup_logger
    # Mock settings for testing
    class Settings:
        OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "")
        
    settings = Settings()

# Configure logging
logger = setup_logger(__name__)

class AIAnalysisService:
    """Service for AI analysis operations."""
    
    def __init__(self):
        """Initialize the AI analysis service."""
        self.client = openai.OpenAI(api_key=os.environ.get("OPENAI_API_KEY", ""))
    
    @time_it
    def analyze_emotions_and_transcript(
        self, 
        emotion_df: pd.DataFrame, 
        transcript: str,
        language: str = 'en',
        interview_assessment: Optional[Dict[str, Any]] = None,
        eye_contact_data: Optional[Dict[str, Any]] = None,
        body_language_data: Optional[Dict[str, Any]] = None,
        face_analysis_data: Optional[Dict[str, Any]] = None,
        model_name: str = "gpt-4o"
    ) -> Dict[str, Any]:
        """
        Analyze emotions and transcript using OpenAI.
        
        Args:
            emotion_df: DataFrame with emotion data
            transcript: Transcript text
            language: Language of the transcript
            interview_assessment: Optional interview assessment
            eye_contact_data: Optional eye contact analysis data
            body_language_data: Optional body language analysis data
            face_analysis_data: Optional face analysis data
            model_name: The name of the model to use for AI analysis
        Returns:
            Dictionary with analysis results
        """
        print("*******************************I AM INSIDE AI ANALYSER *******************************************************")
        logger.info(f"Received interview assessment: {interview_assessment}")
        logger.info(f"Received transcript: {transcript}")
        logger.info(f"Received language: {language}")
        logger.info(f"Received emotion_df: {emotion_df}")
        logger.info(f"Received eye contact data: {eye_contact_data is not None}")
        logger.info(f"Received body language data: {body_language_data is not None}")
        logger.info(f"Received face analysis data: {face_analysis_data is not None}")
        logger.info(f"Using AI model: {model_name}")
        
        # Check if emotion_df is empty or None
        if emotion_df is None or emotion_df.empty:
            logger.warning("No emotion data available for analysis")
            return self._generate_empty_analysis()
        
        try:
            # Extract raw emotion scores from the DataFrame
            raw_emotions = {}
            confidence_by_emotion = {}
            average_confidence = 0
            confidence_data = {}
            
            # Get primary emotion data from the first row of the DataFrame
            if not emotion_df.empty and 'raw_emotion_data' in emotion_df.columns:
                first_row = emotion_df.iloc[0]
                if isinstance(first_row['raw_emotion_data'], dict) and first_row['raw_emotion_data']:
                    raw_emotions = first_row['raw_emotion_data']
                    logger.info(f"Using raw_emotion_data from DataFrame: {raw_emotions}")
                    
                    # Check if confidence data is available in the first row (this would be the "confidence_data" field)
                    if 'confidence_data' in emotion_df.columns and isinstance(first_row.get('confidence_data'), dict):
                        confidence_data = first_row['confidence_data']
                        confidence_by_emotion = confidence_data.get('confidence_by_emotion', {})
                        average_confidence = confidence_data.get('average_confidence', 0)
                        
                        # Round confidence values to 2 decimal places
                        confidence_by_emotion = {emotion: round(value, 2) for emotion, value in confidence_by_emotion.items()}
                        average_confidence = round(average_confidence, 2)
                        
                        logger.info(f"Using rounded confidence_data - confidence_by_emotion: {confidence_by_emotion}")
                        logger.info(f"Using rounded confidence_data - average_confidence: {average_confidence}")
                        
                        # Store rounded values back to confidence_data for consistency
                        confidence_data['confidence_by_emotion'] = confidence_by_emotion
                        confidence_data['average_confidence'] = average_confidence
            
            # If no raw_emotion_data found, fall back to other methods
            if not raw_emotions:
                logger.info("No raw_emotion_data found, trying alternative sources")
                # First check if we have a main_face column
                if 'main_face' in emotion_df.columns and not emotion_df.empty:
                    first_row = emotion_df.iloc[0]
                    main_face = first_row.get('main_face', {})
                    if isinstance(main_face, dict) and main_face and 'emotion' in main_face:
                        raw_emotions = main_face['emotion']
                        logger.info(f"Using emotion from main_face: {raw_emotions}")
                
                # If still no raw emotions, try emotion_scores from first row
                if not raw_emotions and 'emotion_scores' in emotion_df.columns and not emotion_df.empty:
                    first_row = emotion_df.iloc[0]
                    emotion_scores = first_row.get('emotion_scores', {})
                    if isinstance(emotion_scores, dict) and emotion_scores:
                        raw_emotions = emotion_scores
                        logger.info(f"Using emotion_scores from first row: {raw_emotions}")
            
            # If still no raw emotions found, log this issue
            if not raw_emotions:
                logger.warning("No emotion data found in the DataFrame")
                # Use empty dict with zero values for all emotions
                raw_emotions = {
                    "angry": 0, "disgust": 0, "fear": 0, "happy": 0,
                    "sad": 0, "surprise": 0, "neutral": 0
                }
            
            # Extract confidence values if available
            average_confidence = 0
            
            # If we have a 'confidence_by_emotion' stat available in any fashion, use it
            if 'main_face' in emotion_df.columns and not emotion_df.empty:
                # Calculate confidence values from dominant emotions in the data
                confidence_values = []
                emotion_confidence_counts = {}
                
                for index, row in emotion_df.iterrows():
                    if 'main_face' in row and row['main_face'] and 'emotion_confidence' in row['main_face']:
                        confidence = row['main_face']['emotion_confidence']
                        emotion = row['main_face'].get('dominant_emotion', 'neutral')
                        
                        # Add to average confidence
                        confidence_values.append(confidence)
                        
                        # Track by emotion
                        if emotion not in emotion_confidence_counts:
                            emotion_confidence_counts[emotion] = []
                        emotion_confidence_counts[emotion].append(confidence)
                
                # Calculate average confidence
                if confidence_values:
                    average_confidence = sum(confidence_values) / len(confidence_values)
                    
                    # Calculate average confidence by emotion
                    for emotion, confidences in emotion_confidence_counts.items():
                        if confidences:
                            confidence_by_emotion[emotion] = sum(confidences) / len(confidences)
            
            # If we don't have confidence values, check if we have any in first face
            if not confidence_by_emotion and 'faces' in emotion_df.columns and not emotion_df.empty:
                for index, row in emotion_df.iterrows():
                    if 'faces' in row and row['faces'] and len(row['faces']) > 0 and 'emotion_confidence' in row['faces'][0]:
                        confidence = row['faces'][0]['emotion_confidence']
                        emotion = row['faces'][0].get('dominant_emotion', 'neutral')
                        
                        # Add to average confidence
                        if 'confidence_values' not in locals():
                            confidence_values = []
                        confidence_values.append(confidence)
                        
                        # Track by emotion
                        if emotion not in emotion_confidence_counts:
                            emotion_confidence_counts = {}
                            emotion_confidence_counts[emotion] = []
                        emotion_confidence_counts[emotion].append(confidence)
                
                # Calculate average confidence
                if 'confidence_values' in locals() and confidence_values:
                    average_confidence = sum(confidence_values) / len(confidence_values)
                    
                    # Calculate average confidence by emotion
                    for emotion, confidences in emotion_confidence_counts.items():
                        if confidences:
                            confidence_by_emotion[emotion] = sum(confidences) / len(confidences)
            
            # If we still don't have confidence values, use the raw emotions as proxy for confidence
            if not confidence_by_emotion and raw_emotions:
                # Use the raw emotion values as proxy for confidence
                # This ensures we at least have something
                confidence_by_emotion = {k: round(v, 2) for k, v in raw_emotions.items()}
                dominant_emotion, max_value = max(raw_emotions.items(), key=lambda x: x[1], default=("neutral", 0))
                average_confidence = max_value
            
            # Format the confidence values for display
            for emotion in confidence_by_emotion:
                # Do not round the values to preserve the exact data
                pass
            
            # Add debug logging for average_confidence
            logger.info(f"Final average_confidence value to be used in result: {average_confidence}")
            
            # Get the original average_confidence from the confidence_data for the database
            db_average_confidence = confidence_data.get("average_confidence", average_confidence)
            logger.info(f"Using average_confidence from confidence_data for database: {db_average_confidence}")
            
            # Determine overall sentiment based on the dominant emotion
            if 'overall_sentiment' in first_row and first_row['overall_sentiment']:
                # Use the exact sentiment from the DataFrame if available
                sentiment = first_row['overall_sentiment']
                logger.info(f"Using overall_sentiment from DataFrame: {sentiment}")
            elif raw_emotions:
                # Find the dominant emotion only if we don't have a sentiment already
                dominant_emotion, _ = max(raw_emotions.items(), key=lambda x: x[1], default=("neutral", 0))
                sentiment = dominant_emotion.capitalize()
                logger.info(f"Calculated sentiment from raw_emotions: {sentiment}")
            else:
                # Use the standard method if no raw emotions
                sentiment = self._determine_sentiment(raw_emotions)
                logger.info(f"Determined sentiment via standard method: {sentiment}")
            
            # Prepare prompt for OpenAI
            prompt = self._generate_prompt(
                sentiment=sentiment,
                raw_emotions=raw_emotions,
                confidence_by_emotion=confidence_by_emotion,
                average_confidence=average_confidence,
                transcript=transcript,
                language=language,
                interview_assessment=interview_assessment,
                eye_contact_data=eye_contact_data,
                body_language_data=body_language_data,
            )
            logger.info(f"Generated prompt: {prompt}")
            # Call OpenAI API
            try:
                system_prompt = """
You are an expert in analyzing emotions and speech for job interviews and professional presentations.
You are given a transcript of a video, a summary of the emotions expressed in the video, and detailed interview assessment data when available.
You are also given the overall sentiment of the video.
You may also be provided with face analysis, eye contact analysis, and body language analysis.
You are to analyze all provided data and provide a comprehensive analysis in JSON format.
Your evaluation must be based on the transcript, emotions expressed, interview assessment data, face analysis, eye contact analysis, and body language analysis (when provided).
You are to provide a detailed analysis, including:
- Key points from the transcript
- Language quality assessment
- Confidence indicators
- Overall assessment of the performance including body language, eye contact, and professional appearance
- Recommendations for improving emotional expression, communication, body language, and professional appearance

Please provide a comprehensive analysis in JSON format with the following structure:
{
    "Transcript Analysis": {
        "Key Points": List of key points as bullet points <ul>...</ul> in HTML format from the transcript with critical insight for an HR manager. Use bold <b>...</b> tags to highlight important points.
        "Language Quality": Bullet points  <ul>...</ul> in HTML format of assessment of language use, vocabulary,grammar mistakes, clarity, professionalism, and other language-related metrics. Use bold <b>...</b> tags to highlight important points.
        "Confidence Indicators":  Bullet points  <ul>...</ul> in HTML format of analysis of confidence based on language.
    },
    "Body Language Analysis": {
        "Eye Contact": Analysis of eye contact patterns in HTML format based on the interview assessment data.
        "Posture and Movement": Analysis of posture, movement, and other body language indicators in HTML format.
        "Overall Body Language": Summary assessment of body language in HTML format.
    },
    "Overall Summary": overall assessment of the candidate interview performance with critical insight for an HR manager. Use a chain of thought approach to analyze all available data and provide a comprehensive analysis. Write in HTML and highlight important points with bold <b>...</b> tags.    
    "Recommendations": {
        "Emotional Expression": bullet points <ul>...</ul> in HTML format of recommendations for improving emotional expression using bold <b>...</b> tags.
        "Communication": bullet points <ul>...</ul> in HTML format of recommendations for improving communication using bold <b>...</b> tags.
        "Body Language": bullet points <ul>...</ul> in HTML format of specific recommendations for improving body language based on the assessment data using bold <b>...</b> tags.
        "Professional Appearance": bullet points <ul>...</ul> in HTML format of specific recommendations for improving professional appearance using bold <b>...</b> tags.
    }
}
"""
                
                response = self.client.chat.completions.create(
                    model=model_name,
                    messages=[
                        {"role": "system", "content": system_prompt},
                        {"role": "user", "content": prompt}
                    ],
                    temperature=0.7,
                    max_tokens=2500,
                    frequency_penalty=0,
                    presence_penalty=0.2
                )
                
                analysis_text = response.choices[0].message.content.strip()
                
                # Parse the JSON response
                try:
                    analysis = json.loads(analysis_text)
                    logger.info("Successfully parsed the OpenAI response")
                except Exception as parse_error:
                    logger.error(f"Failed to parse OpenAI response as JSON: {str(parse_error)}")
                    logger.info(f"Response content: {analysis_text}")
                    analysis = self._extract_json_from_text(analysis_text)
                    
                    if not analysis:
                        logger.warning("Returning standard analysis structure with error message")
                        analysis = self._generate_empty_analysis()
                        analysis["Error"] = "Failed to parse OpenAI response"
                
                # Add raw emotion data to the analysis for consistency with database storage
                analysis["Emotion Analysis"] = {
                    "Dominant Emotions": raw_emotions,
                    "Confidence By Emotion": confidence_by_emotion,
                    "Overall Sentiment": sentiment,
                    "Average Confidence": db_average_confidence
                }
                
                # Add eye contact and body language data directly to the analysis 
                # to ensure it's preserved in the returned JSON, using the same keys
                # as in the video_processor.py when it creates comprehensive_results
                if eye_contact_data:
                    # Use lowercase key to match video_processor.py
                    key = "eye_contact_analysis"
                    analysis[key] = eye_contact_data
                    logger.info(f"Added {key} to results with {len(str(eye_contact_data))} characters")
                
                if body_language_data:
                    # Use lowercase key to match video_processor.py
                    key = "body_language_analysis"
                    analysis[key] = body_language_data
                    logger.info(f"Added {key} to results with {len(str(body_language_data))} characters")
                
                if face_analysis_data:
                    # Use lowercase key to match video_processor.py
                    key = "face_analysis"
                    analysis[key] = face_analysis_data
                    logger.info(f"Added {key} to results with {len(str(face_analysis_data))} characters")
                
                # Log the exact emotion analysis that will be stored in the database
                logger.info(f"Emotion Analysis to be stored in database: {analysis['Emotion Analysis']}")
                logger.info(f"Added eye_contact_analysis to results: {bool(eye_contact_data)}")
                logger.info(f"Added body_language_analysis to results: {bool(body_language_data)}")
                logger.info(f"Added face_analysis to results: {bool(face_analysis_data)}")
                
                return analysis
                
            except Exception as api_error:
                logger.error(f"Error during OpenAI API call: {str(api_error)}")
                analysis = self._generate_empty_analysis()
                analysis["Error"] = f"OpenAI API error: {str(api_error)}"
                
                # Still include the emotion data for consistency
                analysis["Emotion Analysis"] = {
                    "Dominant Emotions": raw_emotions,
                    "Confidence By Emotion": confidence_by_emotion,
                    "Overall Sentiment": sentiment,
                    "Average Confidence": db_average_confidence
                }
                
                # Also include eye contact and body language data in error cases
                if eye_contact_data:
                    key = "eye_contact_analysis"
                    analysis[key] = eye_contact_data
                    logger.info(f"Preserved {key} in error case with {len(str(eye_contact_data))} characters")
                
                if body_language_data:
                    key = "body_language_analysis"
                    analysis[key] = body_language_data
                    logger.info(f"Preserved {key} in error case with {len(str(body_language_data))} characters")
                
                if face_analysis_data:
                    key = "face_analysis"
                    analysis[key] = face_analysis_data
                    logger.info(f"Preserved {key} in error case with {len(str(face_analysis_data))} characters")
                
                return analysis
                
        except Exception as e:
            logger.error(f"Error during analysis: {str(e)}")
            analysis = self._generate_empty_analysis()
            analysis["Error"] = f"Analysis error: {str(e)}"
            
            # Also include eye contact and body language data in error cases
            if eye_contact_data:
                key = "eye_contact_analysis"
                analysis[key] = eye_contact_data
                logger.info(f"Preserved {key} in error case with {len(str(eye_contact_data))} characters")
            
            if body_language_data:
                key = "body_language_analysis"
                analysis[key] = body_language_data
                logger.info(f"Preserved {key} in error case with {len(str(body_language_data))} characters")
            
            if face_analysis_data:
                key = "face_analysis"
                analysis[key] = face_analysis_data
                logger.info(f"Preserved {key} in error case with {len(str(face_analysis_data))} characters")
            
            return analysis
    
    def _calculate_emotion_percentages(self, emotion_df: pd.DataFrame) -> Dict[str, float]:
        """
        Calculate percentages of different emotion categories based on raw emotion scores.
        
        Args:
            emotion_df: DataFrame with emotion data
            
        Returns:
            Dictionary with emotion percentages for each emotion and grouped categories
        """
        # Early return for empty DataFrame
        if emotion_df is None or emotion_df.empty:
            return {
                "angry": 0, "disgust": 0, "fear": 0, "happy": 0, 
                "sad": 0, "surprise": 0, "neutral": 0,
                "positive": 0, "negative": 0
            }
        
        # Define emotion categories
        all_emotions = {'angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral'}
        positive_emotions = {'happy', 'surprise'}
        negative_emotions = {'angry', 'disgust', 'fear', 'sad'}
        neutral_emotions = {'neutral'}
        
        # Initialize counters for raw emotion scores
        emotion_totals = {emotion: 0 for emotion in all_emotions}
        total_score = 0
        
        # Process each row's emotion scores
        for _, row in emotion_df.iterrows():
            # Try to get emotion scores from the row
            emotion_scores = {}
            
            # First check if we have raw emotion scores in the DataFrame
            if 'emotion_scores' in row and row['emotion_scores']:
                emotion_scores = row['emotion_scores']
            
            # If no scores found, try to use the dominant emotion and confidence
            if not emotion_scores and 'dominant_emotion' in row and 'emotion_confidence' in row:
                emotion = row['dominant_emotion']
                confidence = row['emotion_confidence']
                if emotion != 'unknown' and confidence > 0:
                    emotion_scores = {emotion: confidence}
            
            # Skip if no emotion data
            if not emotion_scores:
                continue
                
            # Sum up scores by emotion
            for emotion, score in emotion_scores.items():
                total_score += score
                if emotion in emotion_totals:
                    emotion_totals[emotion] += score
        
        # Calculate percentages for each emotion
        emotion_percentages = {}
        if total_score > 0:
            for emotion, total in emotion_totals.items():
                emotion_percentages[emotion] = round((total / total_score) * 100, 2)
            
            # Add grouped percentages
            positive_total = sum(emotion_totals.get(emotion, 0) for emotion in positive_emotions)
            negative_total = sum(emotion_totals.get(emotion, 0) for emotion in negative_emotions)
            neutral_total = sum(emotion_totals.get(emotion, 0) for emotion in neutral_emotions)
            
            emotion_percentages.update({
                "positive": round((positive_total / total_score) * 100, 2),
                "negative": round((negative_total / total_score) * 100, 2)
            })
        else:
            # Return zeros if no data
            emotion_percentages = {
                "angry": 0, "disgust": 0, "fear": 0, "happy": 0, 
                "sad": 0, "surprise": 0, "neutral": 0,
                "positive": 0, "negative": 0
            }
        
        return emotion_percentages
    
    def _determine_sentiment(self, emotion_percentages: Dict[str, float]) -> str:
        """
        Determine overall sentiment based on emotion percentages.
        
        Args:
            emotion_percentages: Dictionary with emotion percentages
            
        Returns:
            Sentiment assessment string
        """
        # First try to determine sentiment from individual emotions
        individual_emotions = ['angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral']
        
        # Find the dominant individual emotion
        max_emotion = None
        max_score = -1
        
        for emotion in individual_emotions:
            if emotion in emotion_percentages and emotion_percentages[emotion] > max_score:
                max_score = emotion_percentages[emotion]
                max_emotion = emotion
        
        # If we found a dominant individual emotion with significant percentage, use it
        if max_emotion and max_score > 30:
            return max_emotion.capitalize()
        
        # Otherwise, fall back to category-based sentiment
        positive = emotion_percentages.get("positive", 0)
        negative = emotion_percentages.get("negative", 0)
        neutral = emotion_percentages.get("neutral", 0)
        
        # Use lookup table for thresholds
        if positive > 60:
            return "Very Positive"
        if positive > 40:
            return "Positive"
        if negative > 60:
            return "Very Negative"
        if negative > 40:
            return "Negative"
        if neutral > 60:
            return "Very Neutral"
        if neutral > 40:
            return "Neutral"
        
        # Find dominant category
        max_category = max(
            ("positive", positive),
            ("negative", negative),
            ("neutral", neutral),
            key=lambda x: x[1]
        )
        
        # Map dominant category to sentiment
        sentiment_map = {
            "positive": "Slightly Positive",
            "negative": "Slightly Negative",
            "neutral": "Mixed"  # Default case
        }
        
        return sentiment_map.get(max_category[0], "Mixed")
    
    def _generate_prompt(
        self, 
        sentiment: str, 
        raw_emotions: Dict[str, float],
        confidence_by_emotion: Dict[str, float],
        average_confidence: float,
        transcript: str,
        language: str = 'en',
        interview_assessment: Optional[Dict[str, Any]] = None,
        eye_contact_data: Optional[Dict[str, Any]] = None,
        body_language_data: Optional[Dict[str, Any]] = None,
        face_analysis_data: Optional[Dict[str, Any]] = None
    ) -> str:
        """
        Generate a prompt for the AI model.
        
        Args:
            sentiment: Dominant sentiment
            raw_emotions: Raw emotion scores
            confidence_by_emotion: Confidence scores by emotion
            average_confidence: Average confidence
            transcript: Transcript text
            language: Language of the transcript
            interview_assessment: Optional interview assessment
            eye_contact_data: Optional eye contact analysis data
            body_language_data: Optional body language analysis data
            face_analysis_data: Optional face analysis data
        
        Returns:
            Prompt for the AI model
        """
        # Format the emotion data
        emotions_str = ", ".join([f"{emotion}: {value:.1f}%" for emotion, value in raw_emotions.items()])
        confidence_str = ", ".join([f"{emotion}: {value:.2f}" for emotion, value in confidence_by_emotion.items()])
        
        # Include eye contact analysis if available
        eye_contact_str = ""
        if eye_contact_data:
            ec_stats = eye_contact_data.get("eye_contact_stats", {})
            ec_assessment = eye_contact_data.get("assessment", {})
            if ec_stats and ec_assessment:
                eye_contact_str = f"""
                Eye Contact Analysis:
                - Eye contact percentage: {ec_stats.get('eye_contact_percentage', 0):.1f}%
                - Eye contact duration: {ec_stats.get('eye_contact_duration_seconds', 0):.1f} seconds
                - Longest eye contact: {ec_stats.get('longest_eye_contact_seconds', 0):.1f} seconds
                - Average contact duration: {ec_stats.get('average_contact_duration_seconds', 0):.1f} seconds
                - Contact episodes: {ec_stats.get('contact_episodes', 0)}
                - Assessment score: {ec_assessment.get('score', 0)}/10
                - Key patterns: {', '.join(ec_assessment.get('patterns', []))}
                """
        
        # Include body language analysis if available
        body_language_str = ""
        if body_language_data:
            bl_stats = body_language_data.get("body_language_stats", {})
            bl_assessment = body_language_data.get("assessment", {})
            if bl_stats and bl_assessment:
                body_language_str = f"""
                Body Language Analysis:
                - Shoulder misalignment percentage: {bl_stats.get('shoulder_misalignment_percentage', 0):.1f}%
                - Leaning forward percentage: {bl_stats.get('leaning_forward_percentage', 0):.1f}%
                - Head tilt percentage: {bl_stats.get('head_tilt_percentage', 0):.1f}%
                - Arms crossed percentage: {bl_stats.get('arms_crossed_percentage', 0):.1f}%
                - Self-touch percentage: {bl_stats.get('self_touch_percentage', 0):.1f}%
                - Fidgeting percentage: {bl_stats.get('fidgeting_percentage', 0):.1f}%
                - Pose shifts per minute: {bl_stats.get('pose_shifts_per_minute', 0):.1f}
                - Confidence score: {bl_assessment.get('confidence_score', 0)}/10
                - Engagement score: {bl_assessment.get('engagement_score', 0)}/10
                - Comfort score: {bl_assessment.get('comfort_score', 0)}/10
                - Overall score: {bl_assessment.get('overall_score', 0)}/10
                """
        
        # Include face analysis if available
        face_analysis_str = ""
        if face_analysis_data:
            face_analysis_str = f"""
            Face Analysis:
            - Professional Impression: {face_analysis_data.get('professionalImpression', 'No data')}
            - Attire Assessment: {face_analysis_data.get('attireAssessment', 'No data')}
            - Facial Expression: {face_analysis_data.get('facialExpressionAnalysis', 'No data')}
            - Background Assessment: {face_analysis_data.get('backgroundAssessment', 'No data')}
            - Personality Indicators: {face_analysis_data.get('personalityIndicators', 'No data')}
            - Recommendations: {face_analysis_data.get('recommendationsForImprovement', 'No data')}
            - Overall Score: {face_analysis_data.get('overallScore', 0)}/10
            """
        
        # Format the interview assessment if available
        interview_str = ""
        if interview_assessment:
            interview_str = f"""
            Interview Assessment:
            {json.dumps(interview_assessment, indent=2)}
            """
        
        # Create the prompt with different instructions based on language
        if language.lower() in ['en', 'eng', 'english']:
            prompt = f"""
            You are an expert in analyzing human emotions, body language, and eye contact in video interviews. Based on the transcript and emotional data provided, provide a comprehensive analysis of the interview.

            Emotion Analysis:
            Dominant emotion: {sentiment}
            Emotion breakdown: {emotions_str}
            Confidence by emotion: {confidence_str}
            Average confidence: {average_confidence:.2f}
            
            {eye_contact_str}
            
            {body_language_str}
            
            {face_analysis_str}
            
            {interview_str}
            
            Transcript:
            {transcript}

            Provide a comprehensive analysis with the following sections:
            1. Emotion Analysis: Analyze the emotions detected in the video.
            2. Transcript Analysis: Analyze the content of the transcript, key themes, and topics discussed.
            3. Body Language Analysis: If body language data is available, analyze the body language observed.
            4. Eye Contact Analysis: If eye contact data is available, analyze the eye contact patterns.
            5. Face Analysis: If face analysis data is available, analyze the professional appearance, attire, and background.
            6. Overall Summary: Provide a holistic view of the interview performance.
            7. Recommendations: Suggest improvements for future interviews.

            Format your response as a structured JSON with the following keys:
            {{
                "Emotion Analysis": {{ detailed analysis }},
                "Transcript Analysis": {{ detailed analysis }},
                "Body Language Analysis": {{ detailed analysis, if data is available }},
                "Eye Contact Analysis": {{ detailed analysis, if data is available }},
                "Face Analysis": {{ detailed analysis, if data is available }},
                "Overall Summary": "summary text",
                "Recommendations": {{ recommendations }}
            }}
            """
        else:
            # Simplified prompt for other languages
            prompt = f"""
            Analyze the following transcript and emotion data.
            
            Emotion data: {sentiment}, {emotions_str}
            
            {eye_contact_str}
            
            {body_language_str}
            
            {face_analysis_str}
            
            {interview_str}
            
            Transcript: {transcript}
            
            Provide a summary of the content and emotional state, formatted as JSON.
            """
        
        return prompt
    
    def _generate_empty_analysis(self) -> Dict[str, Any]:
        """
        Generate empty analysis when no data is available.
        
        Returns:
            Empty analysis dictionary
        """
        return {
            "Emotion Analysis": {
                "Dominant Emotions": {
                    "angry": 0,
                    "disgust": 0,
                    "fear": 0,
                    "happy": 0,
                    "sad": 0,
                    "surprise": 0,
                    "neutral": 0
                },
                "Confidence By Emotion": {
                    "angry": 0,
                    "disgust": 0,
                    "fear": 0,
                    "happy": 0,
                    "sad": 0,
                    "surprise": 0,
                    "neutral": 0
                },
                "Overall Sentiment": "No emotions detected",
                "Average Confidence": 0
            },
            "Transcript Analysis": {
                "Key Points": [],
                "Language Quality": "No transcript available",
                "Confidence Indicators": []
            },
            "Body Language Analysis": {
                "Eye Contact": "No data available",
                "Posture and Movement": "No data available",
                "Overall Body Language": "No data available"
            },
            "Overall Summary": "No data available for analysis",
            "Recommendations": {
                "Emotional Expression": "No recommendations available",
                "Communication": "No recommendations available",
                "Body Language": "No recommendations available",
                "Professional Appearance": "No recommendations available"
            }
        }
    
    def _extract_json_from_text(self, text: str) -> Dict[str, Any]:
        """
        Extract JSON from a text string that might contain other content.
        
        Args:
            text: The text to extract JSON from
            
        Returns:
            Extracted JSON as dict, or empty dict if extraction fails
        """
        try:
            # First try to parse the entire text as JSON
            return json.loads(text)
        except json.JSONDecodeError:
            # If that fails, try to find JSON-like content
            try:
                # Check if text starts with markdown code block
                if text.strip().startswith("```json"):
                    # Extract content between the markdown delimiters
                    parts = text.split("```")
                    if len(parts) >= 3:  # At least opening and closing backticks with content between
                        # Get the content after the first ``` and before the next ```
                        json_str = parts[1]
                        # Remove "json" language identifier if present
                        json_str = json_str.replace("json", "", 1).strip()
                        # Try to parse the extracted JSON
                        return json.loads(json_str)
                elif text.strip().startswith("```"):
                    # Similar handling for code blocks without language specification
                    parts = text.split("```")
                    if len(parts) >= 3:
                        json_str = parts[1].strip()
                        return json.loads(json_str)
                
                # Find the first opening brace and the last closing brace
                json_start = text.find('{')
                json_end = text.rfind('}') + 1
                
                if json_start >= 0 and json_end > json_start:
                    json_str = text[json_start:json_end]
                    # Try to parse the extracted JSON
                    return json.loads(json_str)
                
                # If no braces found, look for markdown code blocks elsewhere in the text
                if "```json" in text or "```" in text:
                    # Try to extract from code blocks
                    lines = text.split("\n")
                    start_line = -1
                    end_line = -1
                    
                    for i, line in enumerate(lines):
                        if "```json" in line or line.strip() == "```":
                            if start_line == -1:
                                start_line = i
                            else:
                                end_line = i
                                break
                    
                    if start_line != -1 and end_line != -1:
                        # Extract content between markdown delimiters
                        json_content = "\n".join(lines[start_line+1:end_line])
                        # Clean up and parse
                        json_content = json_content.replace("json", "", 1).strip()
                        return json.loads(json_content)
            except Exception as e:
                logger.error(f"Error extracting JSON from text: {str(e)}")
            
            # If all extraction attempts fail, return empty dict
            return {}
    
    def _format_confidence_values(self, raw_emotions: Dict[str, float], confidence_by_emotion: Dict[str, float]) -> Dict[str, float]:
        """
        Format the confidence values to match what's expected in the database.
        
        Args:
            raw_emotions: Raw emotion data
            confidence_by_emotion: Confidence values by emotion
            
        Returns:
            Formatted confidence values
        """
        # First check if we have proper confidence values from confidence_by_emotion
        if confidence_by_emotion and any(val > 0 for val in confidence_by_emotion.values()):
            logger.info(f"Using provided confidence values: {confidence_by_emotion}")
            # Ensure values are properly formatted
            return {emotion: round(confidence, 2) for emotion, confidence in confidence_by_emotion.items()}
        else:
            # No valid confidence values found, log this fact
            logger.warning("No valid confidence values found, using raw emotions as proxy for confidence")
            # Use the raw emotions as proxy for confidence (this was the source of the issue)
            return {emotion: round(value, 2) for emotion, value in raw_emotions.items()}
    
    def _get_dominant_confidence(self, raw_emotions: Dict[str, float], average_confidence: float) -> float:
        """
        Get the confidence value of the dominant emotion.
        
        Args:
            raw_emotions: Raw emotion data
            average_confidence: Average confidence value from the data
            
        Returns:
            Dominant emotion confidence
        """
        # Simply return the provided average_confidence
        # This method is maintained for backward compatibility
        logger.info(f"Using average confidence: {average_confidence}")
        return round(average_confidence, 2)