import os import json import logging import pandas as pd import openai from typing import Dict, Any, List, Optional # Fix import paths try: from app.utils.logging_utils import time_it, setup_logger from app.core.config import settings except ImportError: # Try relative imports for running from project root from behavior_backend.app.utils.logging_utils import time_it, setup_logger # Mock settings for testing class Settings: OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "") settings = Settings() # Configure logging logger = setup_logger(__name__) class AIAnalysisService: """Service for AI analysis operations.""" def __init__(self): """Initialize the AI analysis service.""" self.client = openai.OpenAI(api_key=os.environ.get("OPENAI_API_KEY", "")) @time_it def analyze_emotions_and_transcript( self, emotion_df: pd.DataFrame, transcript: str, language: str = 'en', interview_assessment: Optional[Dict[str, Any]] = None, eye_contact_data: Optional[Dict[str, Any]] = None, body_language_data: Optional[Dict[str, Any]] = None, face_analysis_data: Optional[Dict[str, Any]] = None, model_name: str = "gpt-4o" ) -> Dict[str, Any]: """ Analyze emotions and transcript using OpenAI. Args: emotion_df: DataFrame with emotion data transcript: Transcript text language: Language of the transcript interview_assessment: Optional interview assessment eye_contact_data: Optional eye contact analysis data body_language_data: Optional body language analysis data face_analysis_data: Optional face analysis data model_name: The name of the model to use for AI analysis Returns: Dictionary with analysis results """ print("*******************************I AM INSIDE AI ANALYSER *******************************************************") logger.info(f"Received interview assessment: {interview_assessment}") logger.info(f"Received transcript: {transcript}") logger.info(f"Received language: {language}") logger.info(f"Received emotion_df: {emotion_df}") logger.info(f"Received eye contact data: {eye_contact_data is not None}") logger.info(f"Received body language data: {body_language_data is not None}") logger.info(f"Received face analysis data: {face_analysis_data is not None}") logger.info(f"Using AI model: {model_name}") # Check if emotion_df is empty or None if emotion_df is None or emotion_df.empty: logger.warning("No emotion data available for analysis") return self._generate_empty_analysis() try: # Extract raw emotion scores from the DataFrame raw_emotions = {} confidence_by_emotion = {} average_confidence = 0 confidence_data = {} # Get primary emotion data from the first row of the DataFrame if not emotion_df.empty and 'raw_emotion_data' in emotion_df.columns: first_row = emotion_df.iloc[0] if isinstance(first_row['raw_emotion_data'], dict) and first_row['raw_emotion_data']: raw_emotions = first_row['raw_emotion_data'] logger.info(f"Using raw_emotion_data from DataFrame: {raw_emotions}") # Check if confidence data is available in the first row (this would be the "confidence_data" field) if 'confidence_data' in emotion_df.columns and isinstance(first_row.get('confidence_data'), dict): confidence_data = first_row['confidence_data'] confidence_by_emotion = confidence_data.get('confidence_by_emotion', {}) average_confidence = confidence_data.get('average_confidence', 0) # Round confidence values to 2 decimal places confidence_by_emotion = {emotion: round(value, 2) for emotion, value in confidence_by_emotion.items()} average_confidence = round(average_confidence, 2) logger.info(f"Using rounded confidence_data - confidence_by_emotion: {confidence_by_emotion}") logger.info(f"Using rounded confidence_data - average_confidence: {average_confidence}") # Store rounded values back to confidence_data for consistency confidence_data['confidence_by_emotion'] = confidence_by_emotion confidence_data['average_confidence'] = average_confidence # If no raw_emotion_data found, fall back to other methods if not raw_emotions: logger.info("No raw_emotion_data found, trying alternative sources") # First check if we have a main_face column if 'main_face' in emotion_df.columns and not emotion_df.empty: first_row = emotion_df.iloc[0] main_face = first_row.get('main_face', {}) if isinstance(main_face, dict) and main_face and 'emotion' in main_face: raw_emotions = main_face['emotion'] logger.info(f"Using emotion from main_face: {raw_emotions}") # If still no raw emotions, try emotion_scores from first row if not raw_emotions and 'emotion_scores' in emotion_df.columns and not emotion_df.empty: first_row = emotion_df.iloc[0] emotion_scores = first_row.get('emotion_scores', {}) if isinstance(emotion_scores, dict) and emotion_scores: raw_emotions = emotion_scores logger.info(f"Using emotion_scores from first row: {raw_emotions}") # If still no raw emotions found, log this issue if not raw_emotions: logger.warning("No emotion data found in the DataFrame") # Use empty dict with zero values for all emotions raw_emotions = { "angry": 0, "disgust": 0, "fear": 0, "happy": 0, "sad": 0, "surprise": 0, "neutral": 0 } # Extract confidence values if available average_confidence = 0 # If we have a 'confidence_by_emotion' stat available in any fashion, use it if 'main_face' in emotion_df.columns and not emotion_df.empty: # Calculate confidence values from dominant emotions in the data confidence_values = [] emotion_confidence_counts = {} for index, row in emotion_df.iterrows(): if 'main_face' in row and row['main_face'] and 'emotion_confidence' in row['main_face']: confidence = row['main_face']['emotion_confidence'] emotion = row['main_face'].get('dominant_emotion', 'neutral') # Add to average confidence confidence_values.append(confidence) # Track by emotion if emotion not in emotion_confidence_counts: emotion_confidence_counts[emotion] = [] emotion_confidence_counts[emotion].append(confidence) # Calculate average confidence if confidence_values: average_confidence = sum(confidence_values) / len(confidence_values) # Calculate average confidence by emotion for emotion, confidences in emotion_confidence_counts.items(): if confidences: confidence_by_emotion[emotion] = sum(confidences) / len(confidences) # If we don't have confidence values, check if we have any in first face if not confidence_by_emotion and 'faces' in emotion_df.columns and not emotion_df.empty: for index, row in emotion_df.iterrows(): if 'faces' in row and row['faces'] and len(row['faces']) > 0 and 'emotion_confidence' in row['faces'][0]: confidence = row['faces'][0]['emotion_confidence'] emotion = row['faces'][0].get('dominant_emotion', 'neutral') # Add to average confidence if 'confidence_values' not in locals(): confidence_values = [] confidence_values.append(confidence) # Track by emotion if emotion not in emotion_confidence_counts: emotion_confidence_counts = {} emotion_confidence_counts[emotion] = [] emotion_confidence_counts[emotion].append(confidence) # Calculate average confidence if 'confidence_values' in locals() and confidence_values: average_confidence = sum(confidence_values) / len(confidence_values) # Calculate average confidence by emotion for emotion, confidences in emotion_confidence_counts.items(): if confidences: confidence_by_emotion[emotion] = sum(confidences) / len(confidences) # If we still don't have confidence values, use the raw emotions as proxy for confidence if not confidence_by_emotion and raw_emotions: # Use the raw emotion values as proxy for confidence # This ensures we at least have something confidence_by_emotion = {k: round(v, 2) for k, v in raw_emotions.items()} dominant_emotion, max_value = max(raw_emotions.items(), key=lambda x: x[1], default=("neutral", 0)) average_confidence = max_value # Format the confidence values for display for emotion in confidence_by_emotion: # Do not round the values to preserve the exact data pass # Add debug logging for average_confidence logger.info(f"Final average_confidence value to be used in result: {average_confidence}") # Get the original average_confidence from the confidence_data for the database db_average_confidence = confidence_data.get("average_confidence", average_confidence) logger.info(f"Using average_confidence from confidence_data for database: {db_average_confidence}") # Determine overall sentiment based on the dominant emotion if 'overall_sentiment' in first_row and first_row['overall_sentiment']: # Use the exact sentiment from the DataFrame if available sentiment = first_row['overall_sentiment'] logger.info(f"Using overall_sentiment from DataFrame: {sentiment}") elif raw_emotions: # Find the dominant emotion only if we don't have a sentiment already dominant_emotion, _ = max(raw_emotions.items(), key=lambda x: x[1], default=("neutral", 0)) sentiment = dominant_emotion.capitalize() logger.info(f"Calculated sentiment from raw_emotions: {sentiment}") else: # Use the standard method if no raw emotions sentiment = self._determine_sentiment(raw_emotions) logger.info(f"Determined sentiment via standard method: {sentiment}") # Prepare prompt for OpenAI prompt = self._generate_prompt( sentiment=sentiment, raw_emotions=raw_emotions, confidence_by_emotion=confidence_by_emotion, average_confidence=average_confidence, transcript=transcript, language=language, interview_assessment=interview_assessment, eye_contact_data=eye_contact_data, body_language_data=body_language_data, ) logger.info(f"Generated prompt: {prompt}") # Call OpenAI API try: system_prompt = """ You are an expert in analyzing emotions and speech for job interviews and professional presentations. You are given a transcript of a video, a summary of the emotions expressed in the video, and detailed interview assessment data when available. You are also given the overall sentiment of the video. You may also be provided with face analysis, eye contact analysis, and body language analysis. You are to analyze all provided data and provide a comprehensive analysis in JSON format. Your evaluation must be based on the transcript, emotions expressed, interview assessment data, face analysis, eye contact analysis, and body language analysis (when provided). You are to provide a detailed analysis, including: - Key points from the transcript - Language quality assessment - Confidence indicators - Overall assessment of the performance including body language, eye contact, and professional appearance - Recommendations for improving emotional expression, communication, body language, and professional appearance Please provide a comprehensive analysis in JSON format with the following structure: { "Transcript Analysis": { "Key Points": List of key points as bullet points in HTML format from the transcript with critical insight for an HR manager. Use bold ... tags to highlight important points. "Language Quality": Bullet points in HTML format of assessment of language use, vocabulary,grammar mistakes, clarity, professionalism, and other language-related metrics. Use bold ... tags to highlight important points. "Confidence Indicators": Bullet points in HTML format of analysis of confidence based on language. }, "Body Language Analysis": { "Eye Contact": Analysis of eye contact patterns in HTML format based on the interview assessment data. "Posture and Movement": Analysis of posture, movement, and other body language indicators in HTML format. "Overall Body Language": Summary assessment of body language in HTML format. }, "Overall Summary": overall assessment of the candidate interview performance with critical insight for an HR manager. Use a chain of thought approach to analyze all available data and provide a comprehensive analysis. Write in HTML and highlight important points with bold ... tags. "Recommendations": { "Emotional Expression": bullet points in HTML format of recommendations for improving emotional expression using bold ... tags. "Communication": bullet points in HTML format of recommendations for improving communication using bold ... tags. "Body Language": bullet points in HTML format of specific recommendations for improving body language based on the assessment data using bold ... tags. "Professional Appearance": bullet points in HTML format of specific recommendations for improving professional appearance using bold ... tags. } } """ response = self.client.chat.completions.create( model=model_name, messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": prompt} ], temperature=0.7, max_tokens=2500, frequency_penalty=0, presence_penalty=0.2 ) analysis_text = response.choices[0].message.content.strip() # Parse the JSON response try: analysis = json.loads(analysis_text) logger.info("Successfully parsed the OpenAI response") except Exception as parse_error: logger.error(f"Failed to parse OpenAI response as JSON: {str(parse_error)}") logger.info(f"Response content: {analysis_text}") analysis = self._extract_json_from_text(analysis_text) if not analysis: logger.warning("Returning standard analysis structure with error message") analysis = self._generate_empty_analysis() analysis["Error"] = "Failed to parse OpenAI response" # Add raw emotion data to the analysis for consistency with database storage analysis["Emotion Analysis"] = { "Dominant Emotions": raw_emotions, "Confidence By Emotion": confidence_by_emotion, "Overall Sentiment": sentiment, "Average Confidence": db_average_confidence } # Add eye contact and body language data directly to the analysis # to ensure it's preserved in the returned JSON, using the same keys # as in the video_processor.py when it creates comprehensive_results if eye_contact_data: # Use lowercase key to match video_processor.py key = "eye_contact_analysis" analysis[key] = eye_contact_data logger.info(f"Added {key} to results with {len(str(eye_contact_data))} characters") if body_language_data: # Use lowercase key to match video_processor.py key = "body_language_analysis" analysis[key] = body_language_data logger.info(f"Added {key} to results with {len(str(body_language_data))} characters") if face_analysis_data: # Use lowercase key to match video_processor.py key = "face_analysis" analysis[key] = face_analysis_data logger.info(f"Added {key} to results with {len(str(face_analysis_data))} characters") # Log the exact emotion analysis that will be stored in the database logger.info(f"Emotion Analysis to be stored in database: {analysis['Emotion Analysis']}") logger.info(f"Added eye_contact_analysis to results: {bool(eye_contact_data)}") logger.info(f"Added body_language_analysis to results: {bool(body_language_data)}") logger.info(f"Added face_analysis to results: {bool(face_analysis_data)}") return analysis except Exception as api_error: logger.error(f"Error during OpenAI API call: {str(api_error)}") analysis = self._generate_empty_analysis() analysis["Error"] = f"OpenAI API error: {str(api_error)}" # Still include the emotion data for consistency analysis["Emotion Analysis"] = { "Dominant Emotions": raw_emotions, "Confidence By Emotion": confidence_by_emotion, "Overall Sentiment": sentiment, "Average Confidence": db_average_confidence } # Also include eye contact and body language data in error cases if eye_contact_data: key = "eye_contact_analysis" analysis[key] = eye_contact_data logger.info(f"Preserved {key} in error case with {len(str(eye_contact_data))} characters") if body_language_data: key = "body_language_analysis" analysis[key] = body_language_data logger.info(f"Preserved {key} in error case with {len(str(body_language_data))} characters") if face_analysis_data: key = "face_analysis" analysis[key] = face_analysis_data logger.info(f"Preserved {key} in error case with {len(str(face_analysis_data))} characters") return analysis except Exception as e: logger.error(f"Error during analysis: {str(e)}") analysis = self._generate_empty_analysis() analysis["Error"] = f"Analysis error: {str(e)}" # Also include eye contact and body language data in error cases if eye_contact_data: key = "eye_contact_analysis" analysis[key] = eye_contact_data logger.info(f"Preserved {key} in error case with {len(str(eye_contact_data))} characters") if body_language_data: key = "body_language_analysis" analysis[key] = body_language_data logger.info(f"Preserved {key} in error case with {len(str(body_language_data))} characters") if face_analysis_data: key = "face_analysis" analysis[key] = face_analysis_data logger.info(f"Preserved {key} in error case with {len(str(face_analysis_data))} characters") return analysis def _calculate_emotion_percentages(self, emotion_df: pd.DataFrame) -> Dict[str, float]: """ Calculate percentages of different emotion categories based on raw emotion scores. Args: emotion_df: DataFrame with emotion data Returns: Dictionary with emotion percentages for each emotion and grouped categories """ # Early return for empty DataFrame if emotion_df is None or emotion_df.empty: return { "angry": 0, "disgust": 0, "fear": 0, "happy": 0, "sad": 0, "surprise": 0, "neutral": 0, "positive": 0, "negative": 0 } # Define emotion categories all_emotions = {'angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral'} positive_emotions = {'happy', 'surprise'} negative_emotions = {'angry', 'disgust', 'fear', 'sad'} neutral_emotions = {'neutral'} # Initialize counters for raw emotion scores emotion_totals = {emotion: 0 for emotion in all_emotions} total_score = 0 # Process each row's emotion scores for _, row in emotion_df.iterrows(): # Try to get emotion scores from the row emotion_scores = {} # First check if we have raw emotion scores in the DataFrame if 'emotion_scores' in row and row['emotion_scores']: emotion_scores = row['emotion_scores'] # If no scores found, try to use the dominant emotion and confidence if not emotion_scores and 'dominant_emotion' in row and 'emotion_confidence' in row: emotion = row['dominant_emotion'] confidence = row['emotion_confidence'] if emotion != 'unknown' and confidence > 0: emotion_scores = {emotion: confidence} # Skip if no emotion data if not emotion_scores: continue # Sum up scores by emotion for emotion, score in emotion_scores.items(): total_score += score if emotion in emotion_totals: emotion_totals[emotion] += score # Calculate percentages for each emotion emotion_percentages = {} if total_score > 0: for emotion, total in emotion_totals.items(): emotion_percentages[emotion] = round((total / total_score) * 100, 2) # Add grouped percentages positive_total = sum(emotion_totals.get(emotion, 0) for emotion in positive_emotions) negative_total = sum(emotion_totals.get(emotion, 0) for emotion in negative_emotions) neutral_total = sum(emotion_totals.get(emotion, 0) for emotion in neutral_emotions) emotion_percentages.update({ "positive": round((positive_total / total_score) * 100, 2), "negative": round((negative_total / total_score) * 100, 2) }) else: # Return zeros if no data emotion_percentages = { "angry": 0, "disgust": 0, "fear": 0, "happy": 0, "sad": 0, "surprise": 0, "neutral": 0, "positive": 0, "negative": 0 } return emotion_percentages def _determine_sentiment(self, emotion_percentages: Dict[str, float]) -> str: """ Determine overall sentiment based on emotion percentages. Args: emotion_percentages: Dictionary with emotion percentages Returns: Sentiment assessment string """ # First try to determine sentiment from individual emotions individual_emotions = ['angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral'] # Find the dominant individual emotion max_emotion = None max_score = -1 for emotion in individual_emotions: if emotion in emotion_percentages and emotion_percentages[emotion] > max_score: max_score = emotion_percentages[emotion] max_emotion = emotion # If we found a dominant individual emotion with significant percentage, use it if max_emotion and max_score > 30: return max_emotion.capitalize() # Otherwise, fall back to category-based sentiment positive = emotion_percentages.get("positive", 0) negative = emotion_percentages.get("negative", 0) neutral = emotion_percentages.get("neutral", 0) # Use lookup table for thresholds if positive > 60: return "Very Positive" if positive > 40: return "Positive" if negative > 60: return "Very Negative" if negative > 40: return "Negative" if neutral > 60: return "Very Neutral" if neutral > 40: return "Neutral" # Find dominant category max_category = max( ("positive", positive), ("negative", negative), ("neutral", neutral), key=lambda x: x[1] ) # Map dominant category to sentiment sentiment_map = { "positive": "Slightly Positive", "negative": "Slightly Negative", "neutral": "Mixed" # Default case } return sentiment_map.get(max_category[0], "Mixed") def _generate_prompt( self, sentiment: str, raw_emotions: Dict[str, float], confidence_by_emotion: Dict[str, float], average_confidence: float, transcript: str, language: str = 'en', interview_assessment: Optional[Dict[str, Any]] = None, eye_contact_data: Optional[Dict[str, Any]] = None, body_language_data: Optional[Dict[str, Any]] = None, face_analysis_data: Optional[Dict[str, Any]] = None ) -> str: """ Generate a prompt for the AI model. Args: sentiment: Dominant sentiment raw_emotions: Raw emotion scores confidence_by_emotion: Confidence scores by emotion average_confidence: Average confidence transcript: Transcript text language: Language of the transcript interview_assessment: Optional interview assessment eye_contact_data: Optional eye contact analysis data body_language_data: Optional body language analysis data face_analysis_data: Optional face analysis data Returns: Prompt for the AI model """ # Format the emotion data emotions_str = ", ".join([f"{emotion}: {value:.1f}%" for emotion, value in raw_emotions.items()]) confidence_str = ", ".join([f"{emotion}: {value:.2f}" for emotion, value in confidence_by_emotion.items()]) # Include eye contact analysis if available eye_contact_str = "" if eye_contact_data: ec_stats = eye_contact_data.get("eye_contact_stats", {}) ec_assessment = eye_contact_data.get("assessment", {}) if ec_stats and ec_assessment: eye_contact_str = f""" Eye Contact Analysis: - Eye contact percentage: {ec_stats.get('eye_contact_percentage', 0):.1f}% - Eye contact duration: {ec_stats.get('eye_contact_duration_seconds', 0):.1f} seconds - Longest eye contact: {ec_stats.get('longest_eye_contact_seconds', 0):.1f} seconds - Average contact duration: {ec_stats.get('average_contact_duration_seconds', 0):.1f} seconds - Contact episodes: {ec_stats.get('contact_episodes', 0)} - Assessment score: {ec_assessment.get('score', 0)}/10 - Key patterns: {', '.join(ec_assessment.get('patterns', []))} """ # Include body language analysis if available body_language_str = "" if body_language_data: bl_stats = body_language_data.get("body_language_stats", {}) bl_assessment = body_language_data.get("assessment", {}) if bl_stats and bl_assessment: body_language_str = f""" Body Language Analysis: - Shoulder misalignment percentage: {bl_stats.get('shoulder_misalignment_percentage', 0):.1f}% - Leaning forward percentage: {bl_stats.get('leaning_forward_percentage', 0):.1f}% - Head tilt percentage: {bl_stats.get('head_tilt_percentage', 0):.1f}% - Arms crossed percentage: {bl_stats.get('arms_crossed_percentage', 0):.1f}% - Self-touch percentage: {bl_stats.get('self_touch_percentage', 0):.1f}% - Fidgeting percentage: {bl_stats.get('fidgeting_percentage', 0):.1f}% - Pose shifts per minute: {bl_stats.get('pose_shifts_per_minute', 0):.1f} - Confidence score: {bl_assessment.get('confidence_score', 0)}/10 - Engagement score: {bl_assessment.get('engagement_score', 0)}/10 - Comfort score: {bl_assessment.get('comfort_score', 0)}/10 - Overall score: {bl_assessment.get('overall_score', 0)}/10 """ # Include face analysis if available face_analysis_str = "" if face_analysis_data: face_analysis_str = f""" Face Analysis: - Professional Impression: {face_analysis_data.get('professionalImpression', 'No data')} - Attire Assessment: {face_analysis_data.get('attireAssessment', 'No data')} - Facial Expression: {face_analysis_data.get('facialExpressionAnalysis', 'No data')} - Background Assessment: {face_analysis_data.get('backgroundAssessment', 'No data')} - Personality Indicators: {face_analysis_data.get('personalityIndicators', 'No data')} - Recommendations: {face_analysis_data.get('recommendationsForImprovement', 'No data')} - Overall Score: {face_analysis_data.get('overallScore', 0)}/10 """ # Format the interview assessment if available interview_str = "" if interview_assessment: interview_str = f""" Interview Assessment: {json.dumps(interview_assessment, indent=2)} """ # Create the prompt with different instructions based on language if language.lower() in ['en', 'eng', 'english']: prompt = f""" You are an expert in analyzing human emotions, body language, and eye contact in video interviews. Based on the transcript and emotional data provided, provide a comprehensive analysis of the interview. Emotion Analysis: Dominant emotion: {sentiment} Emotion breakdown: {emotions_str} Confidence by emotion: {confidence_str} Average confidence: {average_confidence:.2f} {eye_contact_str} {body_language_str} {face_analysis_str} {interview_str} Transcript: {transcript} Provide a comprehensive analysis with the following sections: 1. Emotion Analysis: Analyze the emotions detected in the video. 2. Transcript Analysis: Analyze the content of the transcript, key themes, and topics discussed. 3. Body Language Analysis: If body language data is available, analyze the body language observed. 4. Eye Contact Analysis: If eye contact data is available, analyze the eye contact patterns. 5. Face Analysis: If face analysis data is available, analyze the professional appearance, attire, and background. 6. Overall Summary: Provide a holistic view of the interview performance. 7. Recommendations: Suggest improvements for future interviews. Format your response as a structured JSON with the following keys: {{ "Emotion Analysis": {{ detailed analysis }}, "Transcript Analysis": {{ detailed analysis }}, "Body Language Analysis": {{ detailed analysis, if data is available }}, "Eye Contact Analysis": {{ detailed analysis, if data is available }}, "Face Analysis": {{ detailed analysis, if data is available }}, "Overall Summary": "summary text", "Recommendations": {{ recommendations }} }} """ else: # Simplified prompt for other languages prompt = f""" Analyze the following transcript and emotion data. Emotion data: {sentiment}, {emotions_str} {eye_contact_str} {body_language_str} {face_analysis_str} {interview_str} Transcript: {transcript} Provide a summary of the content and emotional state, formatted as JSON. """ return prompt def _generate_empty_analysis(self) -> Dict[str, Any]: """ Generate empty analysis when no data is available. Returns: Empty analysis dictionary """ return { "Emotion Analysis": { "Dominant Emotions": { "angry": 0, "disgust": 0, "fear": 0, "happy": 0, "sad": 0, "surprise": 0, "neutral": 0 }, "Confidence By Emotion": { "angry": 0, "disgust": 0, "fear": 0, "happy": 0, "sad": 0, "surprise": 0, "neutral": 0 }, "Overall Sentiment": "No emotions detected", "Average Confidence": 0 }, "Transcript Analysis": { "Key Points": [], "Language Quality": "No transcript available", "Confidence Indicators": [] }, "Body Language Analysis": { "Eye Contact": "No data available", "Posture and Movement": "No data available", "Overall Body Language": "No data available" }, "Overall Summary": "No data available for analysis", "Recommendations": { "Emotional Expression": "No recommendations available", "Communication": "No recommendations available", "Body Language": "No recommendations available", "Professional Appearance": "No recommendations available" } } def _extract_json_from_text(self, text: str) -> Dict[str, Any]: """ Extract JSON from a text string that might contain other content. Args: text: The text to extract JSON from Returns: Extracted JSON as dict, or empty dict if extraction fails """ try: # First try to parse the entire text as JSON return json.loads(text) except json.JSONDecodeError: # If that fails, try to find JSON-like content try: # Check if text starts with markdown code block if text.strip().startswith("```json"): # Extract content between the markdown delimiters parts = text.split("```") if len(parts) >= 3: # At least opening and closing backticks with content between # Get the content after the first ``` and before the next ``` json_str = parts[1] # Remove "json" language identifier if present json_str = json_str.replace("json", "", 1).strip() # Try to parse the extracted JSON return json.loads(json_str) elif text.strip().startswith("```"): # Similar handling for code blocks without language specification parts = text.split("```") if len(parts) >= 3: json_str = parts[1].strip() return json.loads(json_str) # Find the first opening brace and the last closing brace json_start = text.find('{') json_end = text.rfind('}') + 1 if json_start >= 0 and json_end > json_start: json_str = text[json_start:json_end] # Try to parse the extracted JSON return json.loads(json_str) # If no braces found, look for markdown code blocks elsewhere in the text if "```json" in text or "```" in text: # Try to extract from code blocks lines = text.split("\n") start_line = -1 end_line = -1 for i, line in enumerate(lines): if "```json" in line or line.strip() == "```": if start_line == -1: start_line = i else: end_line = i break if start_line != -1 and end_line != -1: # Extract content between markdown delimiters json_content = "\n".join(lines[start_line+1:end_line]) # Clean up and parse json_content = json_content.replace("json", "", 1).strip() return json.loads(json_content) except Exception as e: logger.error(f"Error extracting JSON from text: {str(e)}") # If all extraction attempts fail, return empty dict return {} def _format_confidence_values(self, raw_emotions: Dict[str, float], confidence_by_emotion: Dict[str, float]) -> Dict[str, float]: """ Format the confidence values to match what's expected in the database. Args: raw_emotions: Raw emotion data confidence_by_emotion: Confidence values by emotion Returns: Formatted confidence values """ # First check if we have proper confidence values from confidence_by_emotion if confidence_by_emotion and any(val > 0 for val in confidence_by_emotion.values()): logger.info(f"Using provided confidence values: {confidence_by_emotion}") # Ensure values are properly formatted return {emotion: round(confidence, 2) for emotion, confidence in confidence_by_emotion.items()} else: # No valid confidence values found, log this fact logger.warning("No valid confidence values found, using raw emotions as proxy for confidence") # Use the raw emotions as proxy for confidence (this was the source of the issue) return {emotion: round(value, 2) for emotion, value in raw_emotions.items()} def _get_dominant_confidence(self, raw_emotions: Dict[str, float], average_confidence: float) -> float: """ Get the confidence value of the dominant emotion. Args: raw_emotions: Raw emotion data average_confidence: Average confidence value from the data Returns: Dominant emotion confidence """ # Simply return the provided average_confidence # This method is maintained for backward compatibility logger.info(f"Using average confidence: {average_confidence}") return round(average_confidence, 2)