File size: 42,582 Bytes
8ae78b0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
import os
import json
import logging
import pandas as pd
import openai
from typing import Dict, Any, List, Optional

# Fix import paths
try:
    from app.utils.logging_utils import time_it, setup_logger
    from app.core.config import settings
except ImportError:
    # Try relative imports for running from project root
    from behavior_backend.app.utils.logging_utils import time_it, setup_logger
    # Mock settings for testing
    class Settings:
        OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "")
        
    settings = Settings()

# Configure logging
logger = setup_logger(__name__)

class AIAnalysisService:
    """Service for AI analysis operations."""
    
    def __init__(self):
        """Initialize the AI analysis service."""
        self.client = openai.OpenAI(api_key=os.environ.get("OPENAI_API_KEY", ""))
    
    @time_it
    def analyze_emotions_and_transcript(
        self, 
        emotion_df: pd.DataFrame, 
        transcript: str,
        language: str = 'en',
        interview_assessment: Optional[Dict[str, Any]] = None,
        eye_contact_data: Optional[Dict[str, Any]] = None,
        body_language_data: Optional[Dict[str, Any]] = None,
        face_analysis_data: Optional[Dict[str, Any]] = None,
        model_name: str = "gpt-4o"
    ) -> Dict[str, Any]:
        """
        Analyze emotions and transcript using OpenAI.
        
        Args:
            emotion_df: DataFrame with emotion data
            transcript: Transcript text
            language: Language of the transcript
            interview_assessment: Optional interview assessment
            eye_contact_data: Optional eye contact analysis data
            body_language_data: Optional body language analysis data
            face_analysis_data: Optional face analysis data
            model_name: The name of the model to use for AI analysis
        Returns:
            Dictionary with analysis results
        """
        print("*******************************I AM INSIDE AI ANALYSER *******************************************************")
        logger.info(f"Received interview assessment: {interview_assessment}")
        logger.info(f"Received transcript: {transcript}")
        logger.info(f"Received language: {language}")
        logger.info(f"Received emotion_df: {emotion_df}")
        logger.info(f"Received eye contact data: {eye_contact_data is not None}")
        logger.info(f"Received body language data: {body_language_data is not None}")
        logger.info(f"Received face analysis data: {face_analysis_data is not None}")
        logger.info(f"Using AI model: {model_name}")
        
        # Check if emotion_df is empty or None
        if emotion_df is None or emotion_df.empty:
            logger.warning("No emotion data available for analysis")
            return self._generate_empty_analysis()
        
        try:
            # Extract raw emotion scores from the DataFrame
            raw_emotions = {}
            confidence_by_emotion = {}
            average_confidence = 0
            confidence_data = {}
            
            # Get primary emotion data from the first row of the DataFrame
            if not emotion_df.empty and 'raw_emotion_data' in emotion_df.columns:
                first_row = emotion_df.iloc[0]
                if isinstance(first_row['raw_emotion_data'], dict) and first_row['raw_emotion_data']:
                    raw_emotions = first_row['raw_emotion_data']
                    logger.info(f"Using raw_emotion_data from DataFrame: {raw_emotions}")
                    
                    # Check if confidence data is available in the first row (this would be the "confidence_data" field)
                    if 'confidence_data' in emotion_df.columns and isinstance(first_row.get('confidence_data'), dict):
                        confidence_data = first_row['confidence_data']
                        confidence_by_emotion = confidence_data.get('confidence_by_emotion', {})
                        average_confidence = confidence_data.get('average_confidence', 0)
                        
                        # Round confidence values to 2 decimal places
                        confidence_by_emotion = {emotion: round(value, 2) for emotion, value in confidence_by_emotion.items()}
                        average_confidence = round(average_confidence, 2)
                        
                        logger.info(f"Using rounded confidence_data - confidence_by_emotion: {confidence_by_emotion}")
                        logger.info(f"Using rounded confidence_data - average_confidence: {average_confidence}")
                        
                        # Store rounded values back to confidence_data for consistency
                        confidence_data['confidence_by_emotion'] = confidence_by_emotion
                        confidence_data['average_confidence'] = average_confidence
            
            # If no raw_emotion_data found, fall back to other methods
            if not raw_emotions:
                logger.info("No raw_emotion_data found, trying alternative sources")
                # First check if we have a main_face column
                if 'main_face' in emotion_df.columns and not emotion_df.empty:
                    first_row = emotion_df.iloc[0]
                    main_face = first_row.get('main_face', {})
                    if isinstance(main_face, dict) and main_face and 'emotion' in main_face:
                        raw_emotions = main_face['emotion']
                        logger.info(f"Using emotion from main_face: {raw_emotions}")
                
                # If still no raw emotions, try emotion_scores from first row
                if not raw_emotions and 'emotion_scores' in emotion_df.columns and not emotion_df.empty:
                    first_row = emotion_df.iloc[0]
                    emotion_scores = first_row.get('emotion_scores', {})
                    if isinstance(emotion_scores, dict) and emotion_scores:
                        raw_emotions = emotion_scores
                        logger.info(f"Using emotion_scores from first row: {raw_emotions}")
            
            # If still no raw emotions found, log this issue
            if not raw_emotions:
                logger.warning("No emotion data found in the DataFrame")
                # Use empty dict with zero values for all emotions
                raw_emotions = {
                    "angry": 0, "disgust": 0, "fear": 0, "happy": 0,
                    "sad": 0, "surprise": 0, "neutral": 0
                }
            
            # Extract confidence values if available
            average_confidence = 0
            
            # If we have a 'confidence_by_emotion' stat available in any fashion, use it
            if 'main_face' in emotion_df.columns and not emotion_df.empty:
                # Calculate confidence values from dominant emotions in the data
                confidence_values = []
                emotion_confidence_counts = {}
                
                for index, row in emotion_df.iterrows():
                    if 'main_face' in row and row['main_face'] and 'emotion_confidence' in row['main_face']:
                        confidence = row['main_face']['emotion_confidence']
                        emotion = row['main_face'].get('dominant_emotion', 'neutral')
                        
                        # Add to average confidence
                        confidence_values.append(confidence)
                        
                        # Track by emotion
                        if emotion not in emotion_confidence_counts:
                            emotion_confidence_counts[emotion] = []
                        emotion_confidence_counts[emotion].append(confidence)
                
                # Calculate average confidence
                if confidence_values:
                    average_confidence = sum(confidence_values) / len(confidence_values)
                    
                    # Calculate average confidence by emotion
                    for emotion, confidences in emotion_confidence_counts.items():
                        if confidences:
                            confidence_by_emotion[emotion] = sum(confidences) / len(confidences)
            
            # If we don't have confidence values, check if we have any in first face
            if not confidence_by_emotion and 'faces' in emotion_df.columns and not emotion_df.empty:
                for index, row in emotion_df.iterrows():
                    if 'faces' in row and row['faces'] and len(row['faces']) > 0 and 'emotion_confidence' in row['faces'][0]:
                        confidence = row['faces'][0]['emotion_confidence']
                        emotion = row['faces'][0].get('dominant_emotion', 'neutral')
                        
                        # Add to average confidence
                        if 'confidence_values' not in locals():
                            confidence_values = []
                        confidence_values.append(confidence)
                        
                        # Track by emotion
                        if emotion not in emotion_confidence_counts:
                            emotion_confidence_counts = {}
                            emotion_confidence_counts[emotion] = []
                        emotion_confidence_counts[emotion].append(confidence)
                
                # Calculate average confidence
                if 'confidence_values' in locals() and confidence_values:
                    average_confidence = sum(confidence_values) / len(confidence_values)
                    
                    # Calculate average confidence by emotion
                    for emotion, confidences in emotion_confidence_counts.items():
                        if confidences:
                            confidence_by_emotion[emotion] = sum(confidences) / len(confidences)
            
            # If we still don't have confidence values, use the raw emotions as proxy for confidence
            if not confidence_by_emotion and raw_emotions:
                # Use the raw emotion values as proxy for confidence
                # This ensures we at least have something
                confidence_by_emotion = {k: round(v, 2) for k, v in raw_emotions.items()}
                dominant_emotion, max_value = max(raw_emotions.items(), key=lambda x: x[1], default=("neutral", 0))
                average_confidence = max_value
            
            # Format the confidence values for display
            for emotion in confidence_by_emotion:
                # Do not round the values to preserve the exact data
                pass
            
            # Add debug logging for average_confidence
            logger.info(f"Final average_confidence value to be used in result: {average_confidence}")
            
            # Get the original average_confidence from the confidence_data for the database
            db_average_confidence = confidence_data.get("average_confidence", average_confidence)
            logger.info(f"Using average_confidence from confidence_data for database: {db_average_confidence}")
            
            # Determine overall sentiment based on the dominant emotion
            if 'overall_sentiment' in first_row and first_row['overall_sentiment']:
                # Use the exact sentiment from the DataFrame if available
                sentiment = first_row['overall_sentiment']
                logger.info(f"Using overall_sentiment from DataFrame: {sentiment}")
            elif raw_emotions:
                # Find the dominant emotion only if we don't have a sentiment already
                dominant_emotion, _ = max(raw_emotions.items(), key=lambda x: x[1], default=("neutral", 0))
                sentiment = dominant_emotion.capitalize()
                logger.info(f"Calculated sentiment from raw_emotions: {sentiment}")
            else:
                # Use the standard method if no raw emotions
                sentiment = self._determine_sentiment(raw_emotions)
                logger.info(f"Determined sentiment via standard method: {sentiment}")
            
            # Prepare prompt for OpenAI
            prompt = self._generate_prompt(
                sentiment=sentiment,
                raw_emotions=raw_emotions,
                confidence_by_emotion=confidence_by_emotion,
                average_confidence=average_confidence,
                transcript=transcript,
                language=language,
                interview_assessment=interview_assessment,
                eye_contact_data=eye_contact_data,
                body_language_data=body_language_data,
            )
            logger.info(f"Generated prompt: {prompt}")
            # Call OpenAI API
            try:
                system_prompt = """
You are an expert in analyzing emotions and speech for job interviews and professional presentations.
You are given a transcript of a video, a summary of the emotions expressed in the video, and detailed interview assessment data when available.
You are also given the overall sentiment of the video.
You may also be provided with face analysis, eye contact analysis, and body language analysis.
You are to analyze all provided data and provide a comprehensive analysis in JSON format.
Your evaluation must be based on the transcript, emotions expressed, interview assessment data, face analysis, eye contact analysis, and body language analysis (when provided).
You are to provide a detailed analysis, including:
- Key points from the transcript
- Language quality assessment
- Confidence indicators
- Overall assessment of the performance including body language, eye contact, and professional appearance
- Recommendations for improving emotional expression, communication, body language, and professional appearance

Please provide a comprehensive analysis in JSON format with the following structure:
{
    "Transcript Analysis": {
        "Key Points": List of key points as bullet points <ul>...</ul> in HTML format from the transcript with critical insight for an HR manager. Use bold <b>...</b> tags to highlight important points.
        "Language Quality": Bullet points  <ul>...</ul> in HTML format of assessment of language use, vocabulary,grammar mistakes, clarity, professionalism, and other language-related metrics. Use bold <b>...</b> tags to highlight important points.
        "Confidence Indicators":  Bullet points  <ul>...</ul> in HTML format of analysis of confidence based on language.
    },
    "Body Language Analysis": {
        "Eye Contact": Analysis of eye contact patterns in HTML format based on the interview assessment data.
        "Posture and Movement": Analysis of posture, movement, and other body language indicators in HTML format.
        "Overall Body Language": Summary assessment of body language in HTML format.
    },
    "Overall Summary": overall assessment of the candidate interview performance with critical insight for an HR manager. Use a chain of thought approach to analyze all available data and provide a comprehensive analysis. Write in HTML and highlight important points with bold <b>...</b> tags.    
    "Recommendations": {
        "Emotional Expression": bullet points <ul>...</ul> in HTML format of recommendations for improving emotional expression using bold <b>...</b> tags.
        "Communication": bullet points <ul>...</ul> in HTML format of recommendations for improving communication using bold <b>...</b> tags.
        "Body Language": bullet points <ul>...</ul> in HTML format of specific recommendations for improving body language based on the assessment data using bold <b>...</b> tags.
        "Professional Appearance": bullet points <ul>...</ul> in HTML format of specific recommendations for improving professional appearance using bold <b>...</b> tags.
    }
}
"""
                
                response = self.client.chat.completions.create(
                    model=model_name,
                    messages=[
                        {"role": "system", "content": system_prompt},
                        {"role": "user", "content": prompt}
                    ],
                    temperature=0.7,
                    max_tokens=2500,
                    frequency_penalty=0,
                    presence_penalty=0.2
                )
                
                analysis_text = response.choices[0].message.content.strip()
                
                # Parse the JSON response
                try:
                    analysis = json.loads(analysis_text)
                    logger.info("Successfully parsed the OpenAI response")
                except Exception as parse_error:
                    logger.error(f"Failed to parse OpenAI response as JSON: {str(parse_error)}")
                    logger.info(f"Response content: {analysis_text}")
                    analysis = self._extract_json_from_text(analysis_text)
                    
                    if not analysis:
                        logger.warning("Returning standard analysis structure with error message")
                        analysis = self._generate_empty_analysis()
                        analysis["Error"] = "Failed to parse OpenAI response"
                
                # Add raw emotion data to the analysis for consistency with database storage
                analysis["Emotion Analysis"] = {
                    "Dominant Emotions": raw_emotions,
                    "Confidence By Emotion": confidence_by_emotion,
                    "Overall Sentiment": sentiment,
                    "Average Confidence": db_average_confidence
                }
                
                # Add eye contact and body language data directly to the analysis 
                # to ensure it's preserved in the returned JSON, using the same keys
                # as in the video_processor.py when it creates comprehensive_results
                if eye_contact_data:
                    # Use lowercase key to match video_processor.py
                    key = "eye_contact_analysis"
                    analysis[key] = eye_contact_data
                    logger.info(f"Added {key} to results with {len(str(eye_contact_data))} characters")
                
                if body_language_data:
                    # Use lowercase key to match video_processor.py
                    key = "body_language_analysis"
                    analysis[key] = body_language_data
                    logger.info(f"Added {key} to results with {len(str(body_language_data))} characters")
                
                if face_analysis_data:
                    # Use lowercase key to match video_processor.py
                    key = "face_analysis"
                    analysis[key] = face_analysis_data
                    logger.info(f"Added {key} to results with {len(str(face_analysis_data))} characters")
                
                # Log the exact emotion analysis that will be stored in the database
                logger.info(f"Emotion Analysis to be stored in database: {analysis['Emotion Analysis']}")
                logger.info(f"Added eye_contact_analysis to results: {bool(eye_contact_data)}")
                logger.info(f"Added body_language_analysis to results: {bool(body_language_data)}")
                logger.info(f"Added face_analysis to results: {bool(face_analysis_data)}")
                
                return analysis
                
            except Exception as api_error:
                logger.error(f"Error during OpenAI API call: {str(api_error)}")
                analysis = self._generate_empty_analysis()
                analysis["Error"] = f"OpenAI API error: {str(api_error)}"
                
                # Still include the emotion data for consistency
                analysis["Emotion Analysis"] = {
                    "Dominant Emotions": raw_emotions,
                    "Confidence By Emotion": confidence_by_emotion,
                    "Overall Sentiment": sentiment,
                    "Average Confidence": db_average_confidence
                }
                
                # Also include eye contact and body language data in error cases
                if eye_contact_data:
                    key = "eye_contact_analysis"
                    analysis[key] = eye_contact_data
                    logger.info(f"Preserved {key} in error case with {len(str(eye_contact_data))} characters")
                
                if body_language_data:
                    key = "body_language_analysis"
                    analysis[key] = body_language_data
                    logger.info(f"Preserved {key} in error case with {len(str(body_language_data))} characters")
                
                if face_analysis_data:
                    key = "face_analysis"
                    analysis[key] = face_analysis_data
                    logger.info(f"Preserved {key} in error case with {len(str(face_analysis_data))} characters")
                
                return analysis
                
        except Exception as e:
            logger.error(f"Error during analysis: {str(e)}")
            analysis = self._generate_empty_analysis()
            analysis["Error"] = f"Analysis error: {str(e)}"
            
            # Also include eye contact and body language data in error cases
            if eye_contact_data:
                key = "eye_contact_analysis"
                analysis[key] = eye_contact_data
                logger.info(f"Preserved {key} in error case with {len(str(eye_contact_data))} characters")
            
            if body_language_data:
                key = "body_language_analysis"
                analysis[key] = body_language_data
                logger.info(f"Preserved {key} in error case with {len(str(body_language_data))} characters")
            
            if face_analysis_data:
                key = "face_analysis"
                analysis[key] = face_analysis_data
                logger.info(f"Preserved {key} in error case with {len(str(face_analysis_data))} characters")
            
            return analysis
    
    def _calculate_emotion_percentages(self, emotion_df: pd.DataFrame) -> Dict[str, float]:
        """
        Calculate percentages of different emotion categories based on raw emotion scores.
        
        Args:
            emotion_df: DataFrame with emotion data
            
        Returns:
            Dictionary with emotion percentages for each emotion and grouped categories
        """
        # Early return for empty DataFrame
        if emotion_df is None or emotion_df.empty:
            return {
                "angry": 0, "disgust": 0, "fear": 0, "happy": 0, 
                "sad": 0, "surprise": 0, "neutral": 0,
                "positive": 0, "negative": 0
            }
        
        # Define emotion categories
        all_emotions = {'angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral'}
        positive_emotions = {'happy', 'surprise'}
        negative_emotions = {'angry', 'disgust', 'fear', 'sad'}
        neutral_emotions = {'neutral'}
        
        # Initialize counters for raw emotion scores
        emotion_totals = {emotion: 0 for emotion in all_emotions}
        total_score = 0
        
        # Process each row's emotion scores
        for _, row in emotion_df.iterrows():
            # Try to get emotion scores from the row
            emotion_scores = {}
            
            # First check if we have raw emotion scores in the DataFrame
            if 'emotion_scores' in row and row['emotion_scores']:
                emotion_scores = row['emotion_scores']
            
            # If no scores found, try to use the dominant emotion and confidence
            if not emotion_scores and 'dominant_emotion' in row and 'emotion_confidence' in row:
                emotion = row['dominant_emotion']
                confidence = row['emotion_confidence']
                if emotion != 'unknown' and confidence > 0:
                    emotion_scores = {emotion: confidence}
            
            # Skip if no emotion data
            if not emotion_scores:
                continue
                
            # Sum up scores by emotion
            for emotion, score in emotion_scores.items():
                total_score += score
                if emotion in emotion_totals:
                    emotion_totals[emotion] += score
        
        # Calculate percentages for each emotion
        emotion_percentages = {}
        if total_score > 0:
            for emotion, total in emotion_totals.items():
                emotion_percentages[emotion] = round((total / total_score) * 100, 2)
            
            # Add grouped percentages
            positive_total = sum(emotion_totals.get(emotion, 0) for emotion in positive_emotions)
            negative_total = sum(emotion_totals.get(emotion, 0) for emotion in negative_emotions)
            neutral_total = sum(emotion_totals.get(emotion, 0) for emotion in neutral_emotions)
            
            emotion_percentages.update({
                "positive": round((positive_total / total_score) * 100, 2),
                "negative": round((negative_total / total_score) * 100, 2)
            })
        else:
            # Return zeros if no data
            emotion_percentages = {
                "angry": 0, "disgust": 0, "fear": 0, "happy": 0, 
                "sad": 0, "surprise": 0, "neutral": 0,
                "positive": 0, "negative": 0
            }
        
        return emotion_percentages
    
    def _determine_sentiment(self, emotion_percentages: Dict[str, float]) -> str:
        """
        Determine overall sentiment based on emotion percentages.
        
        Args:
            emotion_percentages: Dictionary with emotion percentages
            
        Returns:
            Sentiment assessment string
        """
        # First try to determine sentiment from individual emotions
        individual_emotions = ['angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral']
        
        # Find the dominant individual emotion
        max_emotion = None
        max_score = -1
        
        for emotion in individual_emotions:
            if emotion in emotion_percentages and emotion_percentages[emotion] > max_score:
                max_score = emotion_percentages[emotion]
                max_emotion = emotion
        
        # If we found a dominant individual emotion with significant percentage, use it
        if max_emotion and max_score > 30:
            return max_emotion.capitalize()
        
        # Otherwise, fall back to category-based sentiment
        positive = emotion_percentages.get("positive", 0)
        negative = emotion_percentages.get("negative", 0)
        neutral = emotion_percentages.get("neutral", 0)
        
        # Use lookup table for thresholds
        if positive > 60:
            return "Very Positive"
        if positive > 40:
            return "Positive"
        if negative > 60:
            return "Very Negative"
        if negative > 40:
            return "Negative"
        if neutral > 60:
            return "Very Neutral"
        if neutral > 40:
            return "Neutral"
        
        # Find dominant category
        max_category = max(
            ("positive", positive),
            ("negative", negative),
            ("neutral", neutral),
            key=lambda x: x[1]
        )
        
        # Map dominant category to sentiment
        sentiment_map = {
            "positive": "Slightly Positive",
            "negative": "Slightly Negative",
            "neutral": "Mixed"  # Default case
        }
        
        return sentiment_map.get(max_category[0], "Mixed")
    
    def _generate_prompt(
        self, 
        sentiment: str, 
        raw_emotions: Dict[str, float],
        confidence_by_emotion: Dict[str, float],
        average_confidence: float,
        transcript: str,
        language: str = 'en',
        interview_assessment: Optional[Dict[str, Any]] = None,
        eye_contact_data: Optional[Dict[str, Any]] = None,
        body_language_data: Optional[Dict[str, Any]] = None,
        face_analysis_data: Optional[Dict[str, Any]] = None
    ) -> str:
        """
        Generate a prompt for the AI model.
        
        Args:
            sentiment: Dominant sentiment
            raw_emotions: Raw emotion scores
            confidence_by_emotion: Confidence scores by emotion
            average_confidence: Average confidence
            transcript: Transcript text
            language: Language of the transcript
            interview_assessment: Optional interview assessment
            eye_contact_data: Optional eye contact analysis data
            body_language_data: Optional body language analysis data
            face_analysis_data: Optional face analysis data
        
        Returns:
            Prompt for the AI model
        """
        # Format the emotion data
        emotions_str = ", ".join([f"{emotion}: {value:.1f}%" for emotion, value in raw_emotions.items()])
        confidence_str = ", ".join([f"{emotion}: {value:.2f}" for emotion, value in confidence_by_emotion.items()])
        
        # Include eye contact analysis if available
        eye_contact_str = ""
        if eye_contact_data:
            ec_stats = eye_contact_data.get("eye_contact_stats", {})
            ec_assessment = eye_contact_data.get("assessment", {})
            if ec_stats and ec_assessment:
                eye_contact_str = f"""
                Eye Contact Analysis:
                - Eye contact percentage: {ec_stats.get('eye_contact_percentage', 0):.1f}%
                - Eye contact duration: {ec_stats.get('eye_contact_duration_seconds', 0):.1f} seconds
                - Longest eye contact: {ec_stats.get('longest_eye_contact_seconds', 0):.1f} seconds
                - Average contact duration: {ec_stats.get('average_contact_duration_seconds', 0):.1f} seconds
                - Contact episodes: {ec_stats.get('contact_episodes', 0)}
                - Assessment score: {ec_assessment.get('score', 0)}/10
                - Key patterns: {', '.join(ec_assessment.get('patterns', []))}
                """
        
        # Include body language analysis if available
        body_language_str = ""
        if body_language_data:
            bl_stats = body_language_data.get("body_language_stats", {})
            bl_assessment = body_language_data.get("assessment", {})
            if bl_stats and bl_assessment:
                body_language_str = f"""
                Body Language Analysis:
                - Shoulder misalignment percentage: {bl_stats.get('shoulder_misalignment_percentage', 0):.1f}%
                - Leaning forward percentage: {bl_stats.get('leaning_forward_percentage', 0):.1f}%
                - Head tilt percentage: {bl_stats.get('head_tilt_percentage', 0):.1f}%
                - Arms crossed percentage: {bl_stats.get('arms_crossed_percentage', 0):.1f}%
                - Self-touch percentage: {bl_stats.get('self_touch_percentage', 0):.1f}%
                - Fidgeting percentage: {bl_stats.get('fidgeting_percentage', 0):.1f}%
                - Pose shifts per minute: {bl_stats.get('pose_shifts_per_minute', 0):.1f}
                - Confidence score: {bl_assessment.get('confidence_score', 0)}/10
                - Engagement score: {bl_assessment.get('engagement_score', 0)}/10
                - Comfort score: {bl_assessment.get('comfort_score', 0)}/10
                - Overall score: {bl_assessment.get('overall_score', 0)}/10
                """
        
        # Include face analysis if available
        face_analysis_str = ""
        if face_analysis_data:
            face_analysis_str = f"""
            Face Analysis:
            - Professional Impression: {face_analysis_data.get('professionalImpression', 'No data')}
            - Attire Assessment: {face_analysis_data.get('attireAssessment', 'No data')}
            - Facial Expression: {face_analysis_data.get('facialExpressionAnalysis', 'No data')}
            - Background Assessment: {face_analysis_data.get('backgroundAssessment', 'No data')}
            - Personality Indicators: {face_analysis_data.get('personalityIndicators', 'No data')}
            - Recommendations: {face_analysis_data.get('recommendationsForImprovement', 'No data')}
            - Overall Score: {face_analysis_data.get('overallScore', 0)}/10
            """
        
        # Format the interview assessment if available
        interview_str = ""
        if interview_assessment:
            interview_str = f"""
            Interview Assessment:
            {json.dumps(interview_assessment, indent=2)}
            """
        
        # Create the prompt with different instructions based on language
        if language.lower() in ['en', 'eng', 'english']:
            prompt = f"""
            You are an expert in analyzing human emotions, body language, and eye contact in video interviews. Based on the transcript and emotional data provided, provide a comprehensive analysis of the interview.

            Emotion Analysis:
            Dominant emotion: {sentiment}
            Emotion breakdown: {emotions_str}
            Confidence by emotion: {confidence_str}
            Average confidence: {average_confidence:.2f}
            
            {eye_contact_str}
            
            {body_language_str}
            
            {face_analysis_str}
            
            {interview_str}
            
            Transcript:
            {transcript}

            Provide a comprehensive analysis with the following sections:
            1. Emotion Analysis: Analyze the emotions detected in the video.
            2. Transcript Analysis: Analyze the content of the transcript, key themes, and topics discussed.
            3. Body Language Analysis: If body language data is available, analyze the body language observed.
            4. Eye Contact Analysis: If eye contact data is available, analyze the eye contact patterns.
            5. Face Analysis: If face analysis data is available, analyze the professional appearance, attire, and background.
            6. Overall Summary: Provide a holistic view of the interview performance.
            7. Recommendations: Suggest improvements for future interviews.

            Format your response as a structured JSON with the following keys:
            {{
                "Emotion Analysis": {{ detailed analysis }},
                "Transcript Analysis": {{ detailed analysis }},
                "Body Language Analysis": {{ detailed analysis, if data is available }},
                "Eye Contact Analysis": {{ detailed analysis, if data is available }},
                "Face Analysis": {{ detailed analysis, if data is available }},
                "Overall Summary": "summary text",
                "Recommendations": {{ recommendations }}
            }}
            """
        else:
            # Simplified prompt for other languages
            prompt = f"""
            Analyze the following transcript and emotion data.
            
            Emotion data: {sentiment}, {emotions_str}
            
            {eye_contact_str}
            
            {body_language_str}
            
            {face_analysis_str}
            
            {interview_str}
            
            Transcript: {transcript}
            
            Provide a summary of the content and emotional state, formatted as JSON.
            """
        
        return prompt
    
    def _generate_empty_analysis(self) -> Dict[str, Any]:
        """
        Generate empty analysis when no data is available.
        
        Returns:
            Empty analysis dictionary
        """
        return {
            "Emotion Analysis": {
                "Dominant Emotions": {
                    "angry": 0,
                    "disgust": 0,
                    "fear": 0,
                    "happy": 0,
                    "sad": 0,
                    "surprise": 0,
                    "neutral": 0
                },
                "Confidence By Emotion": {
                    "angry": 0,
                    "disgust": 0,
                    "fear": 0,
                    "happy": 0,
                    "sad": 0,
                    "surprise": 0,
                    "neutral": 0
                },
                "Overall Sentiment": "No emotions detected",
                "Average Confidence": 0
            },
            "Transcript Analysis": {
                "Key Points": [],
                "Language Quality": "No transcript available",
                "Confidence Indicators": []
            },
            "Body Language Analysis": {
                "Eye Contact": "No data available",
                "Posture and Movement": "No data available",
                "Overall Body Language": "No data available"
            },
            "Overall Summary": "No data available for analysis",
            "Recommendations": {
                "Emotional Expression": "No recommendations available",
                "Communication": "No recommendations available",
                "Body Language": "No recommendations available",
                "Professional Appearance": "No recommendations available"
            }
        }
    
    def _extract_json_from_text(self, text: str) -> Dict[str, Any]:
        """
        Extract JSON from a text string that might contain other content.
        
        Args:
            text: The text to extract JSON from
            
        Returns:
            Extracted JSON as dict, or empty dict if extraction fails
        """
        try:
            # First try to parse the entire text as JSON
            return json.loads(text)
        except json.JSONDecodeError:
            # If that fails, try to find JSON-like content
            try:
                # Check if text starts with markdown code block
                if text.strip().startswith("```json"):
                    # Extract content between the markdown delimiters
                    parts = text.split("```")
                    if len(parts) >= 3:  # At least opening and closing backticks with content between
                        # Get the content after the first ``` and before the next ```
                        json_str = parts[1]
                        # Remove "json" language identifier if present
                        json_str = json_str.replace("json", "", 1).strip()
                        # Try to parse the extracted JSON
                        return json.loads(json_str)
                elif text.strip().startswith("```"):
                    # Similar handling for code blocks without language specification
                    parts = text.split("```")
                    if len(parts) >= 3:
                        json_str = parts[1].strip()
                        return json.loads(json_str)
                
                # Find the first opening brace and the last closing brace
                json_start = text.find('{')
                json_end = text.rfind('}') + 1
                
                if json_start >= 0 and json_end > json_start:
                    json_str = text[json_start:json_end]
                    # Try to parse the extracted JSON
                    return json.loads(json_str)
                
                # If no braces found, look for markdown code blocks elsewhere in the text
                if "```json" in text or "```" in text:
                    # Try to extract from code blocks
                    lines = text.split("\n")
                    start_line = -1
                    end_line = -1
                    
                    for i, line in enumerate(lines):
                        if "```json" in line or line.strip() == "```":
                            if start_line == -1:
                                start_line = i
                            else:
                                end_line = i
                                break
                    
                    if start_line != -1 and end_line != -1:
                        # Extract content between markdown delimiters
                        json_content = "\n".join(lines[start_line+1:end_line])
                        # Clean up and parse
                        json_content = json_content.replace("json", "", 1).strip()
                        return json.loads(json_content)
            except Exception as e:
                logger.error(f"Error extracting JSON from text: {str(e)}")
            
            # If all extraction attempts fail, return empty dict
            return {}
    
    def _format_confidence_values(self, raw_emotions: Dict[str, float], confidence_by_emotion: Dict[str, float]) -> Dict[str, float]:
        """
        Format the confidence values to match what's expected in the database.
        
        Args:
            raw_emotions: Raw emotion data
            confidence_by_emotion: Confidence values by emotion
            
        Returns:
            Formatted confidence values
        """
        # First check if we have proper confidence values from confidence_by_emotion
        if confidence_by_emotion and any(val > 0 for val in confidence_by_emotion.values()):
            logger.info(f"Using provided confidence values: {confidence_by_emotion}")
            # Ensure values are properly formatted
            return {emotion: round(confidence, 2) for emotion, confidence in confidence_by_emotion.items()}
        else:
            # No valid confidence values found, log this fact
            logger.warning("No valid confidence values found, using raw emotions as proxy for confidence")
            # Use the raw emotions as proxy for confidence (this was the source of the issue)
            return {emotion: round(value, 2) for emotion, value in raw_emotions.items()}
    
    def _get_dominant_confidence(self, raw_emotions: Dict[str, float], average_confidence: float) -> float:
        """
        Get the confidence value of the dominant emotion.
        
        Args:
            raw_emotions: Raw emotion data
            average_confidence: Average confidence value from the data
            
        Returns:
            Dominant emotion confidence
        """
        # Simply return the provided average_confidence
        # This method is maintained for backward compatibility
        logger.info(f"Using average confidence: {average_confidence}")
        return round(average_confidence, 2)