norhan12 commited on
Commit
35740d6
·
verified ·
1 Parent(s): 2ecb60f

Update process_interview.py

Browse files
Files changed (1) hide show
  1. process_interview.py +110 -50
process_interview.py CHANGED
@@ -368,18 +368,18 @@ def analyze_interviewee_voice(audio_path: str, utterances: List[Dict]) -> Dict:
368
  intensity_std = np.std(intensities) if intensities else 0
369
  shimmer = np.mean(np.abs(np.diff(intensities))) / intensity_mean if len(intensities) > 1 and intensity_mean > 0 else 0
370
  anxiety_score = 0.6 * (pitch_std / pitch_mean) + 0.4 * (jitter + shimmer) if pitch_mean > 0 else 0
371
- confidence_score = 0.7 * (1 / (1 + intensity_std)) + 0.3 * (1 / (1 + filler_ratio))
372
  hesitation_score = filler_ratio + repetition_score
373
  anxiety_level = 'High' if anxiety_score > 0.15 else 'Moderate' if anxiety_score > 0.07 else 'Low'
374
- confidence_level = 'High' if confidence_score > 0.7 else 'Moderate' if confidence_score > 0.5 else 'Low'
375
  fluency_level = 'Fluent' if (filler_ratio < 0.05 and repetition_score < 0.1) else 'Moderate' if (filler_ratio < 0.1 and repetition_score < 0.2) else 'Disfluent'
376
  return {
377
  'speaking_rate': float(round(speaking_rate, 2)),
378
- 'filler_ratio': float(round(filler_ratio, 4)),
379
- 'repetition_score': float(round(repetition_score, 4)),
380
  'pitch_analysis': {'mean': float(round(pitch_mean, 2)), 'std_dev': float(round(pitch_std, 2)), 'jitter': float(round(jitter, 4))},
381
- 'intensity_analysis': {'mean': float(round(intensity_mean, 2)), 'std_dev': float(round(intensity_std, 2)), 'shimmer': float(round(shimmer, 4))},
382
- 'composite_scores': {'anxiety': float(round(anxiety_score, 4)), 'confidence': float(round(confidence_score, 4)), 'hesitation': float(round(hesitation_score, 4))},
383
  'interpretation': {'anxiety_level': anxiety_level, 'confidence_level': confidence_level, 'fluency_level': fluency_level}
384
  }
385
  except Exception as e:
@@ -390,15 +390,15 @@ def generate_voice_interpretation(analysis: Dict) -> str:
390
  if 'error' in analysis:
391
  return f"Voice analysis unavailable: {analysis['error']}"
392
  interpretation_lines = [
393
- f"- Speaking Rate: {analysis['speaking_rate']} words/sec (Benchmark: 2.0-3.0 wps; affects clarity)",
394
- f"- Filler Words: {analysis['filler_ratio'] * 100:.1f}% (High usage reduces credibility)",
395
  f"- Anxiety: {analysis['interpretation']['anxiety_level']} (Score: {analysis['composite_scores']['anxiety']:.3f}; stress response)",
396
  f"- Confidence: {analysis['interpretation']['confidence_level']} (Score: {analysis['composite_scores']['confidence']:.3f}; vocal strength)",
397
  f"- Fluency: {analysis['interpretation']['fluency_level']} (Drives engagement)",
398
  "",
399
  "HR Insights:",
400
  "- Rapid speech (>3.0 wps) may reduce clarity; slower pacing enhances professionalism.",
401
- "- High filler word usage undermines perceived confidence.",
402
  "- Elevated anxiety suggests pressure; training can improve resilience.",
403
  "- Strong confidence supports leadership presence.",
404
  "- Fluent speech enhances engagement in team settings."
@@ -413,11 +413,11 @@ def generate_anxiety_confidence_chart(composite_scores: Dict, chart_buffer):
413
  bars = ax.bar(labels, scores, color=['#FF5252', '#26A69A'], edgecolor='black', width=0.45)
414
  ax.set_ylabel('Score', fontsize=12)
415
  ax.set_title('Vocal Dynamics: Anxiety vs. Confidence', fontsize=14, pad=15)
416
- ax.set_ylim(0, 1.3)
417
  for bar in bars:
418
  height = bar.get_height()
419
  ax.text(bar.get_x() + bar.get_width()/2, height + 0.05, f"{height:.2f}",
420
- ha='center', color='black', fontweight='bold', fontsize=10)
421
  ax.grid(True, axis='y', linestyle='--', alpha=0.7)
422
  plt.tight_layout()
423
  plt.savefig(chart_buffer, format='png', bbox_inches='tight', dpi=300)
@@ -454,7 +454,25 @@ def generate_report(analysis_data: Dict) -> str:
454
  try:
455
  voice = analysis_data.get('voice_analysis', {})
456
  voice_interpretation = generate_voice_interpretation(voice)
457
- interviewee_responses = [f"- {u['text']}" for u in analysis_data['transcript'] if u['role'] == 'Interviewee'][:3]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
458
  acceptance_prob = analysis_data.get('acceptance_probability', 50.0)
459
  acceptance_line = f"\n**Suitability Score: {acceptance_prob:.2f}%**\n"
460
  if acceptance_prob >= 80:
@@ -465,33 +483,66 @@ def generate_report(analysis_data: Dict) -> str:
465
  acceptance_line += "HR Verdict: Moderate potential, needs additional assessment."
466
  else:
467
  acceptance_line += "HR Verdict: Limited fit, significant improvement required."
 
468
  prompt = f"""
469
- You are EvalBot, a senior HR consultant delivering a concise, professional interview analysis report. Use clear headings, bullet points ('-'), complete sentences, and formal language. Avoid redundancy, vague terms, and special characters that could break formatting. Ensure each section is unique and actionable.
470
- {acceptance_line}
471
- **1. Executive Summary**
472
- - Provide a narrative overview of the candidate’s performance, highlighting key strengths and fit.
473
- - Duration: {analysis_data['text_analysis']['total_duration']:.2f} seconds
474
- - Speaker Turns: {analysis_data['text_analysis']['speaker_turns']}
475
- - Participants: {', '.join(sorted(set(u['speaker'] for u in analysis_data['transcript'])))}
476
- **2. Communication and Vocal Dynamics**
477
- - Evaluate vocal delivery (rate, fluency, confidence) with specific insights.
478
- {voice_interpretation}
479
- **3. Competency and Content**
480
- - Assess leadership, problem-solving, communication, and adaptability with clear examples.
481
- - List strengths and growth areas separately, using quantifiable achievements where possible.
482
- - Sample responses:
483
- {chr(10).join(interviewee_responses)}
484
- **4. Role Fit and Potential**
485
- - Analyze cultural fit, role readiness, and long-term growth potential with specific alignment to role requirements.
486
- **5. Recommendations**
487
- - Provide prioritized development strategies (e.g., communication training, technical assessments).
488
- - Suggest specific next steps for hiring managers (e.g., advance, schedule tests).
489
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
490
  response = gemini_model.generate_content(prompt)
491
- return re.sub(r'[^\x00-\x7F]+|[()]+', '', response.text) # Sanitize non-ASCII and parentheses
 
 
492
  except Exception as e:
493
- logger.error(f"Report generation failed: {str(e)}")
494
- return f"Error generating report: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
495
 
496
  def create_pdf_report(analysis_data: Dict, output_path: str, gemini_report_text: str) -> bool:
497
  try:
@@ -539,13 +590,14 @@ def create_pdf_report(analysis_data: Dict, output_path: str, gemini_report_text:
539
  story.append(Paragraph("<b>HR Verdict:</b> Limited fit, significant improvement required.", body_text))
540
  story.append(Spacer(1, 0.2*inch))
541
  participants = sorted([p for p in set(u['speaker'] for u in analysis_data['transcript']) if p != 'Unknown'])
 
542
  table_data = [
543
  ['Metric', 'Value'],
544
  ['Interview Duration', f"{analysis_data['text_analysis']['total_duration']:.2f} seconds"],
545
  ['Speaker Turns', f"{analysis_data['text_analysis']['speaker_turns']}"],
546
- ['Participants', ', '.join(participants)],
547
  ]
548
- table = Table(table_data, colWidths=[2.2*inch, 3.8*inch])
549
  table.setStyle(TableStyle([
550
  ('BACKGROUND', (0,0), (-1,0), colors.HexColor('#0050BC')),
551
  ('TEXTCOLOR', (0,0), (-1,0), colors.white),
@@ -557,6 +609,8 @@ def create_pdf_report(analysis_data: Dict, output_path: str, gemini_report_text:
557
  ('TOPPADDING', (0,0), (-1,0), 6),
558
  ('BACKGROUND', (0,1), (-1,-1), colors.HexColor('#F5F6FA')),
559
  ('GRID', (0,0), (-1,-1), 0.4, colors.HexColor('#DDE4EB')),
 
 
560
  ]))
561
  story.append(table)
562
  story.append(Spacer(1, 0.3*inch))
@@ -641,23 +695,26 @@ def create_pdf_report(analysis_data: Dict, output_path: str, gemini_report_text:
641
  clean_line = line.lstrip('-').strip()
642
  if not clean_line:
643
  continue
644
- clean_line = re.sub(r'[()]+', '', clean_line)
645
  if current_section == 'Competency':
646
- if any(k in clean_line.lower() for k in ['leader', 'problem', 'commun', 'adapt', 'strength']):
647
  current_subsection = 'Strengths'
648
- elif any(k in clean_line.lower() for k in ['improv', 'grow', 'depth']):
649
  current_subsection = 'Growth Areas'
650
  if current_subsection:
651
  sections[current_section][current_subsection].append(clean_line)
652
  elif current_section == 'Recommendations':
653
- if any(k in clean_line.lower() for k in ['commun', 'tech', 'depth', 'pres']):
654
  current_subsection = 'Development'
655
- elif any(k in clean_line.lower() for k in ['adv', 'train', 'assess', 'next', 'mentor']):
656
  current_subsection = 'Next Steps'
657
  if current_subsection:
658
  sections[current_section][current_subsection].append(clean_line)
659
  else:
660
  sections[current_section].append(clean_line)
 
 
 
661
 
662
  # Executive Summary
663
  story.append(Paragraph("2. Executive Summary", h2))
@@ -665,7 +722,8 @@ def create_pdf_report(analysis_data: Dict, output_path: str, gemini_report_text:
665
  for line in sections['Executive Summary']:
666
  story.append(Paragraph(line, bullet_style))
667
  else:
668
- story.append(Paragraph("No summary provided.", body_text))
 
669
  story.append(Spacer(1, 0.15*inch))
670
 
671
  # Competency and Content
@@ -675,14 +733,14 @@ def create_pdf_report(analysis_data: Dict, output_path: str, gemini_report_text:
675
  for line in sections['Competency']['Strengths']:
676
  story.append(Paragraph(line, bullet_style))
677
  else:
678
- story.append(Paragraph("No strengths identified.", body_text))
679
  story.append(Spacer(1, 0.1*inch))
680
  story.append(Paragraph("Growth Areas", h3))
681
  if sections['Competency']['Growth Areas']:
682
  for line in sections['Competency']['Growth Areas']:
683
  story.append(Paragraph(line, bullet_style))
684
  else:
685
- story.append(Paragraph("No growth areas identified; maintain current strengths.", body_text))
686
  story.append(Spacer(1, 0.15*inch))
687
 
688
  # Role Fit
@@ -691,7 +749,7 @@ def create_pdf_report(analysis_data: Dict, output_path: str, gemini_report_text:
691
  for line in sections['Role Fit']:
692
  story.append(Paragraph(line, bullet_style))
693
  else:
694
- story.append(Paragraph("No fit analysis provided.", body_text))
695
  story.append(Spacer(1, 0.15*inch))
696
 
697
  # Recommendations
@@ -701,14 +759,14 @@ def create_pdf_report(analysis_data: Dict, output_path: str, gemini_report_text:
701
  for line in sections['Recommendations']['Development']:
702
  story.append(Paragraph(line, bullet_style))
703
  else:
704
- story.append(Paragraph("No development priorities specified.", body_text))
705
  story.append(Spacer(1, 0.1*inch))
706
  story.append(Paragraph("Next Steps for Hiring Managers", h3))
707
  if sections['Recommendations']['Next Steps']:
708
  for line in sections['Recommendations']['Next Steps']:
709
  story.append(Paragraph(line, bullet_style))
710
  else:
711
- story.append(Paragraph("No next steps provided.", body_text))
712
  story.append(Spacer(1, 0.15*inch))
713
  story.append(Paragraph("This report provides actionable insights to support hiring and candidate development.", body_text))
714
 
@@ -751,6 +809,8 @@ def process_interview(audio_url: str) -> Dict:
751
  for utterance in transcript['utterances']:
752
  utterance['prosodic_features'] = extract_prosodic_features(wav_file, utterance['start'], utterance['end'])
753
  utterances_with_speakers = identify_speakers(transcript, wav_file)
 
 
754
  clf, vectorizer, scaler = None, None, None
755
  if os.path.exists(os.path.join(OUTPUT_DIR, 'role_classifier.pkl')):
756
  clf = joblib.load(os.path.join(OUTPUT_DIR, 'role_classifier.pkl'))
@@ -762,7 +822,7 @@ def process_interview(audio_url: str) -> Dict:
762
  voice_analysis = analyze_interviewee_voice(wav_file, classified_utterances)
763
  analysis_data = {
764
  'transcript': classified_utterances,
765
- 'speakers': list(set(u['speaker'] for u in classified_utterances)),
766
  'voice_analysis': voice_analysis,
767
  'text_analysis': {
768
  'total_duration': sum(u['prosodic_features']['duration'] for u in classified_utterances),
 
368
  intensity_std = np.std(intensities) if intensities else 0
369
  shimmer = np.mean(np.abs(np.diff(intensities))) / intensity_mean if len(intensities) > 1 and intensity_mean > 0 else 0
370
  anxiety_score = 0.6 * (pitch_std / pitch_mean) + 0.4 * (jitter + shimmer) if pitch_mean > 0 else 0
371
+ confidence_score = 0.7 * (1 / (1 + intensity_std)) + 0.3 * (1 - filler_ratio)
372
  hesitation_score = filler_ratio + repetition_score
373
  anxiety_level = 'High' if anxiety_score > 0.15 else 'Moderate' if anxiety_score > 0.07 else 'Low'
374
+ confidence_level = 'High' if confidence_score > 0.75 else 'Moderate' if confidence_score > 0.5 else 'Low'
375
  fluency_level = 'Fluent' if (filler_ratio < 0.05 and repetition_score < 0.1) else 'Moderate' if (filler_ratio < 0.1 and repetition_score < 0.2) else 'Disfluent'
376
  return {
377
  'speaking_rate': float(round(speaking_rate, 2)),
378
+ 'filler_ratio': float(round(filler_ratio, 3)),
379
+ 'repetition_score': float(round(repetition_score, 3)),
380
  'pitch_analysis': {'mean': float(round(pitch_mean, 2)), 'std_dev': float(round(pitch_std, 2)), 'jitter': float(round(jitter, 4))},
381
+ 'intensity_analysis': {'mean': float(round(intensity_mean, 2)), 'std_dev': float(round(float(intensity_std), 2)), 'shimmer': float(round(shimmer, 4))},
382
+ 'composite_scores': {'anxiety': float(round(anxiety_score, 3)), 'confidence': float(round(confidence_score, 3)), 'hesitation': float(round(hesitation_score, 3))},
383
  'interpretation': {'anxiety_level': anxiety_level, 'confidence_level': confidence_level, 'fluency_level': fluency_level}
384
  }
385
  except Exception as e:
 
390
  if 'error' in analysis:
391
  return f"Voice analysis unavailable: {analysis['error']}"
392
  interpretation_lines = [
393
+ f"- Speaking rate: {analysis['speaking_rate', 0):.2f} words/sec (Benchmark: 2.0}-{3.0}; affects clarity)",
394
+ f"- Filler words: {analysis['filler_ratio'] * 100:.1f}% (High usage reduces credibility)",
395
  f"- Anxiety: {analysis['interpretation']['anxiety_level']} (Score: {analysis['composite_scores']['anxiety']:.3f}; stress response)",
396
  f"- Confidence: {analysis['interpretation']['confidence_level']} (Score: {analysis['composite_scores']['confidence']:.3f}; vocal strength)",
397
  f"- Fluency: {analysis['interpretation']['fluency_level']} (Drives engagement)",
398
  "",
399
  "HR Insights:",
400
  "- Rapid speech (>3.0 wps) may reduce clarity; slower pacing enhances professionalism.",
401
+ "- High filler word usage undermines perceived credibility.",
402
  "- Elevated anxiety suggests pressure; training can improve resilience.",
403
  "- Strong confidence supports leadership presence.",
404
  "- Fluent speech enhances engagement in team settings."
 
413
  bars = ax.bar(labels, scores, color=['#FF5252', '#26A69A'], edgecolor='black', width=0.45)
414
  ax.set_ylabel('Score', fontsize=12)
415
  ax.set_title('Vocal Dynamics: Anxiety vs. Confidence', fontsize=14, pad=15)
416
+ ax.set_ylim(0, 1.2)
417
  for bar in bars:
418
  height = bar.get_height()
419
  ax.text(bar.get_x() + bar.get_width()/2, height + 0.05, f"{height:.2f}",
420
+ ha='center', va='bottom', color='black', fontweight='bold', fontsize=10)
421
  ax.grid(True, axis='y', linestyle='--', alpha=0.7)
422
  plt.tight_layout()
423
  plt.savefig(chart_buffer, format='png', bbox_inches='tight', dpi=300)
 
454
  try:
455
  voice = analysis_data.get('voice_analysis', {})
456
  voice_interpretation = generate_voice_interpretation(voice)
457
+ interviewee_responses = [u['text'] for u in analysis_data['transcript'] if u['role'] == 'Interviewee']
458
+ if not interviewee_responses:
459
+ logger.warning("No interviewee responses found for report generation")
460
+ return """**1. Executive Summary**
461
+ - Insufficient interviewee content to generate a summary.
462
+
463
+ **2. Communication and Vocal Dynamics**
464
+ {voice_interpretation}
465
+
466
+ **3. Competency and Content**
467
+ - Strengths: Unable to identify strengths due to limited content.
468
+ - Growth Areas: Recommend further interview to assess competencies.
469
+
470
+ **4. Role Fit and Potential**
471
+ - Unable to assess role fit due to insufficient content.
472
+
473
+ **5. Recommendations**
474
+ - Development: Schedule additional interview to gather more data.
475
+ - Next Steps: Conduct a follow-up interview with targeted questions."""
476
  acceptance_prob = analysis_data.get('acceptance_probability', 50.0)
477
  acceptance_line = f"\n**Suitability Score: {acceptance_prob:.2f}%**\n"
478
  if acceptance_prob >= 80:
 
483
  acceptance_line += "HR Verdict: Moderate potential, needs additional assessment."
484
  else:
485
  acceptance_line += "HR Verdict: Limited fit, significant improvement required."
486
+ transcript_text = "\n".join([f"- {u['speaker']}: {u['text']}" for u in analysis_data['transcript']])
487
  prompt = f"""
488
+ You are EvalBot, a senior HR consultant delivering a professional interview analysis report. Use clear headings with '**', bullet points ('-'), complete sentences, and formal language. Avoid redundancy, vague terms, and special characters that could break formatting (e.g., parentheses). Ensure each section is unique, actionable, and at least 2-3 bullet points long. If content is limited, provide reasonable inferences based on available data.
489
+
490
+ **Input Data**
491
+ - Suitability Score: {acceptance_prob:.2f}%
492
+ - Interview Duration: {analysis_data['text_analysis']['total_duration']:.2f} seconds
493
+ - Speaker Turns: {analysis_data['text_analysis']['speaker_turns']}
494
+ - Participants: {', '.join(sorted(set(u['speaker'] for u in analysis_data['transcript'])))}
495
+ - Voice Analysis: {voice_interpretation}
496
+ - Transcript Sample:
497
+ {transcript_text[:1000]}...
498
+
499
+ **Report Structure**
500
+ {acceptance_line}
501
+
502
+ **1. Executive Summary**
503
+ - Provide a narrative overview of the candidate’s performance, focusing on key strengths and role fit.
504
+ - Highlight communication style and engagement based on voice analysis and transcript.
505
+ - Note interview duration and participant dynamics.
506
+
507
+ **2. Communication and Vocal Dynamics**
508
+ - Evaluate vocal delivery (rate, fluency, confidence) with specific insights.
509
+ {voice_interpretation}
510
+
511
+ **3. Competency and Content**
512
+ - Assess leadership, problem-solving, communication, and adaptability with examples from the transcript.
513
+ - List strengths with quantifiable achievements where possible.
514
+ - Identify growth areas with constructive feedback.
515
+
516
+ **4. Role Fit and Potential**
517
+ - Analyze cultural fit, role readiness, and long-term growth potential.
518
+ - Align findings with typical role requirements (e.g., teamwork, technical skills).
519
+
520
+ **5. Recommendations**
521
+ - Provide prioritized development strategies (e.g., communication training, technical assessments).
522
+ - Suggest specific next steps for hiring managers (e.g., advance to next round, schedule tests).
523
+ """
524
  response = gemini_model.generate_content(prompt)
525
+ report_text = re.sub(r'[^\x00-\x7F]+|[()]+', '', response.text)
526
+ logger.info(f"Generated Gemini report: {report_text[:500]}...") # Log first 500 chars for debugging
527
+ return report_text
528
  except Exception as e:
529
+ logger.error(f"Report generation failed: {str(e)}", exc_info=True)
530
+ return f"""**1. Executive Summary**
531
+ - Report generation failed due to processing error.
532
+
533
+ **2. Communication and Vocal Dynamics**
534
+ {generate_voice_interpretation(analysis_data.get('voice_analysis', {}))}
535
+
536
+ **3. Competency and Content**
537
+ - Strengths: Unable to assess due to error.
538
+ - Growth Areas: Recommend reprocessing the audio.
539
+
540
+ **4. Role Fit and Potential**
541
+ - Unable to assess due to error.
542
+
543
+ **5. Recommendations**
544
+ - Development: Investigate processing error.
545
+ - Next Steps: Retry analysis with corrected audio."""
546
 
547
  def create_pdf_report(analysis_data: Dict, output_path: str, gemini_report_text: str) -> bool:
548
  try:
 
590
  story.append(Paragraph("<b>HR Verdict:</b> Limited fit, significant improvement required.", body_text))
591
  story.append(Spacer(1, 0.2*inch))
592
  participants = sorted([p for p in set(u['speaker'] for u in analysis_data['transcript']) if p != 'Unknown'])
593
+ participants_str = ', '.join(participants)
594
  table_data = [
595
  ['Metric', 'Value'],
596
  ['Interview Duration', f"{analysis_data['text_analysis']['total_duration']:.2f} seconds"],
597
  ['Speaker Turns', f"{analysis_data['text_analysis']['speaker_turns']}"],
598
+ ['Participants', participants_str],
599
  ]
600
+ table = Table(table_data, colWidths=[2.0*inch, 4.0*inch])
601
  table.setStyle(TableStyle([
602
  ('BACKGROUND', (0,0), (-1,0), colors.HexColor('#0050BC')),
603
  ('TEXTCOLOR', (0,0), (-1,0), colors.white),
 
609
  ('TOPPADDING', (0,0), (-1,0), 6),
610
  ('BACKGROUND', (0,1), (-1,-1), colors.HexColor('#F5F6FA')),
611
  ('GRID', (0,0), (-1,-1), 0.4, colors.HexColor('#DDE4EB')),
612
+ ('LEFTPADDING', (1,3), (1,3), 10), # Add padding for Participants
613
+ ('WORDWRAP', (1,3), (1,3), 'CJK'), # Enable word wrapping
614
  ]))
615
  story.append(table)
616
  story.append(Spacer(1, 0.3*inch))
 
695
  clean_line = line.lstrip('-').strip()
696
  if not clean_line:
697
  continue
698
+ clean_line = re.sub(r'[^\w\s.,;:-]', '', clean_line) # Enhanced sanitization
699
  if current_section == 'Competency':
700
+ if any(k in clean_line.lower() for k in ['leader', 'problem', 'commun', 'adapt', 'achieve', 'skill', 'success']):
701
  current_subsection = 'Strengths'
702
+ elif any(k in clean_line.lower() for k in ['improv', 'grow', 'develop', 'weak', 'area']):
703
  current_subsection = 'Growth Areas'
704
  if current_subsection:
705
  sections[current_section][current_subsection].append(clean_line)
706
  elif current_section == 'Recommendations':
707
+ if any(k in clean_line.lower() for k in ['commun', 'tech', 'train', 'skill', 'pres']):
708
  current_subsection = 'Development'
709
+ elif any(k in clean_line.lower() for k in ['adv', 'assess', 'next', 'schedule', 'mentor']):
710
  current_subsection = 'Next Steps'
711
  if current_subsection:
712
  sections[current_section][current_subsection].append(clean_line)
713
  else:
714
  sections[current_section].append(clean_line)
715
+ elif current_section and line: # Handle non-bulleted lines
716
+ clean_line = re.sub(r'[^\w\s.,;:-]', '', line)
717
+ sections[current_section].append(clean_line)
718
 
719
  # Executive Summary
720
  story.append(Paragraph("2. Executive Summary", h2))
 
722
  for line in sections['Executive Summary']:
723
  story.append(Paragraph(line, bullet_style))
724
  else:
725
+ story.append(Paragraph("Candidate showed moderate engagement; further data needed for full assessment.", bullet_style))
726
+ story.append(Paragraph(f"Interview lasted {analysis_data['text_analysis']['total_duration']:.2f} seconds with {analysis_data['text_analysis']['speaker_turns']} turns.", bullet_style))
727
  story.append(Spacer(1, 0.15*inch))
728
 
729
  # Competency and Content
 
733
  for line in sections['Competency']['Strengths']:
734
  story.append(Paragraph(line, bullet_style))
735
  else:
736
+ story.append(Paragraph("Strengths not fully assessed; candidate demonstrated consistent communication.", bullet_style))
737
  story.append(Spacer(1, 0.1*inch))
738
  story.append(Paragraph("Growth Areas", h3))
739
  if sections['Competency']['Growth Areas']:
740
  for line in sections['Competency']['Growth Areas']:
741
  story.append(Paragraph(line, bullet_style))
742
  else:
743
+ story.append(Paragraph("Consider enhancing specificity in responses to highlight expertise.", bullet_style))
744
  story.append(Spacer(1, 0.15*inch))
745
 
746
  # Role Fit
 
749
  for line in sections['Role Fit']:
750
  story.append(Paragraph(line, bullet_style))
751
  else:
752
+ story.append(Paragraph("Potential for role fit exists; further evaluation needed to confirm alignment.", bullet_style))
753
  story.append(Spacer(1, 0.15*inch))
754
 
755
  # Recommendations
 
759
  for line in sections['Recommendations']['Development']:
760
  story.append(Paragraph(line, bullet_style))
761
  else:
762
+ story.append(Paragraph("Enroll in communication training to reduce filler words.", bullet_style))
763
  story.append(Spacer(1, 0.1*inch))
764
  story.append(Paragraph("Next Steps for Hiring Managers", h3))
765
  if sections['Recommendations']['Next Steps']:
766
  for line in sections['Recommendations']['Next Steps']:
767
  story.append(Paragraph(line, bullet_style))
768
  else:
769
+ story.append(Paragraph("Schedule a technical assessment to evaluate role-specific skills.", bullet_style))
770
  story.append(Spacer(1, 0.15*inch))
771
  story.append(Paragraph("This report provides actionable insights to support hiring and candidate development.", body_text))
772
 
 
809
  for utterance in transcript['utterances']:
810
  utterance['prosodic_features'] = extract_prosodic_features(wav_file, utterance['start'], utterance['end'])
811
  utterances_with_speakers = identify_speakers(transcript, wav_file)
812
+ if not utterances_with_speakers:
813
+ raise ValueError("No utterances identified in the audio")
814
  clf, vectorizer, scaler = None, None, None
815
  if os.path.exists(os.path.join(OUTPUT_DIR, 'role_classifier.pkl')):
816
  clf = joblib.load(os.path.join(OUTPUT_DIR, 'role_classifier.pkl'))
 
822
  voice_analysis = analyze_interviewee_voice(wav_file, classified_utterances)
823
  analysis_data = {
824
  'transcript': classified_utterances,
825
+ 'speakers': list(set(u['speaker'] for u in classified_utterances if u['speaker'] != 'Unknown')),
826
  'voice_analysis': voice_analysis,
827
  'text_analysis': {
828
  'total_duration': sum(u['prosodic_features']['duration'] for u in classified_utterances),