Spaces:

EvalBot
/

Audio

Sleeping

App Files Files Community

norhan12 commited on Jun 10, 2025

Commit

35740d6

verified ·

1 Parent(s): 2ecb60f

Update process_interview.py

Browse files

Files changed (1) hide show

process_interview.py +110 -50

process_interview.py CHANGED Viewed

@@ -368,18 +368,18 @@ def analyze_interviewee_voice(audio_path: str, utterances: List[Dict]) -> Dict:
         intensity_std = np.std(intensities) if intensities else 0
         shimmer = np.mean(np.abs(np.diff(intensities))) / intensity_mean if len(intensities) > 1 and intensity_mean > 0 else 0
         anxiety_score = 0.6 * (pitch_std / pitch_mean) + 0.4 * (jitter + shimmer) if pitch_mean > 0 else 0
-        confidence_score = 0.7 * (1 / (1 + intensity_std)) + 0.3 * (1 / (1 + filler_ratio))
         hesitation_score = filler_ratio + repetition_score
         anxiety_level = 'High' if anxiety_score > 0.15 else 'Moderate' if anxiety_score > 0.07 else 'Low'
-        confidence_level = 'High' if confidence_score > 0.7 else 'Moderate' if confidence_score > 0.5 else 'Low'
         fluency_level = 'Fluent' if (filler_ratio < 0.05 and repetition_score < 0.1) else 'Moderate' if (filler_ratio < 0.1 and repetition_score < 0.2) else 'Disfluent'
         return {
             'speaking_rate': float(round(speaking_rate, 2)),
-            'filler_ratio': float(round(filler_ratio, 4)),
-            'repetition_score': float(round(repetition_score, 4)),
             'pitch_analysis': {'mean': float(round(pitch_mean, 2)), 'std_dev': float(round(pitch_std, 2)), 'jitter': float(round(jitter, 4))},
-            'intensity_analysis': {'mean': float(round(intensity_mean, 2)), 'std_dev': float(round(intensity_std, 2)), 'shimmer': float(round(shimmer, 4))},
-            'composite_scores': {'anxiety': float(round(anxiety_score, 4)), 'confidence': float(round(confidence_score, 4)), 'hesitation': float(round(hesitation_score, 4))},
             'interpretation': {'anxiety_level': anxiety_level, 'confidence_level': confidence_level, 'fluency_level': fluency_level}
         }
     except Exception as e:
@@ -390,15 +390,15 @@ def generate_voice_interpretation(analysis: Dict) -> str:
     if 'error' in analysis:
         return f"Voice analysis unavailable: {analysis['error']}"
     interpretation_lines = [
-        f"- Speaking Rate: {analysis['speaking_rate']} words/sec (Benchmark: 2.0-3.0 wps; affects clarity)",
-        f"- Filler Words: {analysis['filler_ratio'] * 100:.1f}% (High usage reduces credibility)",
         f"- Anxiety: {analysis['interpretation']['anxiety_level']} (Score: {analysis['composite_scores']['anxiety']:.3f}; stress response)",
         f"- Confidence: {analysis['interpretation']['confidence_level']} (Score: {analysis['composite_scores']['confidence']:.3f}; vocal strength)",
         f"- Fluency: {analysis['interpretation']['fluency_level']} (Drives engagement)",
         "",
         "HR Insights:",
         "- Rapid speech (>3.0 wps) may reduce clarity; slower pacing enhances professionalism.",
-        "- High filler word usage undermines perceived confidence.",
         "- Elevated anxiety suggests pressure; training can improve resilience.",
         "- Strong confidence supports leadership presence.",
         "- Fluent speech enhances engagement in team settings."
@@ -413,11 +413,11 @@ def generate_anxiety_confidence_chart(composite_scores: Dict, chart_buffer):
         bars = ax.bar(labels, scores, color=['#FF5252', '#26A69A'], edgecolor='black', width=0.45)
         ax.set_ylabel('Score', fontsize=12)
         ax.set_title('Vocal Dynamics: Anxiety vs. Confidence', fontsize=14, pad=15)
-        ax.set_ylim(0, 1.3)
         for bar in bars:
             height = bar.get_height()
             ax.text(bar.get_x() + bar.get_width()/2, height + 0.05, f"{height:.2f}",
-                    ha='center', color='black', fontweight='bold', fontsize=10)
         ax.grid(True, axis='y', linestyle='--', alpha=0.7)
         plt.tight_layout()
         plt.savefig(chart_buffer, format='png', bbox_inches='tight', dpi=300)
@@ -454,7 +454,25 @@ def generate_report(analysis_data: Dict) -> str:
     try:
         voice = analysis_data.get('voice_analysis', {})
         voice_interpretation = generate_voice_interpretation(voice)
-        interviewee_responses = [f"- {u['text']}" for u in analysis_data['transcript'] if u['role'] == 'Interviewee'][:3]
         acceptance_prob = analysis_data.get('acceptance_probability', 50.0)
         acceptance_line = f"\n**Suitability Score: {acceptance_prob:.2f}%**\n"
         if acceptance_prob >= 80:
@@ -465,33 +483,66 @@ def generate_report(analysis_data: Dict) -> str:
             acceptance_line += "HR Verdict: Moderate potential, needs additional assessment."
         else:
             acceptance_line += "HR Verdict: Limited fit, significant improvement required."
         prompt = f"""
-        You are EvalBot, a senior HR consultant delivering a concise, professional interview analysis report. Use clear headings, bullet points ('-'), complete sentences, and formal language. Avoid redundancy, vague terms, and special characters that could break formatting. Ensure each section is unique and actionable.
-        {acceptance_line}
-        **1. Executive Summary**
-        - Provide a narrative overview of the candidate’s performance, highlighting key strengths and fit.
-        - Duration: {analysis_data['text_analysis']['total_duration']:.2f} seconds
-        - Speaker Turns: {analysis_data['text_analysis']['speaker_turns']}
-        - Participants: {', '.join(sorted(set(u['speaker'] for u in analysis_data['transcript'])))}
-        **2. Communication and Vocal Dynamics**
-        - Evaluate vocal delivery (rate, fluency, confidence) with specific insights.
-        {voice_interpretation}
-        **3. Competency and Content**
-        - Assess leadership, problem-solving, communication, and adaptability with clear examples.
-        - List strengths and growth areas separately, using quantifiable achievements where possible.
-        - Sample responses:
-        {chr(10).join(interviewee_responses)}
-        **4. Role Fit and Potential**
-        - Analyze cultural fit, role readiness, and long-term growth potential with specific alignment to role requirements.
-        **5. Recommendations**
-        - Provide prioritized development strategies (e.g., communication training, technical assessments).
-        - Suggest specific next steps for hiring managers (e.g., advance, schedule tests).
-        """
         response = gemini_model.generate_content(prompt)
-        return re.sub(r'[^\x00-\x7F]+|[()]+', '', response.text)  # Sanitize non-ASCII and parentheses
     except Exception as e:
-        logger.error(f"Report generation failed: {str(e)}")
-        return f"Error generating report: {str(e)}"
 def create_pdf_report(analysis_data: Dict, output_path: str, gemini_report_text: str) -> bool:
     try:
@@ -539,13 +590,14 @@ def create_pdf_report(analysis_data: Dict, output_path: str, gemini_report_text:
             story.append(Paragraph("<b>HR Verdict:</b> Limited fit, significant improvement required.", body_text))
         story.append(Spacer(1, 0.2*inch))
         participants = sorted([p for p in set(u['speaker'] for u in analysis_data['transcript']) if p != 'Unknown'])
         table_data = [
             ['Metric', 'Value'],
             ['Interview Duration', f"{analysis_data['text_analysis']['total_duration']:.2f} seconds"],
             ['Speaker Turns', f"{analysis_data['text_analysis']['speaker_turns']}"],
-            ['Participants', ', '.join(participants)],
         ]
-        table = Table(table_data, colWidths=[2.2*inch, 3.8*inch])
         table.setStyle(TableStyle([
             ('BACKGROUND', (0,0), (-1,0), colors.HexColor('#0050BC')),
             ('TEXTCOLOR', (0,0), (-1,0), colors.white),
@@ -557,6 +609,8 @@ def create_pdf_report(analysis_data: Dict, output_path: str, gemini_report_text:
             ('TOPPADDING', (0,0), (-1,0), 6),
             ('BACKGROUND', (0,1), (-1,-1), colors.HexColor('#F5F6FA')),
             ('GRID', (0,0), (-1,-1), 0.4, colors.HexColor('#DDE4EB')),
         ]))
         story.append(table)
         story.append(Spacer(1, 0.3*inch))
@@ -641,23 +695,26 @@ def create_pdf_report(analysis_data: Dict, output_path: str, gemini_report_text:
                 clean_line = line.lstrip('-').strip()
                 if not clean_line:
                     continue
-                clean_line = re.sub(r'[()]+', '', clean_line)
                 if current_section == 'Competency':
-                    if any(k in clean_line.lower() for k in ['leader', 'problem', 'commun', 'adapt', 'strength']):
                         current_subsection = 'Strengths'
-                    elif any(k in clean_line.lower() for k in ['improv', 'grow', 'depth']):
                         current_subsection = 'Growth Areas'
                     if current_subsection:
                         sections[current_section][current_subsection].append(clean_line)
                 elif current_section == 'Recommendations':
-                    if any(k in clean_line.lower() for k in ['commun', 'tech', 'depth', 'pres']):
                         current_subsection = 'Development'
-                    elif any(k in clean_line.lower() for k in ['adv', 'train', 'assess', 'next', 'mentor']):
                         current_subsection = 'Next Steps'
                     if current_subsection:
                         sections[current_section][current_subsection].append(clean_line)
                 else:
                     sections[current_section].append(clean_line)
         # Executive Summary
         story.append(Paragraph("2. Executive Summary", h2))
@@ -665,7 +722,8 @@ def create_pdf_report(analysis_data: Dict, output_path: str, gemini_report_text:
             for line in sections['Executive Summary']:
                 story.append(Paragraph(line, bullet_style))
         else:
-            story.append(Paragraph("No summary provided.", body_text))
         story.append(Spacer(1, 0.15*inch))
         # Competency and Content
@@ -675,14 +733,14 @@ def create_pdf_report(analysis_data: Dict, output_path: str, gemini_report_text:
             for line in sections['Competency']['Strengths']:
                 story.append(Paragraph(line, bullet_style))
         else:
-            story.append(Paragraph("No strengths identified.", body_text))
         story.append(Spacer(1, 0.1*inch))
         story.append(Paragraph("Growth Areas", h3))
         if sections['Competency']['Growth Areas']:
             for line in sections['Competency']['Growth Areas']:
                 story.append(Paragraph(line, bullet_style))
         else:
-            story.append(Paragraph("No growth areas identified; maintain current strengths.", body_text))
         story.append(Spacer(1, 0.15*inch))
         # Role Fit
@@ -691,7 +749,7 @@ def create_pdf_report(analysis_data: Dict, output_path: str, gemini_report_text:
             for line in sections['Role Fit']:
                 story.append(Paragraph(line, bullet_style))
         else:
-            story.append(Paragraph("No fit analysis provided.", body_text))
         story.append(Spacer(1, 0.15*inch))
         # Recommendations
@@ -701,14 +759,14 @@ def create_pdf_report(analysis_data: Dict, output_path: str, gemini_report_text:
             for line in sections['Recommendations']['Development']:
                 story.append(Paragraph(line, bullet_style))
         else:
-            story.append(Paragraph("No development priorities specified.", body_text))
         story.append(Spacer(1, 0.1*inch))
         story.append(Paragraph("Next Steps for Hiring Managers", h3))
         if sections['Recommendations']['Next Steps']:
             for line in sections['Recommendations']['Next Steps']:
                 story.append(Paragraph(line, bullet_style))
         else:
-            story.append(Paragraph("No next steps provided.", body_text))
         story.append(Spacer(1, 0.15*inch))
         story.append(Paragraph("This report provides actionable insights to support hiring and candidate development.", body_text))
@@ -751,6 +809,8 @@ def process_interview(audio_url: str) -> Dict:
         for utterance in transcript['utterances']:
             utterance['prosodic_features'] = extract_prosodic_features(wav_file, utterance['start'], utterance['end'])
         utterances_with_speakers = identify_speakers(transcript, wav_file)
         clf, vectorizer, scaler = None, None, None
         if os.path.exists(os.path.join(OUTPUT_DIR, 'role_classifier.pkl')):
             clf = joblib.load(os.path.join(OUTPUT_DIR, 'role_classifier.pkl'))
@@ -762,7 +822,7 @@ def process_interview(audio_url: str) -> Dict:
         voice_analysis = analyze_interviewee_voice(wav_file, classified_utterances)
         analysis_data = {
             'transcript': classified_utterances,
-            'speakers': list(set(u['speaker'] for u in classified_utterances)),
             'voice_analysis': voice_analysis,
             'text_analysis': {
                 'total_duration': sum(u['prosodic_features']['duration'] for u in classified_utterances),

         intensity_std = np.std(intensities) if intensities else 0
         shimmer = np.mean(np.abs(np.diff(intensities))) / intensity_mean if len(intensities) > 1 and intensity_mean > 0 else 0
         anxiety_score = 0.6 * (pitch_std / pitch_mean) + 0.4 * (jitter + shimmer) if pitch_mean > 0 else 0
+        confidence_score = 0.7 * (1 / (1 + intensity_std)) + 0.3 * (1 - filler_ratio)
         hesitation_score = filler_ratio + repetition_score
         anxiety_level = 'High' if anxiety_score > 0.15 else 'Moderate' if anxiety_score > 0.07 else 'Low'
+        confidence_level = 'High' if confidence_score > 0.75 else 'Moderate' if confidence_score > 0.5 else 'Low'
         fluency_level = 'Fluent' if (filler_ratio < 0.05 and repetition_score < 0.1) else 'Moderate' if (filler_ratio < 0.1 and repetition_score < 0.2) else 'Disfluent'
         return {
             'speaking_rate': float(round(speaking_rate, 2)),
+            'filler_ratio': float(round(filler_ratio, 3)),
+            'repetition_score': float(round(repetition_score, 3)),
             'pitch_analysis': {'mean': float(round(pitch_mean, 2)), 'std_dev': float(round(pitch_std, 2)), 'jitter': float(round(jitter, 4))},
+            'intensity_analysis': {'mean': float(round(intensity_mean, 2)), 'std_dev': float(round(float(intensity_std), 2)), 'shimmer': float(round(shimmer, 4))},
+            'composite_scores': {'anxiety': float(round(anxiety_score, 3)), 'confidence': float(round(confidence_score, 3)), 'hesitation': float(round(hesitation_score, 3))},
             'interpretation': {'anxiety_level': anxiety_level, 'confidence_level': confidence_level, 'fluency_level': fluency_level}
         }
     except Exception as e:
     if 'error' in analysis:
         return f"Voice analysis unavailable: {analysis['error']}"
     interpretation_lines = [
+        f"- Speaking rate: {analysis['speaking_rate', 0):.2f} words/sec (Benchmark: 2.0}-{3.0}; affects clarity)",
+        f"- Filler words: {analysis['filler_ratio'] * 100:.1f}% (High usage reduces credibility)",
         f"- Anxiety: {analysis['interpretation']['anxiety_level']} (Score: {analysis['composite_scores']['anxiety']:.3f}; stress response)",
         f"- Confidence: {analysis['interpretation']['confidence_level']} (Score: {analysis['composite_scores']['confidence']:.3f}; vocal strength)",
         f"- Fluency: {analysis['interpretation']['fluency_level']} (Drives engagement)",
         "",
         "HR Insights:",
         "- Rapid speech (>3.0 wps) may reduce clarity; slower pacing enhances professionalism.",
+        "- High filler word usage undermines perceived credibility.",
         "- Elevated anxiety suggests pressure; training can improve resilience.",
         "- Strong confidence supports leadership presence.",
         "- Fluent speech enhances engagement in team settings."
         bars = ax.bar(labels, scores, color=['#FF5252', '#26A69A'], edgecolor='black', width=0.45)
         ax.set_ylabel('Score', fontsize=12)
         ax.set_title('Vocal Dynamics: Anxiety vs. Confidence', fontsize=14, pad=15)
+        ax.set_ylim(0, 1.2)
         for bar in bars:
             height = bar.get_height()
             ax.text(bar.get_x() + bar.get_width()/2, height + 0.05, f"{height:.2f}",
+                    ha='center', va='bottom', color='black', fontweight='bold', fontsize=10)
         ax.grid(True, axis='y', linestyle='--', alpha=0.7)
         plt.tight_layout()
         plt.savefig(chart_buffer, format='png', bbox_inches='tight', dpi=300)
     try:
         voice = analysis_data.get('voice_analysis', {})
         voice_interpretation = generate_voice_interpretation(voice)
+        interviewee_responses = [u['text'] for u in analysis_data['transcript'] if u['role'] == 'Interviewee']
+        if not interviewee_responses:
+            logger.warning("No interviewee responses found for report generation")
+            return """**1. Executive Summary**
+- Insufficient interviewee content to generate a summary.
+**2. Communication and Vocal Dynamics**
+{voice_interpretation}
+**3. Competency and Content**
+- Strengths: Unable to identify strengths due to limited content.
+- Growth Areas: Recommend further interview to assess competencies.
+**4. Role Fit and Potential**
+- Unable to assess role fit due to insufficient content.
+**5. Recommendations**
+- Development: Schedule additional interview to gather more data.
+- Next Steps: Conduct a follow-up interview with targeted questions."""
         acceptance_prob = analysis_data.get('acceptance_probability', 50.0)
         acceptance_line = f"\n**Suitability Score: {acceptance_prob:.2f}%**\n"
         if acceptance_prob >= 80:
             acceptance_line += "HR Verdict: Moderate potential, needs additional assessment."
         else:
             acceptance_line += "HR Verdict: Limited fit, significant improvement required."
+        transcript_text = "\n".join([f"- {u['speaker']}: {u['text']}" for u in analysis_data['transcript']])
         prompt = f"""
+You are EvalBot, a senior HR consultant delivering a professional interview analysis report. Use clear headings with '**', bullet points ('-'), complete sentences, and formal language. Avoid redundancy, vague terms, and special characters that could break formatting (e.g., parentheses). Ensure each section is unique, actionable, and at least 2-3 bullet points long. If content is limited, provide reasonable inferences based on available data.
+**Input Data**
+- Suitability Score: {acceptance_prob:.2f}%
+- Interview Duration: {analysis_data['text_analysis']['total_duration']:.2f} seconds
+- Speaker Turns: {analysis_data['text_analysis']['speaker_turns']}
+- Participants: {', '.join(sorted(set(u['speaker'] for u in analysis_data['transcript'])))}
+- Voice Analysis: {voice_interpretation}
+- Transcript Sample:
+{transcript_text[:1000]}...
+**Report Structure**
+{acceptance_line}
+**1. Executive Summary**
+- Provide a narrative overview of the candidate’s performance, focusing on key strengths and role fit.
+- Highlight communication style and engagement based on voice analysis and transcript.
+- Note interview duration and participant dynamics.
+**2. Communication and Vocal Dynamics**
+- Evaluate vocal delivery (rate, fluency, confidence) with specific insights.
+{voice_interpretation}
+**3. Competency and Content**
+- Assess leadership, problem-solving, communication, and adaptability with examples from the transcript.
+- List strengths with quantifiable achievements where possible.
+- Identify growth areas with constructive feedback.
+**4. Role Fit and Potential**
+- Analyze cultural fit, role readiness, and long-term growth potential.
+- Align findings with typical role requirements (e.g., teamwork, technical skills).
+**5. Recommendations**
+- Provide prioritized development strategies (e.g., communication training, technical assessments).
+- Suggest specific next steps for hiring managers (e.g., advance to next round, schedule tests).
+"""
         response = gemini_model.generate_content(prompt)
+        report_text = re.sub(r'[^\x00-\x7F]+|[()]+', '', response.text)
+        logger.info(f"Generated Gemini report: {report_text[:500]}...")  # Log first 500 chars for debugging
+        return report_text
     except Exception as e:
+        logger.error(f"Report generation failed: {str(e)}", exc_info=True)
+        return f"""**1. Executive Summary**
+- Report generation failed due to processing error.
+**2. Communication and Vocal Dynamics**
+{generate_voice_interpretation(analysis_data.get('voice_analysis', {}))}
+**3. Competency and Content**
+- Strengths: Unable to assess due to error.
+- Growth Areas: Recommend reprocessing the audio.
+**4. Role Fit and Potential**
+- Unable to assess due to error.
+**5. Recommendations**
+- Development: Investigate processing error.
+- Next Steps: Retry analysis with corrected audio."""
 def create_pdf_report(analysis_data: Dict, output_path: str, gemini_report_text: str) -> bool:
     try:
             story.append(Paragraph("<b>HR Verdict:</b> Limited fit, significant improvement required.", body_text))
         story.append(Spacer(1, 0.2*inch))
         participants = sorted([p for p in set(u['speaker'] for u in analysis_data['transcript']) if p != 'Unknown'])
+        participants_str = ', '.join(participants)
         table_data = [
             ['Metric', 'Value'],
             ['Interview Duration', f"{analysis_data['text_analysis']['total_duration']:.2f} seconds"],
             ['Speaker Turns', f"{analysis_data['text_analysis']['speaker_turns']}"],
+            ['Participants', participants_str],
         ]
+        table = Table(table_data, colWidths=[2.0*inch, 4.0*inch])
         table.setStyle(TableStyle([
             ('BACKGROUND', (0,0), (-1,0), colors.HexColor('#0050BC')),
             ('TEXTCOLOR', (0,0), (-1,0), colors.white),
             ('TOPPADDING', (0,0), (-1,0), 6),
             ('BACKGROUND', (0,1), (-1,-1), colors.HexColor('#F5F6FA')),
             ('GRID', (0,0), (-1,-1), 0.4, colors.HexColor('#DDE4EB')),
+            ('LEFTPADDING', (1,3), (1,3), 10),  # Add padding for Participants
+            ('WORDWRAP', (1,3), (1,3), 'CJK'),  # Enable word wrapping
         ]))
         story.append(table)
         story.append(Spacer(1, 0.3*inch))
                 clean_line = line.lstrip('-').strip()
                 if not clean_line:
                     continue
+                clean_line = re.sub(r'[^\w\s.,;:-]', '', clean_line)  # Enhanced sanitization
                 if current_section == 'Competency':
+                    if any(k in clean_line.lower() for k in ['leader', 'problem', 'commun', 'adapt', 'achieve', 'skill', 'success']):
                         current_subsection = 'Strengths'
+                    elif any(k in clean_line.lower() for k in ['improv', 'grow', 'develop', 'weak', 'area']):
                         current_subsection = 'Growth Areas'
                     if current_subsection:
                         sections[current_section][current_subsection].append(clean_line)
                 elif current_section == 'Recommendations':
+                    if any(k in clean_line.lower() for k in ['commun', 'tech', 'train', 'skill', 'pres']):
                         current_subsection = 'Development'
+                    elif any(k in clean_line.lower() for k in ['adv', 'assess', 'next', 'schedule', 'mentor']):
                         current_subsection = 'Next Steps'
                     if current_subsection:
                         sections[current_section][current_subsection].append(clean_line)
                 else:
                     sections[current_section].append(clean_line)
+            elif current_section and line:  # Handle non-bulleted lines
+                clean_line = re.sub(r'[^\w\s.,;:-]', '', line)
+                sections[current_section].append(clean_line)
         # Executive Summary
         story.append(Paragraph("2. Executive Summary", h2))
             for line in sections['Executive Summary']:
                 story.append(Paragraph(line, bullet_style))
         else:
+            story.append(Paragraph("Candidate showed moderate engagement; further data needed for full assessment.", bullet_style))
+            story.append(Paragraph(f"Interview lasted {analysis_data['text_analysis']['total_duration']:.2f} seconds with {analysis_data['text_analysis']['speaker_turns']} turns.", bullet_style))
         story.append(Spacer(1, 0.15*inch))
         # Competency and Content
             for line in sections['Competency']['Strengths']:
                 story.append(Paragraph(line, bullet_style))
         else:
+            story.append(Paragraph("Strengths not fully assessed; candidate demonstrated consistent communication.", bullet_style))
         story.append(Spacer(1, 0.1*inch))
         story.append(Paragraph("Growth Areas", h3))
         if sections['Competency']['Growth Areas']:
             for line in sections['Competency']['Growth Areas']:
                 story.append(Paragraph(line, bullet_style))
         else:
+            story.append(Paragraph("Consider enhancing specificity in responses to highlight expertise.", bullet_style))
         story.append(Spacer(1, 0.15*inch))
         # Role Fit
             for line in sections['Role Fit']:
                 story.append(Paragraph(line, bullet_style))
         else:
+            story.append(Paragraph("Potential for role fit exists; further evaluation needed to confirm alignment.", bullet_style))
         story.append(Spacer(1, 0.15*inch))
         # Recommendations
             for line in sections['Recommendations']['Development']:
                 story.append(Paragraph(line, bullet_style))
         else:
+            story.append(Paragraph("Enroll in communication training to reduce filler words.", bullet_style))
         story.append(Spacer(1, 0.1*inch))
         story.append(Paragraph("Next Steps for Hiring Managers", h3))
         if sections['Recommendations']['Next Steps']:
             for line in sections['Recommendations']['Next Steps']:
                 story.append(Paragraph(line, bullet_style))
         else:
+            story.append(Paragraph("Schedule a technical assessment to evaluate role-specific skills.", bullet_style))
         story.append(Spacer(1, 0.15*inch))
         story.append(Paragraph("This report provides actionable insights to support hiring and candidate development.", body_text))
         for utterance in transcript['utterances']:
             utterance['prosodic_features'] = extract_prosodic_features(wav_file, utterance['start'], utterance['end'])
         utterances_with_speakers = identify_speakers(transcript, wav_file)
+        if not utterances_with_speakers:
+            raise ValueError("No utterances identified in the audio")
         clf, vectorizer, scaler = None, None, None
         if os.path.exists(os.path.join(OUTPUT_DIR, 'role_classifier.pkl')):
             clf = joblib.load(os.path.join(OUTPUT_DIR, 'role_classifier.pkl'))
         voice_analysis = analyze_interviewee_voice(wav_file, classified_utterances)
         analysis_data = {
             'transcript': classified_utterances,
+            'speakers': list(set(u['speaker'] for u in classified_utterances if u['speaker'] != 'Unknown')),
             'voice_analysis': voice_analysis,
             'text_analysis': {
                 'total_duration': sum(u['prosodic_features']['duration'] for u in classified_utterances),