Spaces:

EvalBot
/

Audio

Sleeping

App Files Files Community

norhan12 commited on Jun 10, 2025

Commit

fe4c2c5

verified ·

1 Parent(s): 52fdc1d

Update process_interview.py

Browse files

Files changed (1) hide show

process_interview.py +37 -253

process_interview.py CHANGED Viewed

@@ -116,8 +116,6 @@ def load_speaker_model():
         logger.error(f"Model loading failed: {str(e)}")
         raise RuntimeError("Could not load speaker verification model")
 def load_models():
     speaker_model = load_speaker_model()
     nlp = spacy.load("en_core_web_sm")
@@ -324,6 +322,7 @@ def classify_roles(utterances: List[Dict], clf, vectorizer, scaler):
     except Exception as e:
         logger.error(f"Role classification failed: {str(e)}")
         raise
 def analyze_interviewee_voice(audio_path: str, utterances: List[Dict]) -> Dict:
     try:
         y, sr = librosa.load(audio_path, sr=16000)
@@ -387,8 +386,6 @@ def analyze_interviewee_voice(audio_path: str, utterances: List[Dict]) -> Dict:
         logger.error(f"Voice analysis failed: {str(e)}", exc_info=True)
         return {'error': f'Voice analysis incomplete due to audio processing issues: {str(e)}'}
 def generate_voice_interpretation(analysis: Dict) -> str:
     try:
         if 'error' in analysis:
@@ -405,12 +402,7 @@ def generate_voice_interpretation(analysis: Dict) -> str:
             "- High filler word usage undermines perceived credibility.",
             "- Elevated anxiety suggests pressure; training can improve resilience.",
             "- Strong confidence supports leadership presence.",
-            "- Fluent speech enhances engagement in team settings.",
-            "",
-            "Candidate Tips:",
-            "- Practice pacing to maintain a steady speaking rate (2.0-3.0 words/sec).",
-            "- Reduce filler words (e.g., 'um', 'like') through mock interviews.",
-            "- Use breathing exercises to lower anxiety and stabilize pitch."
         ]
         return "\n".join(interpretation_lines)
     except Exception as e:
@@ -466,18 +458,10 @@ def generate_report(analysis_data: Dict) -> str:
     try:
         voice = analysis_data.get('voice_analysis', {})
         voice_interpretation = generate_voice_interpretation(voice)
-        interviewee_responses = [u['text'] for u in analysis_data['transcript'] if u['role'] == 'Interviewee'][:5]
         if not interviewee_responses:
             logger.warning("No interviewee responses found for report generation")
-            return f"""**Suitability Score: 50.00%**
-HR Verdict: Insufficient data for evaluation.
-**User Feedback**
-- Insufficient content to provide feedback.
-- Practice answering common interview questions to improve engagement.
-**HR Evaluation**
-**1. Executive Summary**
 - Insufficient interviewee content to generate a summary.
 - Interview duration suggests limited engagement.
@@ -504,9 +488,9 @@ HR Verdict: Insufficient data for evaluation.
             acceptance_line += "HR Verdict: Moderate potential, needs additional assessment."
         else:
             acceptance_line += "HR Verdict: Limited fit, significant improvement required."
-        transcript_text = "\n".join([f"- {u['speaker']}: {u['text']}" for u in analysis_data['transcript']][:10])
         prompt = f"""
-You are EvalBot, a senior HR consultant delivering a dual-purpose interview analysis report. Generate two sections: one for the candidate (**User Feedback**) with actionable self-improvement tips, and one for HR (**HR Evaluation**) with professional analysis. Use clear headings with '**', bullet points ('-'), complete sentences, and formal language for HR, friendly language for User Feedback. Avoid redundancy, vague terms, and special characters that could break formatting. Ensure each section is unique, actionable, and contains at least 2-3 bullet points.
 **Input Data**
 - Suitability Score: {acceptance_prob:.2f}%
@@ -516,17 +500,11 @@ You are EvalBot, a senior HR consultant delivering a dual-purpose interview anal
 - Voice Analysis:
 {voice_interpretation}
 - Transcript Sample:
-{transcript_text}
 **Report Structure**
 {acceptance_line}
-**User Feedback**
-- Provide friendly, actionable tips for the candidate to improve communication, confidence, and content.
-- Focus on practical steps (e.g., practice pacing, reduce fillers).
-- Keep tone motivational and concise.
-**HR Evaluation**
 **1. Executive Summary**
 - Provide a narrative overview of the candidate’s performance, focusing on key strengths and role fit.
 - Highlight communication style and engagement based on voice analysis and transcript.
@@ -551,19 +529,11 @@ You are EvalBot, a senior HR consultant delivering a dual-purpose interview anal
 """
         response = gemini_model.generate_content(prompt)
         report_text = re.sub(r'[^\x00-\x7F]+|[()]+', '', response.text)
-        logger.info(f"Generated Gemini report: {report_text[:500]}...")
         return report_text
     except Exception as e:
         logger.error(f"Report generation failed: {str(e)}", exc_info=True)
-        return f"""**Suitability Score: 50.00%**
-HR Verdict: Report generation failed.
-**User Feedback**
-- Unable to provide feedback due to processing error.
-- Practice answering questions clearly to improve future interviews.
-**HR Evaluation**
-**1. Executive Summary**
 - Report generation failed due to processing error.
 **2. Communication and Vocal Dynamics**
@@ -580,188 +550,7 @@ HR Verdict: Report generation failed.
 - Development: Investigate processing error.
 - Next Steps: Retry analysis with corrected audio."""
-def create_user_pdf_report(analysis_data: Dict, output_path: str, gemini_report_text: str) -> bool:
-    try:
-        doc = SimpleDocTemplate(output_path, pagesize=letter,
-                                rightMargin=0.75*inch, leftMargin=0.75*inch,
-                                topMargin=1*inch, bottomMargin=1*inch)
-        styles = getSampleStyleSheet()
-        h1 = ParagraphStyle(name='Heading1', fontSize=18, leading=22, spaceAfter=16, alignment=1, textColor=colors.HexColor('#003087'), fontName='Helvetica-Bold')
-        h2 = ParagraphStyle(name='Heading2', fontSize=13, leading=15, spaceBefore=10, spaceAfter=6, textColor=colors.HexColor('#0050BC'), fontName='Helvetica-Bold')
-        h3 = ParagraphStyle(name='Heading3', fontSize=9, leading=11, spaceBefore=6, spaceAfter=4, textColor=colors.HexColor('#3F7CFF'), fontName='Helvetica')
-        body_text = ParagraphStyle(name='BodyText', fontSize=8, leading=10, spaceAfter=4, fontName='Helvetica', textColor=colors.HexColor('#333333'))
-        bullet_style = ParagraphStyle(name='Bullet', parent=body_text, leftIndent=16, bulletIndent=6, fontName='Helvetica', bulletFontName='Helvetica', bulletFontSize=8)
-        story = []
-        def header_footer(canvas, doc):
-            canvas.saveState()
-            canvas.setFont('Helvetica', 7)
-            canvas.setFillColor(colors.HexColor('#666666'))
-            canvas.drawString(doc.leftMargin, 0.5*inch, f"Page {doc.page} | EvalBot Personal Feedback Report")
-            canvas.setStrokeColor(colors.HexColor('#0050BC'))
-            canvas.setLineWidth(0.5)
-            canvas.line(doc.leftMargin, doc.height + 0.9*inch, doc.width + doc.leftMargin, doc.height + 0.9*inch)
-            canvas.setFont('Helvetica-Bold', 8)
-            canvas.drawString(doc.leftMargin, doc.height + 0.95*inch, "Personal Interview Feedback")
-            canvas.drawRightString(doc.width + doc.leftMargin, doc.height + 0.95*inch, time.strftime('%B %d, %Y'))
-            canvas.restoreState()
-        # Title Page
-        story.append(Paragraph("Your Interview Feedback Report", h1))
-        story.append(Paragraph(f"Generated: {time.strftime('%B %d, %Y')}", ParagraphStyle(name='Date', alignment=1, fontSize=8, textColor=colors.HexColor('#666666'), fontName='Helvetica')))
-        story.append(Spacer(1, 0.3*inch))
-        story.append(Paragraph("This report provides personalized tips to help you shine in future interviews.", body_text))
-        story.append(Spacer(1, 0.2*inch))
-        story.append(Paragraph("Prepared by: EvalBot - AI-Powered Interview Coach", body_text))
-        story.append(PageBreak())
-        # Parse Gemini Report
-        sections = {
-            "User Feedback": [],
-            "Executive Summary": [],
-            "Communication": [],
-            "Competency": {"Strengths": [], "Growth Areas": []},
-            "Recommendations": {"Development": [], "Next Steps": []},
-            "Role Fit": [],
-        }
-        current_section = None
-        current_subsection = None
-        lines = gemini_report_text.split('\n')
-        for line in lines:
-            line = line.strip()
-            if not line:
-                continue
-            logger.debug(f"Parsing line: {line}")
-            if line.startswith('**') and line.endswith('**'):
-                section_title = line.strip('**').strip()
-                if section_title.startswith(('1.', '2.', '3.', '4.', '5.')):
-                    section_title = section_title[2:].strip()
-                if 'User Feedback' in section_title:
-                    current_section = 'User Feedback'
-                    current_subsection = None
-                elif 'Executive Summary' in section_title:
-                    current_section = 'Executive Summary'
-                    current_subsection = None
-                elif 'Communication' in section_title:
-                    current_section = 'Communication'
-                    current_subsection = None
-                elif 'Competency' in section_title:
-                    current_section = 'Competency'
-                    current_subsection = None
-                elif 'Role Fit' in section_title:
-                    current_section = 'Role Fit'
-                    current_subsection = None
-                elif 'Recommendations' in section_title:
-                    current_section = 'Recommendations'
-                    current_subsection = None
-                logger.debug(f"Set section: {current_section}")
-            elif line.startswith('-') and current_section:
-                clean_line = line.lstrip('-').strip()
-                if not clean_line:
-                    continue
-                clean_line = re.sub(r'[^\w\s.,;:-]', '', clean_line)
-                logger.debug(f"Processing bullet: {clean_line}, section: {current_section}, subsection: {current_subsection}")
-                if current_section in ['Competency', 'Recommendations']:
-                    if current_subsection is None:
-                        if current_section == 'Competency':
-                            current_subsection = 'Strengths'
-                        elif current_section == 'Recommendations':
-                            current_subsection = 'Development'
-                        logger.debug(f"Default subsection set to: {current_subsection}")
-                    if current_subsection:
-                        sections[current_section][current_subsection].append(clean_line)
-                    else:
-                        logger.warning(f"Skipping line due to unset subsection: {clean_line}")
-                else:
-                    sections[current_section].append(clean_line)
-            elif current_section and line:
-                clean_line = re.sub(r'[^\w\s.,;:-]', '', line)
-                logger.debug(f"Processing non-bullet: {clean_line}, section: {current_section}, subsection: {current_subsection}")
-                if current_section in ['Competency', 'Recommendations']:
-                    if current_subsection:
-                        sections[current_section][current_subsection].append(clean_line)
-                    else:
-                        current_subsection = 'Strengths' if current_section == 'Competency' else 'Development'
-                        sections[current_section][current_subsection].append(clean_line)
-                        logger.debug(f"Default subsection for non-bullet set to: {current_subsection}")
-                else:
-                    sections[current_section].append(clean_line)
-        # Introduction
-        story.append(Paragraph("How to Use This Report", h2))
-        story.append(Paragraph("This report is designed to help you improve your interview skills. Review the feedback below and try the suggested tips to boost your confidence and clarity.", body_text))
-        story.append(Spacer(1, 0.15*inch))
-        # Your Communication Style
-        story.append(Paragraph("Your Communication Style", h2))
-        voice_analysis = analysis_data.get('voice_analysis', {})
-        if voice_analysis and 'error' not in voice_analysis:
-            table_data = [
-                ['Metric', 'Value', 'What It Means'],
-                ['Speaking Rate', f"{voice_analysis.get('speaking_rate', 0):.2f} words/sec", 'How fast you speak'],
-                ['Filler Words', f"{voice_analysis.get('filler_ratio', 0) * 100:.1f}%", 'Words like "um" or "like"'],
-                ['Anxiety', voice_analysis.get('interpretation', {}).get('anxiety_level', 'N/A'), 'Your stress level'],
-                ['Confidence', voice_analysis.get('interpretation', {}).get('confidence_level', 'N/A'), 'Your vocal strength'],
-                ['Fluency', voice_analysis.get('interpretation', {}).get('fluency_level', 'N/A'), 'How smoothly you speak'],
-            ]
-            table = Table(table_data, colWidths=[1.5*inch, 1.3*inch, 3.2*inch])
-            table.setStyle(TableStyle([
-                ('BACKGROUND', (0,0), (-1,0), colors.HexColor('#0050BC')),
-                ('TEXTCOLOR', (0,0), (-1,0), colors.white),
-                ('ALIGN', (0,0), (-1,-1), 'LEFT'),
-                ('VALIGN', (0,0), (-1,-1), 'MIDDLE'),
-                ('FONTNAME', (0,0), (-1,0), 'Helvetica-Bold'),
-                ('FONTSIZE', (0,0), (-1,-1), 8),
-                ('BOTTOMPADDING', (0,0), (-1,0), 6),
-                ('TOPPADDING', (0,0), (-1,0), 6),
-                ('BACKGROUND', (0,1), (-1,-1), colors.HexColor('#F5F6FA')),
-                ('GRID', (0,0), (-1,-1), 0.4, colors.HexColor('#DDE4EB')),
-            ]))
-            story.append(table)
-            story.append(Spacer(1, 0.15*inch))
-            story.append(Paragraph("Tips to Improve:", h3))
-            for line in sections['Communication'][-3:]:  # Use candidate tips from voice_interpretation
-                story.append(Paragraph(line, bullet_style))
-        else:
-            story.append(Paragraph(f"Voice analysis unavailable: {voice_analysis.get('error', 'Unknown error')}", body_text))
-        story.append(Spacer(1, 0.15*inch))
-        # Your Responses
-        story.append(Paragraph("Your Responses", h2))
-        if sections['Competency']['Strengths'] or sections['Competency']['Growth Areas']:
-            story.append(Paragraph("Strengths", h3))
-            for line in sections['Competency']['Strengths'][:3]:
-                story.append(Paragraph(line, bullet_style))
-            story.append(Spacer(1, 0.1*inch))
-            story.append(Paragraph("Areas to Work On", h3))
-            for line in sections['Competency']['Growth Areas'][:3]:
-                story.append(Paragraph(line, bullet_style))
-        else:
-            story.append(Paragraph("You showed effort in responding; try to provide more specific examples.", bullet_style))
-            story.append(Paragraph("Practice structuring answers using the STAR method (Situation, Task, Action, Result).", bullet_style))
-        story.append(Spacer(1, 0.15*inch))
-        # Action Plan
-        story.append(Paragraph("Your Action Plan", h2))
-        if sections['User Feedback']:
-            for line in sections['User Feedback']:
-                story.append(Paragraph(line, bullet_style))
-        else:
-            story.append(Paragraph("Practice mock interviews to build confidence.", bullet_style))
-            story.append(Paragraph("Record yourself to identify and reduce filler words.", bullet_style))
-            story.append(Paragraph("Join a public speaking group to improve fluency.", bullet_style))
-        story.append(Spacer(1, 0.15*inch))
-        story.append(Paragraph("Keep practicing, and you'll see improvement in your next interview!", body_text))
-        doc.build(story, onFirstPage=header_footer, onLaterPages=header_footer)
-        logger.info(f"User PDF report successfully generated at {output_path}")
-        return True
-    except Exception as e:
-        logger.error(f"User PDF generation failed: {str(e)}\nFull Gemini report text:\n{gemini_report_text}", exc_info=True)
-        return False
-def create_company_pdf_report(analysis_data: Dict, output_path: str, gemini_report_text: str) -> bool:
     try:
         doc = SimpleDocTemplate(output_path, pagesize=letter,
                                 rightMargin=0.75*inch, leftMargin=0.75*inch,
@@ -772,7 +561,7 @@ def create_company_pdf_report(analysis_data: Dict, output_path: str, gemini_repo
         h3 = ParagraphStyle(name='Heading3', fontSize=9, leading=11, spaceBefore=6, spaceAfter=4, textColor=colors.HexColor('#3F7CFF'), fontName='Helvetica')
         body_text = ParagraphStyle(name='BodyText', fontSize=8, leading=10, spaceAfter=4, fontName='Helvetica', textColor=colors.HexColor('#333333'))
         bullet_style = ParagraphStyle(name='Bullet', parent=body_text, leftIndent=16, bulletIndent=6, fontName='Helvetica', bulletFontName='Helvetica', bulletFontSize=8)
         story = []
         def header_footer(canvas, doc):
@@ -790,13 +579,13 @@ def create_company_pdf_report(analysis_data: Dict, output_path: str, gemini_repo
         # Title Page
         story.append(Paragraph("Candidate Interview Analysis", h1))
-        story.append(Paragraph(f"Generated {time.strftime('%B %d, %Y')}", ParagraphStyle(name='Date', alignment=1, fontSize=8, textColor=colors.HexColor('#666666'), fontName='Helvetica')))
         story.append(Spacer(1, 0.3*inch))
         acceptance_prob = analysis_data.get('acceptance_probability', 50.0)
         story.append(Paragraph("Hiring Suitability Snapshot", h2))
-        prob_color = colors.HexColor('#2E7D32') if acceptance_prob >= 80 else colors.HexColor('#F57C00') if acceptance_prob >= 60 else colors.HexColor('#D32F2F')
         story.append(Paragraph(f"Suitability Score: <font size=14 color='{prob_color.hexval()}'><b>{acceptance_prob:.2f}%</b></font>",
-                               ParagraphStyle(name='Prob', fontSize=10, spaceAfter=8, alignment=1, fontName='Helvetica-Bold')))
         if acceptance_prob >= 80:
             story.append(Paragraph("<b>HR Verdict:</b> Outstanding candidate, recommended for immediate advancement.", body_text))
         elif acceptance_prob >= 60:
@@ -806,7 +595,7 @@ def create_company_pdf_report(analysis_data: Dict, output_path: str, gemini_repo
         else:
             story.append(Paragraph("<b>HR Verdict:</b> Limited fit, significant improvement required.", body_text))
         story.append(Spacer(1, 0.2*inch))
-        participants = sorted(set(u['speaker'] for u in analysis_data['transcript'] if u['speaker'] != 'Unknown'))
         participants_str = ', '.join(participants)
         table_data = [
             ['Metric', 'Value'],
@@ -831,12 +620,12 @@ def create_company_pdf_report(analysis_data: Dict, output_path: str, gemini_repo
         ]))
         story.append(table)
         story.append(Spacer(1, 0.3*inch))
-        story.append(Paragraph("Prepared by EvalBot - AI-Powered HR Analysis", body_text))
         story.append(PageBreak())
         # Detailed Analysis
         story.append(Paragraph("Detailed Candidate Evaluation", h1))
         # Communication and Vocal Dynamics
         story.append(Paragraph("1. Communication & Vocal Dynamics", h2))
         voice_analysis = analysis_data.get('voice_analysis', {})
@@ -876,7 +665,6 @@ def create_company_pdf_report(analysis_data: Dict, output_path: str, gemini_repo
         # Parse Gemini Report
         sections = {
-            "User Feedback": [],
             "Executive Summary": [],
             "Communication": [],
             "Competency": {"Strengths": [], "Growth Areas": []},
@@ -890,7 +678,7 @@ def create_company_pdf_report(analysis_data: Dict, output_path: str, gemini_repo
             line = line.strip()
             if not line:
                 continue
-            logger.debug(f"Parsing line: {line}")
             if line.startswith('**') and line.endswith('**'):
                 section_title = line.strip('**').strip()
                 if section_title.startswith(('1.', '2.', '3.', '4.', '5.')):
@@ -918,7 +706,9 @@ def create_company_pdf_report(analysis_data: Dict, output_path: str, gemini_repo
                 clean_line = re.sub(r'[^\w\s.,;:-]', '', clean_line)
                 logger.debug(f"Processing bullet: {clean_line}, section: {current_section}, subsection: {current_subsection}")
                 if current_section in ['Competency', 'Recommendations']:
                     if current_subsection is None:
                         if current_section == 'Competency':
                             current_subsection = 'Strengths'
                         elif current_section == 'Recommendations':
@@ -929,6 +719,7 @@ def create_company_pdf_report(analysis_data: Dict, output_path: str, gemini_repo
                     else:
                         logger.warning(f"Skipping line due to unset subsection: {clean_line}")
                 else:
                     sections[current_section].append(clean_line)
             elif current_section and line:
                 clean_line = re.sub(r'[^\w\s.,;:-]', '', line)
@@ -937,6 +728,7 @@ def create_company_pdf_report(analysis_data: Dict, output_path: str, gemini_repo
                     if current_subsection:
                         sections[current_section][current_subsection].append(clean_line)
                     else:
                         current_subsection = 'Strengths' if current_section == 'Competency' else 'Development'
                         sections[current_section][current_subsection].append(clean_line)
                         logger.debug(f"Default subsection for non-bullet set to: {current_subsection}")
@@ -976,17 +768,17 @@ def create_company_pdf_report(analysis_data: Dict, output_path: str, gemini_repo
             for line in sections['Role Fit']:
                 story.append(Paragraph(line, bullet_style))
         else:
-            story.append(Paragraph("Potential for role fit exists; further evaluation needed to confirm alignment.", bullet_style))
         story.append(Spacer(1, 0.15*inch))
         # Recommendations
         story.append(Paragraph("5. Recommendations", h2))
         story.append(Paragraph("Development Priorities", h3))
         if sections['Recommendations']['Development']:
             for line in sections['Recommendations']['Development']:
-                story.append(Paragraph(line, bullet_style))
         else:
-            story.append(Paragraph("Enrollment in communication training to reduce filler words.", bullet_style))
         story.append(Spacer(1, 0.1*inch))
         story.append(Paragraph("Next Steps for Hiring Managers", h3))
         if sections['Recommendations']['Next Steps']:
@@ -995,13 +787,13 @@ def create_company_pdf_report(analysis_data: Dict, output_path: str, gemini_repo
         else:
             story.append(Paragraph("Schedule a technical assessment to evaluate role-specific skills.", bullet_style))
         story.append(Spacer(1, 0.15*inch))
-        story.append(Paragraph("This report provides actionable insights to support hiring decisions.", body_text))
         doc.build(story, onFirstPage=header_footer, onLaterPages=header_footer)
-        logger.info(f"Company PDF report successfully generated at {output_path}")
         return True
     except Exception as e:
-        logger.error(f"Company PDF generation failed: {str(e)}\nFull Gemini report text:\n{gemini_report_text}", exc_info=True)
         return False
 def convert_to_serializable(obj):
@@ -1059,28 +851,21 @@ def process_interview(audio_url: str) -> Dict:
         analysis_data['acceptance_probability'] = calculate_acceptance_probability(analysis_data)
         gemini_report_text = generate_report(analysis_data)
         base_name = str(uuid.uuid4())
-        user_pdf_path = os.path.join(OUTPUT_DIR, f"{base_name}_user_report.pdf")
-        company_pdf_path = os.path.join(OUTPUT_DIR, f"{base_name}_company_report.pdf")
         json_path = os.path.join(OUTPUT_DIR, f"{base_name}_analysis.json")
-        user_pdf_success = create_user_pdf_report(analysis_data, user_pdf_path, gemini_report_text)
-        company_pdf_success = create_company_pdf_report(analysis_data, company_pdf_path, gemini_report_text)
         with open(json_path, 'w') as f:
             serializable_data = convert_to_serializable(analysis_data)
             json.dump(serializable_data, f, indent=2)
-        if not (user_pdf_success and company_pdf_success):
-            logger.warning(f"One or both PDF reports failed to generate for {audio_url}")
             return {
-                'user_pdf_path': user_pdf_path if user_pdf_success else None,
-                'company_pdf_path': company_pdf_path if company_pdf_success else None,
                 'json_path': json_path,
-                'error': 'One or both PDF generations failed'
             }
         logger.info(f"Processing completed for {audio_url}")
-        return {
-            'user_pdf_path': user_pdf_path,
-            'company_pdf_path': company_pdf_path,
-            'json_path': json_path
-        }
     except Exception as e:
         logger.error(f"Processing failed for {audio_url}: {str(e)}", exc_info=True)
         base_name = str(uuid.uuid4())
@@ -1088,8 +873,7 @@ def process_interview(audio_url: str) -> Dict:
         with open(json_path, 'w') as f:
             json.dump({'error': str(e)}, f, indent=2)
         return {
-            'user_pdf_path': None,
-            'company_pdf_path': None,
             'json_path': json_path,
             'error': str(e)
         }

         logger.error(f"Model loading failed: {str(e)}")
         raise RuntimeError("Could not load speaker verification model")
 def load_models():
     speaker_model = load_speaker_model()
     nlp = spacy.load("en_core_web_sm")
     except Exception as e:
         logger.error(f"Role classification failed: {str(e)}")
         raise
 def analyze_interviewee_voice(audio_path: str, utterances: List[Dict]) -> Dict:
     try:
         y, sr = librosa.load(audio_path, sr=16000)
         logger.error(f"Voice analysis failed: {str(e)}", exc_info=True)
         return {'error': f'Voice analysis incomplete due to audio processing issues: {str(e)}'}
 def generate_voice_interpretation(analysis: Dict) -> str:
     try:
         if 'error' in analysis:
             "- High filler word usage undermines perceived credibility.",
             "- Elevated anxiety suggests pressure; training can improve resilience.",
             "- Strong confidence supports leadership presence.",
+            "- Fluent speech enhances engagement in team settings."
         ]
         return "\n".join(interpretation_lines)
     except Exception as e:
     try:
         voice = analysis_data.get('voice_analysis', {})
         voice_interpretation = generate_voice_interpretation(voice)
+        interviewee_responses = [u['text'] for u in analysis_data['transcript'] if u['role'] == 'Interviewee']
         if not interviewee_responses:
             logger.warning("No interviewee responses found for report generation")
+            return f"""**1. Executive Summary**
 - Insufficient interviewee content to generate a summary.
 - Interview duration suggests limited engagement.
             acceptance_line += "HR Verdict: Moderate potential, needs additional assessment."
         else:
             acceptance_line += "HR Verdict: Limited fit, significant improvement required."
+        transcript_text = "\n".join([f"- {u['speaker']}: {u['text']}" for u in analysis_data['transcript']])
         prompt = f"""
+You are EvalBot, a senior HR consultant delivering a professional interview analysis report. Use clear headings with '**', bullet points ('-'), complete sentences, and formal language. Avoid redundancy, vague terms, and special characters that could break formatting (e.g., parentheses). Ensure each section is unique, actionable, and contains at least 2-3 bullet points. If content is limited, provide reasonable inferences based on available data.
 **Input Data**
 - Suitability Score: {acceptance_prob:.2f}%
 - Voice Analysis:
 {voice_interpretation}
 - Transcript Sample:
+{transcript_text[:1000]}...
 **Report Structure**
 {acceptance_line}
 **1. Executive Summary**
 - Provide a narrative overview of the candidate’s performance, focusing on key strengths and role fit.
 - Highlight communication style and engagement based on voice analysis and transcript.
 """
         response = gemini_model.generate_content(prompt)
         report_text = re.sub(r'[^\x00-\x7F]+|[()]+', '', response.text)
+        logger.info(f"Generated Gemini report: {report_text[:500]}...")  # Log for debugging
         return report_text
     except Exception as e:
         logger.error(f"Report generation failed: {str(e)}", exc_info=True)
+        return f"""**1. Executive Summary**
 - Report generation failed due to processing error.
 **2. Communication and Vocal Dynamics**
 - Development: Investigate processing error.
 - Next Steps: Retry analysis with corrected audio."""
+def create_pdf_report(analysis_data: Dict, output_path: str, gemini_report_text: str) -> bool:
     try:
         doc = SimpleDocTemplate(output_path, pagesize=letter,
                                 rightMargin=0.75*inch, leftMargin=0.75*inch,
         h3 = ParagraphStyle(name='Heading3', fontSize=9, leading=11, spaceBefore=6, spaceAfter=4, textColor=colors.HexColor('#3F7CFF'), fontName='Helvetica')
         body_text = ParagraphStyle(name='BodyText', fontSize=8, leading=10, spaceAfter=4, fontName='Helvetica', textColor=colors.HexColor('#333333'))
         bullet_style = ParagraphStyle(name='Bullet', parent=body_text, leftIndent=16, bulletIndent=6, fontName='Helvetica', bulletFontName='Helvetica', bulletFontSize=8)
         story = []
         def header_footer(canvas, doc):
         # Title Page
         story.append(Paragraph("Candidate Interview Analysis", h1))
+        story.append(Paragraph(f"Generated: {time.strftime('%B %d, %Y')}", ParagraphStyle(name='Date', alignment=1, fontSize=8, textColor=colors.HexColor('#666666'), fontName='Helvetica')))
         story.append(Spacer(1, 0.3*inch))
         acceptance_prob = analysis_data.get('acceptance_probability', 50.0)
         story.append(Paragraph("Hiring Suitability Snapshot", h2))
+        prob_color = colors.HexColor('#2E7D32') if acceptance_prob >= 80 else (colors.HexColor('#F57C00') if acceptance_prob >= 60 else colors.HexColor('#D32F2F'))
         story.append(Paragraph(f"Suitability Score: <font size=14 color='{prob_color.hexval()}'><b>{acceptance_prob:.2f}%</b></font>",
+                             ParagraphStyle(name='Prob', fontSize=10, spaceAfter=8, alignment=1, fontName='Helvetica-Bold')))
         if acceptance_prob >= 80:
             story.append(Paragraph("<b>HR Verdict:</b> Outstanding candidate, recommended for immediate advancement.", body_text))
         elif acceptance_prob >= 60:
         else:
             story.append(Paragraph("<b>HR Verdict:</b> Limited fit, significant improvement required.", body_text))
         story.append(Spacer(1, 0.2*inch))
+        participants = sorted([p for p in set(u['speaker'] for u in analysis_data['transcript']) if p != 'Unknown'])
         participants_str = ', '.join(participants)
         table_data = [
             ['Metric', 'Value'],
         ]))
         story.append(table)
         story.append(Spacer(1, 0.3*inch))
+        story.append(Paragraph("Prepared by: EvalBot - AI-Powered HR Analysis", body_text))
         story.append(PageBreak())
         # Detailed Analysis
         story.append(Paragraph("Detailed Candidate Evaluation", h1))
         # Communication and Vocal Dynamics
         story.append(Paragraph("1. Communication & Vocal Dynamics", h2))
         voice_analysis = analysis_data.get('voice_analysis', {})
         # Parse Gemini Report
         sections = {
             "Executive Summary": [],
             "Communication": [],
             "Competency": {"Strengths": [], "Growth Areas": []},
             line = line.strip()
             if not line:
                 continue
+            logger.debug(f"Parsing line: {line}")  # Debug parsing
             if line.startswith('**') and line.endswith('**'):
                 section_title = line.strip('**').strip()
                 if section_title.startswith(('1.', '2.', '3.', '4.', '5.')):
                 clean_line = re.sub(r'[^\w\s.,;:-]', '', clean_line)
                 logger.debug(f"Processing bullet: {clean_line}, section: {current_section}, subsection: {current_subsection}")
                 if current_section in ['Competency', 'Recommendations']:
+                    # For dictionary sections, append to subsection
                     if current_subsection is None:
+                        # Set default subsection if unset
                         if current_section == 'Competency':
                             current_subsection = 'Strengths'
                         elif current_section == 'Recommendations':
                     else:
                         logger.warning(f"Skipping line due to unset subsection: {clean_line}")
                 else:
+                    # For list sections, append directly
                     sections[current_section].append(clean_line)
             elif current_section and line:
                 clean_line = re.sub(r'[^\w\s.,;:-]', '', line)
                     if current_subsection:
                         sections[current_section][current_subsection].append(clean_line)
                     else:
+                        # Default subsection
                         current_subsection = 'Strengths' if current_section == 'Competency' else 'Development'
                         sections[current_section][current_subsection].append(clean_line)
                         logger.debug(f"Default subsection for non-bullet set to: {current_subsection}")
             for line in sections['Role Fit']:
                 story.append(Paragraph(line, bullet_style))
         else:
+            story.append(Paragraph("Potential for role fit exists; further evaluation needed to confirm alignment.", bullet_style)))
         story.append(Spacer(1, 0.15*inch))
         # Recommendations
         story.append(Paragraph("5. Recommendations", h2))
         story.append(Paragraph("Development Priorities", h3))
         if sections['Recommendations']['Development']:
             for line in sections['Recommendations']['Development']:
+                story.append(Paragraph(line, bullet_style)))
         else:
+            story.append(Paragraph("Enroll in communication training to reduce filler words.", bullet_style))
         story.append(Spacer(1, 0.1*inch))
         story.append(Paragraph("Next Steps for Hiring Managers", h3))
         if sections['Recommendations']['Next Steps']:
         else:
             story.append(Paragraph("Schedule a technical assessment to evaluate role-specific skills.", bullet_style))
         story.append(Spacer(1, 0.15*inch))
+        story.append(Paragraph("This report provides actionable insights to support hiring and candidate development.", body_text))
         doc.build(story, onFirstPage=header_footer, onLaterPages=header_footer)
+        logger.info(f"PDF report successfully generated at {output_path}")
         return True
     except Exception as e:
+        logger.error(f"PDF generation failed: {str(e)}\nFull Gemini report text:\n{gemini_report_text}", exc_info=True)
         return False
 def convert_to_serializable(obj):
         analysis_data['acceptance_probability'] = calculate_acceptance_probability(analysis_data)
         gemini_report_text = generate_report(analysis_data)
         base_name = str(uuid.uuid4())
+        pdf_path = os.path.join(OUTPUT_DIR, f"{base_name}_report.pdf")
         json_path = os.path.join(OUTPUT_DIR, f"{base_name}_analysis.json")
+        pdf_success = create_pdf_report(analysis_data, pdf_path, gemini_report_text)
         with open(json_path, 'w') as f:
             serializable_data = convert_to_serializable(analysis_data)
             json.dump(serializable_data, f, indent=2)
+        if not pdf_success:
+            logger.warning(f"PDF report failed to generate for {audio_url}")
             return {
+                'pdf_path': None,
                 'json_path': json_path,
+                'error': 'PDF generation failed'
             }
         logger.info(f"Processing completed for {audio_url}")
+        return {'pdf_path': pdf_path, 'json_path': json_path}
     except Exception as e:
         logger.error(f"Processing failed for {audio_url}: {str(e)}", exc_info=True)
         base_name = str(uuid.uuid4())
         with open(json_path, 'w') as f:
             json.dump({'error': str(e)}, f, indent=2)
         return {
+            'pdf_path': None,
             'json_path': json_path,
             'error': str(e)
         }