norhan12 commited on
Commit
fe4c2c5
·
verified ·
1 Parent(s): 52fdc1d

Update process_interview.py

Browse files
Files changed (1) hide show
  1. process_interview.py +37 -253
process_interview.py CHANGED
@@ -116,8 +116,6 @@ def load_speaker_model():
116
  logger.error(f"Model loading failed: {str(e)}")
117
  raise RuntimeError("Could not load speaker verification model")
118
 
119
-
120
-
121
  def load_models():
122
  speaker_model = load_speaker_model()
123
  nlp = spacy.load("en_core_web_sm")
@@ -324,6 +322,7 @@ def classify_roles(utterances: List[Dict], clf, vectorizer, scaler):
324
  except Exception as e:
325
  logger.error(f"Role classification failed: {str(e)}")
326
  raise
 
327
  def analyze_interviewee_voice(audio_path: str, utterances: List[Dict]) -> Dict:
328
  try:
329
  y, sr = librosa.load(audio_path, sr=16000)
@@ -387,8 +386,6 @@ def analyze_interviewee_voice(audio_path: str, utterances: List[Dict]) -> Dict:
387
  logger.error(f"Voice analysis failed: {str(e)}", exc_info=True)
388
  return {'error': f'Voice analysis incomplete due to audio processing issues: {str(e)}'}
389
 
390
-
391
-
392
  def generate_voice_interpretation(analysis: Dict) -> str:
393
  try:
394
  if 'error' in analysis:
@@ -405,12 +402,7 @@ def generate_voice_interpretation(analysis: Dict) -> str:
405
  "- High filler word usage undermines perceived credibility.",
406
  "- Elevated anxiety suggests pressure; training can improve resilience.",
407
  "- Strong confidence supports leadership presence.",
408
- "- Fluent speech enhances engagement in team settings.",
409
- "",
410
- "Candidate Tips:",
411
- "- Practice pacing to maintain a steady speaking rate (2.0-3.0 words/sec).",
412
- "- Reduce filler words (e.g., 'um', 'like') through mock interviews.",
413
- "- Use breathing exercises to lower anxiety and stabilize pitch."
414
  ]
415
  return "\n".join(interpretation_lines)
416
  except Exception as e:
@@ -466,18 +458,10 @@ def generate_report(analysis_data: Dict) -> str:
466
  try:
467
  voice = analysis_data.get('voice_analysis', {})
468
  voice_interpretation = generate_voice_interpretation(voice)
469
- interviewee_responses = [u['text'] for u in analysis_data['transcript'] if u['role'] == 'Interviewee'][:5]
470
  if not interviewee_responses:
471
  logger.warning("No interviewee responses found for report generation")
472
- return f"""**Suitability Score: 50.00%**
473
- HR Verdict: Insufficient data for evaluation.
474
-
475
- **User Feedback**
476
- - Insufficient content to provide feedback.
477
- - Practice answering common interview questions to improve engagement.
478
-
479
- **HR Evaluation**
480
- **1. Executive Summary**
481
  - Insufficient interviewee content to generate a summary.
482
  - Interview duration suggests limited engagement.
483
 
@@ -504,9 +488,9 @@ HR Verdict: Insufficient data for evaluation.
504
  acceptance_line += "HR Verdict: Moderate potential, needs additional assessment."
505
  else:
506
  acceptance_line += "HR Verdict: Limited fit, significant improvement required."
507
- transcript_text = "\n".join([f"- {u['speaker']}: {u['text']}" for u in analysis_data['transcript']][:10])
508
  prompt = f"""
509
- You are EvalBot, a senior HR consultant delivering a dual-purpose interview analysis report. Generate two sections: one for the candidate (**User Feedback**) with actionable self-improvement tips, and one for HR (**HR Evaluation**) with professional analysis. Use clear headings with '**', bullet points ('-'), complete sentences, and formal language for HR, friendly language for User Feedback. Avoid redundancy, vague terms, and special characters that could break formatting. Ensure each section is unique, actionable, and contains at least 2-3 bullet points.
510
 
511
  **Input Data**
512
  - Suitability Score: {acceptance_prob:.2f}%
@@ -516,17 +500,11 @@ You are EvalBot, a senior HR consultant delivering a dual-purpose interview anal
516
  - Voice Analysis:
517
  {voice_interpretation}
518
  - Transcript Sample:
519
- {transcript_text}
520
 
521
  **Report Structure**
522
  {acceptance_line}
523
 
524
- **User Feedback**
525
- - Provide friendly, actionable tips for the candidate to improve communication, confidence, and content.
526
- - Focus on practical steps (e.g., practice pacing, reduce fillers).
527
- - Keep tone motivational and concise.
528
-
529
- **HR Evaluation**
530
  **1. Executive Summary**
531
  - Provide a narrative overview of the candidate’s performance, focusing on key strengths and role fit.
532
  - Highlight communication style and engagement based on voice analysis and transcript.
@@ -551,19 +529,11 @@ You are EvalBot, a senior HR consultant delivering a dual-purpose interview anal
551
  """
552
  response = gemini_model.generate_content(prompt)
553
  report_text = re.sub(r'[^\x00-\x7F]+|[()]+', '', response.text)
554
- logger.info(f"Generated Gemini report: {report_text[:500]}...")
555
  return report_text
556
  except Exception as e:
557
  logger.error(f"Report generation failed: {str(e)}", exc_info=True)
558
- return f"""**Suitability Score: 50.00%**
559
- HR Verdict: Report generation failed.
560
-
561
- **User Feedback**
562
- - Unable to provide feedback due to processing error.
563
- - Practice answering questions clearly to improve future interviews.
564
-
565
- **HR Evaluation**
566
- **1. Executive Summary**
567
  - Report generation failed due to processing error.
568
 
569
  **2. Communication and Vocal Dynamics**
@@ -580,188 +550,7 @@ HR Verdict: Report generation failed.
580
  - Development: Investigate processing error.
581
  - Next Steps: Retry analysis with corrected audio."""
582
 
583
- def create_user_pdf_report(analysis_data: Dict, output_path: str, gemini_report_text: str) -> bool:
584
- try:
585
- doc = SimpleDocTemplate(output_path, pagesize=letter,
586
- rightMargin=0.75*inch, leftMargin=0.75*inch,
587
- topMargin=1*inch, bottomMargin=1*inch)
588
- styles = getSampleStyleSheet()
589
- h1 = ParagraphStyle(name='Heading1', fontSize=18, leading=22, spaceAfter=16, alignment=1, textColor=colors.HexColor('#003087'), fontName='Helvetica-Bold')
590
- h2 = ParagraphStyle(name='Heading2', fontSize=13, leading=15, spaceBefore=10, spaceAfter=6, textColor=colors.HexColor('#0050BC'), fontName='Helvetica-Bold')
591
- h3 = ParagraphStyle(name='Heading3', fontSize=9, leading=11, spaceBefore=6, spaceAfter=4, textColor=colors.HexColor('#3F7CFF'), fontName='Helvetica')
592
- body_text = ParagraphStyle(name='BodyText', fontSize=8, leading=10, spaceAfter=4, fontName='Helvetica', textColor=colors.HexColor('#333333'))
593
- bullet_style = ParagraphStyle(name='Bullet', parent=body_text, leftIndent=16, bulletIndent=6, fontName='Helvetica', bulletFontName='Helvetica', bulletFontSize=8)
594
-
595
- story = []
596
-
597
- def header_footer(canvas, doc):
598
- canvas.saveState()
599
- canvas.setFont('Helvetica', 7)
600
- canvas.setFillColor(colors.HexColor('#666666'))
601
- canvas.drawString(doc.leftMargin, 0.5*inch, f"Page {doc.page} | EvalBot Personal Feedback Report")
602
- canvas.setStrokeColor(colors.HexColor('#0050BC'))
603
- canvas.setLineWidth(0.5)
604
- canvas.line(doc.leftMargin, doc.height + 0.9*inch, doc.width + doc.leftMargin, doc.height + 0.9*inch)
605
- canvas.setFont('Helvetica-Bold', 8)
606
- canvas.drawString(doc.leftMargin, doc.height + 0.95*inch, "Personal Interview Feedback")
607
- canvas.drawRightString(doc.width + doc.leftMargin, doc.height + 0.95*inch, time.strftime('%B %d, %Y'))
608
- canvas.restoreState()
609
-
610
- # Title Page
611
- story.append(Paragraph("Your Interview Feedback Report", h1))
612
- story.append(Paragraph(f"Generated: {time.strftime('%B %d, %Y')}", ParagraphStyle(name='Date', alignment=1, fontSize=8, textColor=colors.HexColor('#666666'), fontName='Helvetica')))
613
- story.append(Spacer(1, 0.3*inch))
614
- story.append(Paragraph("This report provides personalized tips to help you shine in future interviews.", body_text))
615
- story.append(Spacer(1, 0.2*inch))
616
- story.append(Paragraph("Prepared by: EvalBot - AI-Powered Interview Coach", body_text))
617
- story.append(PageBreak())
618
-
619
- # Parse Gemini Report
620
- sections = {
621
- "User Feedback": [],
622
- "Executive Summary": [],
623
- "Communication": [],
624
- "Competency": {"Strengths": [], "Growth Areas": []},
625
- "Recommendations": {"Development": [], "Next Steps": []},
626
- "Role Fit": [],
627
- }
628
- current_section = None
629
- current_subsection = None
630
- lines = gemini_report_text.split('\n')
631
- for line in lines:
632
- line = line.strip()
633
- if not line:
634
- continue
635
- logger.debug(f"Parsing line: {line}")
636
- if line.startswith('**') and line.endswith('**'):
637
- section_title = line.strip('**').strip()
638
- if section_title.startswith(('1.', '2.', '3.', '4.', '5.')):
639
- section_title = section_title[2:].strip()
640
- if 'User Feedback' in section_title:
641
- current_section = 'User Feedback'
642
- current_subsection = None
643
- elif 'Executive Summary' in section_title:
644
- current_section = 'Executive Summary'
645
- current_subsection = None
646
- elif 'Communication' in section_title:
647
- current_section = 'Communication'
648
- current_subsection = None
649
- elif 'Competency' in section_title:
650
- current_section = 'Competency'
651
- current_subsection = None
652
- elif 'Role Fit' in section_title:
653
- current_section = 'Role Fit'
654
- current_subsection = None
655
- elif 'Recommendations' in section_title:
656
- current_section = 'Recommendations'
657
- current_subsection = None
658
- logger.debug(f"Set section: {current_section}")
659
- elif line.startswith('-') and current_section:
660
- clean_line = line.lstrip('-').strip()
661
- if not clean_line:
662
- continue
663
- clean_line = re.sub(r'[^\w\s.,;:-]', '', clean_line)
664
- logger.debug(f"Processing bullet: {clean_line}, section: {current_section}, subsection: {current_subsection}")
665
- if current_section in ['Competency', 'Recommendations']:
666
- if current_subsection is None:
667
- if current_section == 'Competency':
668
- current_subsection = 'Strengths'
669
- elif current_section == 'Recommendations':
670
- current_subsection = 'Development'
671
- logger.debug(f"Default subsection set to: {current_subsection}")
672
- if current_subsection:
673
- sections[current_section][current_subsection].append(clean_line)
674
- else:
675
- logger.warning(f"Skipping line due to unset subsection: {clean_line}")
676
- else:
677
- sections[current_section].append(clean_line)
678
- elif current_section and line:
679
- clean_line = re.sub(r'[^\w\s.,;:-]', '', line)
680
- logger.debug(f"Processing non-bullet: {clean_line}, section: {current_section}, subsection: {current_subsection}")
681
- if current_section in ['Competency', 'Recommendations']:
682
- if current_subsection:
683
- sections[current_section][current_subsection].append(clean_line)
684
- else:
685
- current_subsection = 'Strengths' if current_section == 'Competency' else 'Development'
686
- sections[current_section][current_subsection].append(clean_line)
687
- logger.debug(f"Default subsection for non-bullet set to: {current_subsection}")
688
- else:
689
- sections[current_section].append(clean_line)
690
-
691
- # Introduction
692
- story.append(Paragraph("How to Use This Report", h2))
693
- story.append(Paragraph("This report is designed to help you improve your interview skills. Review the feedback below and try the suggested tips to boost your confidence and clarity.", body_text))
694
- story.append(Spacer(1, 0.15*inch))
695
-
696
- # Your Communication Style
697
- story.append(Paragraph("Your Communication Style", h2))
698
- voice_analysis = analysis_data.get('voice_analysis', {})
699
- if voice_analysis and 'error' not in voice_analysis:
700
- table_data = [
701
- ['Metric', 'Value', 'What It Means'],
702
- ['Speaking Rate', f"{voice_analysis.get('speaking_rate', 0):.2f} words/sec", 'How fast you speak'],
703
- ['Filler Words', f"{voice_analysis.get('filler_ratio', 0) * 100:.1f}%", 'Words like "um" or "like"'],
704
- ['Anxiety', voice_analysis.get('interpretation', {}).get('anxiety_level', 'N/A'), 'Your stress level'],
705
- ['Confidence', voice_analysis.get('interpretation', {}).get('confidence_level', 'N/A'), 'Your vocal strength'],
706
- ['Fluency', voice_analysis.get('interpretation', {}).get('fluency_level', 'N/A'), 'How smoothly you speak'],
707
- ]
708
- table = Table(table_data, colWidths=[1.5*inch, 1.3*inch, 3.2*inch])
709
- table.setStyle(TableStyle([
710
- ('BACKGROUND', (0,0), (-1,0), colors.HexColor('#0050BC')),
711
- ('TEXTCOLOR', (0,0), (-1,0), colors.white),
712
- ('ALIGN', (0,0), (-1,-1), 'LEFT'),
713
- ('VALIGN', (0,0), (-1,-1), 'MIDDLE'),
714
- ('FONTNAME', (0,0), (-1,0), 'Helvetica-Bold'),
715
- ('FONTSIZE', (0,0), (-1,-1), 8),
716
- ('BOTTOMPADDING', (0,0), (-1,0), 6),
717
- ('TOPPADDING', (0,0), (-1,0), 6),
718
- ('BACKGROUND', (0,1), (-1,-1), colors.HexColor('#F5F6FA')),
719
- ('GRID', (0,0), (-1,-1), 0.4, colors.HexColor('#DDE4EB')),
720
- ]))
721
- story.append(table)
722
- story.append(Spacer(1, 0.15*inch))
723
- story.append(Paragraph("Tips to Improve:", h3))
724
- for line in sections['Communication'][-3:]: # Use candidate tips from voice_interpretation
725
- story.append(Paragraph(line, bullet_style))
726
- else:
727
- story.append(Paragraph(f"Voice analysis unavailable: {voice_analysis.get('error', 'Unknown error')}", body_text))
728
- story.append(Spacer(1, 0.15*inch))
729
-
730
- # Your Responses
731
- story.append(Paragraph("Your Responses", h2))
732
- if sections['Competency']['Strengths'] or sections['Competency']['Growth Areas']:
733
- story.append(Paragraph("Strengths", h3))
734
- for line in sections['Competency']['Strengths'][:3]:
735
- story.append(Paragraph(line, bullet_style))
736
- story.append(Spacer(1, 0.1*inch))
737
- story.append(Paragraph("Areas to Work On", h3))
738
- for line in sections['Competency']['Growth Areas'][:3]:
739
- story.append(Paragraph(line, bullet_style))
740
- else:
741
- story.append(Paragraph("You showed effort in responding; try to provide more specific examples.", bullet_style))
742
- story.append(Paragraph("Practice structuring answers using the STAR method (Situation, Task, Action, Result).", bullet_style))
743
- story.append(Spacer(1, 0.15*inch))
744
-
745
- # Action Plan
746
- story.append(Paragraph("Your Action Plan", h2))
747
- if sections['User Feedback']:
748
- for line in sections['User Feedback']:
749
- story.append(Paragraph(line, bullet_style))
750
- else:
751
- story.append(Paragraph("Practice mock interviews to build confidence.", bullet_style))
752
- story.append(Paragraph("Record yourself to identify and reduce filler words.", bullet_style))
753
- story.append(Paragraph("Join a public speaking group to improve fluency.", bullet_style))
754
- story.append(Spacer(1, 0.15*inch))
755
- story.append(Paragraph("Keep practicing, and you'll see improvement in your next interview!", body_text))
756
-
757
- doc.build(story, onFirstPage=header_footer, onLaterPages=header_footer)
758
- logger.info(f"User PDF report successfully generated at {output_path}")
759
- return True
760
- except Exception as e:
761
- logger.error(f"User PDF generation failed: {str(e)}\nFull Gemini report text:\n{gemini_report_text}", exc_info=True)
762
- return False
763
-
764
- def create_company_pdf_report(analysis_data: Dict, output_path: str, gemini_report_text: str) -> bool:
765
  try:
766
  doc = SimpleDocTemplate(output_path, pagesize=letter,
767
  rightMargin=0.75*inch, leftMargin=0.75*inch,
@@ -772,7 +561,7 @@ def create_company_pdf_report(analysis_data: Dict, output_path: str, gemini_repo
772
  h3 = ParagraphStyle(name='Heading3', fontSize=9, leading=11, spaceBefore=6, spaceAfter=4, textColor=colors.HexColor('#3F7CFF'), fontName='Helvetica')
773
  body_text = ParagraphStyle(name='BodyText', fontSize=8, leading=10, spaceAfter=4, fontName='Helvetica', textColor=colors.HexColor('#333333'))
774
  bullet_style = ParagraphStyle(name='Bullet', parent=body_text, leftIndent=16, bulletIndent=6, fontName='Helvetica', bulletFontName='Helvetica', bulletFontSize=8)
775
-
776
  story = []
777
 
778
  def header_footer(canvas, doc):
@@ -790,13 +579,13 @@ def create_company_pdf_report(analysis_data: Dict, output_path: str, gemini_repo
790
 
791
  # Title Page
792
  story.append(Paragraph("Candidate Interview Analysis", h1))
793
- story.append(Paragraph(f"Generated {time.strftime('%B %d, %Y')}", ParagraphStyle(name='Date', alignment=1, fontSize=8, textColor=colors.HexColor('#666666'), fontName='Helvetica')))
794
  story.append(Spacer(1, 0.3*inch))
795
  acceptance_prob = analysis_data.get('acceptance_probability', 50.0)
796
  story.append(Paragraph("Hiring Suitability Snapshot", h2))
797
- prob_color = colors.HexColor('#2E7D32') if acceptance_prob >= 80 else colors.HexColor('#F57C00') if acceptance_prob >= 60 else colors.HexColor('#D32F2F')
798
  story.append(Paragraph(f"Suitability Score: <font size=14 color='{prob_color.hexval()}'><b>{acceptance_prob:.2f}%</b></font>",
799
- ParagraphStyle(name='Prob', fontSize=10, spaceAfter=8, alignment=1, fontName='Helvetica-Bold')))
800
  if acceptance_prob >= 80:
801
  story.append(Paragraph("<b>HR Verdict:</b> Outstanding candidate, recommended for immediate advancement.", body_text))
802
  elif acceptance_prob >= 60:
@@ -806,7 +595,7 @@ def create_company_pdf_report(analysis_data: Dict, output_path: str, gemini_repo
806
  else:
807
  story.append(Paragraph("<b>HR Verdict:</b> Limited fit, significant improvement required.", body_text))
808
  story.append(Spacer(1, 0.2*inch))
809
- participants = sorted(set(u['speaker'] for u in analysis_data['transcript'] if u['speaker'] != 'Unknown'))
810
  participants_str = ', '.join(participants)
811
  table_data = [
812
  ['Metric', 'Value'],
@@ -831,12 +620,12 @@ def create_company_pdf_report(analysis_data: Dict, output_path: str, gemini_repo
831
  ]))
832
  story.append(table)
833
  story.append(Spacer(1, 0.3*inch))
834
- story.append(Paragraph("Prepared by EvalBot - AI-Powered HR Analysis", body_text))
835
  story.append(PageBreak())
836
 
837
  # Detailed Analysis
838
  story.append(Paragraph("Detailed Candidate Evaluation", h1))
839
-
840
  # Communication and Vocal Dynamics
841
  story.append(Paragraph("1. Communication & Vocal Dynamics", h2))
842
  voice_analysis = analysis_data.get('voice_analysis', {})
@@ -876,7 +665,6 @@ def create_company_pdf_report(analysis_data: Dict, output_path: str, gemini_repo
876
 
877
  # Parse Gemini Report
878
  sections = {
879
- "User Feedback": [],
880
  "Executive Summary": [],
881
  "Communication": [],
882
  "Competency": {"Strengths": [], "Growth Areas": []},
@@ -890,7 +678,7 @@ def create_company_pdf_report(analysis_data: Dict, output_path: str, gemini_repo
890
  line = line.strip()
891
  if not line:
892
  continue
893
- logger.debug(f"Parsing line: {line}")
894
  if line.startswith('**') and line.endswith('**'):
895
  section_title = line.strip('**').strip()
896
  if section_title.startswith(('1.', '2.', '3.', '4.', '5.')):
@@ -918,7 +706,9 @@ def create_company_pdf_report(analysis_data: Dict, output_path: str, gemini_repo
918
  clean_line = re.sub(r'[^\w\s.,;:-]', '', clean_line)
919
  logger.debug(f"Processing bullet: {clean_line}, section: {current_section}, subsection: {current_subsection}")
920
  if current_section in ['Competency', 'Recommendations']:
 
921
  if current_subsection is None:
 
922
  if current_section == 'Competency':
923
  current_subsection = 'Strengths'
924
  elif current_section == 'Recommendations':
@@ -929,6 +719,7 @@ def create_company_pdf_report(analysis_data: Dict, output_path: str, gemini_repo
929
  else:
930
  logger.warning(f"Skipping line due to unset subsection: {clean_line}")
931
  else:
 
932
  sections[current_section].append(clean_line)
933
  elif current_section and line:
934
  clean_line = re.sub(r'[^\w\s.,;:-]', '', line)
@@ -937,6 +728,7 @@ def create_company_pdf_report(analysis_data: Dict, output_path: str, gemini_repo
937
  if current_subsection:
938
  sections[current_section][current_subsection].append(clean_line)
939
  else:
 
940
  current_subsection = 'Strengths' if current_section == 'Competency' else 'Development'
941
  sections[current_section][current_subsection].append(clean_line)
942
  logger.debug(f"Default subsection for non-bullet set to: {current_subsection}")
@@ -976,17 +768,17 @@ def create_company_pdf_report(analysis_data: Dict, output_path: str, gemini_repo
976
  for line in sections['Role Fit']:
977
  story.append(Paragraph(line, bullet_style))
978
  else:
979
- story.append(Paragraph("Potential for role fit exists; further evaluation needed to confirm alignment.", bullet_style))
980
  story.append(Spacer(1, 0.15*inch))
981
-
982
  # Recommendations
983
  story.append(Paragraph("5. Recommendations", h2))
984
  story.append(Paragraph("Development Priorities", h3))
985
  if sections['Recommendations']['Development']:
986
  for line in sections['Recommendations']['Development']:
987
- story.append(Paragraph(line, bullet_style))
988
  else:
989
- story.append(Paragraph("Enrollment in communication training to reduce filler words.", bullet_style))
990
  story.append(Spacer(1, 0.1*inch))
991
  story.append(Paragraph("Next Steps for Hiring Managers", h3))
992
  if sections['Recommendations']['Next Steps']:
@@ -995,13 +787,13 @@ def create_company_pdf_report(analysis_data: Dict, output_path: str, gemini_repo
995
  else:
996
  story.append(Paragraph("Schedule a technical assessment to evaluate role-specific skills.", bullet_style))
997
  story.append(Spacer(1, 0.15*inch))
998
- story.append(Paragraph("This report provides actionable insights to support hiring decisions.", body_text))
999
 
1000
  doc.build(story, onFirstPage=header_footer, onLaterPages=header_footer)
1001
- logger.info(f"Company PDF report successfully generated at {output_path}")
1002
  return True
1003
  except Exception as e:
1004
- logger.error(f"Company PDF generation failed: {str(e)}\nFull Gemini report text:\n{gemini_report_text}", exc_info=True)
1005
  return False
1006
 
1007
  def convert_to_serializable(obj):
@@ -1059,28 +851,21 @@ def process_interview(audio_url: str) -> Dict:
1059
  analysis_data['acceptance_probability'] = calculate_acceptance_probability(analysis_data)
1060
  gemini_report_text = generate_report(analysis_data)
1061
  base_name = str(uuid.uuid4())
1062
- user_pdf_path = os.path.join(OUTPUT_DIR, f"{base_name}_user_report.pdf")
1063
- company_pdf_path = os.path.join(OUTPUT_DIR, f"{base_name}_company_report.pdf")
1064
  json_path = os.path.join(OUTPUT_DIR, f"{base_name}_analysis.json")
1065
- user_pdf_success = create_user_pdf_report(analysis_data, user_pdf_path, gemini_report_text)
1066
- company_pdf_success = create_company_pdf_report(analysis_data, company_pdf_path, gemini_report_text)
1067
  with open(json_path, 'w') as f:
1068
  serializable_data = convert_to_serializable(analysis_data)
1069
  json.dump(serializable_data, f, indent=2)
1070
- if not (user_pdf_success and company_pdf_success):
1071
- logger.warning(f"One or both PDF reports failed to generate for {audio_url}")
1072
  return {
1073
- 'user_pdf_path': user_pdf_path if user_pdf_success else None,
1074
- 'company_pdf_path': company_pdf_path if company_pdf_success else None,
1075
  'json_path': json_path,
1076
- 'error': 'One or both PDF generations failed'
1077
  }
1078
  logger.info(f"Processing completed for {audio_url}")
1079
- return {
1080
- 'user_pdf_path': user_pdf_path,
1081
- 'company_pdf_path': company_pdf_path,
1082
- 'json_path': json_path
1083
- }
1084
  except Exception as e:
1085
  logger.error(f"Processing failed for {audio_url}: {str(e)}", exc_info=True)
1086
  base_name = str(uuid.uuid4())
@@ -1088,8 +873,7 @@ def process_interview(audio_url: str) -> Dict:
1088
  with open(json_path, 'w') as f:
1089
  json.dump({'error': str(e)}, f, indent=2)
1090
  return {
1091
- 'user_pdf_path': None,
1092
- 'company_pdf_path': None,
1093
  'json_path': json_path,
1094
  'error': str(e)
1095
  }
 
116
  logger.error(f"Model loading failed: {str(e)}")
117
  raise RuntimeError("Could not load speaker verification model")
118
 
 
 
119
  def load_models():
120
  speaker_model = load_speaker_model()
121
  nlp = spacy.load("en_core_web_sm")
 
322
  except Exception as e:
323
  logger.error(f"Role classification failed: {str(e)}")
324
  raise
325
+
326
  def analyze_interviewee_voice(audio_path: str, utterances: List[Dict]) -> Dict:
327
  try:
328
  y, sr = librosa.load(audio_path, sr=16000)
 
386
  logger.error(f"Voice analysis failed: {str(e)}", exc_info=True)
387
  return {'error': f'Voice analysis incomplete due to audio processing issues: {str(e)}'}
388
 
 
 
389
  def generate_voice_interpretation(analysis: Dict) -> str:
390
  try:
391
  if 'error' in analysis:
 
402
  "- High filler word usage undermines perceived credibility.",
403
  "- Elevated anxiety suggests pressure; training can improve resilience.",
404
  "- Strong confidence supports leadership presence.",
405
+ "- Fluent speech enhances engagement in team settings."
 
 
 
 
 
406
  ]
407
  return "\n".join(interpretation_lines)
408
  except Exception as e:
 
458
  try:
459
  voice = analysis_data.get('voice_analysis', {})
460
  voice_interpretation = generate_voice_interpretation(voice)
461
+ interviewee_responses = [u['text'] for u in analysis_data['transcript'] if u['role'] == 'Interviewee']
462
  if not interviewee_responses:
463
  logger.warning("No interviewee responses found for report generation")
464
+ return f"""**1. Executive Summary**
 
 
 
 
 
 
 
 
465
  - Insufficient interviewee content to generate a summary.
466
  - Interview duration suggests limited engagement.
467
 
 
488
  acceptance_line += "HR Verdict: Moderate potential, needs additional assessment."
489
  else:
490
  acceptance_line += "HR Verdict: Limited fit, significant improvement required."
491
+ transcript_text = "\n".join([f"- {u['speaker']}: {u['text']}" for u in analysis_data['transcript']])
492
  prompt = f"""
493
+ You are EvalBot, a senior HR consultant delivering a professional interview analysis report. Use clear headings with '**', bullet points ('-'), complete sentences, and formal language. Avoid redundancy, vague terms, and special characters that could break formatting (e.g., parentheses). Ensure each section is unique, actionable, and contains at least 2-3 bullet points. If content is limited, provide reasonable inferences based on available data.
494
 
495
  **Input Data**
496
  - Suitability Score: {acceptance_prob:.2f}%
 
500
  - Voice Analysis:
501
  {voice_interpretation}
502
  - Transcript Sample:
503
+ {transcript_text[:1000]}...
504
 
505
  **Report Structure**
506
  {acceptance_line}
507
 
 
 
 
 
 
 
508
  **1. Executive Summary**
509
  - Provide a narrative overview of the candidate’s performance, focusing on key strengths and role fit.
510
  - Highlight communication style and engagement based on voice analysis and transcript.
 
529
  """
530
  response = gemini_model.generate_content(prompt)
531
  report_text = re.sub(r'[^\x00-\x7F]+|[()]+', '', response.text)
532
+ logger.info(f"Generated Gemini report: {report_text[:500]}...") # Log for debugging
533
  return report_text
534
  except Exception as e:
535
  logger.error(f"Report generation failed: {str(e)}", exc_info=True)
536
+ return f"""**1. Executive Summary**
 
 
 
 
 
 
 
 
537
  - Report generation failed due to processing error.
538
 
539
  **2. Communication and Vocal Dynamics**
 
550
  - Development: Investigate processing error.
551
  - Next Steps: Retry analysis with corrected audio."""
552
 
553
+ def create_pdf_report(analysis_data: Dict, output_path: str, gemini_report_text: str) -> bool:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
554
  try:
555
  doc = SimpleDocTemplate(output_path, pagesize=letter,
556
  rightMargin=0.75*inch, leftMargin=0.75*inch,
 
561
  h3 = ParagraphStyle(name='Heading3', fontSize=9, leading=11, spaceBefore=6, spaceAfter=4, textColor=colors.HexColor('#3F7CFF'), fontName='Helvetica')
562
  body_text = ParagraphStyle(name='BodyText', fontSize=8, leading=10, spaceAfter=4, fontName='Helvetica', textColor=colors.HexColor('#333333'))
563
  bullet_style = ParagraphStyle(name='Bullet', parent=body_text, leftIndent=16, bulletIndent=6, fontName='Helvetica', bulletFontName='Helvetica', bulletFontSize=8)
564
+
565
  story = []
566
 
567
  def header_footer(canvas, doc):
 
579
 
580
  # Title Page
581
  story.append(Paragraph("Candidate Interview Analysis", h1))
582
+ story.append(Paragraph(f"Generated: {time.strftime('%B %d, %Y')}", ParagraphStyle(name='Date', alignment=1, fontSize=8, textColor=colors.HexColor('#666666'), fontName='Helvetica')))
583
  story.append(Spacer(1, 0.3*inch))
584
  acceptance_prob = analysis_data.get('acceptance_probability', 50.0)
585
  story.append(Paragraph("Hiring Suitability Snapshot", h2))
586
+ prob_color = colors.HexColor('#2E7D32') if acceptance_prob >= 80 else (colors.HexColor('#F57C00') if acceptance_prob >= 60 else colors.HexColor('#D32F2F'))
587
  story.append(Paragraph(f"Suitability Score: <font size=14 color='{prob_color.hexval()}'><b>{acceptance_prob:.2f}%</b></font>",
588
+ ParagraphStyle(name='Prob', fontSize=10, spaceAfter=8, alignment=1, fontName='Helvetica-Bold')))
589
  if acceptance_prob >= 80:
590
  story.append(Paragraph("<b>HR Verdict:</b> Outstanding candidate, recommended for immediate advancement.", body_text))
591
  elif acceptance_prob >= 60:
 
595
  else:
596
  story.append(Paragraph("<b>HR Verdict:</b> Limited fit, significant improvement required.", body_text))
597
  story.append(Spacer(1, 0.2*inch))
598
+ participants = sorted([p for p in set(u['speaker'] for u in analysis_data['transcript']) if p != 'Unknown'])
599
  participants_str = ', '.join(participants)
600
  table_data = [
601
  ['Metric', 'Value'],
 
620
  ]))
621
  story.append(table)
622
  story.append(Spacer(1, 0.3*inch))
623
+ story.append(Paragraph("Prepared by: EvalBot - AI-Powered HR Analysis", body_text))
624
  story.append(PageBreak())
625
 
626
  # Detailed Analysis
627
  story.append(Paragraph("Detailed Candidate Evaluation", h1))
628
+
629
  # Communication and Vocal Dynamics
630
  story.append(Paragraph("1. Communication & Vocal Dynamics", h2))
631
  voice_analysis = analysis_data.get('voice_analysis', {})
 
665
 
666
  # Parse Gemini Report
667
  sections = {
 
668
  "Executive Summary": [],
669
  "Communication": [],
670
  "Competency": {"Strengths": [], "Growth Areas": []},
 
678
  line = line.strip()
679
  if not line:
680
  continue
681
+ logger.debug(f"Parsing line: {line}") # Debug parsing
682
  if line.startswith('**') and line.endswith('**'):
683
  section_title = line.strip('**').strip()
684
  if section_title.startswith(('1.', '2.', '3.', '4.', '5.')):
 
706
  clean_line = re.sub(r'[^\w\s.,;:-]', '', clean_line)
707
  logger.debug(f"Processing bullet: {clean_line}, section: {current_section}, subsection: {current_subsection}")
708
  if current_section in ['Competency', 'Recommendations']:
709
+ # For dictionary sections, append to subsection
710
  if current_subsection is None:
711
+ # Set default subsection if unset
712
  if current_section == 'Competency':
713
  current_subsection = 'Strengths'
714
  elif current_section == 'Recommendations':
 
719
  else:
720
  logger.warning(f"Skipping line due to unset subsection: {clean_line}")
721
  else:
722
+ # For list sections, append directly
723
  sections[current_section].append(clean_line)
724
  elif current_section and line:
725
  clean_line = re.sub(r'[^\w\s.,;:-]', '', line)
 
728
  if current_subsection:
729
  sections[current_section][current_subsection].append(clean_line)
730
  else:
731
+ # Default subsection
732
  current_subsection = 'Strengths' if current_section == 'Competency' else 'Development'
733
  sections[current_section][current_subsection].append(clean_line)
734
  logger.debug(f"Default subsection for non-bullet set to: {current_subsection}")
 
768
  for line in sections['Role Fit']:
769
  story.append(Paragraph(line, bullet_style))
770
  else:
771
+ story.append(Paragraph("Potential for role fit exists; further evaluation needed to confirm alignment.", bullet_style)))
772
  story.append(Spacer(1, 0.15*inch))
773
+
774
  # Recommendations
775
  story.append(Paragraph("5. Recommendations", h2))
776
  story.append(Paragraph("Development Priorities", h3))
777
  if sections['Recommendations']['Development']:
778
  for line in sections['Recommendations']['Development']:
779
+ story.append(Paragraph(line, bullet_style)))
780
  else:
781
+ story.append(Paragraph("Enroll in communication training to reduce filler words.", bullet_style))
782
  story.append(Spacer(1, 0.1*inch))
783
  story.append(Paragraph("Next Steps for Hiring Managers", h3))
784
  if sections['Recommendations']['Next Steps']:
 
787
  else:
788
  story.append(Paragraph("Schedule a technical assessment to evaluate role-specific skills.", bullet_style))
789
  story.append(Spacer(1, 0.15*inch))
790
+ story.append(Paragraph("This report provides actionable insights to support hiring and candidate development.", body_text))
791
 
792
  doc.build(story, onFirstPage=header_footer, onLaterPages=header_footer)
793
+ logger.info(f"PDF report successfully generated at {output_path}")
794
  return True
795
  except Exception as e:
796
+ logger.error(f"PDF generation failed: {str(e)}\nFull Gemini report text:\n{gemini_report_text}", exc_info=True)
797
  return False
798
 
799
  def convert_to_serializable(obj):
 
851
  analysis_data['acceptance_probability'] = calculate_acceptance_probability(analysis_data)
852
  gemini_report_text = generate_report(analysis_data)
853
  base_name = str(uuid.uuid4())
854
+ pdf_path = os.path.join(OUTPUT_DIR, f"{base_name}_report.pdf")
 
855
  json_path = os.path.join(OUTPUT_DIR, f"{base_name}_analysis.json")
856
+ pdf_success = create_pdf_report(analysis_data, pdf_path, gemini_report_text)
 
857
  with open(json_path, 'w') as f:
858
  serializable_data = convert_to_serializable(analysis_data)
859
  json.dump(serializable_data, f, indent=2)
860
+ if not pdf_success:
861
+ logger.warning(f"PDF report failed to generate for {audio_url}")
862
  return {
863
+ 'pdf_path': None,
 
864
  'json_path': json_path,
865
+ 'error': 'PDF generation failed'
866
  }
867
  logger.info(f"Processing completed for {audio_url}")
868
+ return {'pdf_path': pdf_path, 'json_path': json_path}
 
 
 
 
869
  except Exception as e:
870
  logger.error(f"Processing failed for {audio_url}: {str(e)}", exc_info=True)
871
  base_name = str(uuid.uuid4())
 
873
  with open(json_path, 'w') as f:
874
  json.dump({'error': str(e)}, f, indent=2)
875
  return {
876
+ 'pdf_path': None,
 
877
  'json_path': json_path,
878
  'error': str(e)
879
  }