norhan12 commited on
Commit
885934a
·
verified ·
1 Parent(s): 17f00ad

Update process_interview.py

Browse files
Files changed (1) hide show
  1. process_interview.py +136 -181
process_interview.py CHANGED
@@ -18,9 +18,8 @@ import re
18
  from typing import Dict, List, Tuple
19
  import logging
20
  import tempfile
21
- # --- Imports for enhanced PDF ---
22
  from reportlab.lib.pagesizes import letter
23
- from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak
24
  from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
25
  from reportlab.lib.units import inch
26
  from reportlab.lib import colors
@@ -29,21 +28,12 @@ import matplotlib
29
  matplotlib.use('Agg')
30
  from reportlab.platypus import Image
31
  import io
32
- # --- End Imports for enhanced PDF ---
33
  from transformers import AutoTokenizer, AutoModel
34
  import spacy
35
  import google.generativeai as genai
36
  import joblib
37
  from concurrent.futures import ThreadPoolExecutor
38
- # --- Imports to ensure are present at the top of process_interview.py ---
39
- from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak, Image
40
- from reportlab.lib.pagesizes import letter
41
- from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
42
- from reportlab.lib.units import inch
43
- from reportlab.lib import colors
44
- import time
45
- import re
46
- import io
47
  # Setup logging
48
  logging.basicConfig(level=logging.INFO)
49
  logger = logging.getLogger(__name__)
@@ -60,15 +50,11 @@ PINECONE_KEY = os.getenv("PINECONE_KEY")
60
  ASSEMBLYAI_KEY = os.getenv("ASSEMBLYAI_KEY")
61
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
62
 
63
-
64
- # --- NEW HELPER FUNCTION to download from URL ---
65
  def download_audio_from_url(url: str) -> str:
66
  """Downloads an audio file from a URL to a temporary local path."""
67
  try:
68
- # Create a temporary file to store the downloaded audio
69
  temp_dir = tempfile.gettempdir()
70
  temp_path = os.path.join(temp_dir, f"{uuid.uuid4()}.tmp_audio")
71
-
72
  logger.info(f"Downloading audio from {url} to {temp_path}")
73
  with requests.get(url, stream=True) as r:
74
  r.raise_for_status()
@@ -79,10 +65,7 @@ def download_audio_from_url(url: str) -> str:
79
  except Exception as e:
80
  logger.error(f"Failed to download audio from URL {url}: {e}")
81
  raise
82
- # --- END NEW HELPER FUNCTION ---
83
-
84
 
85
- # Initialize services
86
  def initialize_services():
87
  try:
88
  pc = Pinecone(api_key=PINECONE_KEY)
@@ -104,11 +87,9 @@ def initialize_services():
104
 
105
  index, gemini_model = initialize_services()
106
 
107
- # Device setup
108
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
109
  logger.info(f"Using device: {device}")
110
 
111
-
112
  def load_speaker_model():
113
  try:
114
  import torch
@@ -123,8 +104,6 @@ def load_speaker_model():
123
  logger.error(f"Model loading failed: {str(e)}")
124
  raise RuntimeError("Could not load speaker verification model")
125
 
126
-
127
- # Load ML models
128
  def load_models():
129
  speaker_model = load_speaker_model()
130
  nlp = spacy.load("en_core_web_sm")
@@ -133,11 +112,8 @@ def load_models():
133
  llm_model.eval()
134
  return speaker_model, nlp, tokenizer, llm_model
135
 
136
-
137
  speaker_model, nlp, tokenizer, llm_model = load_models()
138
 
139
-
140
- # Audio processing functions
141
  def convert_to_wav(audio_path: str, output_dir: str = OUTPUT_DIR) -> str:
142
  try:
143
  audio = AudioSegment.from_file(audio_path)
@@ -151,7 +127,6 @@ def convert_to_wav(audio_path: str, output_dir: str = OUTPUT_DIR) -> str:
151
  logger.error(f"Audio conversion failed: {str(e)}")
152
  raise
153
 
154
-
155
  def extract_prosodic_features(audio_path: str, start_ms: int, end_ms: int) -> Dict:
156
  try:
157
  audio = AudioSegment.from_file(audio_path)
@@ -182,7 +157,6 @@ def extract_prosodic_features(audio_path: str, start_ms: int, end_ms: int) -> Di
182
  'intensityMax': 0.0, 'intensitySD': 0.0
183
  }
184
 
185
-
186
  def transcribe(audio_path: str) -> Dict:
187
  try:
188
  with open(audio_path, 'rb') as f:
@@ -216,7 +190,6 @@ def transcribe(audio_path: str) -> Dict:
216
  logger.error(f"Transcription failed: {str(e)}")
217
  raise
218
 
219
-
220
  def process_utterance(utterance, full_audio, wav_file):
221
  try:
222
  start = utterance['start']
@@ -255,7 +228,6 @@ def process_utterance(utterance, full_audio, wav_file):
255
  'embedding': None
256
  }
257
 
258
-
259
  def identify_speakers(transcript: Dict, wav_file: str) -> List[Dict]:
260
  try:
261
  full_audio = AudioSegment.from_wav(wav_file)
@@ -271,7 +243,6 @@ def identify_speakers(transcript: Dict, wav_file: str) -> List[Dict]:
271
  logger.error(f"Speaker identification failed: {str(e)}")
272
  raise
273
 
274
-
275
  def train_role_classifier(utterances: List[Dict]):
276
  try:
277
  texts = [u['text'] for u in utterances]
@@ -311,7 +282,6 @@ def train_role_classifier(utterances: List[Dict]):
311
  logger.error(f"Classifier training failed: {str(e)}")
312
  raise
313
 
314
-
315
  def classify_roles(utterances: List[Dict], clf, vectorizer, scaler):
316
  try:
317
  texts = [u['text'] for u in utterances]
@@ -341,7 +311,6 @@ def classify_roles(utterances: List[Dict], clf, vectorizer, scaler):
341
  logger.error(f"Role classification failed: {str(e)}")
342
  raise
343
 
344
-
345
  def analyze_interviewee_voice(audio_path: str, utterances: List[Dict]) -> Dict:
346
  try:
347
  y, sr = librosa.load(audio_path, sr=16000)
@@ -382,9 +351,9 @@ def analyze_interviewee_voice(audio_path: str, utterances: List[Dict]) -> Dict:
382
  anxiety_score = 0.6 * (pitch_std / pitch_mean) + 0.4 * (jitter + shimmer) if pitch_mean > 0 else 0
383
  confidence_score = 0.7 * (1 / (1 + intensity_std)) + 0.3 * (1 / (1 + filler_ratio))
384
  hesitation_score = filler_ratio + repetition_score
385
- anxiety_level = 'high' if anxiety_score > 0.15 else 'moderate' if anxiety_score > 0.07 else 'low'
386
- confidence_level = 'high' if confidence_score > 0.7 else 'moderate' if confidence_score > 0.5 else 'low'
387
- fluency_level = 'fluent' if (filler_ratio < 0.05 and repetition_score < 0.1) else 'moderate' if (filler_ratio < 0.1 and repetition_score < 0.2) else 'disfluent'
388
  return {
389
  'speaking_rate': float(round(speaking_rate, 2)),
390
  'filler_ratio': float(round(filler_ratio, 4)),
@@ -398,58 +367,57 @@ def analyze_interviewee_voice(audio_path: str, utterances: List[Dict]) -> Dict:
398
  logger.error(f"Voice analysis failed: {str(e)}")
399
  return {'error': str(e)}
400
 
401
-
402
  def generate_voice_interpretation(analysis: Dict) -> str:
403
  if 'error' in analysis:
404
- return "Voice analysis not available."
405
  interpretation_lines = [
406
- "Voice Analysis Summary:",
407
- f"- Speaking Rate: {analysis['speaking_rate']} words/sec (average)",
408
- f"- Filler Words: {analysis['filler_ratio'] * 100:.1f}% of words",
409
- f"- Repetition Score: {analysis['repetition_score']:.3f}",
410
- f"- Anxiety Level: {analysis['interpretation']['anxiety_level'].upper()} (score: {analysis['composite_scores']['anxiety']:.3f})",
411
- f"- Confidence Level: {analysis['interpretation']['confidence_level'].upper()} (score: {analysis['composite_scores']['confidence']:.3f})",
412
- f"- Fluency: {analysis['interpretation']['fluency_level'].upper()}",
413
  "",
414
- "Detailed Interpretation:",
415
- "1. A higher speaking rate indicates faster speech, which can suggest nervousness or enthusiasm.",
416
- "2. Filler words and repetitions reduce speech clarity and professionalism.",
417
- "3. Anxiety is measured through pitch variability and voice instability.",
418
- "4. Confidence is assessed through voice intensity and stability.",
419
- "5. Fluency combines filler words and repetition metrics."
420
  ]
421
  return "\n".join(interpretation_lines)
422
 
423
-
424
  def generate_anxiety_confidence_chart(composite_scores: Dict, chart_path_or_buffer):
425
  try:
426
  labels = ['Anxiety', 'Confidence']
427
  scores = [composite_scores.get('anxiety', 0), composite_scores.get('confidence', 0)]
428
  fig, ax = plt.subplots(figsize=(4, 2.5))
429
- ax.bar(labels, scores, color=['lightcoral', 'lightskyblue'])
430
- ax.set_ylabel('Score')
431
- ax.set_title('Anxiety vs. Confidence Scores')
432
- ax.set_ylim(0, 1.0)
433
- for i, v in enumerate(scores):
434
- ax.text(i, v + 0.05, f"{v:.2f}", color='black', ha='center', fontweight='bold')
 
 
435
  plt.tight_layout()
436
- plt.savefig(chart_path_or_buffer, format='png', bbox_inches='tight')
437
  plt.close(fig)
438
  except Exception as e:
439
  logger.error(f"Error generating chart: {str(e)}")
440
 
441
-
442
  def calculate_acceptance_probability(analysis_data: Dict) -> float:
443
  voice = analysis_data.get('voice_analysis', {})
444
  if 'error' in voice: return 0.0
445
  w_confidence, w_anxiety, w_fluency, w_speaking_rate, w_filler_repetition, w_content_strengths = 0.4, -0.3, 0.2, 0.1, -0.1, 0.2
446
  confidence_score = voice.get('composite_scores', {}).get('confidence', 0.0)
447
  anxiety_score = voice.get('composite_scores', {}).get('anxiety', 0.0)
448
- fluency_level = voice.get('interpretation', {}).get('fluency_level', 'disfluent')
449
  speaking_rate = voice.get('speaking_rate', 0.0)
450
  filler_ratio = voice.get('filler_ratio', 0.0)
451
  repetition_score = voice.get('repetition_score', 0.0)
452
- fluency_map = {'fluent': 1.0, 'moderate': 0.5, 'disfluent': 0.0}
453
  fluency_val = fluency_map.get(fluency_level, 0.0)
454
  ideal_speaking_rate = 2.5
455
  speaking_rate_deviation = abs(speaking_rate - ideal_speaking_rate)
@@ -464,7 +432,6 @@ def calculate_acceptance_probability(analysis_data: Dict) -> float:
464
  acceptance_probability = max(0.0, min(1.0, normalized_score))
465
  return float(f"{acceptance_probability * 100:.2f}")
466
 
467
-
468
  def generate_report(analysis_data: Dict) -> str:
469
  try:
470
  voice = analysis_data.get('voice_analysis', {})
@@ -473,28 +440,34 @@ def generate_report(analysis_data: Dict) -> str:
473
  acceptance_prob = analysis_data.get('acceptance_probability', None)
474
  acceptance_line = ""
475
  if acceptance_prob is not None:
476
- acceptance_line = f"\n**Estimated Acceptance Probability: {acceptance_prob:.2f}%**\n"
477
- if acceptance_prob >= 80: acceptance_line += "This indicates a very strong candidate. Well done!"
478
- elif acceptance_prob >= 50: acceptance_line += "This indicates a solid candidate with potential for improvement."
479
- else: acceptance_line += "This candidate may require significant development or may not be a strong fit."
480
  prompt = f"""
481
- As EvalBot, an AI interview analysis system, generate a highly professional, well-structured, and concise interview analysis report. Use clear headings and subheadings. For bullet points, use '- '.
482
  {acceptance_line}
483
  **1. Executive Summary**
484
- Provide a brief, high-level overview of the interview.
485
- - Overall interview duration: {analysis_data['text_analysis']['total_duration']:.2f} seconds
486
- - Number of speaker turns: {analysis_data['text_analysis']['speaker_turns']}
487
- - Main participants: {', '.join(analysis_data['speakers'])}
488
- **2. Voice Analysis Insights**
489
- Analyze key voice metrics and provide a detailed interpretation.
 
490
  {voice_interpretation}
491
- **3. Content Analysis & Strengths/Areas for Development**
492
- Analyze the key themes and identify both strengths and areas for development in the interviewee's responses.
493
- Key responses from interviewee (for context):
 
494
  {chr(10).join(interviewee_responses)}
495
- **4. Actionable Recommendations**
496
- Offer specific, actionable suggestions for improvement.
497
- Focus on: Communication Skills, Content Delivery, Professional Presentation.
 
 
 
 
498
  """
499
  response = gemini_model.generate_content(prompt)
500
  return response.text
@@ -502,158 +475,157 @@ def generate_report(analysis_data: Dict) -> str:
502
  logger.error(f"Report generation failed: {str(e)}")
503
  return f"Error generating report: {str(e)}"
504
 
505
-
506
-
507
- # --- NEW, ENHANCED PDF GENERATION FUNCTION ---
508
- # --- Make sure these imports are at the top of your process_interview.py file ---
509
- from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak, Image
510
- from reportlab.lib.pagesizes import letter
511
- from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
512
- from reportlab.lib.units import inch
513
- from reportlab.lib import colors
514
- import time
515
- import re
516
- import io
517
-
518
- # --- New, Enhanced PDF Generation Function ---
519
  def create_pdf_report(analysis_data: Dict, output_path: str, gemini_report_text: str):
520
  try:
521
  doc = SimpleDocTemplate(output_path, pagesize=letter,
522
  rightMargin=0.75*inch, leftMargin=0.75*inch,
523
  topMargin=1*inch, bottomMargin=1*inch)
524
-
525
  styles = getSampleStyleSheet()
526
- h1 = ParagraphStyle(name='Heading1', fontSize=20, leading=24, spaceAfter=18, alignment=1, textColor=colors.HexColor('#00205B'))
527
- h2 = ParagraphStyle(name='Heading2', fontSize=14, leading=18, spaceBefore=12, spaceAfter=6, textColor=colors.HexColor('#003366'))
528
- body_text = ParagraphStyle(name='BodyText', parent=styles['Normal'], fontSize=10, leading=14, spaceAfter=6)
529
- bullet_style = ParagraphStyle(name='Bullet', parent=body_text, leftIndent=18, bulletIndent=9)
530
 
531
  story = []
532
 
533
- # --- Header and Footer Logic ---
534
  def header_footer(canvas, doc):
535
  canvas.saveState()
536
- # Footer
537
  canvas.setFont('Helvetica', 9)
538
  canvas.setFillColor(colors.grey)
539
- canvas.drawString(doc.leftMargin, 0.5 * inch, f"Page {doc.page} | EvalBot Confidential Report")
540
- # Header line
541
- canvas.setStrokeColor(colors.HexColor('#003366'))
542
  canvas.setLineWidth(1)
543
- canvas.line(doc.leftMargin, doc.height + 0.75*inch, doc.width + doc.leftMargin, doc.height + 0.75*inch)
 
 
544
  canvas.restoreState()
545
 
546
- # --- First Page: Title and Summary ---
547
- story.append(Paragraph("Interview Performance Analysis", h1))
548
- story.append(Paragraph(f"Analysis Date: {time.strftime('%Y-%m-%d')}", ParagraphStyle(name='Date', alignment=1, fontSize=9, textColor=colors.grey)))
549
- story.append(Spacer(1, 0.4 * inch))
550
-
551
  acceptance_prob = analysis_data.get('acceptance_probability')
552
  if acceptance_prob is not None:
553
- story.append(Paragraph("Candidate Evaluation Summary", h2))
554
- prob_color = colors.green if acceptance_prob >= 70 else (colors.darkorange if acceptance_prob >= 40 else colors.red)
555
- story.append(Paragraph(f"Estimated Acceptance Probability: <font size=14 color='{prob_color.hexval()}'><b>{acceptance_prob:.2f}%</b></font>",
556
- ParagraphStyle(name='Prob', fontSize=12, spaceAfter=10)))
557
  if acceptance_prob >= 80:
558
- story.append(Paragraph("<b>Overall Assessment:</b> This indicates a very strong candidate with high potential. Recommended for the next round.", body_text))
559
  elif acceptance_prob >= 50:
560
- story.append(Paragraph("<b>Overall Assessment:</b> This candidate shows solid potential but has key areas for improvement.", body_text))
561
  else:
562
- story.append(Paragraph("<b>Overall Assessment:</b> This candidate may require significant development or may not be the ideal fit at this time.", body_text))
563
-
 
564
  story.append(PageBreak())
565
 
566
- # --- Second Page: Detailed Analysis ---
567
- story.append(Paragraph("Detailed Analysis", h1))
568
 
569
- story.append(Paragraph("1. Voice & Speech Metrics", h2))
570
  voice_analysis = analysis_data.get('voice_analysis', {})
571
  if voice_analysis and 'error' not in voice_analysis:
572
- # --- This is the corrected table ---
573
  table_data = [
574
- ['Metric', 'Value', 'Interpretation'],
575
- ['Speaking Rate', f"{voice_analysis.get('speaking_rate', 0):.2f} words/sec", 'Indicator of pace and confidence.'],
576
- ['Filler Words Ratio', f"{voice_analysis.get('filler_ratio', 0) * 100:.1f}%", 'Measures use of "um", "uh", etc.'],
577
- ['Anxiety Level', voice_analysis.get('interpretation', {}).get('anxiety_level', 'N/A').upper(), f"Score: {voice_analysis.get('composite_scores', {}).get('anxiety', 0):.3f}"],
578
- ['Confidence Level', voice_analysis.get('interpretation', {}).get('confidence_level', 'N/A').upper(), f"Score: {voice_analysis.get('composite_scores', {}).get('confidence', 0):.3f}"],
579
- ['Fluency Level', voice_analysis.get('interpretation', {}).get('fluency_level', 'N/A').upper(), 'Overall speech flow and coherence.']
580
  ]
581
- table = Table(table_data, colWidths=[1.6*inch, 1.2*inch, 3.7*inch])
582
  table.setStyle(TableStyle([
583
- ('BACKGROUND', (0,0), (-1,0), colors.HexColor('#003366')),
584
- ('TEXTCOLOR',(0,0),(-1,0),colors.whitesmoke),
585
  ('ALIGN', (0,0), (-1,-1), 'LEFT'),
586
  ('VALIGN', (0,0), (-1,-1), 'MIDDLE'),
587
  ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
588
  ('FONTSIZE', (0, 0), (-1, -1), 9),
589
- ('BOTTOMPADDING', (0, 0), (-1, 0), 10),
590
- ('TOPPADDING', (0, 0), (-1, 0), 10),
591
- ('BACKGROUND', (0, 1), (-1, -1), colors.HexColor('#F0F8FF')),
592
- ('GRID', (0,0), (-1,-1), 1, colors.lightgrey)
593
  ]))
594
  story.append(table)
595
- story.append(Spacer(1, 0.2 * inch))
596
-
597
  chart_buffer = io.BytesIO()
598
  generate_anxiety_confidence_chart(voice_analysis.get('composite_scores', {}), chart_buffer)
599
  chart_buffer.seek(0)
600
- img = Image(chart_buffer, width=4*inch, height=2.5*inch)
601
  img.hAlign = 'CENTER'
602
  story.append(img)
603
  else:
604
- story.append(Paragraph("Voice analysis data not available.", body_text))
 
605
 
606
- story.append(PageBreak())
607
-
608
- # --- Gemini Report Parsing and Display ---
609
  sections = {}
610
- # Pre-populate to maintain order
611
- section_titles = ["Executive Summary", "Voice Analysis Insights", "Content Analysis & Strengths/Areas for Development", "Actionable Recommendations"]
 
612
  for title in section_titles:
613
  sections[title] = []
614
-
615
- # Use a more robust way to capture content under each heading
616
- # This regex captures the heading line itself to exclude it from the content
617
  report_parts = re.split(r'(\s*\*\*\s*\d\.\s*.*?\s*\*\*)', gemini_report_text)
618
-
619
  current_section = None
620
  for part in report_parts:
621
  if not part.strip(): continue
622
-
623
  is_heading = False
624
  for title in section_titles:
625
- # Check if the part is a heading
626
  if title.lower() in part.lower():
627
  current_section = title
628
  is_heading = True
629
  break
630
-
631
  if not is_heading and current_section:
632
  sections[current_section].append(part.strip())
633
 
634
- # Display Content and Recommendations
635
- story.append(Paragraph("2. Content Analysis (from Gemini)", h2))
636
- if sections['Content Analysis & Strengths/Areas for Development']:
637
- for line in sections['Content Analysis & Strengths/Areas for Development']:
638
  if line.startswith(('-', '•', '*')):
639
  story.append(Paragraph(line.lstrip('-•* ').strip(), bullet_style))
640
  else:
641
  story.append(Paragraph(line, body_text))
642
  else:
643
- story.append(Paragraph("Content analysis not provided.", body_text))
 
644
 
645
- story.append(Spacer(1, 0.3*inch))
646
-
647
- story.append(Paragraph("3. Actionable Recommendations (from Gemini)", h2))
648
- if sections['Actionable Recommendations']:
649
- for line in sections['Actionable Recommendations']:
650
  if line.startswith(('-', '•', '*')):
651
  story.append(Paragraph(line.lstrip('-•* ').strip(), bullet_style))
652
  else:
653
  story.append(Paragraph(line, body_text))
654
  else:
655
- story.append(Paragraph("Recommendations not provided.", body_text))
656
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
657
  doc.build(story, onFirstPage=header_footer, onLaterPages=header_footer)
658
  return True
659
  except Exception as e:
@@ -667,28 +639,22 @@ def convert_to_serializable(obj):
667
  if isinstance(obj, np.ndarray): return obj.tolist()
668
  return obj
669
 
670
- # --- MODIFIED MAIN FUNCTION ---
671
  def process_interview(audio_path_or_url: str):
672
  local_audio_path = None
673
  wav_file = None
674
  is_downloaded = False
675
  try:
676
  logger.info(f"Starting processing for {audio_path_or_url}")
677
-
678
  if audio_path_or_url.startswith(('http://', 'https://')):
679
  local_audio_path = download_audio_from_url(audio_path_or_url)
680
  is_downloaded = True
681
  else:
682
  local_audio_path = audio_path_or_url
683
-
684
  wav_file = convert_to_wav(local_audio_path)
685
  transcript = transcribe(wav_file)
686
-
687
  for utterance in transcript['utterances']:
688
  utterance['prosodic_features'] = extract_prosodic_features(wav_file, utterance['start'], utterance['end'])
689
-
690
  utterances_with_speakers = identify_speakers(transcript, wav_file)
691
-
692
  clf, vectorizer, scaler = None, None, None
693
  if os.path.exists(os.path.join(OUTPUT_DIR, 'role_classifier.pkl')):
694
  clf = joblib.load(os.path.join(OUTPUT_DIR, 'role_classifier.pkl'))
@@ -696,10 +662,8 @@ def process_interview(audio_path_or_url: str):
696
  scaler = joblib.load(os.path.join(OUTPUT_DIR, 'feature_scaler.pkl'))
697
  else:
698
  clf, vectorizer, scaler = train_role_classifier(utterances_with_speakers)
699
-
700
  classified_utterances = classify_roles(utterances_with_speakers, clf, vectorizer, scaler)
701
  voice_analysis = analyze_interviewee_voice(wav_file, classified_utterances)
702
-
703
  analysis_data = {
704
  'transcript': classified_utterances,
705
  'speakers': list(set(u['speaker'] for u in classified_utterances)),
@@ -709,32 +673,23 @@ def process_interview(audio_path_or_url: str):
709
  'speaker_turns': len(classified_utterances)
710
  }
711
  }
712
-
713
  analysis_data['acceptance_probability'] = calculate_acceptance_probability(analysis_data)
714
  gemini_report_text = generate_report(analysis_data)
715
-
716
  base_name = str(uuid.uuid4())
717
  pdf_path = os.path.join(OUTPUT_DIR, f"{base_name}_report.pdf")
718
  json_path = os.path.join(OUTPUT_DIR, f"{base_name}_analysis.json")
719
-
720
  create_pdf_report(analysis_data, pdf_path, gemini_report_text=gemini_report_text)
721
-
722
  with open(json_path, 'w') as f:
723
  serializable_data = convert_to_serializable(analysis_data)
724
  json.dump(serializable_data, f, indent=2)
725
-
726
  logger.info(f"Processing completed for {audio_path_or_url}")
727
-
728
  return {'pdf_path': pdf_path, 'json_path': json_path}
729
-
730
  except Exception as e:
731
  logger.error(f"Processing failed for {audio_path_or_url}: {str(e)}", exc_info=True)
732
  raise
733
-
734
  finally:
735
  if wav_file and os.path.exists(wav_file):
736
  os.remove(wav_file)
737
  if is_downloaded and local_audio_path and os.path.exists(local_audio_path):
738
  os.remove(local_audio_path)
739
- logger.info(f"Cleaned up temporary downloaded file: {local_audio_path}")
740
- # --- END MODIFIED MAIN FUNCTION ---
 
18
  from typing import Dict, List, Tuple
19
  import logging
20
  import tempfile
 
21
  from reportlab.lib.pagesizes import letter
22
+ from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak, Image
23
  from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
24
  from reportlab.lib.units import inch
25
  from reportlab.lib import colors
 
28
  matplotlib.use('Agg')
29
  from reportlab.platypus import Image
30
  import io
 
31
  from transformers import AutoTokenizer, AutoModel
32
  import spacy
33
  import google.generativeai as genai
34
  import joblib
35
  from concurrent.futures import ThreadPoolExecutor
36
+
 
 
 
 
 
 
 
 
37
  # Setup logging
38
  logging.basicConfig(level=logging.INFO)
39
  logger = logging.getLogger(__name__)
 
50
  ASSEMBLYAI_KEY = os.getenv("ASSEMBLYAI_KEY")
51
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
52
 
 
 
53
  def download_audio_from_url(url: str) -> str:
54
  """Downloads an audio file from a URL to a temporary local path."""
55
  try:
 
56
  temp_dir = tempfile.gettempdir()
57
  temp_path = os.path.join(temp_dir, f"{uuid.uuid4()}.tmp_audio")
 
58
  logger.info(f"Downloading audio from {url} to {temp_path}")
59
  with requests.get(url, stream=True) as r:
60
  r.raise_for_status()
 
65
  except Exception as e:
66
  logger.error(f"Failed to download audio from URL {url}: {e}")
67
  raise
 
 
68
 
 
69
  def initialize_services():
70
  try:
71
  pc = Pinecone(api_key=PINECONE_KEY)
 
87
 
88
  index, gemini_model = initialize_services()
89
 
 
90
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
91
  logger.info(f"Using device: {device}")
92
 
 
93
  def load_speaker_model():
94
  try:
95
  import torch
 
104
  logger.error(f"Model loading failed: {str(e)}")
105
  raise RuntimeError("Could not load speaker verification model")
106
 
 
 
107
  def load_models():
108
  speaker_model = load_speaker_model()
109
  nlp = spacy.load("en_core_web_sm")
 
112
  llm_model.eval()
113
  return speaker_model, nlp, tokenizer, llm_model
114
 
 
115
  speaker_model, nlp, tokenizer, llm_model = load_models()
116
 
 
 
117
  def convert_to_wav(audio_path: str, output_dir: str = OUTPUT_DIR) -> str:
118
  try:
119
  audio = AudioSegment.from_file(audio_path)
 
127
  logger.error(f"Audio conversion failed: {str(e)}")
128
  raise
129
 
 
130
  def extract_prosodic_features(audio_path: str, start_ms: int, end_ms: int) -> Dict:
131
  try:
132
  audio = AudioSegment.from_file(audio_path)
 
157
  'intensityMax': 0.0, 'intensitySD': 0.0
158
  }
159
 
 
160
  def transcribe(audio_path: str) -> Dict:
161
  try:
162
  with open(audio_path, 'rb') as f:
 
190
  logger.error(f"Transcription failed: {str(e)}")
191
  raise
192
 
 
193
  def process_utterance(utterance, full_audio, wav_file):
194
  try:
195
  start = utterance['start']
 
228
  'embedding': None
229
  }
230
 
 
231
  def identify_speakers(transcript: Dict, wav_file: str) -> List[Dict]:
232
  try:
233
  full_audio = AudioSegment.from_wav(wav_file)
 
243
  logger.error(f"Speaker identification failed: {str(e)}")
244
  raise
245
 
 
246
  def train_role_classifier(utterances: List[Dict]):
247
  try:
248
  texts = [u['text'] for u in utterances]
 
282
  logger.error(f"Classifier training failed: {str(e)}")
283
  raise
284
 
 
285
  def classify_roles(utterances: List[Dict], clf, vectorizer, scaler):
286
  try:
287
  texts = [u['text'] for u in utterances]
 
311
  logger.error(f"Role classification failed: {str(e)}")
312
  raise
313
 
 
314
  def analyze_interviewee_voice(audio_path: str, utterances: List[Dict]) -> Dict:
315
  try:
316
  y, sr = librosa.load(audio_path, sr=16000)
 
351
  anxiety_score = 0.6 * (pitch_std / pitch_mean) + 0.4 * (jitter + shimmer) if pitch_mean > 0 else 0
352
  confidence_score = 0.7 * (1 / (1 + intensity_std)) + 0.3 * (1 / (1 + filler_ratio))
353
  hesitation_score = filler_ratio + repetition_score
354
+ anxiety_level = 'High' if anxiety_score > 0.15 else 'Moderate' if anxiety_score > 0.07 else 'Low'
355
+ confidence_level = 'High' if confidence_score > 0.7 else 'Moderate' if confidence_score > 0.5 else 'Low'
356
+ fluency_level = 'Fluent' if (filler_ratio < 0.05 and repetition_score < 0.1) else 'Moderate' if (filler_ratio < 0.1 and repetition_score < 0.2) else 'Disfluent'
357
  return {
358
  'speaking_rate': float(round(speaking_rate, 2)),
359
  'filler_ratio': float(round(filler_ratio, 4)),
 
367
  logger.error(f"Voice analysis failed: {str(e)}")
368
  return {'error': str(e)}
369
 
 
370
  def generate_voice_interpretation(analysis: Dict) -> str:
371
  if 'error' in analysis:
372
+ return "Voice analysis not available due to processing error."
373
  interpretation_lines = [
374
+ "Voice and Speech Profile:",
375
+ f"- Speaking Rate: {analysis['speaking_rate']} words/sec - Compared to optimal range (2.0-3.0 words/sec)",
376
+ f"- Filler Word Usage: {analysis['filler_ratio'] * 100:.1f}% - Frequency of non-content words (e.g., 'um', 'like')",
377
+ f"- Repetition Tendency: {analysis['repetition_score']:.3f} - Measure of repeated phrases",
378
+ f"- Anxiety Indicator: {analysis['interpretation']['anxiety_level']} (Score: {analysis['composite_scores']['anxiety']:.3f}) - Based on pitch and voice stability",
379
+ f"- Confidence Indicator: {analysis['interpretation']['confidence_level']} (Score: {analysis['composite_scores']['confidence']:.3f}) - Derived from vocal consistency",
380
+ f"- Fluency Assessment: {analysis['interpretation']['fluency_level']} - Reflects speech flow and coherence",
381
  "",
382
+ "HR Insights:",
383
+ "- Faster speaking rates may indicate confidence but can suggest nervousness if excessive.",
384
+ "- High filler word usage often reduces perceived professionalism and clarity.",
385
+ "- Elevated anxiety indicators (pitch variability, jitter) may reflect interview pressure.",
386
+ "- Strong confidence scores suggest effective vocal presence and control.",
387
+ "- Fluency impacts listener engagement; disfluency may hinder communication effectiveness."
388
  ]
389
  return "\n".join(interpretation_lines)
390
 
 
391
  def generate_anxiety_confidence_chart(composite_scores: Dict, chart_path_or_buffer):
392
  try:
393
  labels = ['Anxiety', 'Confidence']
394
  scores = [composite_scores.get('anxiety', 0), composite_scores.get('confidence', 0)]
395
  fig, ax = plt.subplots(figsize=(4, 2.5))
396
+ bars = ax.bar(labels, scores, color=['#FF6B6B', '#4ECDC4'], edgecolor='black')
397
+ ax.set_ylabel('Score (Normalized)')
398
+ ax.set_title('Vocal Dynamics: Anxiety vs. Confidence')
399
+ ax.set_ylim(0, 1.2)
400
+ for bar in bars:
401
+ height = bar.get_height()
402
+ ax.text(bar.get_x() + bar.get_width()/2, height + 0.05, f"{height:.2f}",
403
+ ha='center', color='black', fontweight='bold', fontsize=10)
404
  plt.tight_layout()
405
+ plt.savefig(chart_path_or_buffer, format='png', bbox_inches='tight', dpi=150)
406
  plt.close(fig)
407
  except Exception as e:
408
  logger.error(f"Error generating chart: {str(e)}")
409
 
 
410
  def calculate_acceptance_probability(analysis_data: Dict) -> float:
411
  voice = analysis_data.get('voice_analysis', {})
412
  if 'error' in voice: return 0.0
413
  w_confidence, w_anxiety, w_fluency, w_speaking_rate, w_filler_repetition, w_content_strengths = 0.4, -0.3, 0.2, 0.1, -0.1, 0.2
414
  confidence_score = voice.get('composite_scores', {}).get('confidence', 0.0)
415
  anxiety_score = voice.get('composite_scores', {}).get('anxiety', 0.0)
416
+ fluency_level = voice.get('interpretation', {}).get('fluency_level', 'Disfluent')
417
  speaking_rate = voice.get('speaking_rate', 0.0)
418
  filler_ratio = voice.get('filler_ratio', 0.0)
419
  repetition_score = voice.get('repetition_score', 0.0)
420
+ fluency_map = {'Fluent': 1.0, 'Moderate': 0.5, 'Disfluent': 0.0}
421
  fluency_val = fluency_map.get(fluency_level, 0.0)
422
  ideal_speaking_rate = 2.5
423
  speaking_rate_deviation = abs(speaking_rate - ideal_speaking_rate)
 
432
  acceptance_probability = max(0.0, min(1.0, normalized_score))
433
  return float(f"{acceptance_probability * 100:.2f}")
434
 
 
435
  def generate_report(analysis_data: Dict) -> str:
436
  try:
437
  voice = analysis_data.get('voice_analysis', {})
 
440
  acceptance_prob = analysis_data.get('acceptance_probability', None)
441
  acceptance_line = ""
442
  if acceptance_prob is not None:
443
+ acceptance_line = f"\n**Hiring Potential Score: {acceptance_prob:.2f}%**\n"
444
+ if acceptance_prob >= 80: acceptance_line += "Assessment: Exceptional candidate, strongly recommended for advancement."
445
+ elif acceptance_prob >= 50: acceptance_line += "Assessment: Promising candidate with moderate strengths; consider for further evaluation."
446
+ else: acceptance_line += "Assessment: Limited alignment with role expectations; significant development needed."
447
  prompt = f"""
448
+ You are an expert HR consultant, EvalBot, tasked with producing a professional, concise, and actionable interview analysis report. Structure the report with clear headings, subheadings, and bullet points (use '- ' for bullets). Adopt a formal, HR-professional tone, focusing on candidate evaluation, fit for role, and development insights.
449
  {acceptance_line}
450
  **1. Executive Summary**
451
+ - Provide a concise overview of the interview, highlighting key metrics and overall candidate performance.
452
+ - Interview duration: {analysis_data['text_analysis']['total_duration']:.2f} seconds
453
+ - Total speaker turns: {analysis_data['text_analysis']['speaker_turns']}
454
+ - Participants: {', '.join(analysis_data['speakers'])}
455
+ **2. Communication and Vocal Analysis**
456
+ - Evaluate the candidate's vocal delivery, including speaking rate, fluency, and confidence indicators.
457
+ - Provide HR-relevant insights into how these metrics impact perceived professionalism and role suitability.
458
  {voice_interpretation}
459
+ **3. Content Analysis and Competency Assessment**
460
+ - Analyze key themes in the candidate's responses to assess alignment with job competencies (e.g., problem-solving, communication, leadership).
461
+ - Identify strengths and areas for improvement, supported by specific examples.
462
+ - Sample responses for context:
463
  {chr(10).join(interviewee_responses)}
464
+ **4. Fit and Potential Evaluation**
465
+ - Assess the candidate's overall fit for a typical professional role based on communication, content, and vocal dynamics.
466
+ - Consider cultural fit, adaptability, and readiness for the role.
467
+ **5. Actionable HR Recommendations**
468
+ - Provide specific, prioritized recommendations for the candidate’s development.
469
+ - Focus areas: Effective Communication, Content Clarity and Depth, Professional Presence.
470
+ - Suggest next steps for hiring managers (e.g., advance to next round, additional assessments, training focus).
471
  """
472
  response = gemini_model.generate_content(prompt)
473
  return response.text
 
475
  logger.error(f"Report generation failed: {str(e)}")
476
  return f"Error generating report: {str(e)}"
477
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
478
  def create_pdf_report(analysis_data: Dict, output_path: str, gemini_report_text: str):
479
  try:
480
  doc = SimpleDocTemplate(output_path, pagesize=letter,
481
  rightMargin=0.75*inch, leftMargin=0.75*inch,
482
  topMargin=1*inch, bottomMargin=1*inch)
 
483
  styles = getSampleStyleSheet()
484
+ h1 = ParagraphStyle(name='Heading1', fontSize=22, leading=26, spaceAfter=20, alignment=1, textColor=colors.HexColor('#1A3C5E'))
485
+ h2 = ParagraphStyle(name='Heading2', fontSize=14, leading=18, spaceBefore=14, spaceAfter=8, textColor=colors.HexColor('#2E5A87'))
486
+ body_text = ParagraphStyle(name='BodyText', parent=styles['Normal'], fontSize=10, leading=14, spaceAfter=8, fontName='Helvetica')
487
+ bullet_style = ParagraphStyle(name='Bullet', parent=body_text, leftIndent=20, bulletIndent=10, fontName='Helvetica')
488
 
489
  story = []
490
 
 
491
  def header_footer(canvas, doc):
492
  canvas.saveState()
 
493
  canvas.setFont('Helvetica', 9)
494
  canvas.setFillColor(colors.grey)
495
+ canvas.drawString(doc.leftMargin, 0.5 * inch, f"Page {doc.page} | EvalBot HR Interview Report | Confidential")
496
+ canvas.setStrokeColor(colors.HexColor('#2E5A87'))
 
497
  canvas.setLineWidth(1)
498
+ canvas.line(doc.leftMargin, doc.height + 0.85*inch, doc.width + doc.leftMargin, doc.height + 0.85*inch)
499
+ canvas.setFont('Helvetica-Bold', 10)
500
+ canvas.drawString(doc.leftMargin, doc.height + 0.9*inch, "Candidate Interview Analysis Report")
501
  canvas.restoreState()
502
 
503
+ # Title Page
504
+ story.append(Paragraph("Candidate Interview Analysis Report", h1))
505
+ story.append(Paragraph(f"Generated on: {time.strftime('%B %d, %Y')}", ParagraphStyle(name='Date', alignment=1, fontSize=10, textColor=colors.grey)))
506
+ story.append(Spacer(1, 0.5 * inch))
 
507
  acceptance_prob = analysis_data.get('acceptance_probability')
508
  if acceptance_prob is not None:
509
+ story.append(Paragraph("Hiring Potential Snapshot", h2))
510
+ prob_color = colors.HexColor('#2E7D32') if acceptance_prob >= 70 else (colors.HexColor('#F57C00') if acceptance_prob >= 40 else colors.HexColor('#D32F2F'))
511
+ story.append(Paragraph(f"Hiring Potential Score: <font size=16 color='{prob_color.hexval()}'><b>{acceptance_prob:.2f}%</b></font>",
512
+ ParagraphStyle(name='Prob', fontSize=12, spaceAfter=12, alignment=1)))
513
  if acceptance_prob >= 80:
514
+ story.append(Paragraph("<b>HR Assessment:</b> Exceptional candidate, strongly recommended for advancement to the next stage.", body_text))
515
  elif acceptance_prob >= 50:
516
+ story.append(Paragraph("<b>HR Assessment:</b> Promising candidate with moderate strengths; consider for further evaluation.", body_text))
517
  else:
518
+ story.append(Paragraph("<b>HR Assessment:</b> Limited alignment with role expectations; significant development needed.", body_text))
519
+ story.append(Spacer(1, 0.3 * inch))
520
+ story.append(Paragraph("Prepared by: EvalBot - AI-Powered HR Interview Analysis System", body_text))
521
  story.append(PageBreak())
522
 
523
+ # Detailed Analysis
524
+ story.append(Paragraph("Detailed Candidate Evaluation", h1))
525
 
526
+ story.append(Paragraph("1. Communication and Vocal Profile", h2))
527
  voice_analysis = analysis_data.get('voice_analysis', {})
528
  if voice_analysis and 'error' not in voice_analysis:
 
529
  table_data = [
530
+ ['Metric', 'Value', 'HR Insight'],
531
+ ['Speaking Rate', f"{voice_analysis.get('speaking_rate', 0):.2f} words/sec", 'Optimal: 2.0-3.0 wps; impacts clarity and confidence'],
532
+ ['Filler Word Usage', f"{voice_analysis.get('filler_ratio', 0) * 100:.1f}%", 'High usage may reduce perceived professionalism'],
533
+ ['Anxiety Indicator', voice_analysis.get('interpretation', {}).get('anxiety_level', 'N/A'), f"Score: {voice_analysis.get('composite_scores', {}).get('anxiety', 0):.3f}; reflects pressure response"],
534
+ ['Confidence Indicator', voice_analysis.get('interpretation', {}).get('confidence_level', 'N/A'), f"Score: {voice_analysis.get('composite_scores', {}).get('confidence', 0):.3f}; indicates vocal authority"],
535
+ ['Fluency Assessment', voice_analysis.get('interpretation', {}).get('fluency_level', 'N/A'), 'Affects engagement and message delivery']
536
  ]
537
+ table = Table(table_data, colWidths=[1.8*inch, 1.2*inch, 3.5*inch])
538
  table.setStyle(TableStyle([
539
+ ('BACKGROUND', (0,0), (-1,0), colors.HexColor('#2E5A87')),
540
+ ('TEXTCOLOR', (0,0), (-1,0), colors.whitesmoke),
541
  ('ALIGN', (0,0), (-1,-1), 'LEFT'),
542
  ('VALIGN', (0,0), (-1,-1), 'MIDDLE'),
543
  ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
544
  ('FONTSIZE', (0, 0), (-1, -1), 9),
545
+ ('BOTTOMPADDING', (0, 0), (-1, 0), 12),
546
+ ('TOPPADDING', (0, 0), (-1, 0), 12),
547
+ ('BACKGROUND', (0, 1), (-1, -1), colors.HexColor('#F5F7FA')),
548
+ ('GRID', (0,0), (-1,-1), 1, colors.HexColor('#DDE4EB'))
549
  ]))
550
  story.append(table)
551
+ story.append(Spacer(1, 0.25 * inch))
 
552
  chart_buffer = io.BytesIO()
553
  generate_anxiety_confidence_chart(voice_analysis.get('composite_scores', {}), chart_buffer)
554
  chart_buffer.seek(0)
555
+ img = Image(chart_buffer, width=4.5*inch, height=2.8*inch)
556
  img.hAlign = 'CENTER'
557
  story.append(img)
558
  else:
559
+ story.append(Paragraph("Voice analysis unavailable due to processing limitations.", body_text))
560
+ story.append(Spacer(1, 0.3 * inch))
561
 
562
+ # Parse Gemini Report
 
 
563
  sections = {}
564
+ section_titles = ["Executive Summary", "Communication and Vocal Analysis",
565
+ "Content Analysis and Competency Assessment",
566
+ "Fit and Potential Evaluation", "Actionable HR Recommendations"]
567
  for title in section_titles:
568
  sections[title] = []
 
 
 
569
  report_parts = re.split(r'(\s*\*\*\s*\d\.\s*.*?\s*\*\*)', gemini_report_text)
 
570
  current_section = None
571
  for part in report_parts:
572
  if not part.strip(): continue
 
573
  is_heading = False
574
  for title in section_titles:
 
575
  if title.lower() in part.lower():
576
  current_section = title
577
  is_heading = True
578
  break
 
579
  if not is_heading and current_section:
580
  sections[current_section].append(part.strip())
581
 
582
+ # Executive Summary
583
+ story.append(Paragraph("2. Executive Summary", h2))
584
+ if sections['Executive Summary']:
585
+ for line in sections['Executive Summary']:
586
  if line.startswith(('-', '•', '*')):
587
  story.append(Paragraph(line.lstrip('-•* ').strip(), bullet_style))
588
  else:
589
  story.append(Paragraph(line, body_text))
590
  else:
591
+ story.append(Paragraph("Summary not available from analysis.", body_text))
592
+ story.append(Spacer(1, 0.3 * inch))
593
 
594
+ # Content and Competency
595
+ story.append(Paragraph("3. Content and Competency Assessment", h2))
596
+ if sections['Content Analysis and Competency Assessment']:
597
+ for line in sections['Content Analysis and Competency Assessment']:
 
598
  if line.startswith(('-', '•', '*')):
599
  story.append(Paragraph(line.lstrip('-•* ').strip(), bullet_style))
600
  else:
601
  story.append(Paragraph(line, body_text))
602
  else:
603
+ story.append(Paragraph("Content and competency analysis not provided.", body_text))
604
+ story.append(PageBreak())
605
+
606
+ # Fit and Potential
607
+ story.append(Paragraph("4. Fit and Potential Evaluation", h2))
608
+ if sections['Fit and Potential Evaluation']:
609
+ for line in sections['Fit and Potential Evaluation']:
610
+ if line.startswith(('-', '•', '*')):
611
+ story.append(Paragraph(line.lstrip('-•* ').strip(), bullet_style))
612
+ else:
613
+ story.append(Paragraph(line, body_text))
614
+ else:
615
+ story.append(Paragraph("Fit and potential evaluation not available.", body_text))
616
+ story.append(Spacer(1, 0.3 * inch))
617
+
618
+ # HR Recommendations
619
+ story.append(Paragraph("5. Actionable HR Recommendations", h2))
620
+ if sections['Actionable HR Recommendations']:
621
+ for line in sections['Actionable HR Recommendations']:
622
+ if line.startswith(('-', '•', '*')):
623
+ story.append(Paragraph(line.lstrip('-•* ').strip(), bullet_style))
624
+ else:
625
+ story.append(Paragraph(line, body_text))
626
+ else:
627
+ story.append(Paragraph("HR recommendations not provided.", body_text))
628
+
629
  doc.build(story, onFirstPage=header_footer, onLaterPages=header_footer)
630
  return True
631
  except Exception as e:
 
639
  if isinstance(obj, np.ndarray): return obj.tolist()
640
  return obj
641
 
 
642
  def process_interview(audio_path_or_url: str):
643
  local_audio_path = None
644
  wav_file = None
645
  is_downloaded = False
646
  try:
647
  logger.info(f"Starting processing for {audio_path_or_url}")
 
648
  if audio_path_or_url.startswith(('http://', 'https://')):
649
  local_audio_path = download_audio_from_url(audio_path_or_url)
650
  is_downloaded = True
651
  else:
652
  local_audio_path = audio_path_or_url
 
653
  wav_file = convert_to_wav(local_audio_path)
654
  transcript = transcribe(wav_file)
 
655
  for utterance in transcript['utterances']:
656
  utterance['prosodic_features'] = extract_prosodic_features(wav_file, utterance['start'], utterance['end'])
 
657
  utterances_with_speakers = identify_speakers(transcript, wav_file)
 
658
  clf, vectorizer, scaler = None, None, None
659
  if os.path.exists(os.path.join(OUTPUT_DIR, 'role_classifier.pkl')):
660
  clf = joblib.load(os.path.join(OUTPUT_DIR, 'role_classifier.pkl'))
 
662
  scaler = joblib.load(os.path.join(OUTPUT_DIR, 'feature_scaler.pkl'))
663
  else:
664
  clf, vectorizer, scaler = train_role_classifier(utterances_with_speakers)
 
665
  classified_utterances = classify_roles(utterances_with_speakers, clf, vectorizer, scaler)
666
  voice_analysis = analyze_interviewee_voice(wav_file, classified_utterances)
 
667
  analysis_data = {
668
  'transcript': classified_utterances,
669
  'speakers': list(set(u['speaker'] for u in classified_utterances)),
 
673
  'speaker_turns': len(classified_utterances)
674
  }
675
  }
 
676
  analysis_data['acceptance_probability'] = calculate_acceptance_probability(analysis_data)
677
  gemini_report_text = generate_report(analysis_data)
 
678
  base_name = str(uuid.uuid4())
679
  pdf_path = os.path.join(OUTPUT_DIR, f"{base_name}_report.pdf")
680
  json_path = os.path.join(OUTPUT_DIR, f"{base_name}_analysis.json")
 
681
  create_pdf_report(analysis_data, pdf_path, gemini_report_text=gemini_report_text)
 
682
  with open(json_path, 'w') as f:
683
  serializable_data = convert_to_serializable(analysis_data)
684
  json.dump(serializable_data, f, indent=2)
 
685
  logger.info(f"Processing completed for {audio_path_or_url}")
 
686
  return {'pdf_path': pdf_path, 'json_path': json_path}
 
687
  except Exception as e:
688
  logger.error(f"Processing failed for {audio_path_or_url}: {str(e)}", exc_info=True)
689
  raise
 
690
  finally:
691
  if wav_file and os.path.exists(wav_file):
692
  os.remove(wav_file)
693
  if is_downloaded and local_audio_path and os.path.exists(local_audio_path):
694
  os.remove(local_audio_path)
695
+ logger.info(f"Cleaned up temporary downloaded file: {local_audio_path}")