norhan12 commited on
Commit
58775af
·
verified ·
1 Parent(s): c3a988d

Update process_interview.py

Browse files
Files changed (1) hide show
  1. process_interview.py +177 -460
process_interview.py CHANGED
@@ -17,6 +17,7 @@ from sklearn.feature_extraction.text import TfidfVectorizer
17
  import re
18
  from typing import Dict, List, Tuple
19
  import logging
 
20
  # --- Imports for enhanced PDF ---
21
  from reportlab.lib.pagesizes import letter
22
  from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak
@@ -25,10 +26,9 @@ from reportlab.lib.units import inch
25
  from reportlab.lib import colors
26
  import matplotlib.pyplot as plt
27
  import matplotlib
28
-
29
- matplotlib.use('Agg') # --- FIX: تحديد backend لـ matplotlib ---
30
  from reportlab.platypus import Image
31
- import io # --- FIX: إضافة import io لـ BytesIO ---
32
  # --- End Imports for enhanced PDF ---
33
  from transformers import AutoTokenizer, AutoModel
34
  import spacy
@@ -53,6 +53,27 @@ ASSEMBLYAI_KEY = os.getenv("ASSEMBLYAI_KEY")
53
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
54
 
55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  # Initialize services
57
  def initialize_services():
58
  try:
@@ -66,16 +87,13 @@ def initialize_services():
66
  spec=ServerlessSpec(cloud="aws", region="us-east-1")
67
  )
68
  index = pc.Index(index_name)
69
-
70
  genai.configure(api_key=GEMINI_API_KEY)
71
  gemini_model = genai.GenerativeModel('gemini-1.5-flash')
72
-
73
  return index, gemini_model
74
  except Exception as e:
75
  logger.error(f"Error initializing services: {str(e)}")
76
  raise
77
 
78
-
79
  index, gemini_model = initialize_services()
80
 
81
  # Device setup
@@ -102,11 +120,9 @@ def load_speaker_model():
102
  def load_models():
103
  speaker_model = load_speaker_model()
104
  nlp = spacy.load("en_core_web_sm")
105
-
106
  tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
107
  llm_model = AutoModel.from_pretrained("distilbert-base-uncased").to(device)
108
  llm_model.eval()
109
-
110
  return speaker_model, nlp, tokenizer, llm_model
111
 
112
 
@@ -120,7 +136,6 @@ def convert_to_wav(audio_path: str, output_dir: str = OUTPUT_DIR) -> str:
120
  if audio.channels > 1:
121
  audio = audio.set_channels(1)
122
  audio = audio.set_frame_rate(16000)
123
-
124
  wav_file = os.path.join(output_dir, f"{uuid.uuid4()}.wav")
125
  audio.export(wav_file, format="wav")
126
  return wav_file
@@ -135,11 +150,9 @@ def extract_prosodic_features(audio_path: str, start_ms: int, end_ms: int) -> Di
135
  segment = audio[start_ms:end_ms]
136
  temp_path = os.path.join(OUTPUT_DIR, f"temp_{uuid.uuid4()}.wav")
137
  segment.export(temp_path, format="wav")
138
-
139
  y, sr = librosa.load(temp_path, sr=16000)
140
  pitches = librosa.piptrack(y=y, sr=sr)[0]
141
  pitches = pitches[pitches > 0]
142
-
143
  features = {
144
  'duration': (end_ms - start_ms) / 1000,
145
  'mean_pitch': float(np.mean(pitches)) if len(pitches) > 0 else 0.0,
@@ -151,21 +164,14 @@ def extract_prosodic_features(audio_path: str, start_ms: int, end_ms: int) -> Di
151
  'intensityMax': float(np.max(librosa.feature.rms(y=y)[0])),
152
  'intensitySD': float(np.std(librosa.feature.rms(y=y)[0])),
153
  }
154
-
155
  os.remove(temp_path)
156
  return features
157
  except Exception as e:
158
  logger.error(f"Feature extraction failed: {str(e)}")
159
  return {
160
- 'duration': 0.0,
161
- 'mean_pitch': 0.0,
162
- 'min_pitch': 0.0,
163
- 'max_pitch': 0.0,
164
- 'pitch_sd': 0.0,
165
- 'intensityMean': 0.0,
166
- 'intensityMin': 0.0,
167
- 'intensityMax': 0.0,
168
- 'intensitySD': 0.0,
169
  }
170
 
171
 
@@ -178,7 +184,6 @@ def transcribe(audio_path: str) -> Dict:
178
  data=f
179
  )
180
  audio_url = upload_response.json()['upload_url']
181
-
182
  transcript_response = requests.post(
183
  "https://api.assemblyai.com/v2/transcript",
184
  headers={"authorization": ASSEMBLYAI_KEY},
@@ -189,18 +194,15 @@ def transcribe(audio_path: str) -> Dict:
189
  }
190
  )
191
  transcript_id = transcript_response.json()['id']
192
-
193
  while True:
194
  result = requests.get(
195
  f"https://api.assemblyai.com/v2/transcript/{transcript_id}",
196
  headers={"authorization": ASSEMBLYAI_KEY}
197
  ).json()
198
-
199
  if result['status'] == 'completed':
200
  return result
201
  elif result['status'] == 'error':
202
  raise Exception(result['error'])
203
-
204
  time.sleep(5)
205
  except Exception as e:
206
  logger.error(f"Transcription failed: {str(e)}")
@@ -214,35 +216,27 @@ def process_utterance(utterance, full_audio, wav_file):
214
  segment = full_audio[start:end]
215
  temp_path = os.path.join(OUTPUT_DIR, f"temp_{uuid.uuid4()}.wav")
216
  segment.export(temp_path, format="wav")
217
-
218
  with torch.no_grad():
219
- embedding = speaker_model.get_embedding(temp_path).cpu().numpy() # Ensure numpy array
220
-
221
- # --- FIX: Convert embedding to a flat list for Pinecone query ---
222
  embedding_list = embedding.flatten().tolist()
223
- # --- End FIX ---
224
-
225
  query_result = index.query(
226
- vector=embedding_list, # Use the corrected flat list
227
  top_k=1,
228
  include_metadata=True
229
  )
230
-
231
  if query_result['matches'] and query_result['matches'][0]['score'] > 0.7:
232
  speaker_id = query_result['matches'][0]['id']
233
  speaker_name = query_result['matches'][0]['metadata']['speaker_name']
234
  else:
235
  speaker_id = f"unknown_{uuid.uuid4().hex[:6]}"
236
  speaker_name = f"Speaker_{speaker_id[-4:]}"
237
- index.upsert([(speaker_id, embedding_list, {"speaker_name": speaker_name})]) # Use corrected list
238
-
239
  os.remove(temp_path)
240
-
241
  return {
242
  **utterance,
243
  'speaker': speaker_name,
244
  'speaker_id': speaker_id,
245
- 'embedding': embedding_list # Store the corrected list
246
  }
247
  except Exception as e:
248
  logger.error(f"Utterance processing failed: {str(e)}", exc_info=True)
@@ -258,14 +252,12 @@ def identify_speakers(transcript: Dict, wav_file: str) -> List[Dict]:
258
  try:
259
  full_audio = AudioSegment.from_wav(wav_file)
260
  utterances = transcript['utterances']
261
-
262
- with ThreadPoolExecutor(max_workers=5) as executor: # Changed to 5 workers
263
  futures = [
264
  executor.submit(process_utterance, utterance, full_audio, wav_file)
265
  for utterance in utterances
266
  ]
267
  results = [f.result() for f in futures]
268
-
269
  return results
270
  except Exception as e:
271
  logger.error(f"Speaker identification failed: {str(e)}")
@@ -277,26 +269,16 @@ def train_role_classifier(utterances: List[Dict]):
277
  texts = [u['text'] for u in utterances]
278
  vectorizer = TfidfVectorizer(max_features=500, ngram_range=(1, 2))
279
  X_text = vectorizer.fit_transform(texts)
280
-
281
  features = []
282
  labels = []
283
-
284
  for i, utterance in enumerate(utterances):
285
  prosodic = utterance['prosodic_features']
286
  feat = [
287
- prosodic['duration'],
288
- prosodic['mean_pitch'],
289
- prosodic['min_pitch'],
290
- prosodic['max_pitch'],
291
- prosodic['pitch_sd'],
292
- prosodic['intensityMean'],
293
- prosodic['intensityMin'],
294
- prosodic['intensityMax'],
295
- prosodic['intensitySD'],
296
  ]
297
-
298
  feat.extend(X_text[i].toarray()[0].tolist())
299
-
300
  doc = nlp(utterance['text'])
301
  feat.extend([
302
  int(utterance['text'].endswith('?')),
@@ -305,25 +287,17 @@ def train_role_classifier(utterances: List[Dict]):
305
  sum(1 for token in doc if token.pos_ == 'VERB'),
306
  sum(1 for token in doc if token.pos_ == 'NOUN')
307
  ])
308
-
309
  features.append(feat)
310
  labels.append(0 if i % 2 == 0 else 1)
311
-
312
  scaler = StandardScaler()
313
  X = scaler.fit_transform(features)
314
-
315
  clf = RandomForestClassifier(
316
- n_estimators=150,
317
- max_depth=10,
318
- random_state=42,
319
- class_weight='balanced'
320
  )
321
  clf.fit(X, labels)
322
-
323
  joblib.dump(clf, os.path.join(OUTPUT_DIR, 'role_classifier.pkl'))
324
  joblib.dump(vectorizer, os.path.join(OUTPUT_DIR, 'text_vectorizer.pkl'))
325
  joblib.dump(scaler, os.path.join(OUTPUT_DIR, 'feature_scaler.pkl'))
326
-
327
  return clf, vectorizer, scaler
328
  except Exception as e:
329
  logger.error(f"Classifier training failed: {str(e)}")
@@ -334,24 +308,15 @@ def classify_roles(utterances: List[Dict], clf, vectorizer, scaler):
334
  try:
335
  texts = [u['text'] for u in utterances]
336
  X_text = vectorizer.transform(texts)
337
-
338
  results = []
339
  for i, utterance in enumerate(utterances):
340
  prosodic = utterance['prosodic_features']
341
  feat = [
342
- prosodic['duration'],
343
- prosodic['mean_pitch'],
344
- prosodic['min_pitch'],
345
- prosodic['max_pitch'],
346
- prosodic['pitch_sd'],
347
- prosodic['intensityMean'],
348
- prosodic['intensityMin'],
349
- prosodic['intensityMax'],
350
- prosodic['intensitySD'],
351
  ]
352
-
353
  feat.extend(X_text[i].toarray()[0].tolist())
354
-
355
  doc = nlp(utterance['text'])
356
  feat.extend([
357
  int(utterance['text'].endswith('?')),
@@ -360,12 +325,9 @@ def classify_roles(utterances: List[Dict], clf, vectorizer, scaler):
360
  sum(1 for token in doc if token.pos_ == 'VERB'),
361
  sum(1 for token in doc if token.pos_ == 'NOUN')
362
  ])
363
-
364
  X = scaler.transform([feat])
365
  role = 'Interviewer' if clf.predict(X)[0] == 0 else 'Interviewee'
366
-
367
  results.append({**utterance, 'role': role})
368
-
369
  return results
370
  except Exception as e:
371
  logger.error(f"Role classification failed: {str(e)}")
@@ -375,90 +337,54 @@ def classify_roles(utterances: List[Dict], clf, vectorizer, scaler):
375
  def analyze_interviewee_voice(audio_path: str, utterances: List[Dict]) -> Dict:
376
  try:
377
  y, sr = librosa.load(audio_path, sr=16000)
378
-
379
  interviewee_utterances = [u for u in utterances if u['role'] == 'Interviewee']
380
  if not interviewee_utterances:
381
  return {'error': 'No interviewee utterances found'}
382
-
383
  segments = []
384
  for u in interviewee_utterances:
385
  start = int(u['start'] * sr / 1000)
386
  end = int(u['end'] * sr / 1000)
387
  segments.append(y[start:end])
388
-
389
- combined_audio = np.concatenate(segments)
390
-
391
  total_duration = sum(u['prosodic_features']['duration'] for u in interviewee_utterances)
392
  total_words = sum(len(u['text'].split()) for u in interviewee_utterances)
393
  speaking_rate = total_words / total_duration if total_duration > 0 else 0
394
-
395
  filler_words = ['um', 'uh', 'like', 'you know', 'so', 'i mean']
396
- filler_count = sum(
397
- sum(u['text'].lower().count(fw) for fw in filler_words)
398
- for u in interviewee_utterances
399
- )
400
  filler_ratio = filler_count / total_words if total_words > 0 else 0
401
-
402
  all_words = ' '.join(u['text'].lower() for u in interviewee_utterances).split()
403
  word_counts = {}
404
  for i in range(len(all_words) - 1):
405
  bigram = (all_words[i], all_words[i + 1])
406
  word_counts[bigram] = word_counts.get(bigram, 0) + 1
407
- repetition_score = sum(1 for count in word_counts.values() if count > 1) / len(
408
- word_counts) if word_counts else 0
409
-
410
  pitches = []
411
  for segment in segments:
412
  f0, voiced_flag, _ = librosa.pyin(segment, fmin=80, fmax=300, sr=sr)
413
  pitches.extend(f0[voiced_flag])
414
-
415
  pitch_mean = np.mean(pitches) if len(pitches) > 0 else 0
416
  pitch_std = np.std(pitches) if len(pitches) > 0 else 0
417
  jitter = np.mean(np.abs(np.diff(pitches))) / pitch_mean if len(pitches) > 1 and pitch_mean > 0 else 0
418
-
419
  intensities = []
420
  for segment in segments:
421
  rms = librosa.feature.rms(y=segment)[0]
422
  intensities.extend(rms)
423
-
424
  intensity_mean = np.mean(intensities) if intensities else 0
425
  intensity_std = np.std(intensities) if intensities else 0
426
- shimmer = np.mean(np.abs(np.diff(intensities))) / intensity_mean if len(
427
- intensities) > 1 and intensity_mean > 0 else 0
428
-
429
  anxiety_score = 0.6 * (pitch_std / pitch_mean) + 0.4 * (jitter + shimmer) if pitch_mean > 0 else 0
430
  confidence_score = 0.7 * (1 / (1 + intensity_std)) + 0.3 * (1 / (1 + filler_ratio))
431
  hesitation_score = filler_ratio + repetition_score
432
-
433
  anxiety_level = 'high' if anxiety_score > 0.15 else 'moderate' if anxiety_score > 0.07 else 'low'
434
  confidence_level = 'high' if confidence_score > 0.7 else 'moderate' if confidence_score > 0.5 else 'low'
435
- fluency_level = 'fluent' if (filler_ratio < 0.05 and repetition_score < 0.1) else 'moderate' if (
436
- filler_ratio < 0.1 and repetition_score < 0.2) else 'disfluent'
437
-
438
  return {
439
  'speaking_rate': float(round(speaking_rate, 2)),
440
  'filler_ratio': float(round(filler_ratio, 4)),
441
  'repetition_score': float(round(repetition_score, 4)),
442
- 'pitch_analysis': {
443
- 'mean': float(round(pitch_mean, 2)),
444
- 'std_dev': float(round(pitch_std, 2)),
445
- 'jitter': float(round(jitter, 4))
446
- },
447
- 'intensity_analysis': {
448
- 'mean': float(round(intensity_mean, 2)),
449
- 'std_dev': float(round(intensity_std, 2)),
450
- 'shimmer': float(round(shimmer, 4))
451
- },
452
- 'composite_scores': {
453
- 'anxiety': float(round(anxiety_score, 4)),
454
- 'confidence': float(round(confidence_score, 4)),
455
- 'hesitation': float(round(hesitation_score, 4))
456
- },
457
- 'interpretation': {
458
- 'anxiety_level': anxiety_level,
459
- 'confidence_level': confidence_level,
460
- 'fluency_level': fluency_level
461
- }
462
  }
463
  except Exception as e:
464
  logger.error(f"Voice analysis failed: {str(e)}")
@@ -466,187 +392,102 @@ def analyze_interviewee_voice(audio_path: str, utterances: List[Dict]) -> Dict:
466
 
467
 
468
  def generate_voice_interpretation(analysis: Dict) -> str:
469
- # This function is used to provide the text interpretation for Gemini's prompt.
470
  if 'error' in analysis:
471
  return "Voice analysis not available."
472
-
473
- interpretation_lines = []
474
- interpretation_lines.append("Voice Analysis Summary:")
475
- interpretation_lines.append(f"- Speaking Rate: {analysis['speaking_rate']} words/sec (average)")
476
- interpretation_lines.append(f"- Filler Words: {analysis['filler_ratio'] * 100:.1f}% of words")
477
- interpretation_lines.append(f"- Repetition Score: {analysis['repetition_score']:.3f}")
478
- interpretation_lines.append(
479
- f"- Anxiety Level: {analysis['interpretation']['anxiety_level'].upper()} (score: {analysis['composite_scores']['anxiety']:.3f})")
480
- interpretation_lines.append(
481
- f"- Confidence Level: {analysis['interpretation']['confidence_level'].upper()} (score: {analysis['composite_scores']['confidence']:.3f})")
482
- interpretation_lines.append(f"- Fluency: {analysis['interpretation']['fluency_level'].upper()}")
483
- interpretation_lines.append("")
484
- interpretation_lines.append("Detailed Interpretation:")
485
- interpretation_lines.append(
486
- "1. A higher speaking rate indicates faster speech, which can suggest nervousness or enthusiasm.")
487
- interpretation_lines.append("2. Filler words and repetitions reduce speech clarity and professionalism.")
488
- interpretation_lines.append("3. Anxiety is measured through pitch variability and voice instability.")
489
- interpretation_lines.append("4. Confidence is assessed through voice intensity and stability.")
490
- interpretation_lines.append("5. Fluency combines filler words and repetition metrics.")
491
-
492
  return "\n".join(interpretation_lines)
493
 
494
 
495
- # --- Chart Generation Function ---
496
- def generate_anxiety_confidence_chart(composite_scores: Dict, chart_path: str):
497
  try:
498
  labels = ['Anxiety', 'Confidence']
499
  scores = [composite_scores.get('anxiety', 0), composite_scores.get('confidence', 0)]
500
-
501
- fig, ax = plt.subplots(figsize=(4, 2.5)) # Smaller size for embedding in PDF
502
  ax.bar(labels, scores, color=['lightcoral', 'lightskyblue'])
503
  ax.set_ylabel('Score')
504
  ax.set_title('Anxiety vs. Confidence Scores')
505
- ax.set_ylim(0, 1.0) # Assuming scores are normalized 0-1
506
-
507
  for i, v in enumerate(scores):
508
  ax.text(i, v + 0.05, f"{v:.2f}", color='black', ha='center', fontweight='bold')
509
-
510
- # هذه الأوامر يجب أن تكون خارج الـ loop عشان يتم تنفيذها مرة واحدة بعد رسم كل العناصر
511
  plt.tight_layout()
512
- plt.savefig(chart_path)
513
- plt.close(fig) # Close the figure to free up memory
514
  except Exception as e:
515
  logger.error(f"Error generating chart: {str(e)}")
516
 
517
 
518
- # --- Acceptance Probability Calculation ---
519
  def calculate_acceptance_probability(analysis_data: Dict) -> float:
520
- """
521
- Calculates a hypothetical acceptance probability based on voice and content analysis.
522
- This is a simplified, heuristic model and can be refined with more data/ML.
523
- """
524
  voice = analysis_data.get('voice_analysis', {})
525
-
526
- if 'error' in voice:
527
- return 0.0 # Cannot calculate if voice analysis failed
528
-
529
- # Weights for different factors (adjust these to fine-tune the model)
530
- w_confidence = 0.4
531
- w_anxiety = -0.3 # Negative weight for anxiety
532
- w_fluency = 0.2
533
- w_speaking_rate = 0.1 # Ideal rate gets higher score
534
- w_filler_repetition = -0.1 # Negative weight for filler/repetition
535
- w_content_strengths = 0.2 # Placeholder, ideally from deeper content analysis
536
-
537
- # Normalize/interpret scores
538
  confidence_score = voice.get('composite_scores', {}).get('confidence', 0.0)
539
  anxiety_score = voice.get('composite_scores', {}).get('anxiety', 0.0)
540
  fluency_level = voice.get('interpretation', {}).get('fluency_level', 'disfluent')
541
  speaking_rate = voice.get('speaking_rate', 0.0)
542
  filler_ratio = voice.get('filler_ratio', 0.0)
543
  repetition_score = voice.get('repetition_score', 0.0)
544
-
545
- # Fluency mapping (higher score for more fluent)
546
  fluency_map = {'fluent': 1.0, 'moderate': 0.5, 'disfluent': 0.0}
547
  fluency_val = fluency_map.get(fluency_level, 0.0)
548
-
549
- # Speaking rate scoring (e.g., ideal is around 2.5 words/sec, gets lower for too fast/slow)
550
- # This is a simple inverse of deviation from ideal
551
  ideal_speaking_rate = 2.5
552
  speaking_rate_deviation = abs(speaking_rate - ideal_speaking_rate)
553
- speaking_rate_score = max(0, 1 - (speaking_rate_deviation / ideal_speaking_rate)) # Max 1.0, min 0.0
554
-
555
- # Filler/Repetition score (lower is better, so 1 - score)
556
- filler_repetition_composite = (filler_ratio + repetition_score) / 2 # Average them
557
  filler_repetition_score = max(0, 1 - filler_repetition_composite)
558
-
559
- # Simplified content strength score (you might need a more sophisticated NLP method here)
560
- # For now, based on presence of strengths in Gemini's content analysis
561
- content_strength_val = 0.0
562
- # This part would ideally come from a structured output from Gemini's content analysis.
563
- # For now, we'll make a simplified assumption based on the analysis data:
564
- # If content analysis found "strengths" (which is likely if Gemini generates a full report)
565
- # This needs refinement if Gemini output is not structured for this.
566
- if analysis_data.get('text_analysis', {}).get('total_duration', 0) > 0: # Basic check if interview happened
567
- content_strength_val = 0.8 # Assume moderate strength if analysis went through
568
- # You could parse gemini_report_text for specific phrases like "Strengths:" and count items.
569
-
570
- # Calculate raw score
571
- raw_score = (
572
- confidence_score * w_confidence +
573
- (1 - anxiety_score) * abs(w_anxiety) + # (1 - anxiety) because lower anxiety is better
574
- fluency_val * w_fluency +
575
- speaking_rate_score * w_speaking_rate +
576
- filler_repetition_score * abs(w_filler_repetition) + # Use abs weight as score is already inverted
577
- content_strength_val * w_content_strengths
578
- )
579
-
580
- # Normalize to 0-1 and then to percentage
581
- # These max/min values are rough estimates and should be calibrated with real data
582
- min_possible_score = (0 * w_confidence) + (0 * abs(w_anxiety)) + (0 * w_fluency) + (0 * w_speaking_rate) + (
583
- 0 * abs(w_filler_repetition)) + (0 * w_content_strengths)
584
- max_possible_score = (1 * w_confidence) + (1 * abs(w_anxiety)) + (1 * w_fluency) + (1 * w_speaking_rate) + (
585
- 1 * abs(w_filler_repetition)) + (1 * w_content_strengths)
586
-
587
- # Prevent division by zero if all weights are zero or min/max are same
588
- if max_possible_score == min_possible_score:
589
- normalized_score = 0.5 # Default if no variation
590
- else:
591
- normalized_score = (raw_score - min_possible_score) / (max_possible_score - min_possible_score)
592
-
593
- acceptance_probability = max(0.0, min(1.0, normalized_score)) # Clamp between 0 and 1
594
-
595
- return float(f"{acceptance_probability * 100:.2f}") # Return as percentage
596
 
597
 
598
  def generate_report(analysis_data: Dict) -> str:
599
  try:
600
  voice = analysis_data.get('voice_analysis', {})
601
  voice_interpretation = generate_voice_interpretation(voice)
602
-
603
- interviewee_responses = [
604
- f"Speaker {u['speaker']} ({u['role']}): {u['text']}"
605
- for u in analysis_data['transcript']
606
- if u['role'] == 'Interviewee'
607
- ][:5] # Limit to first 5 for prompt brevity
608
-
609
  acceptance_prob = analysis_data.get('acceptance_probability', None)
610
  acceptance_line = ""
611
  if acceptance_prob is not None:
612
  acceptance_line = f"\n**Estimated Acceptance Probability: {acceptance_prob:.2f}%**\n"
613
- if acceptance_prob >= 80:
614
- acceptance_line += "This indicates a very strong candidate. Well done!"
615
- elif acceptance_prob >= 50:
616
- acceptance_line += "This indicates a solid candidate with potential for improvement."
617
- else:
618
- acceptance_line += "This candidate may require significant development or may not be a strong fit."
619
-
620
  prompt = f"""
621
- As EvalBot, an AI interview analysis system, generate a highly professional, well-structured, and concise interview analysis report.
622
- The report should be suitable for a professional setting and clearly highlight key findings and actionable recommendations.
623
- Use clear headings and subheadings. For bullet points, use '- '.
624
-
625
  {acceptance_line}
626
-
627
  **1. Executive Summary**
628
  Provide a brief, high-level overview of the interview.
629
  - Overall interview duration: {analysis_data['text_analysis']['total_duration']:.2f} seconds
630
  - Number of speaker turns: {analysis_data['text_analysis']['speaker_turns']}
631
  - Main participants: {', '.join(analysis_data['speakers'])}
632
-
633
  **2. Voice Analysis Insights**
634
  Analyze key voice metrics and provide a detailed interpretation.
635
  {voice_interpretation}
636
-
637
  **3. Content Analysis & Strengths/Areas for Development**
638
  Analyze the key themes and identify both strengths and areas for development in the interviewee's responses.
639
  Key responses from interviewee (for context):
640
  {chr(10).join(interviewee_responses)}
641
-
642
  **4. Actionable Recommendations**
643
  Offer specific, actionable suggestions for improvement.
644
- Focus on:
645
- - Communication Skills (e.g., pacing, clarity, filler words)
646
- - Content Delivery (e.g., quantifying achievements, structuring answers)
647
- - Professional Presentation (e.g., research, specific examples, mock interviews)
648
  """
649
-
650
  response = gemini_model.generate_content(prompt)
651
  return response.text
652
  except Exception as e:
@@ -654,73 +495,39 @@ def generate_report(analysis_data: Dict) -> str:
654
  return f"Error generating report: {str(e)}"
655
 
656
 
657
- # --- ENHANCED PDF GENERATION FUNCTION ---
658
  def create_pdf_report(analysis_data: Dict, output_path: str, gemini_report_text: str):
659
  try:
660
  doc = SimpleDocTemplate(output_path, pagesize=letter)
661
  styles = getSampleStyleSheet()
662
-
663
- # Define custom styles
664
- h1 = ParagraphStyle(name='Heading1', parent=styles['h1'], fontSize=16, spaceAfter=14, alignment=1,
665
- textColor=colors.HexColor('#003366'))
666
- h2 = ParagraphStyle(name='Heading2', parent=styles['h2'], fontSize=12, spaceBefore=10, spaceAfter=8,
667
- textColor=colors.HexColor('#336699'))
668
- h3 = ParagraphStyle(name='Heading3', parent=styles['h3'], fontSize=10, spaceBefore=8, spaceAfter=4,
669
- textColor=colors.HexColor('#0055AA'))
670
  body_text = ParagraphStyle(name='BodyText', parent=styles['Normal'], fontSize=9, leading=12, spaceAfter=4)
671
- bullet_style = ParagraphStyle(name='Bullet', parent=styles['Normal'], fontSize=9, leading=12, leftIndent=18,
672
- bulletIndent=9)
673
-
674
  story = []
675
-
676
- # Title and Date
677
  story.append(Paragraph(f"<b>EvalBot Interview Analysis Report</b>", h1))
678
  story.append(Spacer(1, 0.2 * inch))
679
  story.append(Paragraph(f"<b>Date:</b> {time.strftime('%Y-%m-%d')}", body_text))
680
  story.append(Spacer(1, 0.3 * inch))
681
-
682
- # --- Acceptance Probability (New Section) ---
683
- acceptance_prob = analysis_data.get('acceptance_probability', None)
684
  if acceptance_prob is not None:
685
  story.append(Paragraph("<b>Candidate Evaluation Summary</b>", h2))
686
  story.append(Spacer(1, 0.1 * inch))
687
-
688
- prob_color = colors.green if acceptance_prob >= 70 else (
689
- colors.orange if acceptance_prob >= 40 else colors.red)
690
-
691
- # --- FIX: Call .hexval() as a method ---
692
- story.append(Paragraph(
693
- f"<font size='12' color='{prob_color.hexval()}'><b>Estimated Acceptance Probability: {acceptance_prob:.2f}%</b></font>",
694
- ParagraphStyle(name='AcceptanceProbability', parent=styles['Normal'], fontSize=12, spaceAfter=10,
695
- alignment=1)
696
- ))
697
- # --- End FIX ---
698
-
699
- if acceptance_prob >= 80:
700
- story.append(
701
- Paragraph("This indicates a very strong candidate with high potential. Well done!", body_text))
702
- elif acceptance_prob >= 50:
703
- story.append(Paragraph(
704
- "This candidate shows solid potential but has areas for improvement to become an even stronger fit.",
705
- body_text))
706
- else:
707
- story.append(Paragraph(
708
- "This candidate may require significant development or may not be the ideal fit at this time.",
709
- body_text))
710
  story.append(Spacer(1, 0.3 * inch))
711
- # --- End Acceptance Probability ---
712
-
713
- # Parse Gemini's report into sections for better PDF structuring
714
  sections = {}
715
  current_section = None
716
- # Use regex to robustly identify sections, especially with varied bullet points
717
  section_patterns = {
718
  r'^\s*\*\*\s*1\.\s*Executive Summary\s*\*\*': 'Executive Summary',
719
  r'^\s*\*\*\s*2\.\s*Voice Analysis Insights\s*\*\*': 'Voice Analysis Insights',
720
- r'^\s*\*\*\s*3\.\s*Content Analysis & Strengths/Areas for Development\s*\*\*': 'Content Analysis & Strengths/Areas for Development',
721
- r'^\s*\*\*\s*4\.\s*Actionable Recommendations\s*\*\*': 'Actionable Recommendations'
722
  }
723
-
724
  for line in gemini_report_text.split('\n'):
725
  matched_section = False
726
  for pattern, section_name in section_patterns.items():
@@ -731,132 +538,52 @@ def create_pdf_report(analysis_data: Dict, output_path: str, gemini_report_text:
731
  break
732
  if not matched_section and current_section:
733
  sections[current_section].append(line)
734
-
735
- # 1. Executive Summary
736
- story.append(Paragraph("1. Executive Summary", h2))
737
- story.append(Spacer(1, 0.1 * inch))
738
- if 'Executive Summary' in sections:
739
- for line in sections['Executive Summary']:
740
- if line.strip():
741
- story.append(Paragraph(line.strip(), body_text))
742
- story.append(Spacer(1, 0.2 * inch))
743
-
744
- # 2. Voice Analysis (Detailed - using Table for summary)
745
- story.append(Paragraph("2. Voice Analysis", h2))
746
  voice_analysis = analysis_data.get('voice_analysis', {})
747
-
748
  if voice_analysis and 'error' not in voice_analysis:
749
- # Voice Analysis Summary Table
750
  table_data = [
751
  ['Metric', 'Value', 'Interpretation'],
752
  ['Speaking Rate', f"{voice_analysis['speaking_rate']:.2f} words/sec", 'Average rate'],
753
- ['Filler Words', f"{voice_analysis['filler_ratio'] * 100:.1f}%", 'Percentage of total words'],
754
- ['Repetition Score', f"{voice_analysis['repetition_score']:.3f}", 'Lower is better articulation'],
755
- ['Anxiety Level', voice_analysis['interpretation']['anxiety_level'].upper(),
756
- f"Score: {voice_analysis['composite_scores']['anxiety']:.3f}"],
757
- ['Confidence Level', voice_analysis['interpretation']['confidence_level'].upper(),
758
- f"Score: {voice_analysis['composite_scores']['confidence']:.3f}"],
759
  ['Fluency', voice_analysis['interpretation']['fluency_level'].upper(), 'Overall speech flow']
760
  ]
761
-
762
- table_style = TableStyle([
763
- ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#6699CC')),
764
- ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
765
- ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
766
  ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
767
- ('BOTTOMPADDING', (0, 0), (-1, 0), 10),
768
- ('BACKGROUND', (0, 1), (-1, -1), colors.HexColor('#EFEFEF')),
769
- ('GRID', (0, 0), (-1, -1), 0.5, colors.HexColor('#CCCCCC')),
770
- ('LEFTPADDING', (0, 0), (-1, -1), 6),
771
- ('RIGHTPADDING', (0, 0), (-1, -1), 6),
772
- ('TOPPADDING', (0, 0), (-1, -1), 6),
773
- ('BOTTOMPADDING', (0, 0), (-1, -1), 6),
774
- ])
775
-
776
- table = Table(table_data)
777
- table.setStyle(table_style)
778
  story.append(table)
779
  story.append(Spacer(1, 0.2 * inch))
780
 
781
- # --- Charts ---
782
- story.append(Paragraph("Score Visualization:", h3))
783
- chart_path = os.path.join(OUTPUT_DIR, f"anxiety_confidence_{uuid.uuid4().hex[:8]}.png")
784
- # --- FIX: Call generate_anxiety_confidence_chart if it is defined and imports are correct ---
785
- try:
786
- # The generate_anxiety_confidence_chart function is now expected to be defined.
787
- # It relies on matplotlib and Image (from reportlab.platypus)
788
- generate_anxiety_confidence_chart(voice_analysis['composite_scores'], chart_path)
789
- if os.path.exists(chart_path):
790
- img = Image(chart_path, width=3.5 * inch, height=2.0 * inch)
791
- story.append(img)
792
- story.append(Spacer(1, 0.1 * inch))
793
- os.remove(chart_path)
794
- except NameError: # Catch NameError if function is truly not defined
795
- logger.warning(
796
- "Chart generation function 'generate_anxiety_confidence_chart' is not defined. Skipping chart.")
797
- except Exception as chart_e:
798
- logger.warning(f"Could not add chart image to PDF: {chart_e}. Please check matplotlib installation.")
799
- # --- End FIX ---
800
- # --- End Charts ---
801
-
802
- # Detailed Interpretation from Gemini (if present)
803
- if 'Voice Analysis Insights' in sections:
804
- story.append(Paragraph("Detailed Interpretation:", h3))
805
- for line in sections['Voice Analysis Insights']:
806
- if line.strip():
807
- # Handle numbered lists from Gemini
808
- if re.match(r'^\d+\.\s', line.strip()):
809
- story.append(
810
- Paragraph(line.strip(), bullet_style))
811
- else:
812
- story.append(Paragraph(line.strip(), body_text))
813
- story.append(Spacer(1, 0.2 * inch))
814
-
815
  else:
816
- story.append(Paragraph("Voice analysis not available or encountered an error.", body_text))
817
- story.append(Spacer(1, 0.3 * inch))
818
-
819
- # 3. Content Analysis
820
- story.append(Paragraph("3. Content Analysis", h2))
821
- if 'Content Analysis & Strengths/Areas for Development' in sections:
822
- for line in sections['Content Analysis & Strengths/Areas for Development']:
823
- if line.strip():
824
- # Handle bullet points from Gemini
825
- if line.strip().startswith('-'):
826
- story.append(Paragraph(line.strip()[1:].strip(), bullet_style)) # Remove the '-' and strip
827
- else:
828
- story.append(Paragraph(line.strip(), body_text))
829
- story.append(Spacer(1, 0.2 * inch))
830
-
831
- # Add some interviewee responses to the report (can be formatted as a list)
832
- story.append(Paragraph("Key Interviewee Responses (Contextual):", h3))
833
- interviewee_responses = [
834
- f"Speaker {u['speaker']} ({u['role']}): {u['text']}"
835
- for u in analysis_data['transcript']
836
- if u['role'] == 'Interviewee'
837
- ][:5]
838
- for res in interviewee_responses:
839
- story.append(Paragraph(res, bullet_style))
840
- story.append(Spacer(1, 0.3 * inch))
841
-
842
- # 4. Recommendations
843
- story.append(Paragraph("4. Recommendations", h2))
844
- if 'Actionable Recommendations' in sections:
845
- for line in sections['Actionable Recommendations']:
846
- if line.strip():
847
- # Handle bullet points from Gemini
848
- if line.strip().startswith('-'):
849
- story.append(Paragraph(line.strip()[1:].strip(), bullet_style)) # Remove the '-' and strip
850
- else:
851
- story.append(Paragraph(line.strip(), body_text))
852
- story.append(Spacer(1, 0.2 * inch))
853
-
854
- # Footer Text
855
- story.append(Spacer(1, 0.5 * inch))
856
- story.append(Paragraph("--- Analysis by EvalBot ---", ParagraphStyle(
857
- name='FooterText', parent=styles['Normal'], fontSize=8, alignment=1, textColor=colors.HexColor('#666666')
858
- )))
859
-
860
  doc.build(story)
861
  return True
862
  except Exception as e:
@@ -865,53 +592,45 @@ def create_pdf_report(analysis_data: Dict, output_path: str, gemini_report_text:
865
 
866
 
867
  def convert_to_serializable(obj):
868
- if isinstance(obj, np.generic):
869
- return obj.item()
870
- elif isinstance(obj, dict):
871
- return {key: convert_to_serializable(value) for key, value in obj.items()}
872
- elif isinstance(obj, list):
873
- return [convert_to_serializable(item) for item in obj]
874
- elif isinstance(obj, np.ndarray):
875
- return obj.tolist()
876
  return obj
877
 
878
-
879
- def process_interview(audio_path: str):
 
 
 
880
  try:
881
- logger.info(f"Starting processing for {audio_path}")
882
-
883
- wav_file = convert_to_wav(audio_path)
884
-
885
- logger.info("Starting transcription")
 
 
 
 
886
  transcript = transcribe(wav_file)
887
-
888
- logger.info("Extracting prosodic features")
889
  for utterance in transcript['utterances']:
890
- utterance['prosodic_features'] = extract_prosodic_features(
891
- wav_file,
892
- utterance['start'],
893
- utterance['end']
894
- )
895
-
896
- logger.info("Identifying speakers")
897
  utterances_with_speakers = identify_speakers(transcript, wav_file)
898
-
899
- logger.info("Classifying roles")
900
- # Ensure role classifier models are loaded/trained only once if possible,
901
- # or handled carefully in a multi-threaded context.
902
- # For simplicity, keeping it inside process_interview for now.
903
  if os.path.exists(os.path.join(OUTPUT_DIR, 'role_classifier.pkl')):
904
  clf = joblib.load(os.path.join(OUTPUT_DIR, 'role_classifier.pkl'))
905
  vectorizer = joblib.load(os.path.join(OUTPUT_DIR, 'text_vectorizer.pkl'))
906
  scaler = joblib.load(os.path.join(OUTPUT_DIR, 'feature_scaler.pkl'))
907
  else:
908
  clf, vectorizer, scaler = train_role_classifier(utterances_with_speakers)
909
-
910
  classified_utterances = classify_roles(utterances_with_speakers, clf, vectorizer, scaler)
911
-
912
- logger.info("Analyzing interviewee voice")
913
  voice_analysis = analyze_interviewee_voice(wav_file, classified_utterances)
914
-
915
  analysis_data = {
916
  'transcript': classified_utterances,
917
  'speakers': list(set(u['speaker'] for u in classified_utterances)),
@@ -921,34 +640,32 @@ def process_interview(audio_path: str):
921
  'speaker_turns': len(classified_utterances)
922
  }
923
  }
924
-
925
- # --- Calculate Acceptance Probability ---
926
- acceptance_probability = calculate_acceptance_probability(analysis_data)
927
- analysis_data['acceptance_probability'] = acceptance_probability
928
- # --- End Acceptance Probability ---
929
-
930
- logger.info("Generating report text using Gemini")
931
  gemini_report_text = generate_report(analysis_data)
932
-
933
- base_name = os.path.splitext(os.path.basename(audio_path))[0]
934
  pdf_path = os.path.join(OUTPUT_DIR, f"{base_name}_report.pdf")
935
- create_pdf_report(analysis_data, pdf_path, gemini_report_text=gemini_report_text)
936
-
937
  json_path = os.path.join(OUTPUT_DIR, f"{base_name}_analysis.json")
 
 
 
938
  with open(json_path, 'w') as f:
939
  serializable_data = convert_to_serializable(analysis_data)
940
  json.dump(serializable_data, f, indent=2)
 
 
 
 
941
 
942
- os.remove(wav_file) # Clean up WAV file after processing
943
-
944
- logger.info(f"Processing completed for {audio_path}")
945
- return {
946
- 'pdf_path': pdf_path,
947
- 'json_path': json_path
948
- }
949
  except Exception as e:
950
- logger.error(f"Processing failed: {str(e)}", exc_info=True)
951
- # Clean up wav_file in case of error
952
- if 'wav_file' in locals() and os.path.exists(wav_file):
 
 
953
  os.remove(wav_file)
954
- raise
 
 
 
 
17
  import re
18
  from typing import Dict, List, Tuple
19
  import logging
20
+ import tempfile
21
  # --- Imports for enhanced PDF ---
22
  from reportlab.lib.pagesizes import letter
23
  from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak
 
26
  from reportlab.lib import colors
27
  import matplotlib.pyplot as plt
28
  import matplotlib
29
+ matplotlib.use('Agg')
 
30
  from reportlab.platypus import Image
31
+ import io
32
  # --- End Imports for enhanced PDF ---
33
  from transformers import AutoTokenizer, AutoModel
34
  import spacy
 
53
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
54
 
55
 
56
+ # --- NEW HELPER FUNCTION to download from URL ---
57
+ def download_audio_from_url(url: str) -> str:
58
+ """Downloads an audio file from a URL to a temporary local path."""
59
+ try:
60
+ # Create a temporary file to store the downloaded audio
61
+ temp_dir = tempfile.gettempdir()
62
+ temp_path = os.path.join(temp_dir, f"{uuid.uuid4()}.tmp_audio")
63
+
64
+ logger.info(f"Downloading audio from {url} to {temp_path}")
65
+ with requests.get(url, stream=True) as r:
66
+ r.raise_for_status()
67
+ with open(temp_path, 'wb') as f:
68
+ for chunk in r.iter_content(chunk_size=8192):
69
+ f.write(chunk)
70
+ return temp_path
71
+ except Exception as e:
72
+ logger.error(f"Failed to download audio from URL {url}: {e}")
73
+ raise
74
+ # --- END NEW HELPER FUNCTION ---
75
+
76
+
77
  # Initialize services
78
  def initialize_services():
79
  try:
 
87
  spec=ServerlessSpec(cloud="aws", region="us-east-1")
88
  )
89
  index = pc.Index(index_name)
 
90
  genai.configure(api_key=GEMINI_API_KEY)
91
  gemini_model = genai.GenerativeModel('gemini-1.5-flash')
 
92
  return index, gemini_model
93
  except Exception as e:
94
  logger.error(f"Error initializing services: {str(e)}")
95
  raise
96
 
 
97
  index, gemini_model = initialize_services()
98
 
99
  # Device setup
 
120
  def load_models():
121
  speaker_model = load_speaker_model()
122
  nlp = spacy.load("en_core_web_sm")
 
123
  tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
124
  llm_model = AutoModel.from_pretrained("distilbert-base-uncased").to(device)
125
  llm_model.eval()
 
126
  return speaker_model, nlp, tokenizer, llm_model
127
 
128
 
 
136
  if audio.channels > 1:
137
  audio = audio.set_channels(1)
138
  audio = audio.set_frame_rate(16000)
 
139
  wav_file = os.path.join(output_dir, f"{uuid.uuid4()}.wav")
140
  audio.export(wav_file, format="wav")
141
  return wav_file
 
150
  segment = audio[start_ms:end_ms]
151
  temp_path = os.path.join(OUTPUT_DIR, f"temp_{uuid.uuid4()}.wav")
152
  segment.export(temp_path, format="wav")
 
153
  y, sr = librosa.load(temp_path, sr=16000)
154
  pitches = librosa.piptrack(y=y, sr=sr)[0]
155
  pitches = pitches[pitches > 0]
 
156
  features = {
157
  'duration': (end_ms - start_ms) / 1000,
158
  'mean_pitch': float(np.mean(pitches)) if len(pitches) > 0 else 0.0,
 
164
  'intensityMax': float(np.max(librosa.feature.rms(y=y)[0])),
165
  'intensitySD': float(np.std(librosa.feature.rms(y=y)[0])),
166
  }
 
167
  os.remove(temp_path)
168
  return features
169
  except Exception as e:
170
  logger.error(f"Feature extraction failed: {str(e)}")
171
  return {
172
+ 'duration': 0.0, 'mean_pitch': 0.0, 'min_pitch': 0.0, 'max_pitch': 0.0,
173
+ 'pitch_sd': 0.0, 'intensityMean': 0.0, 'intensityMin': 0.0,
174
+ 'intensityMax': 0.0, 'intensitySD': 0.0
 
 
 
 
 
 
175
  }
176
 
177
 
 
184
  data=f
185
  )
186
  audio_url = upload_response.json()['upload_url']
 
187
  transcript_response = requests.post(
188
  "https://api.assemblyai.com/v2/transcript",
189
  headers={"authorization": ASSEMBLYAI_KEY},
 
194
  }
195
  )
196
  transcript_id = transcript_response.json()['id']
 
197
  while True:
198
  result = requests.get(
199
  f"https://api.assemblyai.com/v2/transcript/{transcript_id}",
200
  headers={"authorization": ASSEMBLYAI_KEY}
201
  ).json()
 
202
  if result['status'] == 'completed':
203
  return result
204
  elif result['status'] == 'error':
205
  raise Exception(result['error'])
 
206
  time.sleep(5)
207
  except Exception as e:
208
  logger.error(f"Transcription failed: {str(e)}")
 
216
  segment = full_audio[start:end]
217
  temp_path = os.path.join(OUTPUT_DIR, f"temp_{uuid.uuid4()}.wav")
218
  segment.export(temp_path, format="wav")
 
219
  with torch.no_grad():
220
+ embedding = speaker_model.get_embedding(temp_path).cpu().numpy()
 
 
221
  embedding_list = embedding.flatten().tolist()
 
 
222
  query_result = index.query(
223
+ vector=embedding_list,
224
  top_k=1,
225
  include_metadata=True
226
  )
 
227
  if query_result['matches'] and query_result['matches'][0]['score'] > 0.7:
228
  speaker_id = query_result['matches'][0]['id']
229
  speaker_name = query_result['matches'][0]['metadata']['speaker_name']
230
  else:
231
  speaker_id = f"unknown_{uuid.uuid4().hex[:6]}"
232
  speaker_name = f"Speaker_{speaker_id[-4:]}"
233
+ index.upsert([(speaker_id, embedding_list, {"speaker_name": speaker_name})])
 
234
  os.remove(temp_path)
 
235
  return {
236
  **utterance,
237
  'speaker': speaker_name,
238
  'speaker_id': speaker_id,
239
+ 'embedding': embedding_list
240
  }
241
  except Exception as e:
242
  logger.error(f"Utterance processing failed: {str(e)}", exc_info=True)
 
252
  try:
253
  full_audio = AudioSegment.from_wav(wav_file)
254
  utterances = transcript['utterances']
255
+ with ThreadPoolExecutor(max_workers=5) as executor:
 
256
  futures = [
257
  executor.submit(process_utterance, utterance, full_audio, wav_file)
258
  for utterance in utterances
259
  ]
260
  results = [f.result() for f in futures]
 
261
  return results
262
  except Exception as e:
263
  logger.error(f"Speaker identification failed: {str(e)}")
 
269
  texts = [u['text'] for u in utterances]
270
  vectorizer = TfidfVectorizer(max_features=500, ngram_range=(1, 2))
271
  X_text = vectorizer.fit_transform(texts)
 
272
  features = []
273
  labels = []
 
274
  for i, utterance in enumerate(utterances):
275
  prosodic = utterance['prosodic_features']
276
  feat = [
277
+ prosodic['duration'], prosodic['mean_pitch'], prosodic['min_pitch'],
278
+ prosodic['max_pitch'], prosodic['pitch_sd'], prosodic['intensityMean'],
279
+ prosodic['intensityMin'], prosodic['intensityMax'], prosodic['intensitySD'],
 
 
 
 
 
 
280
  ]
 
281
  feat.extend(X_text[i].toarray()[0].tolist())
 
282
  doc = nlp(utterance['text'])
283
  feat.extend([
284
  int(utterance['text'].endswith('?')),
 
287
  sum(1 for token in doc if token.pos_ == 'VERB'),
288
  sum(1 for token in doc if token.pos_ == 'NOUN')
289
  ])
 
290
  features.append(feat)
291
  labels.append(0 if i % 2 == 0 else 1)
 
292
  scaler = StandardScaler()
293
  X = scaler.fit_transform(features)
 
294
  clf = RandomForestClassifier(
295
+ n_estimators=150, max_depth=10, random_state=42, class_weight='balanced'
 
 
 
296
  )
297
  clf.fit(X, labels)
 
298
  joblib.dump(clf, os.path.join(OUTPUT_DIR, 'role_classifier.pkl'))
299
  joblib.dump(vectorizer, os.path.join(OUTPUT_DIR, 'text_vectorizer.pkl'))
300
  joblib.dump(scaler, os.path.join(OUTPUT_DIR, 'feature_scaler.pkl'))
 
301
  return clf, vectorizer, scaler
302
  except Exception as e:
303
  logger.error(f"Classifier training failed: {str(e)}")
 
308
  try:
309
  texts = [u['text'] for u in utterances]
310
  X_text = vectorizer.transform(texts)
 
311
  results = []
312
  for i, utterance in enumerate(utterances):
313
  prosodic = utterance['prosodic_features']
314
  feat = [
315
+ prosodic['duration'], prosodic['mean_pitch'], prosodic['min_pitch'],
316
+ prosodic['max_pitch'], prosodic['pitch_sd'], prosodic['intensityMean'],
317
+ prosodic['intensityMin'], prosodic['intensityMax'], prosodic['intensitySD'],
 
 
 
 
 
 
318
  ]
 
319
  feat.extend(X_text[i].toarray()[0].tolist())
 
320
  doc = nlp(utterance['text'])
321
  feat.extend([
322
  int(utterance['text'].endswith('?')),
 
325
  sum(1 for token in doc if token.pos_ == 'VERB'),
326
  sum(1 for token in doc if token.pos_ == 'NOUN')
327
  ])
 
328
  X = scaler.transform([feat])
329
  role = 'Interviewer' if clf.predict(X)[0] == 0 else 'Interviewee'
 
330
  results.append({**utterance, 'role': role})
 
331
  return results
332
  except Exception as e:
333
  logger.error(f"Role classification failed: {str(e)}")
 
337
  def analyze_interviewee_voice(audio_path: str, utterances: List[Dict]) -> Dict:
338
  try:
339
  y, sr = librosa.load(audio_path, sr=16000)
 
340
  interviewee_utterances = [u for u in utterances if u['role'] == 'Interviewee']
341
  if not interviewee_utterances:
342
  return {'error': 'No interviewee utterances found'}
 
343
  segments = []
344
  for u in interviewee_utterances:
345
  start = int(u['start'] * sr / 1000)
346
  end = int(u['end'] * sr / 1000)
347
  segments.append(y[start:end])
 
 
 
348
  total_duration = sum(u['prosodic_features']['duration'] for u in interviewee_utterances)
349
  total_words = sum(len(u['text'].split()) for u in interviewee_utterances)
350
  speaking_rate = total_words / total_duration if total_duration > 0 else 0
 
351
  filler_words = ['um', 'uh', 'like', 'you know', 'so', 'i mean']
352
+ filler_count = sum(sum(u['text'].lower().count(fw) for fw in filler_words) for u in interviewee_utterances)
 
 
 
353
  filler_ratio = filler_count / total_words if total_words > 0 else 0
 
354
  all_words = ' '.join(u['text'].lower() for u in interviewee_utterances).split()
355
  word_counts = {}
356
  for i in range(len(all_words) - 1):
357
  bigram = (all_words[i], all_words[i + 1])
358
  word_counts[bigram] = word_counts.get(bigram, 0) + 1
359
+ repetition_score = sum(1 for count in word_counts.values() if count > 1) / len(word_counts) if word_counts else 0
 
 
360
  pitches = []
361
  for segment in segments:
362
  f0, voiced_flag, _ = librosa.pyin(segment, fmin=80, fmax=300, sr=sr)
363
  pitches.extend(f0[voiced_flag])
 
364
  pitch_mean = np.mean(pitches) if len(pitches) > 0 else 0
365
  pitch_std = np.std(pitches) if len(pitches) > 0 else 0
366
  jitter = np.mean(np.abs(np.diff(pitches))) / pitch_mean if len(pitches) > 1 and pitch_mean > 0 else 0
 
367
  intensities = []
368
  for segment in segments:
369
  rms = librosa.feature.rms(y=segment)[0]
370
  intensities.extend(rms)
 
371
  intensity_mean = np.mean(intensities) if intensities else 0
372
  intensity_std = np.std(intensities) if intensities else 0
373
+ shimmer = np.mean(np.abs(np.diff(intensities))) / intensity_mean if len(intensities) > 1 and intensity_mean > 0 else 0
 
 
374
  anxiety_score = 0.6 * (pitch_std / pitch_mean) + 0.4 * (jitter + shimmer) if pitch_mean > 0 else 0
375
  confidence_score = 0.7 * (1 / (1 + intensity_std)) + 0.3 * (1 / (1 + filler_ratio))
376
  hesitation_score = filler_ratio + repetition_score
 
377
  anxiety_level = 'high' if anxiety_score > 0.15 else 'moderate' if anxiety_score > 0.07 else 'low'
378
  confidence_level = 'high' if confidence_score > 0.7 else 'moderate' if confidence_score > 0.5 else 'low'
379
+ fluency_level = 'fluent' if (filler_ratio < 0.05 and repetition_score < 0.1) else 'moderate' if (filler_ratio < 0.1 and repetition_score < 0.2) else 'disfluent'
 
 
380
  return {
381
  'speaking_rate': float(round(speaking_rate, 2)),
382
  'filler_ratio': float(round(filler_ratio, 4)),
383
  'repetition_score': float(round(repetition_score, 4)),
384
+ 'pitch_analysis': {'mean': float(round(pitch_mean, 2)), 'std_dev': float(round(pitch_std, 2)), 'jitter': float(round(jitter, 4))},
385
+ 'intensity_analysis': {'mean': float(round(intensity_mean, 2)), 'std_dev': float(round(intensity_std, 2)), 'shimmer': float(round(shimmer, 4))},
386
+ 'composite_scores': {'anxiety': float(round(anxiety_score, 4)), 'confidence': float(round(confidence_score, 4)), 'hesitation': float(round(hesitation_score, 4))},
387
+ 'interpretation': {'anxiety_level': anxiety_level, 'confidence_level': confidence_level, 'fluency_level': fluency_level}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
388
  }
389
  except Exception as e:
390
  logger.error(f"Voice analysis failed: {str(e)}")
 
392
 
393
 
394
  def generate_voice_interpretation(analysis: Dict) -> str:
 
395
  if 'error' in analysis:
396
  return "Voice analysis not available."
397
+ interpretation_lines = [
398
+ "Voice Analysis Summary:",
399
+ f"- Speaking Rate: {analysis['speaking_rate']} words/sec (average)",
400
+ f"- Filler Words: {analysis['filler_ratio'] * 100:.1f}% of words",
401
+ f"- Repetition Score: {analysis['repetition_score']:.3f}",
402
+ f"- Anxiety Level: {analysis['interpretation']['anxiety_level'].upper()} (score: {analysis['composite_scores']['anxiety']:.3f})",
403
+ f"- Confidence Level: {analysis['interpretation']['confidence_level'].upper()} (score: {analysis['composite_scores']['confidence']:.3f})",
404
+ f"- Fluency: {analysis['interpretation']['fluency_level'].upper()}",
405
+ "",
406
+ "Detailed Interpretation:",
407
+ "1. A higher speaking rate indicates faster speech, which can suggest nervousness or enthusiasm.",
408
+ "2. Filler words and repetitions reduce speech clarity and professionalism.",
409
+ "3. Anxiety is measured through pitch variability and voice instability.",
410
+ "4. Confidence is assessed through voice intensity and stability.",
411
+ "5. Fluency combines filler words and repetition metrics."
412
+ ]
 
 
 
 
413
  return "\n".join(interpretation_lines)
414
 
415
 
416
+ def generate_anxiety_confidence_chart(composite_scores: Dict, chart_path_or_buffer):
 
417
  try:
418
  labels = ['Anxiety', 'Confidence']
419
  scores = [composite_scores.get('anxiety', 0), composite_scores.get('confidence', 0)]
420
+ fig, ax = plt.subplots(figsize=(4, 2.5))
 
421
  ax.bar(labels, scores, color=['lightcoral', 'lightskyblue'])
422
  ax.set_ylabel('Score')
423
  ax.set_title('Anxiety vs. Confidence Scores')
424
+ ax.set_ylim(0, 1.0)
 
425
  for i, v in enumerate(scores):
426
  ax.text(i, v + 0.05, f"{v:.2f}", color='black', ha='center', fontweight='bold')
 
 
427
  plt.tight_layout()
428
+ plt.savefig(chart_path_or_buffer, format='png', bbox_inches='tight')
429
+ plt.close(fig)
430
  except Exception as e:
431
  logger.error(f"Error generating chart: {str(e)}")
432
 
433
 
 
434
  def calculate_acceptance_probability(analysis_data: Dict) -> float:
 
 
 
 
435
  voice = analysis_data.get('voice_analysis', {})
436
+ if 'error' in voice: return 0.0
437
+ w_confidence, w_anxiety, w_fluency, w_speaking_rate, w_filler_repetition, w_content_strengths = 0.4, -0.3, 0.2, 0.1, -0.1, 0.2
 
 
 
 
 
 
 
 
 
 
 
438
  confidence_score = voice.get('composite_scores', {}).get('confidence', 0.0)
439
  anxiety_score = voice.get('composite_scores', {}).get('anxiety', 0.0)
440
  fluency_level = voice.get('interpretation', {}).get('fluency_level', 'disfluent')
441
  speaking_rate = voice.get('speaking_rate', 0.0)
442
  filler_ratio = voice.get('filler_ratio', 0.0)
443
  repetition_score = voice.get('repetition_score', 0.0)
 
 
444
  fluency_map = {'fluent': 1.0, 'moderate': 0.5, 'disfluent': 0.0}
445
  fluency_val = fluency_map.get(fluency_level, 0.0)
 
 
 
446
  ideal_speaking_rate = 2.5
447
  speaking_rate_deviation = abs(speaking_rate - ideal_speaking_rate)
448
+ speaking_rate_score = max(0, 1 - (speaking_rate_deviation / ideal_speaking_rate))
449
+ filler_repetition_composite = (filler_ratio + repetition_score) / 2
 
 
450
  filler_repetition_score = max(0, 1 - filler_repetition_composite)
451
+ content_strength_val = 0.8 if analysis_data.get('text_analysis', {}).get('total_duration', 0) > 0 else 0.0
452
+ raw_score = (confidence_score * w_confidence + (1 - anxiety_score) * abs(w_anxiety) + fluency_val * w_fluency + speaking_rate_score * w_speaking_rate + filler_repetition_score * abs(w_filler_repetition) + content_strength_val * w_content_strengths)
453
+ max_possible_score = (w_confidence + abs(w_anxiety) + w_fluency + w_speaking_rate + abs(w_filler_repetition) + w_content_strengths)
454
+ if max_possible_score == 0: return 50.0
455
+ normalized_score = raw_score / max_possible_score
456
+ acceptance_probability = max(0.0, min(1.0, normalized_score))
457
+ return float(f"{acceptance_probability * 100:.2f}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
458
 
459
 
460
  def generate_report(analysis_data: Dict) -> str:
461
  try:
462
  voice = analysis_data.get('voice_analysis', {})
463
  voice_interpretation = generate_voice_interpretation(voice)
464
+ interviewee_responses = [f"Speaker {u['speaker']} ({u['role']}): {u['text']}" for u in analysis_data['transcript'] if u['role'] == 'Interviewee'][:5]
 
 
 
 
 
 
465
  acceptance_prob = analysis_data.get('acceptance_probability', None)
466
  acceptance_line = ""
467
  if acceptance_prob is not None:
468
  acceptance_line = f"\n**Estimated Acceptance Probability: {acceptance_prob:.2f}%**\n"
469
+ if acceptance_prob >= 80: acceptance_line += "This indicates a very strong candidate. Well done!"
470
+ elif acceptance_prob >= 50: acceptance_line += "This indicates a solid candidate with potential for improvement."
471
+ else: acceptance_line += "This candidate may require significant development or may not be a strong fit."
 
 
 
 
472
  prompt = f"""
473
+ As EvalBot, an AI interview analysis system, generate a highly professional, well-structured, and concise interview analysis report. Use clear headings and subheadings. For bullet points, use '- '.
 
 
 
474
  {acceptance_line}
 
475
  **1. Executive Summary**
476
  Provide a brief, high-level overview of the interview.
477
  - Overall interview duration: {analysis_data['text_analysis']['total_duration']:.2f} seconds
478
  - Number of speaker turns: {analysis_data['text_analysis']['speaker_turns']}
479
  - Main participants: {', '.join(analysis_data['speakers'])}
 
480
  **2. Voice Analysis Insights**
481
  Analyze key voice metrics and provide a detailed interpretation.
482
  {voice_interpretation}
 
483
  **3. Content Analysis & Strengths/Areas for Development**
484
  Analyze the key themes and identify both strengths and areas for development in the interviewee's responses.
485
  Key responses from interviewee (for context):
486
  {chr(10).join(interviewee_responses)}
 
487
  **4. Actionable Recommendations**
488
  Offer specific, actionable suggestions for improvement.
489
+ Focus on: Communication Skills, Content Delivery, Professional Presentation.
 
 
 
490
  """
 
491
  response = gemini_model.generate_content(prompt)
492
  return response.text
493
  except Exception as e:
 
495
  return f"Error generating report: {str(e)}"
496
 
497
 
 
498
  def create_pdf_report(analysis_data: Dict, output_path: str, gemini_report_text: str):
499
  try:
500
  doc = SimpleDocTemplate(output_path, pagesize=letter)
501
  styles = getSampleStyleSheet()
502
+ h1 = ParagraphStyle(name='Heading1', parent=styles['h1'], fontSize=16, spaceAfter=14, alignment=1, textColor=colors.HexColor('#003366'))
503
+ h2 = ParagraphStyle(name='Heading2', parent=styles['h2'], fontSize=12, spaceBefore=10, spaceAfter=8, textColor=colors.HexColor('#336699'))
504
+ h3 = ParagraphStyle(name='Heading3', parent=styles['h3'], fontSize=10, spaceBefore=8, spaceAfter=4, textColor=colors.HexColor('#0055AA'))
 
 
 
 
 
505
  body_text = ParagraphStyle(name='BodyText', parent=styles['Normal'], fontSize=9, leading=12, spaceAfter=4)
506
+ bullet_style = ParagraphStyle(name='Bullet', parent=body_text, leftIndent=18, bulletIndent=9)
 
 
507
  story = []
 
 
508
  story.append(Paragraph(f"<b>EvalBot Interview Analysis Report</b>", h1))
509
  story.append(Spacer(1, 0.2 * inch))
510
  story.append(Paragraph(f"<b>Date:</b> {time.strftime('%Y-%m-%d')}", body_text))
511
  story.append(Spacer(1, 0.3 * inch))
512
+ acceptance_prob = analysis_data.get('acceptance_probability')
 
 
513
  if acceptance_prob is not None:
514
  story.append(Paragraph("<b>Candidate Evaluation Summary</b>", h2))
515
  story.append(Spacer(1, 0.1 * inch))
516
+ prob_color = colors.green if acceptance_prob >= 70 else (colors.orange if acceptance_prob >= 40 else colors.red)
517
+ story.append(Paragraph(f"<font size='12' color='{prob_color.hexval()}'><b>Estimated Acceptance Probability: {acceptance_prob:.2f}%</b></font>", ParagraphStyle(name='AcceptanceProbability', parent=styles['Normal'], fontSize=12, spaceAfter=10, alignment=1)))
518
+ if acceptance_prob >= 80: story.append(Paragraph("This indicates a very strong candidate with high potential. Well done!", body_text))
519
+ elif acceptance_prob >= 50: story.append(Paragraph("This candidate shows solid potential but has areas for improvement.", body_text))
520
+ else: story.append(Paragraph("This candidate may require significant development or may not be an ideal fit.", body_text))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
521
  story.append(Spacer(1, 0.3 * inch))
522
+
 
 
523
  sections = {}
524
  current_section = None
 
525
  section_patterns = {
526
  r'^\s*\*\*\s*1\.\s*Executive Summary\s*\*\*': 'Executive Summary',
527
  r'^\s*\*\*\s*2\.\s*Voice Analysis Insights\s*\*\*': 'Voice Analysis Insights',
528
+ r'^\s*\*\*\s*3\.\s*Content Analysis & Strengths/Areas for Development\s*\*\*': 'Content Analysis',
529
+ r'^\s*\*\*\s*4\.\s*Actionable Recommendations\s*\*\*': 'Recommendations'
530
  }
 
531
  for line in gemini_report_text.split('\n'):
532
  matched_section = False
533
  for pattern, section_name in section_patterns.items():
 
538
  break
539
  if not matched_section and current_section:
540
  sections[current_section].append(line)
541
+
542
+ story.append(PageBreak()) # Start detailed report on a new page
543
+
544
+ story.append(Paragraph("<b>1. Detailed Voice Analysis</b>", h2))
 
 
 
 
 
 
 
 
545
  voice_analysis = analysis_data.get('voice_analysis', {})
 
546
  if voice_analysis and 'error' not in voice_analysis:
 
547
  table_data = [
548
  ['Metric', 'Value', 'Interpretation'],
549
  ['Speaking Rate', f"{voice_analysis['speaking_rate']:.2f} words/sec", 'Average rate'],
550
+ ['Filler Words', f"{voice_analysis['filler_ratio'] * 100:.1f}%", '% of total words'],
551
+ ['Repetition Score', f"{voice_analysis['repetition_score']:.3f}", 'Lower is better'],
552
+ ['Anxiety Level', voice_analysis['interpretation']['anxiety_level'].upper(), f"Score: {voice_analysis['composite_scores']['anxiety']:.3f}"],
553
+ ['Confidence Level', voice_analysis['interpretation']['confidence_level'].upper(), f"Score: {voice_analysis['composite_scores']['confidence']:.3f}"],
 
 
554
  ['Fluency', voice_analysis['interpretation']['fluency_level'].upper(), 'Overall speech flow']
555
  ]
556
+ table = Table(table_data, colWidths=[1.5*inch, 1.5*inch, 3*inch])
557
+ table.setStyle(TableStyle([
558
+ ('BACKGROUND', (0,0), (-1,0), colors.HexColor('#4682B4')),
559
+ ('TEXTCOLOR',(0,0),(-1,0),colors.whitesmoke),
560
+ ('ALIGN', (0,0), (-1,-1), 'CENTER'),
561
  ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
562
+ ('BOTTOMPADDING', (0, 0), (-1, 0), 12),
563
+ ('BACKGROUND', (0, 1), (-1, -1), colors.HexColor('#F0F8FF')),
564
+ ('GRID', (0,0), (-1,-1), 1, colors.black)
565
+ ]))
 
 
 
 
 
 
 
566
  story.append(table)
567
  story.append(Spacer(1, 0.2 * inch))
568
 
569
+ chart_buffer = io.BytesIO()
570
+ generate_anxiety_confidence_chart(voice_analysis['composite_scores'], chart_buffer)
571
+ chart_buffer.seek(0)
572
+ img = Image(chart_buffer, width=4*inch, height=2.5*inch)
573
+ story.append(img)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
574
  else:
575
+ story.append(Paragraph("Voice analysis not available.", body_text))
576
+
577
+ story.append(PageBreak())
578
+
579
+ for section_title, key in [("2. Content Analysis", "Content Analysis"), ("3. Recommendations", "Recommendations")]:
580
+ story.append(Paragraph(f"<b>{section_title}</b>", h2))
581
+ if key in sections:
582
+ for line in sections[key]:
583
+ if line.strip():
584
+ story.append(Paragraph(line.strip().lstrip('-').strip(), bullet if line.strip().startswith('-') else body_text))
585
+ story.append(Spacer(1, 0.2*inch))
586
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
587
  doc.build(story)
588
  return True
589
  except Exception as e:
 
592
 
593
 
594
  def convert_to_serializable(obj):
595
+ if isinstance(obj, np.generic): return obj.item()
596
+ if isinstance(obj, dict): return {k: convert_to_serializable(v) for k, v in obj.items()}
597
+ if isinstance(obj, list): return [convert_to_serializable(i) for i in obj]
598
+ if isinstance(obj, np.ndarray): return obj.tolist()
 
 
 
 
599
  return obj
600
 
601
+ # --- MODIFIED MAIN FUNCTION ---
602
+ def process_interview(audio_path_or_url: str):
603
+ local_audio_path = None
604
+ wav_file = None
605
+ is_downloaded = False
606
  try:
607
+ logger.info(f"Starting processing for {audio_path_or_url}")
608
+
609
+ if audio_path_or_url.startswith(('http://', 'https://')):
610
+ local_audio_path = download_audio_from_url(audio_path_or_url)
611
+ is_downloaded = True
612
+ else:
613
+ local_audio_path = audio_path_or_url
614
+
615
+ wav_file = convert_to_wav(local_audio_path)
616
  transcript = transcribe(wav_file)
617
+
 
618
  for utterance in transcript['utterances']:
619
+ utterance['prosodic_features'] = extract_prosodic_features(wav_file, utterance['start'], utterance['end'])
620
+
 
 
 
 
 
621
  utterances_with_speakers = identify_speakers(transcript, wav_file)
622
+
623
+ clf, vectorizer, scaler = None, None, None
 
 
 
624
  if os.path.exists(os.path.join(OUTPUT_DIR, 'role_classifier.pkl')):
625
  clf = joblib.load(os.path.join(OUTPUT_DIR, 'role_classifier.pkl'))
626
  vectorizer = joblib.load(os.path.join(OUTPUT_DIR, 'text_vectorizer.pkl'))
627
  scaler = joblib.load(os.path.join(OUTPUT_DIR, 'feature_scaler.pkl'))
628
  else:
629
  clf, vectorizer, scaler = train_role_classifier(utterances_with_speakers)
630
+
631
  classified_utterances = classify_roles(utterances_with_speakers, clf, vectorizer, scaler)
 
 
632
  voice_analysis = analyze_interviewee_voice(wav_file, classified_utterances)
633
+
634
  analysis_data = {
635
  'transcript': classified_utterances,
636
  'speakers': list(set(u['speaker'] for u in classified_utterances)),
 
640
  'speaker_turns': len(classified_utterances)
641
  }
642
  }
643
+
644
+ analysis_data['acceptance_probability'] = calculate_acceptance_probability(analysis_data)
 
 
 
 
 
645
  gemini_report_text = generate_report(analysis_data)
646
+
647
+ base_name = str(uuid.uuid4())
648
  pdf_path = os.path.join(OUTPUT_DIR, f"{base_name}_report.pdf")
 
 
649
  json_path = os.path.join(OUTPUT_DIR, f"{base_name}_analysis.json")
650
+
651
+ create_pdf_report(analysis_data, pdf_path, gemini_report_text=gemini_report_text)
652
+
653
  with open(json_path, 'w') as f:
654
  serializable_data = convert_to_serializable(analysis_data)
655
  json.dump(serializable_data, f, indent=2)
656
+
657
+ logger.info(f"Processing completed for {audio_path_or_url}")
658
+
659
+ return {'pdf_path': pdf_path, 'json_path': json_path}
660
 
 
 
 
 
 
 
 
661
  except Exception as e:
662
+ logger.error(f"Processing failed for {audio_path_or_url}: {str(e)}", exc_info=True)
663
+ raise
664
+
665
+ finally:
666
+ if wav_file and os.path.exists(wav_file):
667
  os.remove(wav_file)
668
+ if is_downloaded and local_audio_path and os.path.exists(local_audio_path):
669
+ os.remove(local_audio_path)
670
+ logger.info(f"Cleaned up temporary downloaded file: {local_audio_path}")
671
+ # --- END MODIFIED MAIN FUNCTION ---