norhan12 commited on
Commit
dfaa2b7
·
verified ·
1 Parent(s): aef8aac

Update process_interview.py

Browse files
Files changed (1) hide show
  1. process_interview.py +219 -668
process_interview.py CHANGED
@@ -19,7 +19,7 @@ from typing import Dict, List, Tuple
19
  import logging
20
  import tempfile
21
  from reportlab.lib.pagesizes import letter
22
- from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak, Image
23
  from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
24
  from reportlab.lib.units import inch
25
  from reportlab.lib import colors
@@ -27,7 +27,7 @@ import matplotlib.pyplot as plt
27
  import matplotlib
28
  matplotlib.use('Agg')
29
  import io
30
- from transformers import AutoTokenizer, AutoModel
31
  import spacy
32
  import google.generativeai as genai
33
  import joblib
@@ -35,61 +35,28 @@ from concurrent.futures import ThreadPoolExecutor
35
 
36
  # Setup logging
37
  logging.basicConfig(level=logging.INFO)
38
- logger = logging.getLogger(__name__)
39
- logging.getLogger("nemo_logger").setLevel(logging.WARNING)
 
40
 
41
  # Configuration
42
- AUDIO_DIR = "./Uploads"
43
  OUTPUT_DIR = "./processed_audio"
44
  os.makedirs(OUTPUT_DIR, exist_ok=True)
45
 
46
  # API Keys
47
- PINECONE_KEY = os.getenv("PINECONE_KEY", "your-pinecone-key")
48
- ASSEMBLYAI_KEY = os.getenv("ASSEMBLYAI_KEY", "your-assemblyai-key")
49
- GEMINI_API_KEY = os.getenv("GEMINI_API_KEY", "your-gemini-key")
50
 
51
- def validate_url(url: str) -> bool:
52
- """Check if the URL is accessible."""
53
- try:
54
- response = requests.head(url, timeout=5)
55
- return response.status_code == 200
56
- except requests.RequestException as e:
57
- logger.error(f"URL validation failed for {url}: {str(e)}")
58
- return False
59
-
60
- def download_audio_from_url(url: str) -> str:
61
- """Downloads an audio file from a URL to a temporary local path."""
62
- if not validate_url(url):
63
- logger.error(f"Invalid or inaccessible URL: {url}")
64
- raise ValueError(f"Audio file not found at {url}")
65
- try:
66
- temp_dir = tempfile.gettempdir()
67
- temp_path = os.path.join(temp_dir, f"{uuid.uuid4()}.tmp_audio")
68
- logger.info(f"Downloading audio from {url} to {temp_path}")
69
- with requests.get(url, stream=True, timeout=10) as r:
70
- r.raise_for_status()
71
- with open(temp_path, 'wb') as f:
72
- for chunk in r.iter_content(chunk_size=8192):
73
- f.write(chunk)
74
- return temp_path
75
- except requests.HTTPError as e:
76
- logger.error(f"HTTP error downloading audio from {url}: {str(e)}")
77
- raise
78
- except Exception as e:
79
- logger.error(f"Failed to download audio from URL {url}: {str(e)}")
80
- raise
81
 
82
  def initialize_services():
83
  try:
84
  pc = Pinecone(api_key=PINECONE_KEY)
85
  index_name = "interview-speaker-embeddings"
86
  if index_name not in pc.list_indexes().names():
87
- pc.create_index(
88
- name=index_name,
89
- dimension=192,
90
- metric="cosine",
91
- spec=ServerlessSpec(cloud="aws", region="us-east-1")
92
- )
93
  index = pc.Index(index_name)
94
  genai.configure(api_key=GEMINI_API_KEY)
95
  gemini_model = genai.GenerativeModel('gemini-1.5-flash')
@@ -106,10 +73,7 @@ logger.info(f"Using device: {device}")
106
  def load_speaker_model():
107
  try:
108
  torch.set_num_threads(5)
109
- model = EncDecSpeakerLabelModel.from_pretrained(
110
- "nvidia/speakerverification_en_titanet_large",
111
- map_location=device
112
- )
113
  model.eval()
114
  return model
115
  except Exception as e:
@@ -129,8 +93,7 @@ speaker_model, nlp, tokenizer, llm_model = load_models()
129
  def convert_to_wav(audio_path: str, output_dir: str = OUTPUT_DIR) -> str:
130
  try:
131
  audio = AudioSegment.from_file(audio_path)
132
- if audio.channels > 1:
133
- audio = audio.set_channels(1)
134
  audio = audio.set_frame_rate(16000)
135
  wav_file = os.path.join(output_dir, f"{uuid.uuid4()}.wav")
136
  audio.export(wav_file, format="wav")
@@ -143,13 +106,14 @@ def extract_prosodic_features(audio_path: str, start_ms: int, end_ms: int) -> Di
143
  try:
144
  audio = AudioSegment.from_file(audio_path)
145
  segment = audio[start_ms:end_ms]
146
- temp_path = os.path.join(OUTPUT_DIR, f"temp_{uuid.uuid4()}.wav")
147
- segment.export(temp_path, format="wav")
148
- y, sr = librosa.load(temp_path, sr=16000)
149
- pitches = librosa.piptrack(y=y, sr=sr)[0]
 
150
  pitches = pitches[pitches > 0]
151
- features = {
152
- 'duration': (end_ms - start_ms) / 1000,
153
  'mean_pitch': float(np.mean(pitches)) if len(pitches) > 0 else 0.0,
154
  'min_pitch': float(np.min(pitches)) if len(pitches) > 0 else 0.0,
155
  'max_pitch': float(np.max(pitches)) if len(pitches) > 0 else 0.0,
@@ -159,733 +123,320 @@ def extract_prosodic_features(audio_path: str, start_ms: int, end_ms: int) -> Di
159
  'intensityMax': float(np.max(librosa.feature.rms(y=y)[0])),
160
  'intensitySD': float(np.std(librosa.feature.rms(y=y)[0])),
161
  }
162
- os.remove(temp_path)
163
- return features
164
  except Exception as e:
165
  logger.error(f"Feature extraction failed: {str(e)}")
166
- return {
167
- 'duration': 0.0, 'mean_pitch': 0.0, 'min_pitch': 0.0, 'max_pitch': 0.0,
168
- 'pitch_sd': 0.0, 'intensityMean': 0.0, 'intensityMin': 0.0,
169
- 'intensityMax': 0.0, 'intensitySD': 0.0
170
- }
171
 
172
  def transcribe(audio_path: str) -> Dict:
173
  try:
174
  with open(audio_path, 'rb') as f:
175
- upload_response = requests.post(
176
- "https://api.assemblyai.com/v2/upload",
177
- headers={"authorization": ASSEMBLYAI_KEY},
178
- data=f
179
- )
180
  audio_url = upload_response.json()['upload_url']
181
- transcript_response = requests.post(
182
- "https://api.assemblyai.com/v2/transcript",
183
- headers={"authorization": ASSEMBLYAI_KEY},
184
- json={
185
- "audio_url": audio_url,
186
- "speaker_labels": True,
187
- "filter_profanity": True
188
- }
189
- )
190
  transcript_id = transcript_response.json()['id']
191
  while True:
192
- result = requests.get(
193
- f"https://api.assemblyai.com/v2/transcript/{transcript_id}",
194
- headers={"authorization": ASSEMBLYAI_KEY}
195
- ).json()
196
- if result['status'] == 'completed':
197
- return result
198
- elif result['status'] == 'error':
199
- raise Exception(result['error'])
200
  time.sleep(5)
201
  except Exception as e:
202
  logger.error(f"Transcription failed: {str(e)}")
203
  raise
204
 
205
- def process_utterance(utterance: Dict, full_audio: AudioSegment, wav_file: str) -> Dict:
206
  try:
207
- start = utterance['start']
208
- end = utterance['end']
209
  segment = full_audio[start:end]
210
- temp_path = os.path.join(OUTPUT_DIR, f"temp_{uuid.uuid4()}.wav")
211
- segment.export(temp_path, format="wav")
212
- with torch.no_grad():
213
- embedding = speaker_model.get_embedding(temp_path).cpu().numpy()
 
214
  embedding_list = embedding.flatten().tolist()
215
- query_result = index.query(
216
- vector=embedding_list,
217
- top_k=1,
218
- include_metadata=True
219
- )
220
- if query_result['matches'] and query_result['matches'][0]['score'] > 0.7:
221
  speaker_id = query_result['matches'][0]['id']
222
  speaker_name = query_result['matches'][0]['metadata']['speaker_name']
223
  else:
224
- speaker_id = f"unknown_{uuid.uuid4().hex[:6]}"
225
- speaker_name = f"Speaker_{speaker_id[-4:]}"
226
  index.upsert([(speaker_id, embedding_list, {"speaker_name": speaker_name})])
227
- os.remove(temp_path)
228
- return {
229
- **utterance,
230
- 'speaker': speaker_name,
231
- 'speaker_id': speaker_id,
232
- 'embedding': embedding_list
233
- }
234
  except Exception as e:
235
- logger.error(f"Utterance processing failed: {str(e)}")
236
- return {
237
- **utterance,
238
- 'speaker': 'Unknown',
239
- 'speaker_id': 'unknown',
240
- 'embedding': None
241
- }
242
 
243
  def identify_speakers(transcript: Dict, wav_file: str) -> List[Dict]:
244
  try:
245
  full_audio = AudioSegment.from_wav(wav_file)
246
- utterances = transcript['utterances']
247
  with ThreadPoolExecutor(max_workers=5) as executor:
248
- futures = [
249
- executor.submit(process_utterance, utterance, full_audio, wav_file)
250
- for utterance in utterances
251
- ]
252
  results = [f.result() for f in futures]
253
  return results
254
  except Exception as e:
255
  logger.error(f"Speaker identification failed: {str(e)}")
256
  raise
257
 
258
- def train_role_classifier(utterances: List[Dict]):
259
- try:
260
- texts = [u['text'] for u in utterances]
261
- vectorizer = TfidfVectorizer(max_features=500, ngram_range=(1, 2))
262
- X_text = vectorizer.fit_transform(texts)
263
- features = []
264
- labels = []
265
- for i, utterance in enumerate(utterances):
266
- prosodic = utterance['prosodic_features']
267
- feat = [
268
- prosodic['duration'], prosodic['mean_pitch'], prosodic['min_pitch'],
269
- prosodic['max_pitch'], prosodic['pitch_sd'], prosodic['intensityMean'],
270
- prosodic['intensityMin'], prosodic['intensityMax'], prosodic['intensitySD'],
271
- ]
272
- feat.extend(X_text[i].toarray()[0].tolist())
273
- doc = nlp(utterance['text'])
274
- feat.extend([
275
- int(utterance['text'].endswith('?')),
276
- len(re.findall(r'\b(why|how|what|when|where|who|which)\b', utterance['text'].lower())),
277
- len(utterance['text'].split()),
278
- sum(1 for token in doc if token.pos_ == 'VERB'),
279
- sum(1 for token in doc if token.pos_ == 'NOUN')
280
- ])
281
- features.append(feat)
282
- labels.append(0 if i % 2 == 0 else 1) # Simplified for demo
283
- scaler = StandardScaler()
284
- X = scaler.fit_transform(features)
285
- clf = RandomForestClassifier(
286
- n_estimators=150, max_depth=10, random_state=42, class_weight='balanced'
287
- )
288
- clf.fit(X, labels)
289
- joblib.dump(clf, os.path.join(OUTPUT_DIR, 'role_classifier.pkl'))
290
- joblib.dump(vectorizer, os.path.join(OUTPUT_DIR, 'text_vectorizer.pkl'))
291
- joblib.dump(scaler, os.path.join(OUTPUT_DIR, 'feature_scaler.pkl'))
292
- return clf, vectorizer, scaler
293
- except Exception as e:
294
- logger.error(f"Classifier training failed: {str(e)}")
295
- raise
296
-
297
- def classify_roles(utterances: List[Dict], clf, vectorizer, scaler):
298
- try:
299
- texts = [u['text'] for u in utterances]
300
- X_text = vectorizer.transform(texts)
301
- results = []
302
- for i, utterance in enumerate(utterances):
303
- prosodic = utterance['prosodic_features']
304
- feat = [
305
- prosodic['duration'], prosodic['mean_pitch'], prosodic['min_pitch'],
306
- prosodic['max_pitch'], prosodic['pitch_sd'], prosodic['intensityMean'],
307
- prosodic['intensityMin'], prosodic['intensityMax'], prosodic['intensitySD'],
308
- ]
309
- feat.extend(X_text[i].toarray()[0].tolist())
310
- doc = nlp(utterance['text'])
311
- feat.extend([
312
- int(utterance['text'].endswith('?')),
313
- len(re.findall(r'\b(why|how|what|when|where|who|which)\b', utterance['text'].lower())),
314
- len(utterance['text'].split()),
315
- sum(1 for token in doc if token.pos_ == 'VERB'),
316
- sum(1 for token in doc if token.pos_ == 'NOUN')
317
- ])
318
- X = scaler.transform([feat])
319
- role = 'Interviewer' if clf.predict(X)[0] == 0 else 'Interviewee'
320
- results.append({**utterance, 'role': role})
321
- return results
322
- except Exception as e:
323
- logger.error(f"Role classification failed: {str(e)}")
324
- raise
325
-
326
  def analyze_interviewee_voice(audio_path: str, utterances: List[Dict]) -> Dict:
327
  try:
328
  y, sr = librosa.load(audio_path, sr=16000)
329
- interviewee_utterances = [u for u in utterances if u['role'] == 'Interviewee']
330
- if not interviewee_utterances:
331
- logger.warning("No interviewee utterances found")
332
- return {'error': 'No interviewee utterances found'}
333
- segments = []
334
- for u in interviewee_utterances:
335
- start = int(u['start'] * sr / 1000)
336
- end = int(u['end'] * sr / 1000)
337
- if end > start and len(y[start:end]) > 0:
338
- segments.append(y[start:end])
339
- else:
340
- logger.warning(f"Invalid segment for utterance: start={start}, end={end}")
341
- if not segments:
342
- logger.warning("No valid audio segments for voice analysis")
343
- return {'error': 'No valid audio segments found'}
344
  total_duration = sum(u['prosodic_features']['duration'] for u in interviewee_utterances)
345
  total_words = sum(len(u['text'].split()) for u in interviewee_utterances)
346
  speaking_rate = total_words / total_duration if total_duration > 0 else 0
347
  filler_words = ['um', 'uh', 'like', 'you know', 'so', 'i mean']
348
  filler_count = sum(sum(u['text'].lower().count(fw) for fw in filler_words) for u in interviewee_utterances)
349
  filler_ratio = filler_count / total_words if total_words > 0 else 0
350
- all_words = ' '.join(u['text'].lower() for u in interviewee_utterances).split()
351
- word_counts = {}
352
- for i in range(len(all_words) - 1):
353
- bigram = (all_words[i], all_words[i + 1])
354
- word_counts[bigram] = word_counts.get(bigram, 0) + 1
355
- repetition_score = sum(1 for count in word_counts.values() if count > 1) / len(word_counts) if word_counts else 0
356
- pitches = []
357
  for segment in segments:
358
- f0, voiced_flag, _ = librosa.pyin(segment, fmin=80, fmax=300, sr=sr)
 
359
  pitches.extend(f0[voiced_flag])
 
360
  pitch_mean = np.mean(pitches) if len(pitches) > 0 else 0
361
- pitch_std = np.std(pitches) if len(pitches) > 0 else 0
362
  jitter = np.mean(np.abs(np.diff(pitches))) / pitch_mean if len(pitches) > 1 and pitch_mean > 0 else 0
363
- intensities = []
364
- for segment in segments:
365
- rms = np.mean(librosa.feature.rms(y=segment)[0]) if len(segment) > 0 else 0.0
366
- intensities.append(float(rms))
367
- intensity_mean = np.mean(intensities) if intensities else 0
368
- intensity_std = np.std(intensities) if intensities else 0
369
- shimmer = np.mean(np.abs(np.diff(intensities))) / intensity_mean if len(intensities) > 1 and intensity_mean > 0 else 0
370
- anxiety_score = 0.6 * (pitch_std / pitch_mean) + 0.4 * (jitter + shimmer) if pitch_mean > 0 else 0
371
- confidence_score = 0.7 * (1 / (1 + intensity_std)) + 0.3 * (1 - filler_ratio)
372
  hesitation_score = filler_ratio + repetition_score
373
- anxiety_level = 'High' if anxiety_score > 0.15 else 'Moderate' if anxiety_score > 0.07 else 'Low'
374
- confidence_level = 'High' if confidence_score > 0.75 else 'Moderate' if confidence_score > 0.5 else 'Low'
375
- fluency_level = 'Fluent' if (filler_ratio < 0.05 and repetition_score < 0.1) else 'Moderate' if (filler_ratio < 0.1 and repetition_score < 0.2) else 'Disfluent'
376
  return {
377
- 'speaking_rate': float(round(speaking_rate, 2)),
378
- 'filler_ratio': float(round(filler_ratio, 3)),
379
- 'repetition_score': float(round(repetition_score, 3)),
380
- 'pitch_analysis': {'mean': float(round(pitch_mean, 2)), 'std_dev': float(round(pitch_std, 2)), 'jitter': float(round(jitter, 4))},
381
- 'intensity_analysis': {'mean': float(round(intensity_mean, 2)), 'std_dev': float(round(float(intensity_std), 2)), 'shimmer': float(round(shimmer, 4))},
382
- 'composite_scores': {'anxiety': float(round(anxiety_score, 3)), 'confidence': float(round(confidence_score, 3)), 'hesitation': float(round(hesitation_score, 3))},
383
- 'interpretation': {'anxiety_level': anxiety_level, 'confidence_level': confidence_level, 'fluency_level': fluency_level}
384
  }
385
  except Exception as e:
386
- logger.error(f"Voice analysis failed: {str(e)}", exc_info=True)
387
- return {'error': f'Voice analysis incomplete due to audio processing issues: {str(e)}'}
388
-
389
- def generate_voice_interpretation(analysis: Dict) -> str:
390
- try:
391
- if 'error' in analysis:
392
- return f"Voice analysis unavailable: {analysis['error']}"
393
- interpretation_lines = [
394
- f"- Speaking rate: {analysis.get('speaking_rate', 0):.2f} words/sec (Benchmark: 2.0-3.0; affects clarity)",
395
- f"- Filler words: {analysis.get('filler_ratio', 0) * 100:.1f}% (High usage reduces credibility)",
396
- f"- Anxiety: {analysis.get('interpretation', {}).get('anxiety_level', 'N/A')} (Score: {analysis.get('composite_scores', {}).get('anxiety', 0):.3f}; stress response)",
397
- f"- Confidence: {analysis.get('interpretation', {}).get('confidence_level', 'N/A')} (Score: {analysis.get('composite_scores', {}).get('confidence', 0):.3f}; vocal strength)",
398
- f"- Fluency: {analysis.get('interpretation', {}).get('fluency_level', 'N/A')} (Drives engagement)",
399
- "",
400
- "HR Insights:",
401
- "- Rapid speech (>3.0 wps) may reduce clarity; slower pacing enhances professionalism.",
402
- "- High filler word usage undermines perceived credibility.",
403
- "- Elevated anxiety suggests pressure; training can improve resilience.",
404
- "- Strong confidence supports leadership presence.",
405
- "- Fluent speech enhances engagement in team settings."
406
- ]
407
- return "\n".join(interpretation_lines)
408
- except Exception as e:
409
- logger.error(f"Error generating voice interpretation: {str(e)}")
410
- return f"Voice analysis unavailable: Error in interpretation formatting"
411
-
412
- def generate_anxiety_confidence_chart(composite_scores: Dict, chart_buffer):
413
- try:
414
- labels = ['Anxiety', 'Confidence']
415
- scores = [composite_scores.get('anxiety', 0), composite_scores.get('confidence', 0)]
416
- fig, ax = plt.subplots(figsize=(5, 3.5))
417
- bars = ax.bar(labels, scores, color=['#FF5252', '#26A69A'], edgecolor='black', width=0.45)
418
- ax.set_ylabel('Score', fontsize=12)
419
- ax.set_title('Vocal Dynamics: Anxiety vs. Confidence', fontsize=14, pad=15)
420
- ax.set_ylim(0, 1.2)
421
- for bar in bars:
422
- height = bar.get_height()
423
- ax.text(bar.get_x() + bar.get_width()/2, height + 0.05, f"{height:.2f}",
424
- ha='center', va='bottom', color='black', fontweight='bold', fontsize=10)
425
- ax.grid(True, axis='y', linestyle='--', alpha=0.7)
426
- plt.tight_layout()
427
- plt.savefig(chart_buffer, format='png', bbox_inches='tight', dpi=300)
428
- plt.close(fig)
429
- except Exception as e:
430
- logger.error(f"Error generating chart: {str(e)}")
431
 
432
  def calculate_acceptance_probability(analysis_data: Dict) -> float:
 
433
  voice = analysis_data.get('voice_analysis', {})
434
- if 'error' in voice:
435
- return 50.0
436
- w_confidence, w_anxiety, w_fluency, w_speaking_rate, w_filler_repetition, w_content_strengths = 0.35, -0.25, 0.2, 0.15, -0.15, 0.25
437
  confidence_score = voice.get('composite_scores', {}).get('confidence', 0.0)
438
  anxiety_score = voice.get('composite_scores', {}).get('anxiety', 0.0)
439
- fluency_level = voice.get('interpretation', {}).get('fluency_level', 'Disfluent')
440
  speaking_rate = voice.get('speaking_rate', 0.0)
441
  filler_ratio = voice.get('filler_ratio', 0.0)
442
  repetition_score = voice.get('repetition_score', 0.0)
443
- fluency_map = {'Fluent': 1.0, 'Moderate': 0.6, 'Disfluent': 0.2}
444
- fluency_val = fluency_map.get(fluency_level, 0.2)
445
  ideal_speaking_rate = 2.5
446
  speaking_rate_deviation = abs(speaking_rate - ideal_speaking_rate)
447
  speaking_rate_score = max(0, 1 - (speaking_rate_deviation / ideal_speaking_rate))
448
  filler_repetition_composite = (filler_ratio + repetition_score) / 2
449
  filler_repetition_score = max(0, 1 - filler_repetition_composite)
450
- content_strength_val = 0.85 if analysis_data.get('text_analysis', {}).get('total_duration', 0) > 60 else 0.4
451
  raw_score = (confidence_score * w_confidence + (1 - anxiety_score) * abs(w_anxiety) + fluency_val * w_fluency + speaking_rate_score * w_speaking_rate + filler_repetition_score * abs(w_filler_repetition) + content_strength_val * w_content_strengths)
452
  max_possible_score = (w_confidence + abs(w_anxiety) + w_fluency + w_speaking_rate + abs(w_filler_repetition) + w_content_strengths)
453
- normalized_score = raw_score / max_possible_score if max_possible_score > 0 else 0.5
 
454
  acceptance_probability = max(0.0, min(1.0, normalized_score))
455
  return float(f"{acceptance_probability * 100:.2f}")
456
 
 
 
 
 
 
 
 
 
457
  def generate_report(analysis_data: Dict) -> str:
458
  try:
459
  voice = analysis_data.get('voice_analysis', {})
460
- voice_interpretation = generate_voice_interpretation(voice)
461
- interviewee_responses = [u['text'] for u in analysis_data['transcript'] if u['role'] == 'Interviewee']
462
- if not interviewee_responses:
463
- logger.warning("No interviewee responses found for report generation")
464
- return f"""**1. Executive Summary**
465
- - Insufficient interviewee content to generate a summary.
466
- - Interview duration suggests limited engagement.
467
-
468
- **2. Communication and Vocal Dynamics**
469
- {voice_interpretation}
470
-
471
- **3. Competency and Content**
472
- - Strengths: Unable to identify strengths due to limited content.
473
- - Growth Areas: Recommend further interview to assess competencies.
474
-
475
- **4. Role Fit and Potential**
476
- - Unable to assess role fit due to insufficient content.
477
-
478
- **5. Recommendations**
479
- - Development: Schedule additional interview to gather more data.
480
- - Next Steps: Conduct a follow-up interview with targeted questions."""
481
- acceptance_prob = analysis_data.get('acceptance_probability', 50.0)
482
- acceptance_line = f"\n**Suitability Score: {acceptance_prob:.2f}%**\n"
483
- if acceptance_prob >= 80:
484
- acceptance_line += "HR Verdict: Outstanding candidate, recommended for immediate advancement."
485
- elif acceptance_prob >= 60:
486
- acceptance_line += "HR Verdict: Strong candidate, suitable for further evaluation."
487
- elif acceptance_prob >= 40:
488
- acceptance_line += "HR Verdict: Moderate potential, needs additional assessment."
489
- else:
490
- acceptance_line += "HR Verdict: Limited fit, significant improvement required."
491
- transcript_text = "\n".join([f"- {u['speaker']}: {u['text']}" for u in analysis_data['transcript']])
492
  prompt = f"""
493
- You are EvalBot, a senior HR consultant delivering a professional interview analysis report. Use clear headings with '**', bullet points ('-'), complete sentences, and formal language. Avoid redundancy, vague terms, and special characters that could break formatting (e.g., parentheses). Ensure each section is unique, actionable, and contains at least 2-3 bullet points. If content is limited, provide reasonable inferences based on available data.
494
-
495
- **Input Data**
496
- - Suitability Score: {acceptance_prob:.2f}%
497
- - Interview Duration: {analysis_data['text_analysis']['total_duration']:.2f} seconds
498
- - Speaker Turns: {analysis_data['text_analysis']['speaker_turns']}
499
- - Participants: {', '.join(sorted(set(u['speaker'] for u in analysis_data['transcript'])))}
500
- - Voice Analysis:
501
- {voice_interpretation}
502
- - Transcript Sample:
503
- {transcript_text[:1000]}...
504
-
505
- **Report Structure**
506
- {acceptance_line}
507
-
508
- **1. Executive Summary**
509
- - Provide a narrative overview of the candidate’s performance, focusing on key strengths and role fit.
510
- - Highlight communication style and engagement based on voice analysis and transcript.
511
- - Note interview duration and participant dynamics.
512
-
513
- **2. Communication and Vocal Dynamics**
514
- - Evaluate vocal delivery (rate, fluency, confidence) with specific insights.
515
- {voice_interpretation}
516
-
517
- **3. Competency and Content**
518
- - Assess leadership, problem-solving, communication, and adaptability with examples from the transcript.
519
- - List strengths with quantifiable achievements where possible.
520
- - Identify growth areas with constructive feedback.
521
-
522
- **4. Role Fit and Potential**
523
- - Analyze cultural fit, role readiness, and long-term growth potential.
524
- - Align findings with typical role requirements (e.g., teamwork, technical skills).
525
-
526
- **5. Recommendations**
527
- - Provide prioritized development strategies (e.g., communication training, technical assessments).
528
- - Suggest specific next steps for hiring managers (e.g., advance to next round, schedule tests).
529
- """
530
- response = gemini_model.generate_content(prompt)
531
- report_text = re.sub(r'[^\x00-\x7F]+|[()]+', '', response.text)
532
- logger.info(f"Generated Gemini report: {report_text[:500]}...") # Log for debugging
533
- return report_text
534
- except Exception as e:
535
- logger.error(f"Report generation failed: {str(e)}", exc_info=True)
536
- return f"""**1. Executive Summary**
537
- - Report generation failed due to processing error.
538
 
539
- **2. Communication and Vocal Dynamics**
540
- {generate_voice_interpretation(analysis_data.get('voice_analysis', {}))}
541
 
542
- **3. Competency and Content**
543
- - Strengths: Unable to assess due to error.
544
- - Growth Areas: Recommend reprocessing the audio.
545
 
546
- **4. Role Fit and Potential**
547
- - Unable to assess due to error.
548
 
549
- **5. Recommendations**
550
- - Development: Investigate processing error.
551
- - Next Steps: Retry analysis with corrected audio."""
552
 
553
- def create_pdf_report(analysis_data: Dict, output_path: str, gemini_report_text: str) -> bool:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
554
  try:
555
- doc = SimpleDocTemplate(output_path, pagesize=letter,
556
- rightMargin=0.75*inch, leftMargin=0.75*inch,
557
- topMargin=1*inch, bottomMargin=1*inch)
558
  styles = getSampleStyleSheet()
559
- h1 = ParagraphStyle(name='Heading1', fontSize=18, leading=22, spaceAfter=16, alignment=1, textColor=colors.HexColor('#003087'), fontName='Helvetica-Bold')
560
- h2 = ParagraphStyle(name='Heading2', fontSize=13, leading=15, spaceBefore=10, spaceAfter=6, textColor=colors.HexColor('#0050BC'), fontName='Helvetica-Bold')
561
- h3 = ParagraphStyle(name='Heading3', fontSize=9, leading=11, spaceBefore=6, spaceAfter=4, textColor=colors.HexColor('#3F7CFF'), fontName='Helvetica')
562
- body_text = ParagraphStyle(name='BodyText', fontSize=8, leading=10, spaceAfter=4, fontName='Helvetica', textColor=colors.HexColor('#333333'))
563
- bullet_style = ParagraphStyle(name='Bullet', parent=body_text, leftIndent=16, bulletIndent=6, fontName='Helvetica', bulletFontName='Helvetica', bulletFontSize=8)
564
-
565
  story = []
566
-
567
  def header_footer(canvas, doc):
568
  canvas.saveState()
569
- canvas.setFont('Helvetica', 7)
570
- canvas.setFillColor(colors.HexColor('#666666'))
571
- canvas.drawString(doc.leftMargin, 0.5*inch, f"Page {doc.page} | EvalBot HR Interview Report | Confidential")
572
- canvas.setStrokeColor(colors.HexColor('#0050BC'))
573
  canvas.setLineWidth(0.5)
574
- canvas.line(doc.leftMargin, doc.height + 0.9*inch, doc.width + doc.leftMargin, doc.height + 0.9*inch)
575
- canvas.setFont('Helvetica-Bold', 8)
576
- canvas.drawString(doc.leftMargin, doc.height + 0.95*inch, "Candidate Interview Analysis")
577
- canvas.drawRightString(doc.width + doc.leftMargin, doc.height + 0.95*inch, time.strftime('%B %d, %Y'))
578
  canvas.restoreState()
579
 
580
- # Title Page
581
- story.append(Paragraph("Candidate Interview Analysis", h1))
582
- story.append(Paragraph(f"Generated: {time.strftime('%B %d, %Y')}", ParagraphStyle(name='Date', alignment=1, fontSize=8, textColor=colors.HexColor('#666666'), fontName='Helvetica')))
583
- story.append(Spacer(1, 0.3*inch))
584
- acceptance_prob = analysis_data.get('acceptance_probability', 50.0)
585
- story.append(Paragraph("Hiring Suitability Snapshot", h2))
586
- prob_color = colors.HexColor('#2E7D32') if acceptance_prob >= 80 else (colors.HexColor('#F57C00') if acceptance_prob >= 60 else colors.HexColor('#D32F2F'))
587
- story.append(Paragraph(f"Suitability Score: <font size=14 color='{prob_color.hexval()}'><b>{acceptance_prob:.2f}%</b></font>",
588
- ParagraphStyle(name='Prob', fontSize=10, spaceAfter=8, alignment=1, fontName='Helvetica-Bold')))
589
- if acceptance_prob >= 80:
590
- story.append(Paragraph("<b>HR Verdict:</b> Outstanding candidate, recommended for immediate advancement.", body_text))
591
- elif acceptance_prob >= 60:
592
- story.append(Paragraph("<b>HR Verdict:</b> Strong candidate, suitable for further evaluation.", body_text))
593
- elif acceptance_prob >= 40:
594
- story.append(Paragraph("<b>HR Verdict:</b> Moderate potential, needs additional assessment.", body_text))
595
- else:
596
- story.append(Paragraph("<b>HR Verdict:</b> Limited fit, significant improvement required.", body_text))
597
- story.append(Spacer(1, 0.2*inch))
598
- participants = sorted([p for p in set(u['speaker'] for u in analysis_data['transcript']) if p != 'Unknown'])
599
- participants_str = ', '.join(participants)
600
- table_data = [
601
- ['Metric', 'Value'],
602
- ['Interview Duration', f"{analysis_data['text_analysis']['total_duration']:.2f} seconds"],
603
- ['Speaker Turns', f"{analysis_data['text_analysis']['speaker_turns']}"],
604
- ['Participants', participants_str],
605
- ]
606
- table = Table(table_data, colWidths=[2.0*inch, 4.0*inch])
607
- table.setStyle(TableStyle([
608
- ('BACKGROUND', (0,0), (-1,0), colors.HexColor('#0050BC')),
609
- ('TEXTCOLOR', (0,0), (-1,0), colors.white),
610
- ('ALIGN', (0,0), (-1,-1), 'LEFT'),
611
- ('VALIGN', (0,0), (-1,-1), 'MIDDLE'),
612
- ('FONTNAME', (0,0), (-1,0), 'Helvetica-Bold'),
613
- ('FONTSIZE', (0,0), (-1,-1), 8),
614
- ('BOTTOMPADDING', (0,0), (-1,0), 6),
615
- ('TOPPADDING', (0,0), (-1,0), 6),
616
- ('BACKGROUND', (0,1), (-1,-1), colors.HexColor('#F5F6FA')),
617
- ('GRID', (0,0), (-1,-1), 0.4, colors.HexColor('#DDE4EB')),
618
- ('LEFTPADDING', (1,3), (1,3), 10),
619
- ('WORDWRAP', (1,3), (1,3), 'CJK'),
620
- ]))
621
- story.append(table)
622
- story.append(Spacer(1, 0.3*inch))
623
- story.append(Paragraph("Prepared by: EvalBot - AI-Powered HR Analysis", body_text))
624
- story.append(PageBreak())
625
-
626
- # Detailed Analysis
627
- story.append(Paragraph("Detailed Candidate Evaluation", h1))
628
-
629
- # Communication and Vocal Dynamics
630
- story.append(Paragraph("1. Communication & Vocal Dynamics", h2))
631
- voice_analysis = analysis_data.get('voice_analysis', {})
632
- if voice_analysis and 'error' not in voice_analysis:
633
- table_data = [
634
- ['Metric', 'Value', 'HR Insight'],
635
- ['Speaking Rate', f"{voice_analysis.get('speaking_rate', 0):.2f} words/sec", 'Benchmark: 2.0-3.0 wps; impacts clarity'],
636
- ['Filler Words', f"{voice_analysis.get('filler_ratio', 0) * 100:.1f}%", 'High usage reduces credibility'],
637
- ['Anxiety', voice_analysis.get('interpretation', {}).get('anxiety_level', 'N/A'), f"Score: {voice_analysis.get('composite_scores', {}).get('anxiety', 0):.3f}"],
638
- ['Confidence', voice_analysis.get('interpretation', {}).get('confidence_level', 'N/A'), f"Score: {voice_analysis.get('composite_scores', {}).get('confidence', 0):.3f}"],
639
- ['Fluency', voice_analysis.get('interpretation', {}).get('fluency_level', 'N/A'), 'Drives engagement'],
640
- ]
641
- table = Table(table_data, colWidths=[1.5*inch, 1.3*inch, 3.2*inch])
642
- table.setStyle(TableStyle([
643
- ('BACKGROUND', (0,0), (-1,0), colors.HexColor('#0050BC')),
644
- ('TEXTCOLOR', (0,0), (-1,0), colors.white),
645
- ('ALIGN', (0,0), (-1,-1), 'LEFT'),
646
- ('VALIGN', (0,0), (-1,-1), 'MIDDLE'),
647
- ('FONTNAME', (0,0), (-1,0), 'Helvetica-Bold'),
648
- ('FONTSIZE', (0,0), (-1,-1), 8),
649
- ('BOTTOMPADDING', (0,0), (-1,0), 6),
650
- ('TOPPADDING', (0,0), (-1,0), 6),
651
- ('BACKGROUND', (0,1), (-1,-1), colors.HexColor('#F5F6FA')),
652
- ('GRID', (0,0), (-1,-1), 0.4, colors.HexColor('#DDE4EB')),
653
- ]))
654
- story.append(table)
655
- story.append(Spacer(1, 0.15*inch))
656
- chart_buffer = io.BytesIO()
657
- generate_anxiety_confidence_chart(voice_analysis.get('composite_scores', {}), chart_buffer)
658
- chart_buffer.seek(0)
659
- img = Image(chart_buffer, width=4.2*inch, height=2.8*inch)
660
- img.hAlign = 'CENTER'
661
- story.append(img)
662
- else:
663
- story.append(Paragraph(f"Voice analysis unavailable: {voice_analysis.get('error', 'Unknown error')}", body_text))
664
- story.append(Spacer(1, 0.15*inch))
665
-
666
- # Parse Gemini Report
667
- sections = {
668
- "Executive Summary": [],
669
- "Communication": [],
670
- "Competency": {"Strengths": [], "Growth Areas": []},
671
- "Recommendations": {"Development": [], "Next Steps": []},
672
- "Role Fit": [],
673
- }
674
- current_section = None
675
- current_subsection = None
676
- lines = gemini_report_text.split('\n')
677
- for line in lines:
678
- line = line.strip()
679
- if not line:
680
- continue
681
- logger.debug(f"Parsing line: {line}") # Debug parsing
682
- if line.startswith('**') and line.endswith('**'):
683
- section_title = line.strip('**').strip()
684
- if section_title.startswith(('1.', '2.', '3.', '4.', '5.')):
685
- section_title = section_title[2:].strip()
686
- if 'Executive Summary' in section_title:
687
- current_section = 'Executive Summary'
688
- current_subsection = None
689
- elif 'Communication' in section_title:
690
- current_section = 'Communication'
691
- current_subsection = None
692
- elif 'Competency' in section_title:
693
- current_section = 'Competency'
694
- current_subsection = None
695
- elif 'Role Fit' in section_title:
696
- current_section = 'Role Fit'
697
- current_subsection = None
698
- elif 'Recommendations' in section_title:
699
- current_section = 'Recommendations'
700
- current_subsection = None
701
- logger.debug(f"Set section: {current_section}")
702
- elif line.startswith('-') and current_section:
703
- clean_line = line.lstrip('-').strip()
704
- if not clean_line:
705
- continue
706
- clean_line = re.sub(r'[^\w\s.,;:-]', '', clean_line)
707
- logger.debug(f"Processing bullet: {clean_line}, section: {current_section}, subsection: {current_subsection}")
708
- if current_section in ['Competency', 'Recommendations']:
709
- # For dictionary sections, append to subsection
710
- if current_subsection is None:
711
- # Set default subsection if unset
712
- if current_section == 'Competency':
713
- current_subsection = 'Strengths'
714
- elif current_section == 'Recommendations':
715
- current_subsection = 'Development'
716
- logger.debug(f"Default subsection set to: {current_subsection}")
717
- if current_subsection:
718
- sections[current_section][current_subsection].append(clean_line)
719
- else:
720
- logger.warning(f"Skipping line due to unset subsection: {clean_line}")
721
- else:
722
- # For list sections, append directly
723
- sections[current_section].append(clean_line)
724
- elif current_section and line:
725
- clean_line = re.sub(r'[^\w\s.,;:-]', '', line)
726
- logger.debug(f"Processing non-bullet: {clean_line}, section: {current_section}, subsection: {current_subsection}")
727
- if current_section in ['Competency', 'Recommendations']:
728
- if current_subsection:
729
- sections[current_section][current_subsection].append(clean_line)
730
- else:
731
- # Default subsection
732
- current_subsection = 'Strengths' if current_section == 'Competency' else 'Development'
733
- sections[current_section][current_subsection].append(clean_line)
734
- logger.debug(f"Default subsection for non-bullet set to: {current_subsection}")
735
- else:
736
- sections[current_section].append(clean_line)
737
-
738
- # Executive Summary
739
- story.append(Paragraph("2. Executive Summary", h2))
740
- if sections['Executive Summary']:
741
- for line in sections['Executive Summary']:
742
- story.append(Paragraph(line, bullet_style))
743
- else:
744
- story.append(Paragraph("Candidate showed moderate engagement; further assessment needed.", bullet_style))
745
- story.append(Paragraph(f"Interview lasted {analysis_data['text_analysis']['total_duration']:.2f} seconds with {analysis_data['text_analysis']['speaker_turns']} turns.", bullet_style))
746
- story.append(Spacer(1, 0.15*inch))
747
-
748
- # Competency and Content
749
- story.append(Paragraph("3. Competency & Content", h2))
750
- story.append(Paragraph("Strengths", h3))
751
- if sections['Competency']['Strengths']:
752
- for line in sections['Competency']['Strengths']:
753
- story.append(Paragraph(line, bullet_style))
754
- else:
755
- story.append(Paragraph("Strengths not fully assessed; candidate demonstrated consistent communication.", bullet_style))
756
- story.append(Spacer(1, 0.1*inch))
757
- story.append(Paragraph("Growth Areas", h3))
758
- if sections['Competency']['Growth Areas']:
759
- for line in sections['Competency']['Growth Areas']:
760
- story.append(Paragraph(line, bullet_style))
761
- else:
762
- story.append(Paragraph("Consider enhancing specificity in responses to highlight expertise.", bullet_style))
763
- story.append(Spacer(1, 0.15*inch))
764
-
765
- # Role Fit
766
- story.append(Paragraph("4. Role Fit & Potential", h2))
767
- if sections['Role Fit']:
768
- for line in sections['Role Fit']:
769
- story.append(Paragraph(line, bullet_style))
770
- else:
771
- story.append(Paragraph("Potential for role fit exists; further evaluation needed to confirm alignment.", bullet_style))
772
- story.append(Spacer(1, 0.15*inch))
773
-
774
- # Recommendations
775
- story.append(Paragraph("5. Recommendations", h2))
776
- story.append(Paragraph("Development Priorities", h3))
777
- if sections['Recommendations']['Development']:
778
- for line in sections['Recommendations']['Development']:
779
- story.append(Paragraph(line, bullet_style))
780
- else:
781
- story.append(Paragraph("Enroll in communication training to reduce filler words.", bullet_style))
782
- story.append(Spacer(1, 0.1*inch))
783
- story.append(Paragraph("Next Steps for Hiring Managers", h3))
784
- if sections['Recommendations']['Next Steps']:
785
- for line in sections['Recommendations']['Next Steps']:
786
- story.append(Paragraph(line, bullet_style))
787
- else:
788
- story.append(Paragraph("Schedule a technical assessment to evaluate role-specific skills.", bullet_style))
789
- story.append(Spacer(1, 0.15*inch))
790
- story.append(Paragraph("This report provides actionable insights to support hiring and candidate development.", body_text))
791
 
792
  doc.build(story, onFirstPage=header_footer, onLaterPages=header_footer)
793
- logger.info(f"PDF report successfully generated at {output_path}")
794
  return True
795
  except Exception as e:
796
- logger.error(f"PDF generation failed: {str(e)}\nFull Gemini report text:\n{gemini_report_text}", exc_info=True)
797
  return False
798
 
799
- def convert_to_serializable(obj):
800
- if isinstance(obj, np.generic):
801
- return obj.item()
802
- if isinstance(obj, dict):
803
- return {k: convert_to_serializable(v) for k, v in obj.items()}
804
- if isinstance(obj, list):
805
- return [convert_to_serializable(item) for item in obj]
806
- if isinstance(obj, np.ndarray):
807
- return obj.tolist()
808
- return obj
809
 
810
- def process_interview(audio_url: str) -> Dict:
811
- """Process a single audio URL and generate analysis report."""
812
- local_audio_path = None
813
- wav_file = None
814
- is_downloaded = False
815
  try:
816
- if not isinstance(audio_url, str):
817
- raise ValueError("Input must be a single URL string")
818
- logger.info(f"Starting processing for {audio_url}")
819
- if audio_url.startswith(('http://', 'https://')):
820
- local_audio_path = download_audio_from_url(audio_url)
 
 
 
 
 
 
821
  is_downloaded = True
822
  else:
823
- local_audio_path = audio_url
824
- if not os.path.exists(local_audio_path):
825
- raise FileNotFoundError(f"Local audio file not found: {local_audio_path}")
826
  wav_file = convert_to_wav(local_audio_path)
827
  transcript = transcribe(wav_file)
828
- for utterance in transcript['utterances']:
829
- utterance['prosodic_features'] = extract_prosodic_features(wav_file, utterance['start'], utterance['end'])
 
 
830
  utterances_with_speakers = identify_speakers(transcript, wav_file)
831
- if not utterances_with_speakers:
832
- raise ValueError("No utterances identified in the audio")
833
- clf, vectorizer, scaler = None, None, None
834
- if os.path.exists(os.path.join(OUTPUT_DIR, 'role_classifier.pkl')):
835
- clf = joblib.load(os.path.join(OUTPUT_DIR, 'role_classifier.pkl'))
836
- vectorizer = joblib.load(os.path.join(OUTPUT_DIR, 'text_vectorizer.pkl'))
837
- scaler = joblib.load(os.path.join(OUTPUT_DIR, 'feature_scaler.pkl'))
838
- else:
839
- clf, vectorizer, scaler = train_role_classifier(utterances_with_speakers)
840
- classified_utterances = classify_roles(utterances_with_speakers, clf, vectorizer, scaler)
841
  voice_analysis = analyze_interviewee_voice(wav_file, classified_utterances)
 
 
842
  analysis_data = {
 
843
  'transcript': classified_utterances,
844
- 'speakers': list(set(u['speaker'] for u in classified_utterances if u['speaker'] != 'Unknown')),
845
  'voice_analysis': voice_analysis,
 
846
  'text_analysis': {
847
  'total_duration': sum(u['prosodic_features']['duration'] for u in classified_utterances),
848
  'speaker_turns': len(classified_utterances)
849
  }
850
  }
 
851
  analysis_data['acceptance_probability'] = calculate_acceptance_probability(analysis_data)
852
  gemini_report_text = generate_report(analysis_data)
 
853
  base_name = str(uuid.uuid4())
854
  pdf_path = os.path.join(OUTPUT_DIR, f"{base_name}_report.pdf")
855
  json_path = os.path.join(OUTPUT_DIR, f"{base_name}_analysis.json")
856
- pdf_success = create_pdf_report(analysis_data, pdf_path, gemini_report_text)
857
- with open(json_path, 'w') as f:
858
- serializable_data = convert_to_serializable(analysis_data)
859
- json.dump(serializable_data, f, indent=2)
860
- if not pdf_success:
861
- logger.warning(f"PDF report failed to generate for {audio_url}")
862
- return {
863
- 'pdf_path': None,
864
- 'json_path': json_path,
865
- 'error': 'PDF generation failed'
866
- }
867
- logger.info(f"Processing completed for {audio_url}")
868
- return {'pdf_path': pdf_path, 'json_path': json_path}
869
- except Exception as e:
870
- logger.error(f"Processing failed for {audio_url}: {str(e)}", exc_info=True)
871
- base_name = str(uuid.uuid4())
872
- json_path = os.path.join(OUTPUT_DIR, f"{base_name}_analysis.json")
873
  with open(json_path, 'w') as f:
874
- json.dump({'error': str(e)}, f, indent=2)
 
 
 
875
  return {
876
- 'pdf_path': None,
877
  'json_path': json_path,
878
- 'error': str(e)
 
879
  }
 
 
 
 
 
880
  finally:
881
- if wav_file and os.path.exists(wav_file):
882
- try:
883
- os.remove(wav_file)
884
- except Exception as e:
885
- logger.error(f"Failed to clean up wav file {wav_file}: {str(e)}")
886
  if is_downloaded and local_audio_path and os.path.exists(local_audio_path):
887
- try:
888
- os.remove(local_audio_path)
889
- logger.info(f"Cleaned up temporary file: {local_audio_path}")
890
- except Exception as e:
891
- logger.error(f"Failed to clean up local audio file {local_audio_path}: {str(e)}")
 
19
  import logging
20
  import tempfile
21
  from reportlab.lib.pagesizes import letter
22
+ from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak, Image, HRFlowable
23
  from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
24
  from reportlab.lib.units import inch
25
  from reportlab.lib import colors
 
27
  import matplotlib
28
  matplotlib.use('Agg')
29
  import io
30
+ from transformers import AutoTokenizer, AutoModel, pipeline
31
  import spacy
32
  import google.generativeai as genai
33
  import joblib
 
35
 
36
  # Setup logging
37
  logging.basicConfig(level=logging.INFO)
38
+ logger = logging.getLogger(_name_)
39
+ logging.getLogger("nemo_logging").setLevel(logging.ERROR)
40
+ logging.getLogger("nemo").setLevel(logging.ERROR)
41
 
42
  # Configuration
 
43
  OUTPUT_DIR = "./processed_audio"
44
  os.makedirs(OUTPUT_DIR, exist_ok=True)
45
 
46
  # API Keys
47
+ PINECONE_KEY = os.getenv("PINECONE_KEY")
48
+ ASSEMBLYAI_KEY = os.getenv("ASSEMBLYAI_KEY")
49
+ GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
50
 
51
+ # --- All your original helper functions ---
52
+ # I am including them exactly as you last provided them.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
  def initialize_services():
55
  try:
56
  pc = Pinecone(api_key=PINECONE_KEY)
57
  index_name = "interview-speaker-embeddings"
58
  if index_name not in pc.list_indexes().names():
59
+ pc.create_index(name=index_name, dimension=192, metric="cosine", spec=ServerlessSpec(cloud="aws", region="us-east-1"))
 
 
 
 
 
60
  index = pc.Index(index_name)
61
  genai.configure(api_key=GEMINI_API_KEY)
62
  gemini_model = genai.GenerativeModel('gemini-1.5-flash')
 
73
  def load_speaker_model():
74
  try:
75
  torch.set_num_threads(5)
76
+ model = EncDecSpeakerLabelModel.from_pretrained("nvidia/speakerverification_en_titanet_large", map_location=device)
 
 
 
77
  model.eval()
78
  return model
79
  except Exception as e:
 
93
  def convert_to_wav(audio_path: str, output_dir: str = OUTPUT_DIR) -> str:
94
  try:
95
  audio = AudioSegment.from_file(audio_path)
96
+ if audio.channels > 1: audio = audio.set_channels(1)
 
97
  audio = audio.set_frame_rate(16000)
98
  wav_file = os.path.join(output_dir, f"{uuid.uuid4()}.wav")
99
  audio.export(wav_file, format="wav")
 
106
  try:
107
  audio = AudioSegment.from_file(audio_path)
108
  segment = audio[start_ms:end_ms]
109
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
110
+ segment.export(tmp.name, format="wav")
111
+ y, sr = librosa.load(tmp.name, sr=16000)
112
+ os.remove(tmp.name)
113
+ pitches, _ = librosa.piptrack(y=y, sr=sr)
114
  pitches = pitches[pitches > 0]
115
+ return {
116
+ 'duration': (end_ms - start_ms) / 1000.0,
117
  'mean_pitch': float(np.mean(pitches)) if len(pitches) > 0 else 0.0,
118
  'min_pitch': float(np.min(pitches)) if len(pitches) > 0 else 0.0,
119
  'max_pitch': float(np.max(pitches)) if len(pitches) > 0 else 0.0,
 
123
  'intensityMax': float(np.max(librosa.feature.rms(y=y)[0])),
124
  'intensitySD': float(np.std(librosa.feature.rms(y=y)[0])),
125
  }
 
 
126
  except Exception as e:
127
  logger.error(f"Feature extraction failed: {str(e)}")
128
+ return {}
 
 
 
 
129
 
130
  def transcribe(audio_path: str) -> Dict:
131
  try:
132
  with open(audio_path, 'rb') as f:
133
+ upload_response = requests.post("https://api.assemblyai.com/v2/upload", headers={"authorization": ASSEMBLYAI_KEY}, data=f)
 
 
 
 
134
  audio_url = upload_response.json()['upload_url']
135
+ transcript_response = requests.post("https://api.assemblyai.com/v2/transcript", headers={"authorization": ASSEMBLYAI_KEY}, json={"audio_url": audio_url, "speaker_labels": True, "filter_profanity": True})
 
 
 
 
 
 
 
 
136
  transcript_id = transcript_response.json()['id']
137
  while True:
138
+ result = requests.get(f"https://api.assemblyai.com/v2/transcript/{transcript_id}", headers={"authorization": ASSEMBLYAI_KEY}).json()
139
+ if result['status'] == 'completed': return result
140
+ elif result['status'] == 'error': raise Exception(f"AssemblyAI Error: {result.get('error')}")
 
 
 
 
 
141
  time.sleep(5)
142
  except Exception as e:
143
  logger.error(f"Transcription failed: {str(e)}")
144
  raise
145
 
146
+ def process_utterance(utterance, full_audio):
147
  try:
148
+ start, end = utterance['start'], utterance['end']
 
149
  segment = full_audio[start:end]
150
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
151
+ segment.export(tmp.name, format="wav")
152
+ with torch.no_grad():
153
+ embedding = speaker_model.get_embedding(tmp.name).cpu().numpy()
154
+ os.remove(tmp.name)
155
  embedding_list = embedding.flatten().tolist()
156
+ query_result = index.query(vector=embedding_list, top_k=1, include_metadata=True)
157
+ if query_result['matches'] and query_result['matches'][0]['score'] > 0.75:
 
 
 
 
158
  speaker_id = query_result['matches'][0]['id']
159
  speaker_name = query_result['matches'][0]['metadata']['speaker_name']
160
  else:
161
+ speaker_id = f"speaker_{uuid.uuid4().hex[:6]}"
162
+ speaker_name = f"Speaker_{speaker_id[-4:].upper()}"
163
  index.upsert([(speaker_id, embedding_list, {"speaker_name": speaker_name})])
164
+ return {**utterance, 'speaker': speaker_name, 'speaker_id': speaker_id}
 
 
 
 
 
 
165
  except Exception as e:
166
+ logger.error(f"Utterance processing failed: {str(e)}", exc_info=True)
167
+ return {**utterance, 'speaker': 'Unknown', 'speaker_id': 'unknown'}
 
 
 
 
 
168
 
169
  def identify_speakers(transcript: Dict, wav_file: str) -> List[Dict]:
170
  try:
171
  full_audio = AudioSegment.from_wav(wav_file)
 
172
  with ThreadPoolExecutor(max_workers=5) as executor:
173
+ futures = [executor.submit(process_utterance, u, full_audio) for u in transcript['utterances']]
 
 
 
174
  results = [f.result() for f in futures]
175
  return results
176
  except Exception as e:
177
  logger.error(f"Speaker identification failed: {str(e)}")
178
  raise
179
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
  def analyze_interviewee_voice(audio_path: str, utterances: List[Dict]) -> Dict:
181
  try:
182
  y, sr = librosa.load(audio_path, sr=16000)
183
+ interviewee_utterances = [u for u in utterances if u.get('role') == 'Interviewee']
184
+ if not interviewee_utterances: return {'error': 'No interviewee utterances found'}
185
+ segments = [y[int(u['start']*sr/1000):int(u['end']*sr/1000)] for u in interviewee_utterances]
 
 
 
 
 
 
 
 
 
 
 
 
186
  total_duration = sum(u['prosodic_features']['duration'] for u in interviewee_utterances)
187
  total_words = sum(len(u['text'].split()) for u in interviewee_utterances)
188
  speaking_rate = total_words / total_duration if total_duration > 0 else 0
189
  filler_words = ['um', 'uh', 'like', 'you know', 'so', 'i mean']
190
  filler_count = sum(sum(u['text'].lower().count(fw) for fw in filler_words) for u in interviewee_utterances)
191
  filler_ratio = filler_count / total_words if total_words > 0 else 0
192
+ repetition_score = 0
193
+ pitches, intensities = [], []
 
 
 
 
 
194
  for segment in segments:
195
+ if len(segment) == 0: continue
196
+ f0, voiced_flag, _ = librosa.pyin(segment, fmin=librosa.note_to_hz('C2'), fmax=librosa.note_to_hz('C7'), sr=sr)
197
  pitches.extend(f0[voiced_flag])
198
+ intensities.extend(librosa.feature.rms(y=segment)[0])
199
  pitch_mean = np.mean(pitches) if len(pitches) > 0 else 0
200
+ intensity_std = np.std(intensities) if len(intensities) > 0 else 0
201
  jitter = np.mean(np.abs(np.diff(pitches))) / pitch_mean if len(pitches) > 1 and pitch_mean > 0 else 0
202
+ shimmer = np.mean(np.abs(np.diff(intensities))) / np.mean(intensities) if len(intensities) > 1 and np.mean(intensities) > 0 else 0
203
+ anxiety_score = (0.6 * (np.std(pitches)/pitch_mean if pitch_mean > 0 else 0) + 0.4 * (jitter + shimmer))
204
+ confidence_score = 0.7 * (1/(1+intensity_std)) + 0.3 * (1/(1+filler_ratio))
 
 
 
 
 
 
205
  hesitation_score = filler_ratio + repetition_score
 
 
 
206
  return {
207
+ 'speaking_rate': float(round(speaking_rate, 2)), 'filler_ratio': float(round(filler_ratio, 4)), 'repetition_score': float(round(repetition_score, 4)),
208
+ 'composite_scores': {'anxiety': float(round(anxiety_score, 4)), 'confidence': float(round(confidence_score, 4)), 'hesitation': float(round(hesitation_score, 4))},
209
+ 'interpretation': {
210
+ 'anxiety_level': 'high' if anxiety_score > 0.15 else 'moderate' if anxiety_score > 0.07 else 'low',
211
+ 'confidence_level': 'high' if confidence_score > 0.7 else 'moderate' if confidence_score > 0.5 else 'low',
212
+ 'fluency_level': 'fluent' if filler_ratio < 0.05 and repetition_score < 0.1 else 'disfluent'
213
+ }
214
  }
215
  except Exception as e:
216
+ logger.error(f"Voice analysis failed: {str(e)}")
217
+ return {'error': str(e)}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218
 
219
  def calculate_acceptance_probability(analysis_data: Dict) -> float:
220
+ # Your full, detailed function
221
  voice = analysis_data.get('voice_analysis', {})
222
+ if 'error' in voice: return 0.0
223
+ w_confidence, w_anxiety, w_fluency, w_speaking_rate, w_filler_repetition, w_content_strengths = 0.4, -0.3, 0.2, 0.1, -0.1, 0.2
 
224
  confidence_score = voice.get('composite_scores', {}).get('confidence', 0.0)
225
  anxiety_score = voice.get('composite_scores', {}).get('anxiety', 0.0)
226
+ fluency_level = voice.get('interpretation', {}).get('fluency_level', 'disfluent')
227
  speaking_rate = voice.get('speaking_rate', 0.0)
228
  filler_ratio = voice.get('filler_ratio', 0.0)
229
  repetition_score = voice.get('repetition_score', 0.0)
230
+ fluency_map = {'fluent': 1.0, 'moderate': 0.5, 'disfluent': 0.0}
231
+ fluency_val = fluency_map.get(fluency_level, 0.0)
232
  ideal_speaking_rate = 2.5
233
  speaking_rate_deviation = abs(speaking_rate - ideal_speaking_rate)
234
  speaking_rate_score = max(0, 1 - (speaking_rate_deviation / ideal_speaking_rate))
235
  filler_repetition_composite = (filler_ratio + repetition_score) / 2
236
  filler_repetition_score = max(0, 1 - filler_repetition_composite)
237
+ content_strength_val = 0.8 if analysis_data.get('text_analysis', {}).get('total_duration', 0) > 0 else 0.0
238
  raw_score = (confidence_score * w_confidence + (1 - anxiety_score) * abs(w_anxiety) + fluency_val * w_fluency + speaking_rate_score * w_speaking_rate + filler_repetition_score * abs(w_filler_repetition) + content_strength_val * w_content_strengths)
239
  max_possible_score = (w_confidence + abs(w_anxiety) + w_fluency + w_speaking_rate + abs(w_filler_repetition) + w_content_strengths)
240
+ if max_possible_score == 0: return 50.0
241
+ normalized_score = raw_score / max_possible_score
242
  acceptance_probability = max(0.0, min(1.0, normalized_score))
243
  return float(f"{acceptance_probability * 100:.2f}")
244
 
245
+ def convert_to_serializable(obj):
246
+ if isinstance(obj, np.generic): return obj.item()
247
+ if isinstance(obj, dict): return {k: convert_to_serializable(v) for k, v in obj.items()}
248
+ if isinstance(obj, list): return [convert_to_serializable(i) for i in obj]
249
+ if isinstance(obj, np.ndarray): return obj.tolist()
250
+ return obj
251
+
252
+ # --- NEW: HR Persona Report Generation ---
253
  def generate_report(analysis_data: Dict) -> str:
254
  try:
255
  voice = analysis_data.get('voice_analysis', {})
256
+ voice_interp = "Voice analysis data was not available."
257
+ if voice and 'error' not in voice:
258
+ voice_interp = (f"The candidate's voice profile indicates a '{voice.get('interpretation', {}).get('confidence_level', 'N/A').upper()}' confidence level "
259
+ f"and a '{voice.get('interpretation', {}).get('anxiety_level', 'N/A').upper()}' anxiety level. "
260
+ f"Fluency was rated as '{voice.get('interpretation', {}).get('fluency_level', 'N/A').upper()}'.")
261
+
262
+ content = analysis_data.get('advanced_content_analysis', {})
263
+ content_interp = (f"Sentiment of responses was generally '{content.get('overall_sentiment', {}).get('label', 'N/A')}'. "
264
+ f"Mentioned technical skills: {', '.join(content.get('mentioned_technologies', [])) or 'None'}. "
265
+ f"Mentioned soft skills: {', '.join(content.get('mentioned_soft_skills', [])) or 'None'}.")
266
+
267
+ prob = analysis_data.get('acceptance_probability')
268
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
269
  prompt = f"""
270
+ *Persona:* You are a Senior HR Partner writing a candidate evaluation memo for the hiring manager.
271
+ *Task:* Write a professional, objective, and concise evaluation based on the data below.
272
+ *Tone:* Analytical and formal.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
273
 
274
+ *CANDIDATE EVALUATION MEMORANDUM*
275
+ *CONFIDENTIAL*
276
 
277
+ *Candidate ID:* {analysis_data.get('user_id', 'N/A')}
278
+ *Analysis Date:* {time.strftime('%Y-%m-%d')}
279
+ *Estimated Acceptance Probability:* {prob:.2f}%
280
 
281
+ *1. Overall Recommendation:*
282
+ Provide a clear, one-sentence recommendation (e.g., "Highly recommend proceeding to the final round," or "Recommend with reservations due to...").
283
 
284
+ *2. Key Competency Assessment (Content & Skills):*
285
+ - Summarize the candidate's key strengths and areas for development based on the content analysis.
286
+ - *Data for Content Analysis:* {content_interp}
287
 
288
+ *3. Communication Style (Voice & Speech Analysis):*
289
+ - Evaluate the candidate's communication style (confidence, clarity, nervousness).
290
+ - *Data for Voice Analysis:* {voice_interp}
291
+
292
+ *4. Actionable Next Steps:*
293
+ - Suggest specific questions or topics for the next interviewer to focus on.
294
+ """
295
+ response = gemini_model.generate_content(prompt)
296
+ return response.text
297
+ except Exception as e:
298
+ logger.error(f"Report generation failed: {str(e)}")
299
+ return f"Error generating report: {str(e)}"
300
+
301
+ # --- NEW: Polished PDF Creation ---
302
+ def parse_gemini_report(text: str) -> list:
303
+ parsed_elements = []
304
+ patterns = {
305
+ 'h3': r'^\s*\\\d\.\d\s+(.?)\\*:',
306
+ 'bullet': r'^\s*[-•]\s(.*)',
307
+ 'bold': r'^\s*\\(.?)\\*'
308
+ }
309
+ for line in text.split('\n'):
310
+ line = line.strip()
311
+ if not line: continue
312
+ match_h3 = re.match(patterns['h3'], line)
313
+ if match_h3:
314
+ parsed_elements.append({'type': 'h3', 'content': match_h3.group(1)})
315
+ continue
316
+ match_bold = re.match(patterns['bold'], line)
317
+ if match_bold:
318
+ if not re.match(r'^\d\.', match_bold.group(1)):
319
+ parsed_elements.append({'type': 'h3', 'content': match_bold.group(1)})
320
+ continue
321
+ match_bullet = re.match(patterns['bullet'], line)
322
+ if match_bullet:
323
+ parsed_elements.append({'type': 'bullet', 'content': match_bullet.group(1)})
324
+ continue
325
+ parsed_elements.append({'type': 'body', 'content': line})
326
+ return parsed_elements
327
+
328
+ def create_pdf_report(analysis_data: Dict, output_path: str, gemini_report_text: str):
329
  try:
330
+ doc = SimpleDocTemplate(output_path, pagesize=letter, rightMargin=0.75*inch, leftMargin=0.75*inch, topMargin=1.2*inch, bottomMargin=1*inch)
 
 
331
  styles = getSampleStyleSheet()
332
+ h1 = ParagraphStyle(name='Heading1', fontSize=18, leading=22, spaceAfter=12, alignment=1, textColor=colors.HexColor('#00205B'), fontName='Helvetica-Bold')
333
+ h2 = ParagraphStyle(name='Heading2', fontSize=14, leading=18, spaceBefore=18, spaceAfter=10, textColor=colors.HexColor('#003366'), fontName='Helvetica-Bold')
334
+ h3 = ParagraphStyle(name='Heading3', parent=h2, fontSize=11, spaceBefore=10, spaceAfter=4, textColor=colors.HexColor('#2E8B57'), fontName='Helvetica-Bold')
335
+ body_text = ParagraphStyle(name='BodyText', parent=styles['Normal'], fontSize=10, leading=14, spaceAfter=6, fontName='Helvetica')
336
+ bullet_style = ParagraphStyle(name='Bullet', parent=body_text, leftIndent=20, bulletIndent=10, spaceAfter=4)
 
337
  story = []
 
338
  def header_footer(canvas, doc):
339
  canvas.saveState()
340
+ canvas.setFont('Helvetica', 9)
341
+ canvas.setFillColor(colors.grey)
342
+ canvas.drawString(doc.leftMargin, 0.5 * inch, f"Page {doc.page} | EvalBot Confidential Report")
343
+ canvas.setStrokeColor(colors.HexColor('#003366'))
344
  canvas.setLineWidth(0.5)
345
+ canvas.line(doc.leftMargin, doc.height + 0.8*inch, doc.width + doc.leftMargin, doc.height + 0.8*inch)
346
+ canvas.setFont('Helvetica-Bold', 10)
347
+ canvas.setFillColor(colors.HexColor('#003366'))
348
+ canvas.drawString(doc.leftMargin, doc.height + 0.9*inch, "Interview Performance Analysis")
349
  canvas.restoreState()
350
 
351
+ # Build the story from the parsed Gemini report
352
+ parsed_report = parse_gemini_report(gemini_report_text)
353
+ for element in parsed_report:
354
+ if element['type'] == 'h2': story.append(Paragraph(element['content'], h2))
355
+ elif element['type'] == 'h3': story.append(Paragraph(element['content'], h3))
356
+ elif element['type'] == 'bullet': story.append(Paragraph(f" {element['content']}", bullet_style))
357
+ else: story.append(Paragraph(element['content'], body_text))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
358
 
359
  doc.build(story, onFirstPage=header_footer, onLaterPages=header_footer)
 
360
  return True
361
  except Exception as e:
362
+ logger.error(f"Enhanced PDF creation failed: {str(e)}", exc_info=True)
363
  return False
364
 
 
 
 
 
 
 
 
 
 
 
365
 
366
+ # --- MAIN ORCHESTRATOR FUNCTION ---
367
+ def process_interview(audio_path_or_url: str):
368
+ local_audio_path, wav_file, is_downloaded = None, None, False
 
 
369
  try:
370
+ user_id_from_task = "unknown_user"
371
+ try:
372
+ from celery_worker import celery_app
373
+ if celery_app.current_task:
374
+ user_id_from_task = celery_app.current_task.request.kwargs.get('item_data', {}).get('user_id', 'unknown_user')
375
+ except (ImportError, AttributeError):
376
+ pass # Celery might not be in the context if run locally
377
+
378
+ logger.info(f"Starting processing for {audio_path_or_url}")
379
+ if audio_path_or_url.startswith(('http://', 'https://')):
380
+ local_audio_path = download_audio_from_url(audio_path_or_url)
381
  is_downloaded = True
382
  else:
383
+ local_audio_path = audio_path_or_url
384
+
 
385
  wav_file = convert_to_wav(local_audio_path)
386
  transcript = transcribe(wav_file)
387
+
388
+ for u in transcript['utterances']:
389
+ u['prosodic_features'] = extract_prosodic_features(wav_file, u['start'], u['end'])
390
+
391
  utterances_with_speakers = identify_speakers(transcript, wav_file)
392
+
393
+ # NOTE: Using alternating role classification as decided.
394
+ for i, u in enumerate(utterances_with_speakers):
395
+ u['role'] = 'Interviewer' if i % 2 == 0 else 'Interviewee'
396
+ classified_utterances = utterances_with_speakers
397
+
 
 
 
 
398
  voice_analysis = analyze_interviewee_voice(wav_file, classified_utterances)
399
+ content_analysis = analyze_text_content(classified_utterances)
400
+
401
  analysis_data = {
402
+ 'user_id': user_id_from_task,
403
  'transcript': classified_utterances,
404
+ 'speakers': list(set(u['speaker'] for u in classified_utterances)),
405
  'voice_analysis': voice_analysis,
406
+ 'advanced_content_analysis': content_analysis,
407
  'text_analysis': {
408
  'total_duration': sum(u['prosodic_features']['duration'] for u in classified_utterances),
409
  'speaker_turns': len(classified_utterances)
410
  }
411
  }
412
+
413
  analysis_data['acceptance_probability'] = calculate_acceptance_probability(analysis_data)
414
  gemini_report_text = generate_report(analysis_data)
415
+
416
  base_name = str(uuid.uuid4())
417
  pdf_path = os.path.join(OUTPUT_DIR, f"{base_name}_report.pdf")
418
  json_path = os.path.join(OUTPUT_DIR, f"{base_name}_analysis.json")
419
+
420
+ create_pdf_report(analysis_data, pdf_path, gemini_report_text)
421
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
422
  with open(json_path, 'w') as f:
423
+ json.dump(convert_to_serializable(analysis_data), f, indent=2)
424
+
425
+ logger.info(f"Processing completed for {audio_path_or_url}")
426
+
427
  return {
428
+ 'pdf_path': pdf_path,
429
  'json_path': json_path,
430
+ 'pdf_filename': os.path.basename(pdf_path),
431
+ 'json_filename': os.path.basename(json_path)
432
  }
433
+
434
+ except Exception as e:
435
+ logger.error(f"Processing failed for {audio_path_or_url}: {str(e)}", exc_info=True)
436
+ raise
437
+
438
  finally:
439
+ if wav_file and os.path.exists(wav_file): os.remove(wav_file)
 
 
 
 
440
  if is_downloaded and local_audio_path and os.path.exists(local_audio_path):
441
+ os.remove(local_audio_path)
442
+ logger.info(f"Cleaned up temporary downloaded file: {local_audio_path}")