norhan12 commited on
Commit
8474847
·
verified ·
1 Parent(s): 28b5bc1

Update process_interview.py

Browse files
Files changed (1) hide show
  1. process_interview.py +432 -215
process_interview.py CHANGED
@@ -19,15 +19,16 @@ from typing import Dict, List, Tuple
19
  import logging
20
  import tempfile
21
  from reportlab.lib.pagesizes import letter
22
- from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak, Image, HRFlowable
23
  from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
24
  from reportlab.lib.units import inch
25
  from reportlab.lib import colors
26
  import matplotlib.pyplot as plt
27
  import matplotlib
28
  matplotlib.use('Agg')
 
29
  import io
30
- from transformers import AutoTokenizer, AutoModel, pipeline
31
  import spacy
32
  import google.generativeai as genai
33
  import joblib
@@ -35,10 +36,12 @@ from concurrent.futures import ThreadPoolExecutor
35
 
36
  # Setup logging
37
  logging.basicConfig(level=logging.INFO)
38
- logger = logging.getLogger(__name__)
39
  logging.getLogger("nemo_logging").setLevel(logging.ERROR)
 
40
 
41
  # Configuration
 
42
  OUTPUT_DIR = "./processed_audio"
43
  os.makedirs(OUTPUT_DIR, exist_ok=True)
44
 
@@ -47,21 +50,18 @@ PINECONE_KEY = os.getenv("PINECONE_KEY")
47
  ASSEMBLYAI_KEY = os.getenv("ASSEMBLYAI_KEY")
48
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
49
 
50
- # --- All your original helper functions ---
51
- # I am including them exactly as you last provided them.
52
- # --- HELPER FUNCTION to download from URL ---
53
  def download_audio_from_url(url: str) -> str:
 
54
  try:
55
  temp_dir = tempfile.gettempdir()
56
- temp_filename = f"{uuid.uuid4()}.tmp_audio"
57
- local_filename = os.path.join(temp_dir, temp_filename)
58
- logger.info(f"Downloading audio from {url} to {local_filename}")
59
  with requests.get(url, stream=True) as r:
60
  r.raise_for_status()
61
- with open(local_filename, 'wb') as f:
62
  for chunk in r.iter_content(chunk_size=8192):
63
  f.write(chunk)
64
- return local_filename
65
  except Exception as e:
66
  logger.error(f"Failed to download audio from URL {url}: {e}")
67
  raise
@@ -71,7 +71,12 @@ def initialize_services():
71
  pc = Pinecone(api_key=PINECONE_KEY)
72
  index_name = "interview-speaker-embeddings"
73
  if index_name not in pc.list_indexes().names():
74
- pc.create_index(name=index_name, dimension=192, metric="cosine", spec=ServerlessSpec(cloud="aws", region="us-east-1"))
 
 
 
 
 
75
  index = pc.Index(index_name)
76
  genai.configure(api_key=GEMINI_API_KEY)
77
  gemini_model = genai.GenerativeModel('gemini-1.5-flash')
@@ -87,8 +92,12 @@ logger.info(f"Using device: {device}")
87
 
88
  def load_speaker_model():
89
  try:
 
90
  torch.set_num_threads(5)
91
- model = EncDecSpeakerLabelModel.from_pretrained("nvidia/speakerverification_en_titanet_large", map_location=device)
 
 
 
92
  model.eval()
93
  return model
94
  except Exception as e:
@@ -108,7 +117,8 @@ speaker_model, nlp, tokenizer, llm_model = load_models()
108
  def convert_to_wav(audio_path: str, output_dir: str = OUTPUT_DIR) -> str:
109
  try:
110
  audio = AudioSegment.from_file(audio_path)
111
- if audio.channels > 1: audio = audio.set_channels(1)
 
112
  audio = audio.set_frame_rate(16000)
113
  wav_file = os.path.join(output_dir, f"{uuid.uuid4()}.wav")
114
  audio.export(wav_file, format="wav")
@@ -121,14 +131,13 @@ def extract_prosodic_features(audio_path: str, start_ms: int, end_ms: int) -> Di
121
  try:
122
  audio = AudioSegment.from_file(audio_path)
123
  segment = audio[start_ms:end_ms]
124
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
125
- segment.export(tmp.name, format="wav")
126
- y, sr = librosa.load(tmp.name, sr=16000)
127
- os.remove(tmp.name)
128
- pitches, _ = librosa.piptrack(y=y, sr=sr)
129
  pitches = pitches[pitches > 0]
130
- return {
131
- 'duration': (end_ms - start_ms) / 1000.0,
132
  'mean_pitch': float(np.mean(pitches)) if len(pitches) > 0 else 0.0,
133
  'min_pitch': float(np.min(pitches)) if len(pitches) > 0 else 0.0,
134
  'max_pitch': float(np.max(pitches)) if len(pitches) > 0 else 0.0,
@@ -138,132 +147,277 @@ def extract_prosodic_features(audio_path: str, start_ms: int, end_ms: int) -> Di
138
  'intensityMax': float(np.max(librosa.feature.rms(y=y)[0])),
139
  'intensitySD': float(np.std(librosa.feature.rms(y=y)[0])),
140
  }
 
 
141
  except Exception as e:
142
  logger.error(f"Feature extraction failed: {str(e)}")
143
- return {}
 
 
 
 
144
 
145
  def transcribe(audio_path: str) -> Dict:
146
  try:
147
  with open(audio_path, 'rb') as f:
148
- upload_response = requests.post("https://api.assemblyai.com/v2/upload", headers={"authorization": ASSEMBLYAI_KEY}, data=f)
 
 
 
 
149
  audio_url = upload_response.json()['upload_url']
150
- transcript_response = requests.post("https://api.assemblyai.com/v2/transcript", headers={"authorization": ASSEMBLYAI_KEY}, json={"audio_url": audio_url, "speaker_labels": True, "filter_profanity": True})
 
 
 
 
 
 
 
 
151
  transcript_id = transcript_response.json()['id']
152
  while True:
153
- result = requests.get(f"https://api.assemblyai.com/v2/transcript/{transcript_id}", headers={"authorization": ASSEMBLYAI_KEY}).json()
154
- if result['status'] == 'completed': return result
155
- elif result['status'] == 'error': raise Exception(f"AssemblyAI Error: {result.get('error')}")
 
 
 
 
 
156
  time.sleep(5)
157
  except Exception as e:
158
  logger.error(f"Transcription failed: {str(e)}")
159
  raise
160
 
161
- def process_utterance(utterance, full_audio):
162
  try:
163
- start, end = utterance['start'], utterance['end']
 
164
  segment = full_audio[start:end]
165
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
166
- segment.export(tmp.name, format="wav")
167
- with torch.no_grad():
168
- embedding = speaker_model.get_embedding(tmp.name).cpu().numpy()
169
- os.remove(tmp.name)
170
  embedding_list = embedding.flatten().tolist()
171
- query_result = index.query(vector=embedding_list, top_k=1, include_metadata=True)
172
- if query_result['matches'] and query_result['matches'][0]['score'] > 0.75:
 
 
 
 
173
  speaker_id = query_result['matches'][0]['id']
174
  speaker_name = query_result['matches'][0]['metadata']['speaker_name']
175
  else:
176
- speaker_id = f"speaker_{uuid.uuid4().hex[:6]}"
177
- speaker_name = f"Speaker_{speaker_id[-4:].upper()}"
178
  index.upsert([(speaker_id, embedding_list, {"speaker_name": speaker_name})])
179
- return {**utterance, 'speaker': speaker_name, 'speaker_id': speaker_id}
 
 
 
 
 
 
180
  except Exception as e:
181
  logger.error(f"Utterance processing failed: {str(e)}", exc_info=True)
182
- return {**utterance, 'speaker': 'Unknown', 'speaker_id': 'unknown'}
 
 
 
 
 
183
 
184
  def identify_speakers(transcript: Dict, wav_file: str) -> List[Dict]:
185
  try:
186
  full_audio = AudioSegment.from_wav(wav_file)
 
187
  with ThreadPoolExecutor(max_workers=5) as executor:
188
- futures = [executor.submit(process_utterance, u, full_audio) for u in transcript['utterances']]
 
 
 
189
  results = [f.result() for f in futures]
190
  return results
191
  except Exception as e:
192
  logger.error(f"Speaker identification failed: {str(e)}")
193
  raise
194
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
195
  def analyze_interviewee_voice(audio_path: str, utterances: List[Dict]) -> Dict:
196
  try:
197
  y, sr = librosa.load(audio_path, sr=16000)
198
- interviewee_utterances = [u for u in utterances if u.get('role') == 'Interviewee']
199
- if not interviewee_utterances: return {'error': 'No interviewee utterances found'}
200
- segments = [y[int(u['start']*sr/1000):int(u['end']*sr/1000)] for u in interviewee_utterances]
 
 
 
 
 
201
  total_duration = sum(u['prosodic_features']['duration'] for u in interviewee_utterances)
202
  total_words = sum(len(u['text'].split()) for u in interviewee_utterances)
203
  speaking_rate = total_words / total_duration if total_duration > 0 else 0
204
  filler_words = ['um', 'uh', 'like', 'you know', 'so', 'i mean']
205
  filler_count = sum(sum(u['text'].lower().count(fw) for fw in filler_words) for u in interviewee_utterances)
206
  filler_ratio = filler_count / total_words if total_words > 0 else 0
207
- repetition_score = 0
208
- pitches, intensities = [], []
 
 
 
 
 
209
  for segment in segments:
210
- if len(segment) == 0: continue
211
- f0, voiced_flag, _ = librosa.pyin(segment, fmin=librosa.note_to_hz('C2'), fmax=librosa.note_to_hz('C7'), sr=sr)
212
  pitches.extend(f0[voiced_flag])
213
- intensities.extend(librosa.feature.rms(y=segment)[0])
214
  pitch_mean = np.mean(pitches) if len(pitches) > 0 else 0
215
- intensity_std = np.std(intensities) if len(intensities) > 0 else 0
216
  jitter = np.mean(np.abs(np.diff(pitches))) / pitch_mean if len(pitches) > 1 and pitch_mean > 0 else 0
217
- shimmer = np.mean(np.abs(np.diff(intensities))) / np.mean(intensities) if len(intensities) > 1 and np.mean(intensities) > 0 else 0
218
- anxiety_score = (0.6 * (np.std(pitches)/pitch_mean if pitch_mean > 0 else 0) + 0.4 * (jitter + shimmer))
219
- confidence_score = 0.7 * (1/(1+intensity_std)) + 0.3 * (1/(1+filler_ratio))
 
 
 
 
 
 
220
  hesitation_score = filler_ratio + repetition_score
 
 
 
221
  return {
222
- 'speaking_rate': float(round(speaking_rate, 2)), 'filler_ratio': float(round(filler_ratio, 4)), 'repetition_score': float(round(repetition_score, 4)),
 
 
 
 
223
  'composite_scores': {'anxiety': float(round(anxiety_score, 4)), 'confidence': float(round(confidence_score, 4)), 'hesitation': float(round(hesitation_score, 4))},
224
- 'interpretation': {
225
- 'anxiety_level': 'high' if anxiety_score > 0.15 else 'moderate' if anxiety_score > 0.07 else 'low',
226
- 'confidence_level': 'high' if confidence_score > 0.7 else 'moderate' if confidence_score > 0.5 else 'low',
227
- 'fluency_level': 'fluent' if filler_ratio < 0.05 and repetition_score < 0.1 else 'disfluent'
228
- }
229
  }
230
  except Exception as e:
231
  logger.error(f"Voice analysis failed: {str(e)}")
232
  return {'error': str(e)}
233
 
234
-
235
- def analyze_text_content(utterances: List[Dict]) -> Dict:
236
- interviewee_utterances = [u['text'] for u in utterances if u.get('role') == 'Interviewee']
237
- if not interviewee_utterances:
238
- return {"overall_sentiment": {"label": "NEUTRAL", "score": 1.0}, "mentioned_technologies": [], "mentioned_soft_skills": []}
239
-
240
- full_text = " ".join(interviewee_utterances)
241
- sentiment_results = sentiment_pipeline(full_text, truncation=True, max_length=512)
242
-
243
- tech_keywords = ['python', 'react', 'aws', 'docker', 'api', 'fastapi', 'machine learning', 'pytorch', 'tensorflow']
244
- soft_skills = ['leadership', 'teamwork', 'communication', 'problem solving', 'management', 'planning']
245
-
246
- found_tech = [kw for kw in tech_keywords if kw in full_text.lower()]
247
- found_skills = [skill for skill in soft_skills if skill in full_text.lower()]
248
-
249
- return {
250
- "overall_sentiment": sentiment_results[0],
251
- "mentioned_technologies": list(set(found_tech)),
252
- "mentioned_soft_skills": list(set(found_skills))
253
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
254
 
255
  def calculate_acceptance_probability(analysis_data: Dict) -> float:
256
- # Your full, detailed function
257
  voice = analysis_data.get('voice_analysis', {})
258
  if 'error' in voice: return 0.0
259
  w_confidence, w_anxiety, w_fluency, w_speaking_rate, w_filler_repetition, w_content_strengths = 0.4, -0.3, 0.2, 0.1, -0.1, 0.2
260
  confidence_score = voice.get('composite_scores', {}).get('confidence', 0.0)
261
  anxiety_score = voice.get('composite_scores', {}).get('anxiety', 0.0)
262
- fluency_level = voice.get('interpretation', {}).get('fluency_level', 'disfluent')
263
  speaking_rate = voice.get('speaking_rate', 0.0)
264
  filler_ratio = voice.get('filler_ratio', 0.0)
265
  repetition_score = voice.get('repetition_score', 0.0)
266
- fluency_map = {'fluent': 1.0, 'moderate': 0.5, 'disfluent': 0.0}
267
  fluency_val = fluency_map.get(fluency_level, 0.0)
268
  ideal_speaking_rate = 2.5
269
  speaking_rate_deviation = abs(speaking_rate - ideal_speaking_rate)
@@ -278,55 +432,42 @@ def calculate_acceptance_probability(analysis_data: Dict) -> float:
278
  acceptance_probability = max(0.0, min(1.0, normalized_score))
279
  return float(f"{acceptance_probability * 100:.2f}")
280
 
281
- def convert_to_serializable(obj):
282
- if isinstance(obj, np.generic): return obj.item()
283
- if isinstance(obj, dict): return {k: convert_to_serializable(v) for k, v in obj.items()}
284
- if isinstance(obj, list): return [convert_to_serializable(i) for i in obj]
285
- if isinstance(obj, np.ndarray): return obj.tolist()
286
- return obj
287
-
288
- # --- NEW: HR Persona Report Generation ---
289
  def generate_report(analysis_data: Dict) -> str:
290
  try:
291
  voice = analysis_data.get('voice_analysis', {})
292
- voice_interp = "Voice analysis data was not available."
293
- if voice and 'error' not in voice:
294
- voice_interp = (f"The candidate's voice profile indicates a '{voice.get('interpretation', {}).get('confidence_level', 'N/A').upper()}' confidence level "
295
- f"and a '{voice.get('interpretation', {}).get('anxiety_level', 'N/A').upper()}' anxiety level. "
296
- f"Fluency was rated as '{voice.get('interpretation', {}).get('fluency_level', 'N/A').upper()}'.")
297
-
298
- content = analysis_data.get('advanced_content_analysis', {})
299
- content_interp = (f"Sentiment of responses was generally '{content.get('overall_sentiment', {}).get('label', 'N/A')}'. "
300
- f"Mentioned technical skills: {', '.join(content.get('mentioned_technologies', [])) or 'None'}. "
301
- f"Mentioned soft skills: {', '.join(content.get('mentioned_soft_skills', [])) or 'None'}.")
302
-
303
- prob = analysis_data.get('acceptance_probability')
304
-
305
  prompt = f"""
306
- *Persona:* You are a Senior HR Partner writing a candidate evaluation memo for the hiring manager.
307
- *Task:* Write a professional, objective, and concise evaluation based on the data below.
308
- *Tone:* Analytical and formal.
309
-
310
- *CANDIDATE EVALUATION MEMORANDUM*
311
- *CONFIDENTIAL*
312
-
313
- *Candidate ID:* {analysis_data.get('user_id', 'N/A')}
314
- *Analysis Date:* {time.strftime('%Y-%m-%d')}
315
- *Estimated Acceptance Probability:* {prob:.2f}%
316
-
317
- *1. Overall Recommendation:*
318
- Provide a clear, one-sentence recommendation (e.g., "Highly recommend proceeding to the final round," or "Recommend with reservations due to...").
319
-
320
- *2. Key Competency Assessment (Content & Skills):*
321
- - Summarize the candidate's key strengths and areas for development based on the content analysis.
322
- - *Data for Content Analysis:* {content_interp}
323
-
324
- *3. Communication Style (Voice & Speech Analysis):*
325
- - Evaluate the candidate's communication style (confidence, clarity, nervousness).
326
- - *Data for Voice Analysis:* {voice_interp}
327
-
328
- *4. Actionable Next Steps:*
329
- - Suggest specific questions or topics for the next interviewer to focus on.
330
  """
331
  response = gemini_model.generate_content(prompt)
332
  return response.text
@@ -334,63 +475,156 @@ def generate_report(analysis_data: Dict) -> str:
334
  logger.error(f"Report generation failed: {str(e)}")
335
  return f"Error generating report: {str(e)}"
336
 
337
- # --- NEW: Polished PDF Creation ---
338
- def parse_gemini_report(text: str) -> list:
339
- parsed_elements = []
340
- patterns = {
341
- 'h3': r'^\s*\\\d\.\d\s+(.?)\\*:',
342
- 'bullet': r'^\s*[-•]\s(.*)',
343
- 'bold': r'^\s*\\(.?)\\*'
344
- }
345
- for line in text.split('\n'):
346
- line = line.strip()
347
- if not line: continue
348
- match_h3 = re.match(patterns['h3'], line)
349
- if match_h3:
350
- parsed_elements.append({'type': 'h3', 'content': match_h3.group(1)})
351
- continue
352
- match_bold = re.match(patterns['bold'], line)
353
- if match_bold:
354
- if not re.match(r'^\d\.', match_bold.group(1)):
355
- parsed_elements.append({'type': 'h3', 'content': match_bold.group(1)})
356
- continue
357
- match_bullet = re.match(patterns['bullet'], line)
358
- if match_bullet:
359
- parsed_elements.append({'type': 'bullet', 'content': match_bullet.group(1)})
360
- continue
361
- parsed_elements.append({'type': 'body', 'content': line})
362
- return parsed_elements
363
-
364
  def create_pdf_report(analysis_data: Dict, output_path: str, gemini_report_text: str):
365
  try:
366
- doc = SimpleDocTemplate(output_path, pagesize=letter, rightMargin=0.75*inch, leftMargin=0.75*inch, topMargin=1.2*inch, bottomMargin=1*inch)
 
 
367
  styles = getSampleStyleSheet()
368
- h1 = ParagraphStyle(name='Heading1', fontSize=18, leading=22, spaceAfter=12, alignment=1, textColor=colors.HexColor('#00205B'), fontName='Helvetica-Bold')
369
- h2 = ParagraphStyle(name='Heading2', fontSize=14, leading=18, spaceBefore=18, spaceAfter=10, textColor=colors.HexColor('#003366'), fontName='Helvetica-Bold')
370
- h3 = ParagraphStyle(name='Heading3', parent=h2, fontSize=11, spaceBefore=10, spaceAfter=4, textColor=colors.HexColor('#2E8B57'), fontName='Helvetica-Bold')
371
- body_text = ParagraphStyle(name='BodyText', parent=styles['Normal'], fontSize=10, leading=14, spaceAfter=6, fontName='Helvetica')
372
- bullet_style = ParagraphStyle(name='Bullet', parent=body_text, leftIndent=20, bulletIndent=10, spaceAfter=4)
373
  story = []
 
374
  def header_footer(canvas, doc):
375
  canvas.saveState()
376
  canvas.setFont('Helvetica', 9)
377
  canvas.setFillColor(colors.grey)
378
- canvas.drawString(doc.leftMargin, 0.5 * inch, f"Page {doc.page} | EvalBot Confidential Report")
379
- canvas.setStrokeColor(colors.HexColor('#003366'))
380
- canvas.setLineWidth(0.5)
381
- canvas.line(doc.leftMargin, doc.height + 0.8*inch, doc.width + doc.leftMargin, doc.height + 0.8*inch)
382
  canvas.setFont('Helvetica-Bold', 10)
383
- canvas.setFillColor(colors.HexColor('#003366'))
384
- canvas.drawString(doc.leftMargin, doc.height + 0.9*inch, "Interview Performance Analysis")
385
  canvas.restoreState()
386
 
387
- # Build the story from the parsed Gemini report
388
- parsed_report = parse_gemini_report(gemini_report_text)
389
- for element in parsed_report:
390
- if element['type'] == 'h2': story.append(Paragraph(element['content'], h2))
391
- elif element['type'] == 'h3': story.append(Paragraph(element['content'], h3))
392
- elif element['type'] == 'bullet': story.append(Paragraph(f"• {element['content']}", bullet_style))
393
- else: story.append(Paragraph(element['content'], body_text))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
394
 
395
  doc.build(story, onFirstPage=header_footer, onLaterPages=header_footer)
396
  return True
@@ -398,81 +632,64 @@ def create_pdf_report(analysis_data: Dict, output_path: str, gemini_report_text:
398
  logger.error(f"Enhanced PDF creation failed: {str(e)}", exc_info=True)
399
  return False
400
 
 
 
 
 
 
 
401
 
402
- # --- MAIN ORCHESTRATOR FUNCTION ---
403
  def process_interview(audio_path_or_url: str):
404
- local_audio_path, wav_file, is_downloaded = None, None, False
 
 
405
  try:
406
- user_id_from_task = "unknown_user"
407
- try:
408
- from celery_worker import celery_app
409
- if celery_app.current_task:
410
- user_id_from_task = celery_app.current_task.request.kwargs.get('item_data', {}).get('user_id', 'unknown_user')
411
- except (ImportError, AttributeError):
412
- pass # Celery might not be in the context if run locally
413
-
414
  logger.info(f"Starting processing for {audio_path_or_url}")
415
  if audio_path_or_url.startswith(('http://', 'https://')):
416
  local_audio_path = download_audio_from_url(audio_path_or_url)
417
  is_downloaded = True
418
  else:
419
  local_audio_path = audio_path_or_url
420
-
421
  wav_file = convert_to_wav(local_audio_path)
422
  transcript = transcribe(wav_file)
423
-
424
- for u in transcript['utterances']:
425
- u['prosodic_features'] = extract_prosodic_features(wav_file, u['start'], u['end'])
426
-
427
  utterances_with_speakers = identify_speakers(transcript, wav_file)
428
-
429
- # NOTE: Using alternating role classification as decided.
430
- for i, u in enumerate(utterances_with_speakers):
431
- u['role'] = 'Interviewer' if i % 2 == 0 else 'Interviewee'
432
- classified_utterances = utterances_with_speakers
433
-
 
 
434
  voice_analysis = analyze_interviewee_voice(wav_file, classified_utterances)
435
- content_analysis = analyze_text_content(classified_utterances)
436
-
437
  analysis_data = {
438
- 'user_id': user_id_from_task,
439
  'transcript': classified_utterances,
440
  'speakers': list(set(u['speaker'] for u in classified_utterances)),
441
  'voice_analysis': voice_analysis,
442
- 'advanced_content_analysis': content_analysis,
443
  'text_analysis': {
444
  'total_duration': sum(u['prosodic_features']['duration'] for u in classified_utterances),
445
  'speaker_turns': len(classified_utterances)
446
  }
447
  }
448
-
449
  analysis_data['acceptance_probability'] = calculate_acceptance_probability(analysis_data)
450
  gemini_report_text = generate_report(analysis_data)
451
-
452
  base_name = str(uuid.uuid4())
453
  pdf_path = os.path.join(OUTPUT_DIR, f"{base_name}_report.pdf")
454
  json_path = os.path.join(OUTPUT_DIR, f"{base_name}_analysis.json")
455
-
456
- create_pdf_report(analysis_data, pdf_path, gemini_report_text)
457
-
458
  with open(json_path, 'w') as f:
459
- json.dump(convert_to_serializable(analysis_data), f, indent=2)
460
-
461
  logger.info(f"Processing completed for {audio_path_or_url}")
462
-
463
- return {
464
- 'pdf_path': pdf_path,
465
- 'json_path': json_path,
466
- 'pdf_filename': os.path.basename(pdf_path),
467
- 'json_filename': os.path.basename(json_path)
468
- }
469
-
470
  except Exception as e:
471
  logger.error(f"Processing failed for {audio_path_or_url}: {str(e)}", exc_info=True)
472
  raise
473
-
474
  finally:
475
- if wav_file and os.path.exists(wav_file): os.remove(wav_file)
 
476
  if is_downloaded and local_audio_path and os.path.exists(local_audio_path):
477
  os.remove(local_audio_path)
478
  logger.info(f"Cleaned up temporary downloaded file: {local_audio_path}")
 
19
  import logging
20
  import tempfile
21
  from reportlab.lib.pagesizes import letter
22
+ from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak, Image
23
  from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
24
  from reportlab.lib.units import inch
25
  from reportlab.lib import colors
26
  import matplotlib.pyplot as plt
27
  import matplotlib
28
  matplotlib.use('Agg')
29
+ from reportlab.platypus import Image
30
  import io
31
+ from transformers import AutoTokenizer, AutoModel
32
  import spacy
33
  import google.generativeai as genai
34
  import joblib
 
36
 
37
  # Setup logging
38
  logging.basicConfig(level=logging.INFO)
39
+ logger = logging.getLogger(_name_)
40
  logging.getLogger("nemo_logging").setLevel(logging.ERROR)
41
+ logging.getLogger("nemo").setLevel(logging.ERROR)
42
 
43
  # Configuration
44
+ AUDIO_DIR = "./uploads"
45
  OUTPUT_DIR = "./processed_audio"
46
  os.makedirs(OUTPUT_DIR, exist_ok=True)
47
 
 
50
  ASSEMBLYAI_KEY = os.getenv("ASSEMBLYAI_KEY")
51
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
52
 
 
 
 
53
  def download_audio_from_url(url: str) -> str:
54
+ """Downloads an audio file from a URL to a temporary local path."""
55
  try:
56
  temp_dir = tempfile.gettempdir()
57
+ temp_path = os.path.join(temp_dir, f"{uuid.uuid4()}.tmp_audio")
58
+ logger.info(f"Downloading audio from {url} to {temp_path}")
 
59
  with requests.get(url, stream=True) as r:
60
  r.raise_for_status()
61
+ with open(temp_path, 'wb') as f:
62
  for chunk in r.iter_content(chunk_size=8192):
63
  f.write(chunk)
64
+ return temp_path
65
  except Exception as e:
66
  logger.error(f"Failed to download audio from URL {url}: {e}")
67
  raise
 
71
  pc = Pinecone(api_key=PINECONE_KEY)
72
  index_name = "interview-speaker-embeddings"
73
  if index_name not in pc.list_indexes().names():
74
+ pc.create_index(
75
+ name=index_name,
76
+ dimension=192,
77
+ metric="cosine",
78
+ spec=ServerlessSpec(cloud="aws", region="us-east-1")
79
+ )
80
  index = pc.Index(index_name)
81
  genai.configure(api_key=GEMINI_API_KEY)
82
  gemini_model = genai.GenerativeModel('gemini-1.5-flash')
 
92
 
93
  def load_speaker_model():
94
  try:
95
+ import torch
96
  torch.set_num_threads(5)
97
+ model = EncDecSpeakerLabelModel.from_pretrained(
98
+ "nvidia/speakerverification_en_titanet_large",
99
+ map_location=torch.device('cpu')
100
+ )
101
  model.eval()
102
  return model
103
  except Exception as e:
 
117
  def convert_to_wav(audio_path: str, output_dir: str = OUTPUT_DIR) -> str:
118
  try:
119
  audio = AudioSegment.from_file(audio_path)
120
+ if audio.channels > 1:
121
+ audio = audio.set_channels(1)
122
  audio = audio.set_frame_rate(16000)
123
  wav_file = os.path.join(output_dir, f"{uuid.uuid4()}.wav")
124
  audio.export(wav_file, format="wav")
 
131
  try:
132
  audio = AudioSegment.from_file(audio_path)
133
  segment = audio[start_ms:end_ms]
134
+ temp_path = os.path.join(OUTPUT_DIR, f"temp_{uuid.uuid4()}.wav")
135
+ segment.export(temp_path, format="wav")
136
+ y, sr = librosa.load(temp_path, sr=16000)
137
+ pitches = librosa.piptrack(y=y, sr=sr)[0]
 
138
  pitches = pitches[pitches > 0]
139
+ features = {
140
+ 'duration': (end_ms - start_ms) / 1000,
141
  'mean_pitch': float(np.mean(pitches)) if len(pitches) > 0 else 0.0,
142
  'min_pitch': float(np.min(pitches)) if len(pitches) > 0 else 0.0,
143
  'max_pitch': float(np.max(pitches)) if len(pitches) > 0 else 0.0,
 
147
  'intensityMax': float(np.max(librosa.feature.rms(y=y)[0])),
148
  'intensitySD': float(np.std(librosa.feature.rms(y=y)[0])),
149
  }
150
+ os.remove(temp_path)
151
+ return features
152
  except Exception as e:
153
  logger.error(f"Feature extraction failed: {str(e)}")
154
+ return {
155
+ 'duration': 0.0, 'mean_pitch': 0.0, 'min_pitch': 0.0, 'max_pitch': 0.0,
156
+ 'pitch_sd': 0.0, 'intensityMean': 0.0, 'intensityMin': 0.0,
157
+ 'intensityMax': 0.0, 'intensitySD': 0.0
158
+ }
159
 
160
  def transcribe(audio_path: str) -> Dict:
161
  try:
162
  with open(audio_path, 'rb') as f:
163
+ upload_response = requests.post(
164
+ "https://api.assemblyai.com/v2/upload",
165
+ headers={"authorization": ASSEMBLYAI_KEY},
166
+ data=f
167
+ )
168
  audio_url = upload_response.json()['upload_url']
169
+ transcript_response = requests.post(
170
+ "https://api.assemblyai.com/v2/transcript",
171
+ headers={"authorization": ASSEMBLYAI_KEY},
172
+ json={
173
+ "audio_url": audio_url,
174
+ "speaker_labels": True,
175
+ "filter_profanity": True
176
+ }
177
+ )
178
  transcript_id = transcript_response.json()['id']
179
  while True:
180
+ result = requests.get(
181
+ f"https://api.assemblyai.com/v2/transcript/{transcript_id}",
182
+ headers={"authorization": ASSEMBLYAI_KEY}
183
+ ).json()
184
+ if result['status'] == 'completed':
185
+ return result
186
+ elif result['status'] == 'error':
187
+ raise Exception(result['error'])
188
  time.sleep(5)
189
  except Exception as e:
190
  logger.error(f"Transcription failed: {str(e)}")
191
  raise
192
 
193
+ def process_utterance(utterance, full_audio, wav_file):
194
  try:
195
+ start = utterance['start']
196
+ end = utterance['end']
197
  segment = full_audio[start:end]
198
+ temp_path = os.path.join(OUTPUT_DIR, f"temp_{uuid.uuid4()}.wav")
199
+ segment.export(temp_path, format="wav")
200
+ with torch.no_grad():
201
+ embedding = speaker_model.get_embedding(temp_path).cpu().numpy()
 
202
  embedding_list = embedding.flatten().tolist()
203
+ query_result = index.query(
204
+ vector=embedding_list,
205
+ top_k=1,
206
+ include_metadata=True
207
+ )
208
+ if query_result['matches'] and query_result['matches'][0]['score'] > 0.7:
209
  speaker_id = query_result['matches'][0]['id']
210
  speaker_name = query_result['matches'][0]['metadata']['speaker_name']
211
  else:
212
+ speaker_id = f"unknown_{uuid.uuid4().hex[:6]}"
213
+ speaker_name = f"Speaker_{speaker_id[-4:]}"
214
  index.upsert([(speaker_id, embedding_list, {"speaker_name": speaker_name})])
215
+ os.remove(temp_path)
216
+ return {
217
+ **utterance,
218
+ 'speaker': speaker_name,
219
+ 'speaker_id': speaker_id,
220
+ 'embedding': embedding_list
221
+ }
222
  except Exception as e:
223
  logger.error(f"Utterance processing failed: {str(e)}", exc_info=True)
224
+ return {
225
+ **utterance,
226
+ 'speaker': 'Unknown',
227
+ 'speaker_id': 'unknown',
228
+ 'embedding': None
229
+ }
230
 
231
  def identify_speakers(transcript: Dict, wav_file: str) -> List[Dict]:
232
  try:
233
  full_audio = AudioSegment.from_wav(wav_file)
234
+ utterances = transcript['utterances']
235
  with ThreadPoolExecutor(max_workers=5) as executor:
236
+ futures = [
237
+ executor.submit(process_utterance, utterance, full_audio, wav_file)
238
+ for utterance in utterances
239
+ ]
240
  results = [f.result() for f in futures]
241
  return results
242
  except Exception as e:
243
  logger.error(f"Speaker identification failed: {str(e)}")
244
  raise
245
 
246
+ def train_role_classifier(utterances: List[Dict]):
247
+ try:
248
+ texts = [u['text'] for u in utterances]
249
+ vectorizer = TfidfVectorizer(max_features=500, ngram_range=(1, 2))
250
+ X_text = vectorizer.fit_transform(texts)
251
+ features = []
252
+ labels = []
253
+ for i, utterance in enumerate(utterances):
254
+ prosodic = utterance['prosodic_features']
255
+ feat = [
256
+ prosodic['duration'], prosodic['mean_pitch'], prosodic['min_pitch'],
257
+ prosodic['max_pitch'], prosodic['pitch_sd'], prosodic['intensityMean'],
258
+ prosodic['intensityMin'], prosodic['intensityMax'], prosodic['intensitySD'],
259
+ ]
260
+ feat.extend(X_text[i].toarray()[0].tolist())
261
+ doc = nlp(utterance['text'])
262
+ feat.extend([
263
+ int(utterance['text'].endswith('?')),
264
+ len(re.findall(r'\b(why|how|what|when|where|who|which)\b', utterance['text'].lower())),
265
+ len(utterance['text'].split()),
266
+ sum(1 for token in doc if token.pos_ == 'VERB'),
267
+ sum(1 for token in doc if token.pos_ == 'NOUN')
268
+ ])
269
+ features.append(feat)
270
+ labels.append(0 if i % 2 == 0 else 1)
271
+ scaler = StandardScaler()
272
+ X = scaler.fit_transform(features)
273
+ clf = RandomForestClassifier(
274
+ n_estimators=150, max_depth=10, random_state=42, class_weight='balanced'
275
+ )
276
+ clf.fit(X, labels)
277
+ joblib.dump(clf, os.path.join(OUTPUT_DIR, 'role_classifier.pkl'))
278
+ joblib.dump(vectorizer, os.path.join(OUTPUT_DIR, 'text_vectorizer.pkl'))
279
+ joblib.dump(scaler, os.path.join(OUTPUT_DIR, 'feature_scaler.pkl'))
280
+ return clf, vectorizer, scaler
281
+ except Exception as e:
282
+ logger.error(f"Classifier training failed: {str(e)}")
283
+ raise
284
+
285
+ def classify_roles(utterances: List[Dict], clf, vectorizer, scaler):
286
+ try:
287
+ texts = [u['text'] for u in utterances]
288
+ X_text = vectorizer.transform(texts)
289
+ results = []
290
+ for i, utterance in enumerate(utterances):
291
+ prosodic = utterance['prosodic_features']
292
+ feat = [
293
+ prosodic['duration'], prosodic['mean_pitch'], prosodic['min_pitch'],
294
+ prosodic['max_pitch'], prosodic['pitch_sd'], prosodic['intensityMean'],
295
+ prosodic['intensityMin'], prosodic['intensityMax'], prosodic['intensitySD'],
296
+ ]
297
+ feat.extend(X_text[i].toarray()[0].tolist())
298
+ doc = nlp(utterance['text'])
299
+ feat.extend([
300
+ int(utterance['text'].endswith('?')),
301
+ len(re.findall(r'\b(why|how|what|when|where|who|which)\b', utterance['text'].lower())),
302
+ len(utterance['text'].split()),
303
+ sum(1 for token in doc if token.pos_ == 'VERB'),
304
+ sum(1 for token in doc if token.pos_ == 'NOUN')
305
+ ])
306
+ X = scaler.transform([feat])
307
+ role = 'Interviewer' if clf.predict(X)[0] == 0 else 'Interviewee'
308
+ results.append({**utterance, 'role': role})
309
+ return results
310
+ except Exception as e:
311
+ logger.error(f"Role classification failed: {str(e)}")
312
+ raise
313
+
314
  def analyze_interviewee_voice(audio_path: str, utterances: List[Dict]) -> Dict:
315
  try:
316
  y, sr = librosa.load(audio_path, sr=16000)
317
+ interviewee_utterances = [u for u in utterances if u['role'] == 'Interviewee']
318
+ if not interviewee_utterances:
319
+ return {'error': 'No interviewee utterances found'}
320
+ segments = []
321
+ for u in interviewee_utterances:
322
+ start = int(u['start'] * sr / 1000)
323
+ end = int(u['end'] * sr / 1000)
324
+ segments.append(y[start:end])
325
  total_duration = sum(u['prosodic_features']['duration'] for u in interviewee_utterances)
326
  total_words = sum(len(u['text'].split()) for u in interviewee_utterances)
327
  speaking_rate = total_words / total_duration if total_duration > 0 else 0
328
  filler_words = ['um', 'uh', 'like', 'you know', 'so', 'i mean']
329
  filler_count = sum(sum(u['text'].lower().count(fw) for fw in filler_words) for u in interviewee_utterances)
330
  filler_ratio = filler_count / total_words if total_words > 0 else 0
331
+ all_words = ' '.join(u['text'].lower() for u in interviewee_utterances).split()
332
+ word_counts = {}
333
+ for i in range(len(all_words) - 1):
334
+ bigram = (all_words[i], all_words[i + 1])
335
+ word_counts[bigram] = word_counts.get(bigram, 0) + 1
336
+ repetition_score = sum(1 for count in word_counts.values() if count > 1) / len(word_counts) if word_counts else 0
337
+ pitches = []
338
  for segment in segments:
339
+ f0, voiced_flag, _ = librosa.pyin(segment, fmin=80, fmax=300, sr=sr)
 
340
  pitches.extend(f0[voiced_flag])
 
341
  pitch_mean = np.mean(pitches) if len(pitches) > 0 else 0
342
+ pitch_std = np.std(pitches) if len(pitches) > 0 else 0
343
  jitter = np.mean(np.abs(np.diff(pitches))) / pitch_mean if len(pitches) > 1 and pitch_mean > 0 else 0
344
+ intensities = []
345
+ for segment in segments:
346
+ rms = librosa.feature.rms(y=segment)[0]
347
+ intensities.extend(rms)
348
+ intensity_mean = np.mean(intensities) if intensities else 0
349
+ intensity_std = np.std(intensities) if intensities else 0
350
+ shimmer = np.mean(np.abs(np.diff(intensities))) / intensity_mean if len(intensities) > 1 and intensity_mean > 0 else 0
351
+ anxiety_score = 0.6 * (pitch_std / pitch_mean) + 0.4 * (jitter + shimmer) if pitch_mean > 0 else 0
352
+ confidence_score = 0.7 * (1 / (1 + intensity_std)) + 0.3 * (1 / (1 + filler_ratio))
353
  hesitation_score = filler_ratio + repetition_score
354
+ anxiety_level = 'High' if anxiety_score > 0.15 else 'Moderate' if anxiety_score > 0.07 else 'Low'
355
+ confidence_level = 'High' if confidence_score > 0.7 else 'Moderate' if confidence_score > 0.5 else 'Low'
356
+ fluency_level = 'Fluent' if (filler_ratio < 0.05 and repetition_score < 0.1) else 'Moderate' if (filler_ratio < 0.1 and repetition_score < 0.2) else 'Disfluent'
357
  return {
358
+ 'speaking_rate': float(round(speaking_rate, 2)),
359
+ 'filler_ratio': float(round(filler_ratio, 4)),
360
+ 'repetition_score': float(round(repetition_score, 4)),
361
+ 'pitch_analysis': {'mean': float(round(pitch_mean, 2)), 'std_dev': float(round(pitch_std, 2)), 'jitter': float(round(jitter, 4))},
362
+ 'intensity_analysis': {'mean': float(round(intensity_mean, 2)), 'std_dev': float(round(intensity_std, 2)), 'shimmer': float(round(shimmer, 4))},
363
  'composite_scores': {'anxiety': float(round(anxiety_score, 4)), 'confidence': float(round(confidence_score, 4)), 'hesitation': float(round(hesitation_score, 4))},
364
+ 'interpretation': {'anxiety_level': anxiety_level, 'confidence_level': confidence_level, 'fluency_level': fluency_level}
 
 
 
 
365
  }
366
  except Exception as e:
367
  logger.error(f"Voice analysis failed: {str(e)}")
368
  return {'error': str(e)}
369
 
370
+ def generate_voice_interpretation(analysis: Dict) -> str:
371
+ if 'error' in analysis:
372
+ return "Voice analysis not available due to processing error."
373
+ interpretation_lines = [
374
+ "Voice and Speech Profile:",
375
+ f"- Speaking Rate: {analysis['speaking_rate']} words/sec - Compared to optimal range (2.0-3.0 words/sec)",
376
+ f"- Filler Word Usage: {analysis['filler_ratio'] * 100:.1f}% - Frequency of non-content words (e.g., 'um', 'like')",
377
+ f"- Repetition Tendency: {analysis['repetition_score']:.3f} - Measure of repeated phrases",
378
+ f"- Anxiety Indicator: {analysis['interpretation']['anxiety_level']} (Score: {analysis['composite_scores']['anxiety']:.3f}) - Based on pitch and voice stability",
379
+ f"- Confidence Indicator: {analysis['interpretation']['confidence_level']} (Score: {analysis['composite_scores']['confidence']:.3f}) - Derived from vocal consistency",
380
+ f"- Fluency Assessment: {analysis['interpretation']['fluency_level']} - Reflects speech flow and coherence",
381
+ "",
382
+ "HR Insights:",
383
+ "- Faster speaking rates may indicate confidence but can suggest nervousness if excessive.",
384
+ "- High filler word usage often reduces perceived professionalism and clarity.",
385
+ "- Elevated anxiety indicators (pitch variability, jitter) may reflect interview pressure.",
386
+ "- Strong confidence scores suggest effective vocal presence and control.",
387
+ "- Fluency impacts listener engagement; disfluency may hinder communication effectiveness."
388
+ ]
389
+ return "\n".join(interpretation_lines)
390
+
391
+ def generate_anxiety_confidence_chart(composite_scores: Dict, chart_path_or_buffer):
392
+ try:
393
+ labels = ['Anxiety', 'Confidence']
394
+ scores = [composite_scores.get('anxiety', 0), composite_scores.get('confidence', 0)]
395
+ fig, ax = plt.subplots(figsize=(4, 2.5))
396
+ bars = ax.bar(labels, scores, color=['#FF6B6B', '#4ECDC4'], edgecolor='black')
397
+ ax.set_ylabel('Score (Normalized)')
398
+ ax.set_title('Vocal Dynamics: Anxiety vs. Confidence')
399
+ ax.set_ylim(0, 1.2)
400
+ for bar in bars:
401
+ height = bar.get_height()
402
+ ax.text(bar.get_x() + bar.get_width()/2, height + 0.05, f"{height:.2f}",
403
+ ha='center', color='black', fontweight='bold', fontsize=10)
404
+ plt.tight_layout()
405
+ plt.savefig(chart_path_or_buffer, format='png', bbox_inches='tight', dpi=150)
406
+ plt.close(fig)
407
+ except Exception as e:
408
+ logger.error(f"Error generating chart: {str(e)}")
409
 
410
  def calculate_acceptance_probability(analysis_data: Dict) -> float:
 
411
  voice = analysis_data.get('voice_analysis', {})
412
  if 'error' in voice: return 0.0
413
  w_confidence, w_anxiety, w_fluency, w_speaking_rate, w_filler_repetition, w_content_strengths = 0.4, -0.3, 0.2, 0.1, -0.1, 0.2
414
  confidence_score = voice.get('composite_scores', {}).get('confidence', 0.0)
415
  anxiety_score = voice.get('composite_scores', {}).get('anxiety', 0.0)
416
+ fluency_level = voice.get('interpretation', {}).get('fluency_level', 'Disfluent')
417
  speaking_rate = voice.get('speaking_rate', 0.0)
418
  filler_ratio = voice.get('filler_ratio', 0.0)
419
  repetition_score = voice.get('repetition_score', 0.0)
420
+ fluency_map = {'Fluent': 1.0, 'Moderate': 0.5, 'Disfluent': 0.0}
421
  fluency_val = fluency_map.get(fluency_level, 0.0)
422
  ideal_speaking_rate = 2.5
423
  speaking_rate_deviation = abs(speaking_rate - ideal_speaking_rate)
 
432
  acceptance_probability = max(0.0, min(1.0, normalized_score))
433
  return float(f"{acceptance_probability * 100:.2f}")
434
 
 
 
 
 
 
 
 
 
435
  def generate_report(analysis_data: Dict) -> str:
436
  try:
437
  voice = analysis_data.get('voice_analysis', {})
438
+ voice_interpretation = generate_voice_interpretation(voice)
439
+ interviewee_responses = [f"Speaker {u['speaker']} ({u['role']}): {u['text']}" for u in analysis_data['transcript'] if u['role'] == 'Interviewee'][:5]
440
+ acceptance_prob = analysis_data.get('acceptance_probability', None)
441
+ acceptance_line = ""
442
+ if acceptance_prob is not None:
443
+ acceptance_line = f"\n*Hiring Potential Score: {acceptance_prob:.2f}%*\n"
444
+ if acceptance_prob >= 80: acceptance_line += "Assessment: Exceptional candidate, strongly recommended for advancement."
445
+ elif acceptance_prob >= 50: acceptance_line += "Assessment: Promising candidate with moderate strengths; consider for further evaluation."
446
+ else: acceptance_line += "Assessment: Limited alignment with role expectations; significant development needed."
 
 
 
 
447
  prompt = f"""
448
+ You are an expert HR consultant, EvalBot, tasked with producing a professional, concise, and actionable interview analysis report. Structure the report with clear headings, subheadings, and bullet points (use '- ' for bullets). Adopt a formal, HR-professional tone, focusing on candidate evaluation, fit for role, and development insights.
449
+ {acceptance_line}
450
+ *1. Executive Summary*
451
+ - Provide a concise overview of the interview, highlighting key metrics and overall candidate performance.
452
+ - Interview duration: {analysis_data['text_analysis']['total_duration']:.2f} seconds
453
+ - Total speaker turns: {analysis_data['text_analysis']['speaker_turns']}
454
+ - Participants: {', '.join(analysis_data['speakers'])}
455
+ *2. Communication and Vocal Analysis*
456
+ - Evaluate the candidate's vocal delivery, including speaking rate, fluency, and confidence indicators.
457
+ - Provide HR-relevant insights into how these metrics impact perceived professionalism and role suitability.
458
+ {voice_interpretation}
459
+ *3. Content Analysis and Competency Assessment*
460
+ - Analyze key themes in the candidate's responses to assess alignment with job competencies (e.g., problem-solving, communication, leadership).
461
+ - Identify strengths and areas for improvement, supported by specific examples.
462
+ - Sample responses for context:
463
+ {chr(10).join(interviewee_responses)}
464
+ *4. Fit and Potential Evaluation*
465
+ - Assess the candidate's overall fit for a typical professional role based on communication, content, and vocal dynamics.
466
+ - Consider cultural fit, adaptability, and readiness for the role.
467
+ *5. Actionable HR Recommendations*
468
+ - Provide specific, prioritized recommendations for the candidate’s development.
469
+ - Focus areas: Effective Communication, Content Clarity and Depth, Professional Presence.
470
+ - Suggest next steps for hiring managers (e.g., advance to next round, additional assessments, training focus).
 
471
  """
472
  response = gemini_model.generate_content(prompt)
473
  return response.text
 
475
  logger.error(f"Report generation failed: {str(e)}")
476
  return f"Error generating report: {str(e)}"
477
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
478
  def create_pdf_report(analysis_data: Dict, output_path: str, gemini_report_text: str):
479
  try:
480
+ doc = SimpleDocTemplate(output_path, pagesize=letter,
481
+ rightMargin=0.75*inch, leftMargin=0.75*inch,
482
+ topMargin=1*inch, bottomMargin=1*inch)
483
  styles = getSampleStyleSheet()
484
+ h1 = ParagraphStyle(name='Heading1', fontSize=22, leading=26, spaceAfter=20, alignment=1, textColor=colors.HexColor('#1A3C5E'))
485
+ h2 = ParagraphStyle(name='Heading2', fontSize=14, leading=18, spaceBefore=14, spaceAfter=8, textColor=colors.HexColor('#2E5A87'))
486
+ body_text = ParagraphStyle(name='BodyText', parent=styles['Normal'], fontSize=10, leading=14, spaceAfter=8, fontName='Helvetica')
487
+ bullet_style = ParagraphStyle(name='Bullet', parent=body_text, leftIndent=20, bulletIndent=10, fontName='Helvetica')
488
+
489
  story = []
490
+
491
  def header_footer(canvas, doc):
492
  canvas.saveState()
493
  canvas.setFont('Helvetica', 9)
494
  canvas.setFillColor(colors.grey)
495
+ canvas.drawString(doc.leftMargin, 0.5 * inch, f"Page {doc.page} | EvalBot HR Interview Report | Confidential")
496
+ canvas.setStrokeColor(colors.HexColor('#2E5A87'))
497
+ canvas.setLineWidth(1)
498
+ canvas.line(doc.leftMargin, doc.height + 0.85*inch, doc.width + doc.leftMargin, doc.height + 0.85*inch)
499
  canvas.setFont('Helvetica-Bold', 10)
500
+ canvas.drawString(doc.leftMargin, doc.height + 0.9*inch, "Candidate Interview Analysis Report")
 
501
  canvas.restoreState()
502
 
503
+ # Title Page
504
+ story.append(Paragraph("Candidate Interview Analysis Report", h1))
505
+ story.append(Paragraph(f"Generated on: {time.strftime('%B %d, %Y')}", ParagraphStyle(name='Date', alignment=1, fontSize=10, textColor=colors.grey)))
506
+ story.append(Spacer(1, 0.5 * inch))
507
+ acceptance_prob = analysis_data.get('acceptance_probability')
508
+ if acceptance_prob is not None:
509
+ story.append(Paragraph("Hiring Potential Snapshot", h2))
510
+ prob_color = colors.HexColor('#2E7D32') if acceptance_prob >= 70 else (colors.HexColor('#F57C00') if acceptance_prob >= 40 else colors.HexColor('#D32F2F'))
511
+ story.append(Paragraph(f"Hiring Potential Score: <font size=16 color='{prob_color.hexval()}'><b>{acceptance_prob:.2f}%</b></font>",
512
+ ParagraphStyle(name='Prob', fontSize=12, spaceAfter=12, alignment=1)))
513
+ if acceptance_prob >= 80:
514
+ story.append(Paragraph("<b>HR Assessment:</b> Exceptional candidate, strongly recommended for advancement to the next stage.", body_text))
515
+ elif acceptance_prob >= 50:
516
+ story.append(Paragraph("<b>HR Assessment:</b> Promising candidate with moderate strengths; consider for further evaluation.", body_text))
517
+ else:
518
+ story.append(Paragraph("<b>HR Assessment:</b> Limited alignment with role expectations; significant development needed.", body_text))
519
+ story.append(Spacer(1, 0.3 * inch))
520
+ story.append(Paragraph("Prepared by: EvalBot - AI-Powered HR Interview Analysis System", body_text))
521
+ story.append(PageBreak())
522
+
523
+ # Detailed Analysis
524
+ story.append(Paragraph("Detailed Candidate Evaluation", h1))
525
+
526
+ story.append(Paragraph("1. Communication and Vocal Profile", h2))
527
+ voice_analysis = analysis_data.get('voice_analysis', {})
528
+ if voice_analysis and 'error' not in voice_analysis:
529
+ table_data = [
530
+ ['Metric', 'Value', 'HR Insight'],
531
+ ['Speaking Rate', f"{voice_analysis.get('speaking_rate', 0):.2f} words/sec", 'Optimal: 2.0-3.0 wps; impacts clarity and confidence'],
532
+ ['Filler Word Usage', f"{voice_analysis.get('filler_ratio', 0) * 100:.1f}%", 'High usage may reduce perceived professionalism'],
533
+ ['Anxiety Indicator', voice_analysis.get('interpretation', {}).get('anxiety_level', 'N/A'), f"Score: {voice_analysis.get('composite_scores', {}).get('anxiety', 0):.3f}; reflects pressure response"],
534
+ ['Confidence Indicator', voice_analysis.get('interpretation', {}).get('confidence_level', 'N/A'), f"Score: {voice_analysis.get('composite_scores', {}).get('confidence', 0):.3f}; indicates vocal authority"],
535
+ ['Fluency Assessment', voice_analysis.get('interpretation', {}).get('fluency_level', 'N/A'), 'Affects engagement and message delivery']
536
+ ]
537
+ table = Table(table_data, colWidths=[1.8*inch, 1.2*inch, 3.5*inch])
538
+ table.setStyle(TableStyle([
539
+ ('BACKGROUND', (0,0), (-1,0), colors.HexColor('#2E5A87')),
540
+ ('TEXTCOLOR', (0,0), (-1,0), colors.whitesmoke),
541
+ ('ALIGN', (0,0), (-1,-1), 'LEFT'),
542
+ ('VALIGN', (0,0), (-1,-1), 'MIDDLE'),
543
+ ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
544
+ ('FONTSIZE', (0, 0), (-1, -1), 9),
545
+ ('BOTTOMPADDING', (0, 0), (-1, 0), 12),
546
+ ('TOPPADDING', (0, 0), (-1, 0), 12),
547
+ ('BACKGROUND', (0, 1), (-1, -1), colors.HexColor('#F5F7FA')),
548
+ ('GRID', (0,0), (-1,-1), 1, colors.HexColor('#DDE4EB'))
549
+ ]))
550
+ story.append(table)
551
+ story.append(Spacer(1, 0.25 * inch))
552
+ chart_buffer = io.BytesIO()
553
+ generate_anxiety_confidence_chart(voice_analysis.get('composite_scores', {}), chart_buffer)
554
+ chart_buffer.seek(0)
555
+ img = Image(chart_buffer, width=4.5*inch, height=2.8*inch)
556
+ img.hAlign = 'CENTER'
557
+ story.append(img)
558
+ else:
559
+ story.append(Paragraph("Voice analysis unavailable due to processing limitations.", body_text))
560
+ story.append(Spacer(1, 0.3 * inch))
561
+
562
+ # Parse Gemini Report
563
+ sections = {}
564
+ section_titles = ["Executive Summary", "Communication and Vocal Analysis",
565
+ "Content Analysis and Competency Assessment",
566
+ "Fit and Potential Evaluation", "Actionable HR Recommendations"]
567
+ for title in section_titles:
568
+ sections[title] = []
569
+ report_parts = re.split(r'(\s*\\\s*\d\.\s*.?\s\\)', gemini_report_text)
570
+ current_section = None
571
+ for part in report_parts:
572
+ if not part.strip(): continue
573
+ is_heading = False
574
+ for title in section_titles:
575
+ if title.lower() in part.lower():
576
+ current_section = title
577
+ is_heading = True
578
+ break
579
+ if not is_heading and current_section:
580
+ sections[current_section].append(part.strip())
581
+
582
+ # Executive Summary
583
+ story.append(Paragraph("2. Executive Summary", h2))
584
+ if sections['Executive Summary']:
585
+ for line in sections['Executive Summary']:
586
+ if line.startswith(('-', '•', '*')):
587
+ story.append(Paragraph(line.lstrip('-•* ').strip(), bullet_style))
588
+ else:
589
+ story.append(Paragraph(line, body_text))
590
+ else:
591
+ story.append(Paragraph("Summary not available from analysis.", body_text))
592
+ story.append(Spacer(1, 0.3 * inch))
593
+
594
+ # Content and Competency
595
+ story.append(Paragraph("3. Content and Competency Assessment", h2))
596
+ if sections['Content Analysis and Competency Assessment']:
597
+ for line in sections['Content Analysis and Competency Assessment']:
598
+ if line.startswith(('-', '•', '*')):
599
+ story.append(Paragraph(line.lstrip('-•* ').strip(), bullet_style))
600
+ else:
601
+ story.append(Paragraph(line, body_text))
602
+ else:
603
+ story.append(Paragraph("Content and competency analysis not provided.", body_text))
604
+ story.append(PageBreak())
605
+
606
+ # Fit and Potential
607
+ story.append(Paragraph("4. Fit and Potential Evaluation", h2))
608
+ if sections['Fit and Potential Evaluation']:
609
+ for line in sections['Fit and Potential Evaluation']:
610
+ if line.startswith(('-', '•', '*')):
611
+ story.append(Paragraph(line.lstrip('-•* ').strip(), bullet_style))
612
+ else:
613
+ story.append(Paragraph(line, body_text))
614
+ else:
615
+ story.append(Paragraph("Fit and potential evaluation not available.", body_text))
616
+ story.append(Spacer(1, 0.3 * inch))
617
+
618
+ # HR Recommendations
619
+ story.append(Paragraph("5. Actionable HR Recommendations", h2))
620
+ if sections['Actionable HR Recommendations']:
621
+ for line in sections['Actionable HR Recommendations']:
622
+ if line.startswith(('-', '•', '*')):
623
+ story.append(Paragraph(line.lstrip('-•* ').strip(), bullet_style))
624
+ else:
625
+ story.append(Paragraph(line, body_text))
626
+ else:
627
+ story.append(Paragraph("HR recommendations not provided.", body_text))
628
 
629
  doc.build(story, onFirstPage=header_footer, onLaterPages=header_footer)
630
  return True
 
632
  logger.error(f"Enhanced PDF creation failed: {str(e)}", exc_info=True)
633
  return False
634
 
635
+ def convert_to_serializable(obj):
636
+ if isinstance(obj, np.generic): return obj.item()
637
+ if isinstance(obj, dict): return {k: convert_to_serializable(v) for k, v in obj.items()}
638
+ if isinstance(obj, list): return [convert_to_serializable(i) for i in obj]
639
+ if isinstance(obj, np.ndarray): return obj.tolist()
640
+ return obj
641
 
 
642
  def process_interview(audio_path_or_url: str):
643
+ local_audio_path = None
644
+ wav_file = None
645
+ is_downloaded = False
646
  try:
 
 
 
 
 
 
 
 
647
  logger.info(f"Starting processing for {audio_path_or_url}")
648
  if audio_path_or_url.startswith(('http://', 'https://')):
649
  local_audio_path = download_audio_from_url(audio_path_or_url)
650
  is_downloaded = True
651
  else:
652
  local_audio_path = audio_path_or_url
 
653
  wav_file = convert_to_wav(local_audio_path)
654
  transcript = transcribe(wav_file)
655
+ for utterance in transcript['utterances']:
656
+ utterance['prosodic_features'] = extract_prosodic_features(wav_file, utterance['start'], utterance['end'])
 
 
657
  utterances_with_speakers = identify_speakers(transcript, wav_file)
658
+ clf, vectorizer, scaler = None, None, None
659
+ if os.path.exists(os.path.join(OUTPUT_DIR, 'role_classifier.pkl')):
660
+ clf = joblib.load(os.path.join(OUTPUT_DIR, 'role_classifier.pkl'))
661
+ vectorizer = joblib.load(os.path.join(OUTPUT_DIR, 'text_vectorizer.pkl'))
662
+ scaler = joblib.load(os.path.join(OUTPUT_DIR, 'feature_scaler.pkl'))
663
+ else:
664
+ clf, vectorizer, scaler = train_role_classifier(utterances_with_speakers)
665
+ classified_utterances = classify_roles(utterances_with_speakers, clf, vectorizer, scaler)
666
  voice_analysis = analyze_interviewee_voice(wav_file, classified_utterances)
 
 
667
  analysis_data = {
 
668
  'transcript': classified_utterances,
669
  'speakers': list(set(u['speaker'] for u in classified_utterances)),
670
  'voice_analysis': voice_analysis,
 
671
  'text_analysis': {
672
  'total_duration': sum(u['prosodic_features']['duration'] for u in classified_utterances),
673
  'speaker_turns': len(classified_utterances)
674
  }
675
  }
 
676
  analysis_data['acceptance_probability'] = calculate_acceptance_probability(analysis_data)
677
  gemini_report_text = generate_report(analysis_data)
 
678
  base_name = str(uuid.uuid4())
679
  pdf_path = os.path.join(OUTPUT_DIR, f"{base_name}_report.pdf")
680
  json_path = os.path.join(OUTPUT_DIR, f"{base_name}_analysis.json")
681
+ create_pdf_report(analysis_data, pdf_path, gemini_report_text=gemini_report_text)
 
 
682
  with open(json_path, 'w') as f:
683
+ serializable_data = convert_to_serializable(analysis_data)
684
+ json.dump(serializable_data, f, indent=2)
685
  logger.info(f"Processing completed for {audio_path_or_url}")
686
+ return {'pdf_path': pdf_path, 'json_path': json_path}
 
 
 
 
 
 
 
687
  except Exception as e:
688
  logger.error(f"Processing failed for {audio_path_or_url}: {str(e)}", exc_info=True)
689
  raise
 
690
  finally:
691
+ if wav_file and os.path.exists(wav_file):
692
+ os.remove(wav_file)
693
  if is_downloaded and local_audio_path and os.path.exists(local_audio_path):
694
  os.remove(local_audio_path)
695
  logger.info(f"Cleaned up temporary downloaded file: {local_audio_path}")