norhan12 commited on
Commit
dda086c
·
verified ·
1 Parent(s): d505a88

Update process_interview.py

Browse files
Files changed (1) hide show
  1. process_interview.py +297 -365
process_interview.py CHANGED
@@ -10,16 +10,12 @@ import wave
10
  from nemo.collections.asr.models import EncDecSpeakerLabelModel
11
  from pinecone import Pinecone, ServerlessSpec
12
  import librosa
13
- import pandas as pd
14
- from sklearn.ensemble import RandomForestClassifier
15
- from sklearn.preprocessing import StandardScaler
16
- from sklearn.feature_extraction.text import TfidfVectorizer
17
  import re
18
- from typing import Dict, List, Tuple
19
  import logging
20
  import tempfile
21
  from reportlab.lib.pagesizes import letter
22
- from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak, Image
23
  from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
24
  from reportlab.lib.units import inch
25
  from reportlab.lib import colors
@@ -28,20 +24,20 @@ import matplotlib
28
  matplotlib.use('Agg')
29
  from reportlab.platypus import Image
30
  import io
31
- from transformers import AutoTokenizer, AutoModel
32
  import spacy
33
  import google.generativeai as genai
34
- import joblib
35
  from concurrent.futures import ThreadPoolExecutor
36
- import urllib3
37
- # Setup logging
 
38
  logging.basicConfig(level=logging.INFO)
39
  logger = logging.getLogger(__name__)
40
- logging.getLogger("nemo_logging").setLevel(logging.INFO)
41
- logging.getLogger("nemo").setLevel(logging.INFO)
 
 
42
 
43
  # Configuration
44
- AUDIO_DIR = "./Uploads"
45
  OUTPUT_DIR = "./processed_audio"
46
  os.makedirs(OUTPUT_DIR, exist_ok=True)
47
 
@@ -50,29 +46,34 @@ PINECONE_KEY = os.getenv("PINECONE_KEY")
50
  ASSEMBLYAI_KEY = os.getenv("ASSEMBLYAI_KEY")
51
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
52
 
 
53
  def download_audio_from_url(url: str, retries=3) -> str:
54
  """Downloads an audio file from a URL to a temporary local path with retries."""
55
- try:
56
- temp_dir = tempfile.gettempdir()
57
- temp_path = os.path.join(temp_dir, f"{uuid.uuid4()}.tmp_audio")
58
- logger.info(f"Downloading audio from {url} to {temp_path}")
59
- for attempt in range(retries):
60
- try:
61
- with requests.get(url, stream=True, timeout=30) as r:
62
- r.raise_for_status()
63
- with open(temp_path, 'wb') as f:
64
- for chunk in r.iter_content(chunk_size=8192):
65
- f.write(chunk)
 
66
  return temp_path
67
- except (requests.exceptions.ChunkedEncodingError, urllib3.exceptions.ProtocolError) as e:
68
- logger.warning(f"Attempt {attempt + 1} failed: {e}. Retrying...")
69
- time.sleep(2 ** attempt) # Exponential backoff
70
- raise Exception(f"Failed to download audio after {retries} attempts.")
71
- except Exception as e:
72
- logger.error(f"Failed to download audio from URL {url}: {e}")
73
- raise
 
 
74
 
75
  def initialize_services():
 
76
  try:
77
  pc = Pinecone(api_key=PINECONE_KEY)
78
  index_name = "interview-speaker-embeddings"
@@ -84,6 +85,7 @@ def initialize_services():
84
  spec=ServerlessSpec(cloud="aws", region="us-east-1")
85
  )
86
  index = pc.Index(index_name)
 
87
  genai.configure(api_key=GEMINI_API_KEY)
88
  gemini_model = genai.GenerativeModel('gemini-1.5-flash')
89
  return index, gemini_model
@@ -92,14 +94,14 @@ def initialize_services():
92
  raise
93
 
94
  index, gemini_model = initialize_services()
95
-
96
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
97
  logger.info(f"Using device: {device}")
98
 
99
  def load_speaker_model():
 
100
  try:
101
- import torch
102
- torch.set_num_threads(5)
103
  model = EncDecSpeakerLabelModel.from_pretrained(
104
  "nvidia/speakerverification_en_titanet_large",
105
  map_location=torch.device('cpu')
@@ -111,21 +113,18 @@ def load_speaker_model():
111
  raise RuntimeError("Could not load speaker verification model")
112
 
113
  def load_models():
 
114
  speaker_model = load_speaker_model()
115
  nlp = spacy.load("en_core_web_sm")
116
- tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
117
- llm_model = AutoModel.from_pretrained("distilbert-base-uncased").to(device)
118
- llm_model.eval()
119
- return speaker_model, nlp, tokenizer, llm_model
120
 
121
- speaker_model, nlp, tokenizer, llm_model = load_models()
122
  def convert_to_wav(audio_path: str, output_dir: str = OUTPUT_DIR) -> str:
 
123
  try:
124
  audio = AudioSegment.from_file(audio_path)
125
- if audio.channels > 1:
126
- audio = audio.set_channels(1)
127
- audio = audio.set_frame_rate(16000)
128
-
129
  wav_file = os.path.join(output_dir, f"{uuid.uuid4()}.wav")
130
  audio.export(wav_file, format="wav")
131
  return wav_file
@@ -133,354 +132,236 @@ def convert_to_wav(audio_path: str, output_dir: str = OUTPUT_DIR) -> str:
133
  logger.error(f"Audio conversion failed: {str(e)}")
134
  raise
135
 
136
-
137
  def extract_prosodic_features(audio_path: str, start_ms: int, end_ms: int) -> Dict:
 
138
  try:
139
- audio = AudioSegment.from_file(audio_path)
140
- segment = audio[start_ms:end_ms]
141
- temp_path = os.path.join(OUTPUT_DIR, f"temp_{uuid.uuid4()}.wav")
142
- segment.export(temp_path, format="wav")
143
-
144
- y, sr = librosa.load(temp_path, sr=16000)
145
- pitches = librosa.piptrack(y=y, sr=sr)[0]
146
  pitches = pitches[pitches > 0]
 
 
147
 
148
- features = {
149
  'duration': (end_ms - start_ms) / 1000,
150
  'mean_pitch': float(np.mean(pitches)) if len(pitches) > 0 else 0.0,
151
- 'min_pitch': float(np.min(pitches)) if len(pitches) > 0 else 0.0,
152
- 'max_pitch': float(np.max(pitches)) if len(pitches) > 0 else 0.0,
153
  'pitch_sd': float(np.std(pitches)) if len(pitches) > 0 else 0.0,
154
- 'intensityMean': float(np.mean(librosa.feature.rms(y=y)[0])),
155
- 'intensityMin': float(np.min(librosa.feature.rms(y=y)[0])),
156
- 'intensityMax': float(np.max(librosa.feature.rms(y=y)[0])),
157
- 'intensitySD': float(np.std(librosa.feature.rms(y=y)[0])),
158
  }
159
-
160
- os.remove(temp_path)
161
- return features
162
  except Exception as e:
163
  logger.error(f"Feature extraction failed: {str(e)}")
164
- return {
165
- 'duration': (end_ms - start_ms) / 1000,
166
- 'mean_pitch': 0.0,
167
- 'min_pitch': 0.0,
168
- 'max_pitch': 0.0,
169
- 'pitch_sd': 0.0,
170
- 'intensityMean': 0.0,
171
- 'intensityMin': 0.0,
172
- 'intensityMax': 0.0,
173
- 'intensitySD': 0.0,
174
- }
175
-
176
 
177
  def transcribe(audio_path: str) -> Dict:
 
178
  try:
 
179
  with open(audio_path, 'rb') as f:
180
- upload_response = requests.post(
181
- "https://api.assemblyai.com/v2/upload",
182
- headers={"authorization": ASSEMBLYAI_KEY},
183
- data=f
184
- )
185
  audio_url = upload_response.json()['upload_url']
186
-
187
- transcript_response = requests.post(
188
- "https://api.assemblyai.com/v2/transcript",
189
- headers={"authorization": ASSEMBLYAI_KEY},
190
- json={
191
- "audio_url": audio_url,
192
- "speaker_labels": True,
193
- "filter_profanity": True
194
- }
195
- )
196
  transcript_id = transcript_response.json()['id']
197
 
198
  while True:
199
- result = requests.get(
200
- f"https://api.assemblyai.com/v2/transcript/{transcript_id}",
201
- headers={"authorization": ASSEMBLYAI_KEY}
202
- ).json()
203
-
204
  if result['status'] == 'completed':
 
 
205
  return result
206
  elif result['status'] == 'error':
207
- raise Exception(result['error'])
208
-
209
  time.sleep(5)
210
  except Exception as e:
211
  logger.error(f"Transcription failed: {str(e)}")
212
  raise
213
 
214
-
215
- def process_utterance(utterance, full_audio, wav_file):
216
  try:
217
- start = utterance['start']
218
- end = utterance['end']
219
  segment = full_audio[start:end]
220
- temp_path = os.path.join(OUTPUT_DIR, f"temp_{uuid.uuid4()}.wav")
221
- segment.export(temp_path, format="wav")
222
-
223
- with torch.no_grad():
224
- embedding = speaker_model.get_embedding(temp_path).to(device)
225
-
226
- query_result = index.query(
227
- vector=embedding.cpu().numpy().tolist(),
228
- top_k=1,
229
- include_metadata=True
230
- )
231
-
232
- if query_result['matches'] and query_result['matches'][0]['score'] > 0.7:
233
- speaker_id = query_result['matches'][0]['id']
234
- speaker_name = query_result['matches'][0]['metadata']['speaker_name']
235
- else:
236
- speaker_id = f"unknown_{uuid.uuid4().hex[:6]}"
237
- speaker_name = f"Speaker_{speaker_id[-4:]}"
238
- index.upsert([(speaker_id, embedding.tolist(), {"speaker_name": speaker_name})])
239
-
240
- os.remove(temp_path)
241
-
242
- return {
243
- **utterance,
244
- 'speaker': speaker_name,
245
- 'speaker_id': speaker_id,
246
- 'embedding': embedding.cpu().numpy().tolist()
247
- }
248
  except Exception as e:
249
  logger.error(f"Utterance processing failed: {str(e)}")
250
- return {
251
- **utterance,
252
- 'speaker': 'Unknown',
253
- 'speaker_id': 'unknown',
254
- 'embedding': None
255
- }
256
-
257
 
258
  def identify_speakers(transcript: Dict, wav_file: str) -> List[Dict]:
 
259
  try:
260
  full_audio = AudioSegment.from_wav(wav_file)
261
  utterances = transcript['utterances']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
262
 
263
- with ThreadPoolExecutor(max_workers=5) as executor: # Changed to 5 workers
264
- futures = [
265
- executor.submit(process_utterance, utterance, full_audio, wav_file)
266
- for utterance in utterances
267
- ]
268
- results = [f.result() for f in futures]
269
-
270
- return results
271
  except Exception as e:
272
  logger.error(f"Speaker identification failed: {str(e)}")
273
  raise
274
 
275
-
276
- def train_role_classifier(utterances: List[Dict]):
277
- try:
278
- texts = [u['text'] for u in utterances]
279
- vectorizer = TfidfVectorizer(max_features=500, ngram_range=(1, 2))
280
- X_text = vectorizer.fit_transform(texts)
281
-
282
- features = []
283
- labels = []
284
-
285
- for i, utterance in enumerate(utterances):
286
- prosodic = utterance['prosodic_features']
287
- feat = [
288
- prosodic['duration'],
289
- prosodic['mean_pitch'],
290
- prosodic['min_pitch'],
291
- prosodic['max_pitch'],
292
- prosodic['pitch_sd'],
293
- prosodic['intensityMean'],
294
- prosodic['intensityMin'],
295
- prosodic['intensityMax'],
296
- prosodic['intensitySD'],
297
- ]
298
-
299
- feat.extend(X_text[i].toarray()[0].tolist())
300
-
301
- doc = nlp(utterance['text'])
302
- feat.extend([
303
- int(utterance['text'].endswith('?')),
304
- len(re.findall(r'\b(why|how|what|when|where|who|which)\b', utterance['text'].lower())),
305
- len(utterance['text'].split()),
306
- sum(1 for token in doc if token.pos_ == 'VERB'),
307
- sum(1 for token in doc if token.pos_ == 'NOUN')
308
- ])
309
-
310
- features.append(feat)
311
- labels.append(0 if i % 2 == 0 else 1)
312
-
313
- scaler = StandardScaler()
314
- X = scaler.fit_transform(features)
315
-
316
- clf = RandomForestClassifier(
317
- n_estimators=150,
318
- max_depth=10,
319
- random_state=42,
320
- class_weight='balanced'
321
- )
322
- clf.fit(X, labels)
323
-
324
- joblib.dump(clf, os.path.join(OUTPUT_DIR, 'role_classifier.pkl'))
325
- joblib.dump(vectorizer, os.path.join(OUTPUT_DIR, 'text_vectorizer.pkl'))
326
- joblib.dump(scaler, os.path.join(OUTPUT_DIR, 'feature_scaler.pkl'))
327
-
328
- return clf, vectorizer, scaler
329
- except Exception as e:
330
- logger.error(f"Classifier training failed: {str(e)}")
331
- raise
332
-
333
-
334
- def classify_roles(utterances: List[Dict], clf, vectorizer, scaler):
335
  try:
336
- texts = [u['text'] for u in utterances]
337
- X_text = vectorizer.transform(texts)
338
-
339
- results = []
340
- for i, utterance in enumerate(utterances):
341
- prosodic = utterance['prosodic_features']
342
- feat = [
343
- prosodic['duration'],
344
- prosodic['mean_pitch'],
345
- prosodic['min_pitch'],
346
- prosodic['max_pitch'],
347
- prosodic['pitch_sd'],
348
- prosodic['intensityMean'],
349
- prosodic['intensityMin'],
350
- prosodic['intensityMax'],
351
- prosodic['intensitySD'],
352
- ]
353
-
354
- feat.extend(X_text[i].toarray()[0].tolist())
355
-
356
- doc = nlp(utterance['text'])
357
- feat.extend([
358
- int(utterance['text'].endswith('?')),
359
- len(re.findall(r'\b(why|how|what|when|where|who|which)\b', utterance['text'].lower())),
360
- len(utterance['text'].split()),
361
- sum(1 for token in doc if token.pos_ == 'VERB'),
362
- sum(1 for token in doc if token.pos_ == 'NOUN')
363
- ])
364
-
365
- X = scaler.transform([feat])
366
- role = 'Interviewer' if clf.predict(X)[0] == 0 else 'Interviewee'
367
-
368
- results.append({**utterance, 'role': role})
369
 
370
- return results
 
 
 
 
 
 
371
  except Exception as e:
372
  logger.error(f"Role classification failed: {str(e)}")
373
- raise
374
-
375
-
 
376
 
377
 
378
  def analyze_interviewee_voice(audio_path: str, utterances: List[Dict]) -> Dict:
 
379
  try:
380
- y, sr = librosa.load(audio_path, sr=16000)
381
-
382
- interviewee_utterances = [u for u in utterances if u['role'] == 'Interviewee']
383
  if not interviewee_utterances:
384
  return {'error': 'No interviewee utterances found'}
385
-
386
- segments = []
387
- for u in interviewee_utterances:
388
- start = int(u['start'] * sr / 1000)
389
- end = int(u['end'] * sr / 1000)
390
- segments.append(y[start:end])
391
-
392
- combined_audio = np.concatenate(segments)
393
-
394
  total_duration = sum(u['prosodic_features']['duration'] for u in interviewee_utterances)
395
  total_words = sum(len(u['text'].split()) for u in interviewee_utterances)
396
- speaking_rate = total_words / total_duration if total_duration > 0 else 0
397
 
398
- filler_words = ['um', 'uh', 'like', 'you know', 'so', 'i mean']
399
- filler_count = sum(
400
- sum(u['text'].lower().count(fw) for fw in filler_words)
401
- for u in interviewee_utterances
402
- )
403
  filler_ratio = filler_count / total_words if total_words > 0 else 0
404
 
 
405
  all_words = ' '.join(u['text'].lower() for u in interviewee_utterances).split()
406
- word_counts = {}
407
- for i in range(len(all_words) - 1):
408
- bigram = (all_words[i], all_words[i + 1])
409
- word_counts[bigram] = word_counts.get(bigram, 0) + 1
410
- repetition_score = sum(1 for count in word_counts.values() if count > 1) / len(
411
- word_counts) if word_counts else 0
412
-
413
- pitches = []
414
- for segment in segments:
415
- f0, voiced_flag, _ = librosa.pyin(segment, fmin=80, fmax=300, sr=sr)
416
- pitches.extend(f0[voiced_flag])
417
 
418
  pitch_mean = np.mean(pitches) if len(pitches) > 0 else 0
419
  pitch_std = np.std(pitches) if len(pitches) > 0 else 0
420
- jitter = np.mean(np.abs(np.diff(pitches))) / pitch_mean if len(pitches) > 1 and pitch_mean > 0 else 0
 
421
 
422
- intensities = []
423
- for segment in segments:
424
- rms = librosa.feature.rms(y=segment)[0]
425
- intensities.extend(rms)
426
-
427
- intensity_mean = np.mean(intensities) if intensities else 0
428
- intensity_std = np.std(intensities) if intensities else 0
429
- shimmer = np.mean(np.abs(np.diff(intensities))) / intensity_mean if len(
430
- intensities) > 1 and intensity_mean > 0 else 0
431
-
432
- anxiety_score = 0.6 * (pitch_std / pitch_mean) + 0.4 * (jitter + shimmer) if pitch_mean > 0 else 0
433
- confidence_score = 0.7 * (1 / (1 + intensity_std)) + 0.3 * (1 / (1 + filler_ratio))
434
- hesitation_score = filler_ratio + repetition_score
435
-
436
- anxiety_level = 'high' if anxiety_score > 0.15 else 'moderate' if anxiety_score > 0.07 else 'low'
437
- confidence_level = 'high' if confidence_score > 0.7 else 'moderate' if confidence_score > 0.5 else 'low'
438
- fluency_level = 'fluent' if (filler_ratio < 0.05 and repetition_score < 0.1) else 'moderate' if (
439
- filler_ratio < 0.1 and repetition_score < 0.2) else 'disfluent'
440
 
 
 
 
 
441
  return {
442
  'speaking_rate': float(round(speaking_rate, 2)),
443
  'filler_ratio': float(round(filler_ratio, 4)),
444
  'repetition_score': float(round(repetition_score, 4)),
445
- 'pitch_analysis': {
446
- 'mean': float(round(pitch_mean, 2)),
447
- 'std_dev': float(round(pitch_std, 2)),
448
- 'jitter': float(round(jitter, 4))
449
- },
450
- 'intensity_analysis': {
451
- 'mean': float(round(intensity_mean, 2)),
452
- 'std_dev': float(round(intensity_std, 2)),
453
- 'shimmer': float(round(shimmer, 4))
454
- },
455
  'composite_scores': {
456
  'anxiety': float(round(anxiety_score, 4)),
457
  'confidence': float(round(confidence_score, 4)),
458
  'hesitation': float(round(hesitation_score, 4))
459
  },
460
  'interpretation': {
461
- 'anxiety_level': anxiety_level,
462
- 'confidence_level': confidence_level,
463
- 'fluency_level': fluency_level
464
  }
465
  }
466
  except Exception as e:
467
- logger.error(f"Voice analysis failed: {str(e)}")
468
  return {'error': str(e)}
469
 
470
-
471
  def generate_anxiety_confidence_chart(composite_scores: Dict, chart_path_or_buffer):
 
472
  try:
473
  labels = ['Anxiety', 'Confidence']
474
  scores = [composite_scores.get('anxiety', 0), composite_scores.get('confidence', 0)]
 
475
  fig, ax = plt.subplots(figsize=(5, 3.5))
476
  bars = ax.bar(labels, scores, color=['#FF5252', '#26A69A'], edgecolor='black', width=0.45)
477
- ax.set_ylabel('Score (Normalized)', fontsize=12)
 
478
  ax.set_title('Vocal Dynamics: Anxiety vs. Confidence', fontsize=14, pad=15)
479
- ax.set_ylim(0, 1.3)
 
480
  for bar in bars:
481
  height = bar.get_height()
482
- ax.text(bar.get_x() + bar.get_width()/2, height + 0.05, f"{height:.2f}",
483
- ha='center', color='black', fontweight='bold', fontsize=11)
 
484
  ax.grid(True, axis='y', linestyle='--', alpha=0.7)
485
  plt.tight_layout()
486
  plt.savefig(chart_path_or_buffer, format='png', bbox_inches='tight', dpi=300)
@@ -489,67 +370,101 @@ def generate_anxiety_confidence_chart(composite_scores: Dict, chart_path_or_buff
489
  logger.error(f"Error generating chart: {str(e)}")
490
 
491
  def calculate_acceptance_probability(analysis_data: Dict) -> float:
 
492
  voice = analysis_data.get('voice_analysis', {})
493
  if 'error' in voice: return 0.0
494
- w_confidence, w_anxiety, w_fluency, w_speaking_rate, w_filler_repetition, w_content_strengths = 0.35, -0.25, 0.2, 0.15, -0.15, 0.25
495
- confidence_score = voice.get('composite_scores', {}).get('confidence', 0.0)
496
- anxiety_score = voice.get('composite_scores', {}).get('anxiety', 0.0)
497
- fluency_level = voice.get('interpretation', {}).get('fluency_level', 'Disfluent')
498
- speaking_rate = voice.get('speaking_rate', 0.0)
499
- filler_ratio = voice.get('filler_ratio', 0.0)
500
- repetition_score = voice.get('repetition_score', 0.0)
501
- fluency_map = {'Fluent': 1.0, 'Moderate': 0.6, 'Disfluent': 0.2}
502
- fluency_val = fluency_map.get(fluency_level, 0.2)
503
- ideal_speaking_rate = 2.5
504
- speaking_rate_deviation = abs(speaking_rate - ideal_speaking_rate)
505
- speaking_rate_score = max(0, 1 - (speaking_rate_deviation / ideal_speaking_rate))
506
- filler_repetition_composite = (filler_ratio + repetition_score) / 2
507
- filler_repetition_score = max(0, 1 - filler_repetition_composite)
508
- content_strength_val = 0.85 if analysis_data.get('text_analysis', {}).get('total_duration', 0) > 60 else 0.4
509
- raw_score = (confidence_score * w_confidence + (1 - anxiety_score) * abs(w_anxiety) + fluency_val * w_fluency + speaking_rate_score * w_speaking_rate + filler_repetition_score * abs(w_filler_repetition) + content_strength_val * w_content_strengths)
510
- max_possible_score = (w_confidence + abs(w_anxiety) + w_fluency + w_speaking_rate + abs(w_filler_repetition) + w_content_strengths)
511
- if max_possible_score == 0: return 50.0
512
- normalized_score = raw_score / max_possible_score
 
 
 
 
 
 
 
513
  acceptance_probability = max(0.0, min(1.0, normalized_score))
 
514
  return float(f"{acceptance_probability * 100:.2f}")
515
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
516
  def generate_report(analysis_data: Dict) -> str:
 
517
  try:
518
- voice = analysis_data.get('voice_analysis', {})
519
- voice_interpretation = generate_voice_interpretation(voice)
520
- interviewee_responses = [f"Speaker {u['speaker']} ({u['role']}): {u['text']}" for u in analysis_data['transcript'] if u['role'] == 'Interviewee'][:6]
521
- acceptance_prob = analysis_data.get('acceptance_probability', None)
 
522
  acceptance_line = ""
523
  if acceptance_prob is not None:
524
  acceptance_line = f"\n**Hiring Suitability Score: {acceptance_prob:.2f}%**\n"
525
- if acceptance_prob >= 80: acceptance_line += "HR Verdict: Outstanding candidate, highly recommended for immediate advancement."
526
- elif acceptance_prob >= 60: acceptance_line += "HR Verdict: Strong candidate, suitable for further evaluation with targeted development."
527
- elif acceptance_prob >= 40: acceptance_line += "HR Verdict: Moderate potential, requires additional assessment and skill-building."
528
- else: acceptance_line += "HR Verdict: Limited fit, significant improvement needed for role alignment."
 
529
  prompt = f"""
530
- You are EvalBot, a senior HR consultant with 20+ years of experience, delivering a polished, concise, and engaging interview analysis report. Use a professional tone, clear headings, and bullet points ('- ') for readability. Avoid redundancy and ensure distinct sections for strengths, growth areas, and recommendations.
 
531
  {acceptance_line}
 
532
  **1. Executive Summary**
533
- - Provide a concise overview of performance, key metrics, and hiring potential.
534
  - Interview length: {analysis_data['text_analysis']['total_duration']:.2f} seconds
535
- - Speaker turns: {analysis_data['text_analysis']['speaker_turns']}
536
  - Participants: {', '.join(analysis_data['speakers'])}
 
537
  **2. Communication and Vocal Dynamics**
538
- - Evaluate vocal delivery (rate, fluency, confidence) and professional impact.
539
- - Offer HR insights on workplace alignment.
540
  {voice_interpretation}
 
541
  **3. Competency and Content Evaluation**
542
- - Assess competencies: leadership, problem-solving, communication, adaptability.
543
  - List strengths and growth areas separately, with specific examples.
544
- - Sample responses:
545
- {chr(10).join(interviewee_responses)}
546
- **4. Role Fit and Growth Potential**
547
- - Analyze cultural fit, role readiness, and long-term potential.
548
- - Highlight enthusiasm and scalability.
549
- **5. Strategic HR Recommendations**
550
- - Provide distinct, prioritized strategies for candidate growth.
551
- - Target: Communication, Response Depth, Professional Presence.
552
- - List clear next steps for hiring managers (e.g., advance, train, assess).
553
  """
554
  response = gemini_model.generate_content(prompt)
555
  return response.text
@@ -675,7 +590,7 @@ def create_pdf_report(analysis_data: Dict, output_path: str, gemini_report_text:
675
  "Role Fit and Growth Potential": [],
676
  "Strategic HR Recommendations": {"Development Priorities": [], "Next Steps": []}
677
  }
678
- report_parts = re.split(r'(\s*\*\*\s*\d\.\s*.*?\s*\*\*)', gemini_report_text)
679
  current_section = None
680
  for part in report_parts:
681
  if not part.strip(): continue
@@ -771,10 +686,19 @@ def convert_to_serializable(obj):
771
  if isinstance(obj, np.ndarray): return obj.tolist()
772
  return obj
773
 
 
 
 
 
 
 
 
 
774
  def process_interview(audio_path_or_url: str):
775
- local_audio_path = None
776
- wav_file = None
777
  is_downloaded = False
 
778
  try:
779
  logger.info(f"Starting processing for {audio_path_or_url}")
780
  if audio_path_or_url.startswith(('http://', 'https://')):
@@ -782,44 +706,52 @@ def process_interview(audio_path_or_url: str):
782
  is_downloaded = True
783
  else:
784
  local_audio_path = audio_path_or_url
 
785
  wav_file = convert_to_wav(local_audio_path)
786
  transcript = transcribe(wav_file)
787
- for utterance in transcript['utterances']:
788
- utterance['prosodic_features'] = extract_prosodic_features(wav_file, utterance['start'], utterance['end'])
 
 
789
  utterances_with_speakers = identify_speakers(transcript, wav_file)
790
- clf, vectorizer, scaler = None, None, None
791
- if os.path.exists(os.path.join(OUTPUT_DIR, 'role_classifier.pkl')):
792
- clf = joblib.load(os.path.join(OUTPUT_DIR, 'role_classifier.pkl'))
793
- vectorizer = joblib.load(os.path.join(OUTPUT_DIR, 'text_vectorizer.pkl'))
794
- scaler = joblib.load(os.path.join(OUTPUT_DIR, 'feature_scaler.pkl'))
795
- else:
796
- clf, vectorizer, scaler = train_role_classifier(utterances_with_speakers)
797
- classified_utterances = classify_roles(utterances_with_speakers, clf, vectorizer, scaler)
798
  voice_analysis = analyze_interviewee_voice(wav_file, classified_utterances)
 
799
  analysis_data = {
800
  'transcript': classified_utterances,
801
- 'speakers': list(set(u['speaker'] for u in classified_utterances)),
802
  'voice_analysis': voice_analysis,
803
  'text_analysis': {
804
  'total_duration': sum(u['prosodic_features']['duration'] for u in classified_utterances),
805
  'speaker_turns': len(classified_utterances)
806
  }
807
  }
 
808
  analysis_data['acceptance_probability'] = calculate_acceptance_probability(analysis_data)
 
809
  gemini_report_text = generate_report(analysis_data)
 
810
  base_name = str(uuid.uuid4())
811
  pdf_path = os.path.join(OUTPUT_DIR, f"{base_name}_report.pdf")
812
  json_path = os.path.join(OUTPUT_DIR, f"{base_name}_analysis.json")
813
- create_pdf_report(analysis_data, pdf_path, gemini_report_text=gemini_report_text)
 
 
814
  with open(json_path, 'w') as f:
815
  serializable_data = convert_to_serializable(analysis_data)
816
  json.dump(serializable_data, f, indent=2)
817
- logger.info(f"Processing completed for {audio_path_or_url}")
818
- return {'pdf_path': pdf_path, 'json_path': json_path}
 
 
819
  except Exception as e:
820
  logger.error(f"Processing failed for {audio_path_or_url}: {str(e)}", exc_info=True)
821
  raise
822
  finally:
 
823
  if wav_file and os.path.exists(wav_file):
824
  os.remove(wav_file)
825
  if is_downloaded and local_audio_path and os.path.exists(local_audio_path):
 
10
  from nemo.collections.asr.models import EncDecSpeakerLabelModel
11
  from pinecone import Pinecone, ServerlessSpec
12
  import librosa
 
 
 
 
13
  import re
14
+ from typing import Dict, List
15
  import logging
16
  import tempfile
17
  from reportlab.lib.pagesizes import letter
18
+ from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak
19
  from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
20
  from reportlab.lib.units import inch
21
  from reportlab.lib import colors
 
24
  matplotlib.use('Agg')
25
  from reportlab.platypus import Image
26
  import io
 
27
  import spacy
28
  import google.generativeai as genai
 
29
  from concurrent.futures import ThreadPoolExecutor
30
+ import urllib3 # <-- تم الإصلاح: إضافة استيراد urllib3
31
+
32
+ # إعدادات التسجيل (Logging)
33
  logging.basicConfig(level=logging.INFO)
34
  logger = logging.getLogger(__name__)
35
+ # تقليل verbosity من مكتبة NeMo
36
+ logging.getLogger("nemo_logging").setLevel(logging.WARNING)
37
+ logging.getLogger("nemo").setLevel(logging.WARNING)
38
+
39
 
40
  # Configuration
 
41
  OUTPUT_DIR = "./processed_audio"
42
  os.makedirs(OUTPUT_DIR, exist_ok=True)
43
 
 
46
  ASSEMBLYAI_KEY = os.getenv("ASSEMBLYAI_KEY")
47
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
48
 
49
+ # --- تم الإصلاح: دالة تحميل محسّنة مع إعادة المحاولة ---
50
  def download_audio_from_url(url: str, retries=3) -> str:
51
  """Downloads an audio file from a URL to a temporary local path with retries."""
52
+ temp_dir = tempfile.gettempdir()
53
+ temp_path = os.path.join(temp_dir, f"{uuid.uuid4()}.tmp_audio")
54
+ logger.info(f"Downloading audio from {url} to {temp_path}")
55
+
56
+ for attempt in range(retries):
57
+ try:
58
+ with requests.get(url, stream=True, timeout=60) as r: # زيادة timeout
59
+ r.raise_for_status()
60
+ with open(temp_path, 'wb') as f:
61
+ for chunk in r.iter_content(chunk_size=8192):
62
+ f.write(chunk)
63
+ logger.info("Download completed successfully.")
64
  return temp_path
65
+ except (requests.exceptions.RequestException, urllib3.exceptions.ProtocolError) as e:
66
+ logger.warning(f"Attempt {attempt + 1}/{retries} failed: {e}. Retrying...")
67
+ if attempt < retries - 1:
68
+ time.sleep(2 ** attempt) # Exponential backoff
69
+ else:
70
+ logger.error(f"Failed to download audio after {retries} attempts.")
71
+ raise
72
+ raise Exception(f"Failed to download audio from URL {url}")
73
+
74
 
75
  def initialize_services():
76
+ """Initializes Pinecone and Gemini services."""
77
  try:
78
  pc = Pinecone(api_key=PINECONE_KEY)
79
  index_name = "interview-speaker-embeddings"
 
85
  spec=ServerlessSpec(cloud="aws", region="us-east-1")
86
  )
87
  index = pc.Index(index_name)
88
+
89
  genai.configure(api_key=GEMINI_API_KEY)
90
  gemini_model = genai.GenerativeModel('gemini-1.5-flash')
91
  return index, gemini_model
 
94
  raise
95
 
96
  index, gemini_model = initialize_services()
 
97
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
98
  logger.info(f"Using device: {device}")
99
 
100
  def load_speaker_model():
101
+ """Loads the speaker verification model."""
102
  try:
103
+ # يضمن عدم استخدام عدد كبير جدًا من الخيوط
104
+ torch.set_num_threads(1)
105
  model = EncDecSpeakerLabelModel.from_pretrained(
106
  "nvidia/speakerverification_en_titanet_large",
107
  map_location=torch.device('cpu')
 
113
  raise RuntimeError("Could not load speaker verification model")
114
 
115
  def load_models():
116
+ """Loads all necessary models."""
117
  speaker_model = load_speaker_model()
118
  nlp = spacy.load("en_core_web_sm")
119
+ return speaker_model, nlp
120
+
121
+ speaker_model, nlp = load_models()
 
122
 
 
123
  def convert_to_wav(audio_path: str, output_dir: str = OUTPUT_DIR) -> str:
124
+ """Converts any audio file to a 16kHz mono WAV file."""
125
  try:
126
  audio = AudioSegment.from_file(audio_path)
127
+ audio = audio.set_frame_rate(16000).set_channels(1)
 
 
 
128
  wav_file = os.path.join(output_dir, f"{uuid.uuid4()}.wav")
129
  audio.export(wav_file, format="wav")
130
  return wav_file
 
132
  logger.error(f"Audio conversion failed: {str(e)}")
133
  raise
134
 
 
135
  def extract_prosodic_features(audio_path: str, start_ms: int, end_ms: int) -> Dict:
136
+ """Extracts prosodic features from an audio segment."""
137
  try:
138
+ y, sr = librosa.load(audio_path, sr=16000, offset=start_ms/1000.0, duration=(end_ms-start_ms)/1000.0)
139
+
140
+ pitches, _ = librosa.piptrack(y=y, sr=sr)
 
 
 
 
141
  pitches = pitches[pitches > 0]
142
+
143
+ rms = librosa.feature.rms(y=y)[0]
144
 
145
+ return {
146
  'duration': (end_ms - start_ms) / 1000,
147
  'mean_pitch': float(np.mean(pitches)) if len(pitches) > 0 else 0.0,
 
 
148
  'pitch_sd': float(np.std(pitches)) if len(pitches) > 0 else 0.0,
149
+ 'intensityMean': float(np.mean(rms)),
150
+ 'intensitySD': float(np.std(rms)),
 
 
151
  }
 
 
 
152
  except Exception as e:
153
  logger.error(f"Feature extraction failed: {str(e)}")
154
+ return {'duration': 0, 'mean_pitch': 0, 'pitch_sd': 0, 'intensityMean': 0, 'intensitySD': 0}
 
 
 
 
 
 
 
 
 
 
 
155
 
156
  def transcribe(audio_path: str) -> Dict:
157
+ """Transcribes audio using AssemblyAI and enables speaker labels."""
158
  try:
159
+ headers = {"authorization": ASSEMBLYAI_KEY}
160
  with open(audio_path, 'rb') as f:
161
+ upload_response = requests.post("https://api.assemblyai.com/v2/upload", headers=headers, data=f)
162
+
 
 
 
163
  audio_url = upload_response.json()['upload_url']
164
+
165
+ transcript_request = {
166
+ "audio_url": audio_url,
167
+ "speaker_labels": True,
168
+ }
169
+
170
+ transcript_response = requests.post("https://api.assemblyai.com/v2/transcript", json=transcript_request, headers=headers)
 
 
 
171
  transcript_id = transcript_response.json()['id']
172
 
173
  while True:
174
+ result = requests.get(f"https://api.assemblyai.com/v2/transcript/{transcript_id}", headers=headers).json()
 
 
 
 
175
  if result['status'] == 'completed':
176
+ if not result.get('utterances'):
177
+ raise ValueError("Transcription completed but no utterances were returned. The audio may be too short or silent.")
178
  return result
179
  elif result['status'] == 'error':
180
+ raise Exception(f"Transcription failed: {result['error']}")
 
181
  time.sleep(5)
182
  except Exception as e:
183
  logger.error(f"Transcription failed: {str(e)}")
184
  raise
185
 
186
+ def process_utterance(utterance, full_audio):
187
+ """Processes a single utterance to get a speaker embedding."""
188
  try:
189
+ start, end = utterance['start'], utterance['end']
 
190
  segment = full_audio[start:end]
191
+
192
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=True) as temp_f:
193
+ segment.export(temp_f.name, format="wav")
194
+ with torch.no_grad():
195
+ embedding = speaker_model.get_embedding(temp_f.name).cpu().numpy().flatten()
196
+
197
+ return {**utterance, 'embedding': embedding}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198
  except Exception as e:
199
  logger.error(f"Utterance processing failed: {str(e)}")
200
+ return {**utterance, 'embedding': np.zeros(192)} # Return zero vector on failure
 
 
 
 
 
 
201
 
202
  def identify_speakers(transcript: Dict, wav_file: str) -> List[Dict]:
203
+ """Identifies unique speakers from utterances."""
204
  try:
205
  full_audio = AudioSegment.from_wav(wav_file)
206
  utterances = transcript['utterances']
207
+
208
+ with ThreadPoolExecutor(max_workers=4) as executor:
209
+ futures = [executor.submit(process_utterance, u, full_audio) for u in utterances]
210
+ processed_utterances = [f.result() for f in futures]
211
+
212
+ # Map AssemblyAI speaker labels (A, B, C...) to unique speaker names
213
+ speaker_map = {}
214
+ unique_speaker_count = 0
215
+
216
+ for u in processed_utterances:
217
+ assembly_speaker = u['speaker']
218
+ if assembly_speaker not in speaker_map:
219
+ unique_speaker_count += 1
220
+ speaker_map[assembly_speaker] = f"Speaker_{unique_speaker_count}"
221
+ u['speaker_name'] = speaker_map[assembly_speaker]
222
+
223
+ return processed_utterances
224
 
 
 
 
 
 
 
 
 
225
  except Exception as e:
226
  logger.error(f"Speaker identification failed: {str(e)}")
227
  raise
228
 
229
+ # --- تم الإصلاح: استبدال نموذج التصنيف بمنهجية إرشادية (Heuristic) لتصنيف الأدوار ---
230
+ def classify_roles(utterances: List[Dict]) -> List[Dict]:
231
+ """
232
+ Classifies roles as 'Interviewer' or 'Interviewee' based on heuristics.
233
+ The 'Interviewer' is assumed to be the one who asks more questions.
234
+ """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
235
  try:
236
+ speaker_stats = {}
237
+ question_words = {'what', 'why', 'how', 'when', 'where', 'who', 'which', 'tell', 'describe', 'explain'}
238
+
239
+ for u in utterances:
240
+ speaker = u['speaker_name']
241
+ if speaker not in speaker_stats:
242
+ speaker_stats[speaker] = {'question_score': 0, 'utterance_count': 0}
243
+
244
+ speaker_stats[speaker]['utterance_count'] += 1
245
+ text_lower = u['text'].lower()
246
+
247
+ # زيادة النتيجة إذا انتهى النص بعلامة استفهام
248
+ if text_lower.endswith('?'):
249
+ speaker_stats[speaker]['question_score'] += 1
250
+
251
+ # زيادة النتيجة لكل كلمة استفهامية
252
+ for word in question_words:
253
+ if word in text_lower.split():
254
+ speaker_stats[speaker]['question_score'] += 1
255
+
256
+ if not speaker_stats:
257
+ # إذا لم يتم العثور على متحدثين، لا يمكن التصنيف
258
+ return utterances
259
+
260
+ # تحديد المحاور بناءً على أعلى "question_score"
261
+ interviewer_speaker = max(speaker_stats, key=lambda s: speaker_stats[s]['question_score'])
262
+
263
+ logger.info(f"Speaker stats for role classification: {speaker_stats}")
264
+ logger.info(f"Identified Interviewer: {interviewer_speaker}")
 
 
 
 
265
 
266
+ for u in utterances:
267
+ if u['speaker_name'] == interviewer_speaker:
268
+ u['role'] = 'Interviewer'
269
+ else:
270
+ u['role'] = 'Interviewee'
271
+
272
+ return utterances
273
  except Exception as e:
274
  logger.error(f"Role classification failed: {str(e)}")
275
+ # تعيين دور افتراضي في حالة الفشل
276
+ for u in utterances:
277
+ u['role'] = 'Unknown'
278
+ return utterances
279
 
280
 
281
  def analyze_interviewee_voice(audio_path: str, utterances: List[Dict]) -> Dict:
282
+ """Analyzes the voice characteristics of the interviewee."""
283
  try:
284
+ interviewee_utterances = [u for u in utterances if u.get('role') == 'Interviewee']
 
 
285
  if not interviewee_utterances:
286
  return {'error': 'No interviewee utterances found'}
287
+
288
+ y, sr = librosa.load(audio_path, sr=16000)
289
+
290
+ # استخراج مقاطع صوتية للمرشح
291
+ segments = [y[int(u['start']*sr/1000):int(u['end']*sr/1000)] for u in interviewee_utterances]
292
+
 
 
 
293
  total_duration = sum(u['prosodic_features']['duration'] for u in interviewee_utterances)
294
  total_words = sum(len(u['text'].split()) for u in interviewee_utterances)
295
+ speaking_rate = total_words / (total_duration / 60) if total_duration > 0 else 0 # Words per minute
296
 
297
+ # تحليل الكلمات الحشوية (Filler words)
298
+ filler_words = {'um', 'uh', 'like', 'you know', 'so', 'i mean', 'actually'}
299
+ filler_count = sum(1 for u in interviewee_utterances for word in u['text'].lower().split() if word in filler_words)
 
 
300
  filler_ratio = filler_count / total_words if total_words > 0 else 0
301
 
302
+ # تحليل تكرار الكلمات
303
  all_words = ' '.join(u['text'].lower() for u in interviewee_utterances).split()
304
+ repetition_score = (len(all_words) - len(set(all_words))) / len(all_words) if all_words else 0
305
+
306
+ # تحليل طبقة الصوت (Pitch) والكثافة (Intensity)
307
+ pitches = np.concatenate([librosa.pyin(s, fmin=librosa.note_to_hz('C2'), fmax=librosa.note_to_hz('C7'))[0] for s in segments if len(s)>0])
308
+ pitches = pitches[~np.isnan(pitches)]
309
+
310
+ intensities = np.concatenate([librosa.feature.rms(y=s)[0] for s in segments if len(s)>0])
 
 
 
 
311
 
312
  pitch_mean = np.mean(pitches) if len(pitches) > 0 else 0
313
  pitch_std = np.std(pitches) if len(pitches) > 0 else 0
314
+ intensity_mean = np.mean(intensities) if len(intensities) > 0 else 0
315
+ intensity_std = np.std(intensities) if len(intensities) > 0 else 0
316
 
317
+ # حساب الدرجات المركبة
318
+ anxiety_score = (pitch_std / 150) if pitch_std > 0 else 0 # تطبيع بسيط
319
+ confidence_score = 1 - (intensity_std * 5) if intensity_std > 0 else 1 # تطبيع بسيط
320
+ hesitation_score = (filler_ratio + repetition_score) / 2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
321
 
322
+ # تقييد الدرجات بين 0 و 1
323
+ anxiety_score = max(0, min(1, anxiety_score))
324
+ confidence_score = max(0, min(1, confidence_score))
325
+
326
  return {
327
  'speaking_rate': float(round(speaking_rate, 2)),
328
  'filler_ratio': float(round(filler_ratio, 4)),
329
  'repetition_score': float(round(repetition_score, 4)),
330
+ 'pitch_analysis': {'mean': float(round(pitch_mean, 2)), 'std_dev': float(round(pitch_std, 2))},
331
+ 'intensity_analysis': {'mean': float(round(intensity_mean, 4)), 'std_dev': float(round(intensity_std, 4))},
 
 
 
 
 
 
 
 
332
  'composite_scores': {
333
  'anxiety': float(round(anxiety_score, 4)),
334
  'confidence': float(round(confidence_score, 4)),
335
  'hesitation': float(round(hesitation_score, 4))
336
  },
337
  'interpretation': {
338
+ 'anxiety_level': 'high' if anxiety_score > 0.6 else 'moderate' if anxiety_score > 0.3 else 'low',
339
+ 'confidence_level': 'high' if confidence_score > 0.7 else 'moderate' if confidence_score > 0.4 else 'low',
340
+ 'fluency_level': 'disfluent' if hesitation_score > 0.1 else 'moderate' if hesitation_score > 0.05 else 'fluent'
341
  }
342
  }
343
  except Exception as e:
344
+ logger.error(f"Voice analysis failed: {str(e)}", exc_info=True)
345
  return {'error': str(e)}
346
 
 
347
  def generate_anxiety_confidence_chart(composite_scores: Dict, chart_path_or_buffer):
348
+ """Generates a bar chart for anxiety and confidence scores."""
349
  try:
350
  labels = ['Anxiety', 'Confidence']
351
  scores = [composite_scores.get('anxiety', 0), composite_scores.get('confidence', 0)]
352
+
353
  fig, ax = plt.subplots(figsize=(5, 3.5))
354
  bars = ax.bar(labels, scores, color=['#FF5252', '#26A69A'], edgecolor='black', width=0.45)
355
+
356
+ ax.set_ylabel('Score (0 to 1)', fontsize=12)
357
  ax.set_title('Vocal Dynamics: Anxiety vs. Confidence', fontsize=14, pad=15)
358
+ ax.set_ylim(0, 1.1)
359
+
360
  for bar in bars:
361
  height = bar.get_height()
362
+ ax.text(bar.get_x() + bar.get_width()/2, height + 0.02, f"{height:.2f}",
363
+ ha='center', va='bottom', color='black', fontweight='bold', fontsize=11)
364
+
365
  ax.grid(True, axis='y', linestyle='--', alpha=0.7)
366
  plt.tight_layout()
367
  plt.savefig(chart_path_or_buffer, format='png', bbox_inches='tight', dpi=300)
 
370
  logger.error(f"Error generating chart: {str(e)}")
371
 
372
  def calculate_acceptance_probability(analysis_data: Dict) -> float:
373
+ """Calculates a suitability score based on analysis data."""
374
  voice = analysis_data.get('voice_analysis', {})
375
  if 'error' in voice: return 0.0
376
+
377
+ # تعريف الأوزان
378
+ w_confidence, w_anxiety, w_fluency, w_speaking_rate = 0.4, -0.2, 0.2, 0.2
379
+
380
+ confidence_score = voice.get('composite_scores', {}).get('confidence', 0.5)
381
+ anxiety_score = voice.get('composite_scores', {}).get('anxiety', 0.5)
382
+ hesitation_score = voice.get('composite_scores', {}).get('hesitation', 0.5)
383
+ fluency_score = 1 - hesitation_score
384
+
385
+ # تقييم سرعة الكلام
386
+ rate = voice.get('speaking_rate', 150)
387
+ if 120 <= rate <= 180:
388
+ speaking_rate_score = 1.0
389
+ elif 100 <= rate < 120 or 180 < rate <= 200:
390
+ speaking_rate_score = 0.7
391
+ else:
392
+ speaking_rate_score = 0.4
393
+
394
+ raw_score = (confidence_score * w_confidence +
395
+ (1 - anxiety_score) * abs(w_anxiety) +
396
+ fluency_score * w_fluency +
397
+ speaking_rate_score * w_speaking_rate)
398
+
399
+ max_possible_score = w_confidence + abs(w_anxiety) + w_fluency + w_speaking_rate
400
+
401
+ normalized_score = raw_score / max_possible_score if max_possible_score != 0 else 0
402
  acceptance_probability = max(0.0, min(1.0, normalized_score))
403
+
404
  return float(f"{acceptance_probability * 100:.2f}")
405
 
406
+ # --- تم الإصلاح: إضافة الدالة المفقودة ---
407
+ def generate_voice_interpretation(voice: Dict) -> str:
408
+ """Generates a human-readable interpretation of voice analysis."""
409
+ if not voice or 'error' in voice:
410
+ return "- Vocal analysis could not be performed as no interviewee was identified."
411
+
412
+ interp = voice.get('interpretation', {})
413
+ scores = voice.get('composite_scores', {})
414
+
415
+ confidence = interp.get('confidence_level', 'N/A').capitalize()
416
+ anxiety = interp.get('anxiety_level', 'N/A').capitalize()
417
+ fluency = interp.get('fluency_level', 'N/A').capitalize()
418
+ rate = voice.get('speaking_rate', 0)
419
+
420
+ lines = [
421
+ f"- **Confidence:** {confidence} (Score: {scores.get('confidence', 0):.2f}). The candidate's vocal tone suggests their level of assurance.",
422
+ f"- **Anxiety:** {anxiety} (Score: {scores.get('anxiety', 0):.2f}). Vocal stress indicators point to their comfort level during the interview.",
423
+ f"- **Fluency & Hesitation:** {fluency} (Hesitation Score: {scores.get('hesitation', 0):.2f}). Reflects the smoothness of speech and use of filler words.",
424
+ f"- **Speaking Rate:** {rate:.0f} words per minute. A normal conversational pace is typically between 120-180 WPM."
425
+ ]
426
+ return "\n".join(lines)
427
+
428
+
429
  def generate_report(analysis_data: Dict) -> str:
430
+ """Generates a comprehensive report using Gemini AI."""
431
  try:
432
+ voice_interpretation = generate_voice_interpretation(analysis_data.get('voice_analysis', {}))
433
+
434
+ interviewee_responses = [f"- {u['text']}" for u in analysis_data['transcript'] if u.get('role') == 'Interviewee'][:4]
435
+
436
+ acceptance_prob = analysis_data.get('acceptance_probability')
437
  acceptance_line = ""
438
  if acceptance_prob is not None:
439
  acceptance_line = f"\n**Hiring Suitability Score: {acceptance_prob:.2f}%**\n"
440
+ if acceptance_prob >= 80: acceptance_line += "HR Verdict: Outstanding candidate. Highly recommended for advancement."
441
+ elif acceptance_prob >= 60: acceptance_line += "HR Verdict: Strong candidate. Suitable for further evaluation."
442
+ elif acceptance_prob >= 40: acceptance_line += "HR Verdict: Moderate potential. Requires additional assessment."
443
+ else: acceptance_line += "HR Verdict: Limited fit for the role at this time."
444
+
445
  prompt = f"""
446
+ You are EvalBot, a senior HR consultant. Generate a polished, concise, and engaging interview analysis report. Use a professional tone, clear headings, and bullet points.
447
+
448
  {acceptance_line}
449
+
450
  **1. Executive Summary**
451
+ - Provide a concise overview of the candidate's performance, key metrics, and hiring potential.
452
  - Interview length: {analysis_data['text_analysis']['total_duration']:.2f} seconds
 
453
  - Participants: {', '.join(analysis_data['speakers'])}
454
+
455
  **2. Communication and Vocal Dynamics**
456
+ - Evaluate vocal delivery based on the following analysis. Offer HR insights on its impact.
 
457
  {voice_interpretation}
458
+
459
  **3. Competency and Content Evaluation**
460
+ - Based on the sample responses below, assess competencies like leadership, problem-solving, and self-awareness.
461
  - List strengths and growth areas separately, with specific examples.
462
+ - Sample Responses from Candidate:
463
+ {' '.join(interviewee_responses) if interviewee_responses else "No responses from interviewee were identified."}
464
+
465
+ **4. Strategic HR Recommendations**
466
+ - Provide prioritized strategies for the candidate's growth.
467
+ - List clear next steps for hiring managers (e.g., advance, further technical assessment, reject).
 
 
 
468
  """
469
  response = gemini_model.generate_content(prompt)
470
  return response.text
 
590
  "Role Fit and Growth Potential": [],
591
  "Strategic HR Recommendations": {"Development Priorities": [], "Next Steps": []}
592
  }
593
+ report_parts = re.split(r'(\s*\\\s*\d\.\s*.?\s\\)', gemini_report_text)
594
  current_section = None
595
  for part in report_parts:
596
  if not part.strip(): continue
 
686
  if isinstance(obj, np.ndarray): return obj.tolist()
687
  return obj
688
 
689
+ def convert_to_serializable(obj):
690
+ """Converts numpy types to native Python types for JSON serialization."""
691
+ if isinstance(obj, np.generic): return obj.item()
692
+ if isinstance(obj, dict): return {k: convert_to_serializable(v) for k, v in obj.items()}
693
+ if isinstance(obj, list): return [convert_to_serializable(i) for i in obj]
694
+ if isinstance(obj, np.ndarray): return obj.tolist()
695
+ return obj
696
+
697
  def process_interview(audio_path_or_url: str):
698
+ """Main function to process an interview from an audio file or URL."""
699
+ local_audio_path, wav_file = None, None
700
  is_downloaded = False
701
+
702
  try:
703
  logger.info(f"Starting processing for {audio_path_or_url}")
704
  if audio_path_or_url.startswith(('http://', 'https://')):
 
706
  is_downloaded = True
707
  else:
708
  local_audio_path = audio_path_or_url
709
+
710
  wav_file = convert_to_wav(local_audio_path)
711
  transcript = transcribe(wav_file)
712
+
713
+ for u in transcript['utterances']:
714
+ u['prosodic_features'] = extract_prosodic_features(wav_file, u['start'], u['end'])
715
+
716
  utterances_with_speakers = identify_speakers(transcript, wav_file)
717
+
718
+ # التصنيف باستخدام المنهجية الإرشادية
719
+ classified_utterances = classify_roles(utterances_with_speakers)
720
+
 
 
 
 
721
  voice_analysis = analyze_interviewee_voice(wav_file, classified_utterances)
722
+
723
  analysis_data = {
724
  'transcript': classified_utterances,
725
+ 'speakers': list(set(u['speaker_name'] for u in classified_utterances)),
726
  'voice_analysis': voice_analysis,
727
  'text_analysis': {
728
  'total_duration': sum(u['prosodic_features']['duration'] for u in classified_utterances),
729
  'speaker_turns': len(classified_utterances)
730
  }
731
  }
732
+
733
  analysis_data['acceptance_probability'] = calculate_acceptance_probability(analysis_data)
734
+
735
  gemini_report_text = generate_report(analysis_data)
736
+
737
  base_name = str(uuid.uuid4())
738
  pdf_path = os.path.join(OUTPUT_DIR, f"{base_name}_report.pdf")
739
  json_path = os.path.join(OUTPUT_DIR, f"{base_name}_analysis.json")
740
+
741
+ # create_pdf_report(analysis_data, pdf_path, gemini_report_text=gemini_report_text)
742
+
743
  with open(json_path, 'w') as f:
744
  serializable_data = convert_to_serializable(analysis_data)
745
  json.dump(serializable_data, f, indent=2)
746
+
747
+ logger.info(f"Processing completed. JSON report at: {json_path}")
748
+ return {'pdf_path': pdf_path, 'json_path': json_path, 'report_text': gemini_report_text}
749
+
750
  except Exception as e:
751
  logger.error(f"Processing failed for {audio_path_or_url}: {str(e)}", exc_info=True)
752
  raise
753
  finally:
754
+ # تنظيف الملفات المؤقتة
755
  if wav_file and os.path.exists(wav_file):
756
  os.remove(wav_file)
757
  if is_downloaded and local_audio_path and os.path.exists(local_audio_path):