norhan12 commited on
Commit
eabbb82
·
verified ·
1 Parent(s): fc3466f

Upload process_interview.py

Browse files
Files changed (1) hide show
  1. process_interview.py +954 -0
process_interview.py ADDED
@@ -0,0 +1,954 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ import numpy as np
4
+ import uuid
5
+ import requests
6
+ import time
7
+ import json
8
+ from pydub import AudioSegment
9
+ import wave
10
+ from nemo.collections.asr.models import EncDecSpeakerLabelModel
11
+ from pinecone import Pinecone, ServerlessSpec
12
+ import librosa
13
+ import pandas as pd
14
+ from sklearn.ensemble import RandomForestClassifier
15
+ from sklearn.preprocessing import StandardScaler
16
+ from sklearn.feature_extraction.text import TfidfVectorizer
17
+ import re
18
+ from typing import Dict, List, Tuple
19
+ import logging
20
+ # --- Imports for enhanced PDF ---
21
+ from reportlab.lib.pagesizes import letter
22
+ from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak
23
+ from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
24
+ from reportlab.lib.units import inch
25
+ from reportlab.lib import colors
26
+ import matplotlib.pyplot as plt
27
+ import matplotlib
28
+
29
+ matplotlib.use('Agg') # --- FIX: تحديد backend لـ matplotlib ---
30
+ from reportlab.platypus import Image
31
+ import io # --- FIX: إضافة import io لـ BytesIO ---
32
+ # --- End Imports for enhanced PDF ---
33
+ from transformers import AutoTokenizer, AutoModel
34
+ import spacy
35
+ import google.generativeai as genai
36
+ import joblib
37
+ from concurrent.futures import ThreadPoolExecutor
38
+
39
+ # Setup logging
40
+ logging.basicConfig(level=logging.INFO)
41
+ logger = logging.getLogger(__name__)
42
+ logging.getLogger("nemo_logging").setLevel(logging.ERROR)
43
+ logging.getLogger("nemo").setLevel(logging.ERROR)
44
+
45
+ # Configuration
46
+ AUDIO_DIR = "./uploads"
47
+ OUTPUT_DIR = "./processed_audio"
48
+ os.makedirs(OUTPUT_DIR, exist_ok=True)
49
+
50
+ # API Keys
51
+ PINECONE_KEY = os.getenv("PINECONE_KEY")
52
+ ASSEMBLYAI_KEY = os.getenv("ASSEMBLYAI_KEY")
53
+ GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
54
+
55
+
56
+ # Initialize services
57
+ def initialize_services():
58
+ try:
59
+ pc = Pinecone(api_key=PINECONE_KEY)
60
+ index_name = "interview-speaker-embeddings"
61
+ if index_name not in pc.list_indexes().names():
62
+ pc.create_index(
63
+ name=index_name,
64
+ dimension=192,
65
+ metric="cosine",
66
+ spec=ServerlessSpec(cloud="aws", region="us-east-1")
67
+ )
68
+ index = pc.Index(index_name)
69
+
70
+ genai.configure(api_key=GEMINI_API_KEY)
71
+ gemini_model = genai.GenerativeModel('gemini-1.5-flash')
72
+
73
+ return index, gemini_model
74
+ except Exception as e:
75
+ logger.error(f"Error initializing services: {str(e)}")
76
+ raise
77
+
78
+
79
+ index, gemini_model = initialize_services()
80
+
81
+ # Device setup
82
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
83
+ logger.info(f"Using device: {device}")
84
+
85
+
86
+ def load_speaker_model():
87
+ try:
88
+ import torch
89
+ torch.set_num_threads(5)
90
+ model = EncDecSpeakerLabelModel.from_pretrained(
91
+ "nvidia/speakerverification_en_titanet_large",
92
+ map_location=torch.device('cpu')
93
+ )
94
+ model.eval()
95
+ return model
96
+ except Exception as e:
97
+ logger.error(f"Model loading failed: {str(e)}")
98
+ raise RuntimeError("Could not load speaker verification model")
99
+
100
+
101
+ # Load ML models
102
+ def load_models():
103
+ speaker_model = load_speaker_model()
104
+ nlp = spacy.load("en_core_web_sm")
105
+
106
+ tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
107
+ llm_model = AutoModel.from_pretrained("distilbert-base-uncased").to(device)
108
+ llm_model.eval()
109
+
110
+ return speaker_model, nlp, tokenizer, llm_model
111
+
112
+
113
+ speaker_model, nlp, tokenizer, llm_model = load_models()
114
+
115
+
116
+ # Audio processing functions
117
+ def convert_to_wav(audio_path: str, output_dir: str = OUTPUT_DIR) -> str:
118
+ try:
119
+ audio = AudioSegment.from_file(audio_path)
120
+ if audio.channels > 1:
121
+ audio = audio.set_channels(1)
122
+ audio = audio.set_frame_rate(16000)
123
+
124
+ wav_file = os.path.join(output_dir, f"{uuid.uuid4()}.wav")
125
+ audio.export(wav_file, format="wav")
126
+ return wav_file
127
+ except Exception as e:
128
+ logger.error(f"Audio conversion failed: {str(e)}")
129
+ raise
130
+
131
+
132
+ def extract_prosodic_features(audio_path: str, start_ms: int, end_ms: int) -> Dict:
133
+ try:
134
+ audio = AudioSegment.from_file(audio_path)
135
+ segment = audio[start_ms:end_ms]
136
+ temp_path = os.path.join(OUTPUT_DIR, f"temp_{uuid.uuid4()}.wav")
137
+ segment.export(temp_path, format="wav")
138
+
139
+ y, sr = librosa.load(temp_path, sr=16000)
140
+ pitches = librosa.piptrack(y=y, sr=sr)[0]
141
+ pitches = pitches[pitches > 0]
142
+
143
+ features = {
144
+ 'duration': (end_ms - start_ms) / 1000,
145
+ 'mean_pitch': float(np.mean(pitches)) if len(pitches) > 0 else 0.0,
146
+ 'min_pitch': float(np.min(pitches)) if len(pitches) > 0 else 0.0,
147
+ 'max_pitch': float(np.max(pitches)) if len(pitches) > 0 else 0.0,
148
+ 'pitch_sd': float(np.std(pitches)) if len(pitches) > 0 else 0.0,
149
+ 'intensityMean': float(np.mean(librosa.feature.rms(y=y)[0])),
150
+ 'intensityMin': float(np.min(librosa.feature.rms(y=y)[0])),
151
+ 'intensityMax': float(np.max(librosa.feature.rms(y=y)[0])),
152
+ 'intensitySD': float(np.std(librosa.feature.rms(y=y)[0])),
153
+ }
154
+
155
+ os.remove(temp_path)
156
+ return features
157
+ except Exception as e:
158
+ logger.error(f"Feature extraction failed: {str(e)}")
159
+ return {
160
+ 'duration': 0.0,
161
+ 'mean_pitch': 0.0,
162
+ 'min_pitch': 0.0,
163
+ 'max_pitch': 0.0,
164
+ 'pitch_sd': 0.0,
165
+ 'intensityMean': 0.0,
166
+ 'intensityMin': 0.0,
167
+ 'intensityMax': 0.0,
168
+ 'intensitySD': 0.0,
169
+ }
170
+
171
+
172
+ def transcribe(audio_path: str) -> Dict:
173
+ try:
174
+ with open(audio_path, 'rb') as f:
175
+ upload_response = requests.post(
176
+ "https://api.assemblyai.com/v2/upload",
177
+ headers={"authorization": ASSEMBLYAI_KEY},
178
+ data=f
179
+ )
180
+ audio_url = upload_response.json()['upload_url']
181
+
182
+ transcript_response = requests.post(
183
+ "https://api.assemblyai.com/v2/transcript",
184
+ headers={"authorization": ASSEMBLYAI_KEY},
185
+ json={
186
+ "audio_url": audio_url,
187
+ "speaker_labels": True,
188
+ "filter_profanity": True
189
+ }
190
+ )
191
+ transcript_id = transcript_response.json()['id']
192
+
193
+ while True:
194
+ result = requests.get(
195
+ f"https://api.assemblyai.com/v2/transcript/{transcript_id}",
196
+ headers={"authorization": ASSEMBLYAI_KEY}
197
+ ).json()
198
+
199
+ if result['status'] == 'completed':
200
+ return result
201
+ elif result['status'] == 'error':
202
+ raise Exception(result['error'])
203
+
204
+ time.sleep(5)
205
+ except Exception as e:
206
+ logger.error(f"Transcription failed: {str(e)}")
207
+ raise
208
+
209
+
210
+ def process_utterance(utterance, full_audio, wav_file):
211
+ try:
212
+ start = utterance['start']
213
+ end = utterance['end']
214
+ segment = full_audio[start:end]
215
+ temp_path = os.path.join(OUTPUT_DIR, f"temp_{uuid.uuid4()}.wav")
216
+ segment.export(temp_path, format="wav")
217
+
218
+ with torch.no_grad():
219
+ embedding = speaker_model.get_embedding(temp_path).cpu().numpy() # Ensure numpy array
220
+
221
+ # --- FIX: Convert embedding to a flat list for Pinecone query ---
222
+ embedding_list = embedding.flatten().tolist()
223
+ # --- End FIX ---
224
+
225
+ query_result = index.query(
226
+ vector=embedding_list, # Use the corrected flat list
227
+ top_k=1,
228
+ include_metadata=True
229
+ )
230
+
231
+ if query_result['matches'] and query_result['matches'][0]['score'] > 0.7:
232
+ speaker_id = query_result['matches'][0]['id']
233
+ speaker_name = query_result['matches'][0]['metadata']['speaker_name']
234
+ else:
235
+ speaker_id = f"unknown_{uuid.uuid4().hex[:6]}"
236
+ speaker_name = f"Speaker_{speaker_id[-4:]}"
237
+ index.upsert([(speaker_id, embedding_list, {"speaker_name": speaker_name})]) # Use corrected list
238
+
239
+ os.remove(temp_path)
240
+
241
+ return {
242
+ **utterance,
243
+ 'speaker': speaker_name,
244
+ 'speaker_id': speaker_id,
245
+ 'embedding': embedding_list # Store the corrected list
246
+ }
247
+ except Exception as e:
248
+ logger.error(f"Utterance processing failed: {str(e)}", exc_info=True)
249
+ return {
250
+ **utterance,
251
+ 'speaker': 'Unknown',
252
+ 'speaker_id': 'unknown',
253
+ 'embedding': None
254
+ }
255
+
256
+
257
+ def identify_speakers(transcript: Dict, wav_file: str) -> List[Dict]:
258
+ try:
259
+ full_audio = AudioSegment.from_wav(wav_file)
260
+ utterances = transcript['utterances']
261
+
262
+ with ThreadPoolExecutor(max_workers=5) as executor: # Changed to 5 workers
263
+ futures = [
264
+ executor.submit(process_utterance, utterance, full_audio, wav_file)
265
+ for utterance in utterances
266
+ ]
267
+ results = [f.result() for f in futures]
268
+
269
+ return results
270
+ except Exception as e:
271
+ logger.error(f"Speaker identification failed: {str(e)}")
272
+ raise
273
+
274
+
275
+ def train_role_classifier(utterances: List[Dict]):
276
+ try:
277
+ texts = [u['text'] for u in utterances]
278
+ vectorizer = TfidfVectorizer(max_features=500, ngram_range=(1, 2))
279
+ X_text = vectorizer.fit_transform(texts)
280
+
281
+ features = []
282
+ labels = []
283
+
284
+ for i, utterance in enumerate(utterances):
285
+ prosodic = utterance['prosodic_features']
286
+ feat = [
287
+ prosodic['duration'],
288
+ prosodic['mean_pitch'],
289
+ prosodic['min_pitch'],
290
+ prosodic['max_pitch'],
291
+ prosodic['pitch_sd'],
292
+ prosodic['intensityMean'],
293
+ prosodic['intensityMin'],
294
+ prosodic['intensityMax'],
295
+ prosodic['intensitySD'],
296
+ ]
297
+
298
+ feat.extend(X_text[i].toarray()[0].tolist())
299
+
300
+ doc = nlp(utterance['text'])
301
+ feat.extend([
302
+ int(utterance['text'].endswith('?')),
303
+ len(re.findall(r'\b(why|how|what|when|where|who|which)\b', utterance['text'].lower())),
304
+ len(utterance['text'].split()),
305
+ sum(1 for token in doc if token.pos_ == 'VERB'),
306
+ sum(1 for token in doc if token.pos_ == 'NOUN')
307
+ ])
308
+
309
+ features.append(feat)
310
+ labels.append(0 if i % 2 == 0 else 1)
311
+
312
+ scaler = StandardScaler()
313
+ X = scaler.fit_transform(features)
314
+
315
+ clf = RandomForestClassifier(
316
+ n_estimators=150,
317
+ max_depth=10,
318
+ random_state=42,
319
+ class_weight='balanced'
320
+ )
321
+ clf.fit(X, labels)
322
+
323
+ joblib.dump(clf, os.path.join(OUTPUT_DIR, 'role_classifier.pkl'))
324
+ joblib.dump(vectorizer, os.path.join(OUTPUT_DIR, 'text_vectorizer.pkl'))
325
+ joblib.dump(scaler, os.path.join(OUTPUT_DIR, 'feature_scaler.pkl'))
326
+
327
+ return clf, vectorizer, scaler
328
+ except Exception as e:
329
+ logger.error(f"Classifier training failed: {str(e)}")
330
+ raise
331
+
332
+
333
+ def classify_roles(utterances: List[Dict], clf, vectorizer, scaler):
334
+ try:
335
+ texts = [u['text'] for u in utterances]
336
+ X_text = vectorizer.transform(texts)
337
+
338
+ results = []
339
+ for i, utterance in enumerate(utterances):
340
+ prosodic = utterance['prosodic_features']
341
+ feat = [
342
+ prosodic['duration'],
343
+ prosodic['mean_pitch'],
344
+ prosodic['min_pitch'],
345
+ prosodic['max_pitch'],
346
+ prosodic['pitch_sd'],
347
+ prosodic['intensityMean'],
348
+ prosodic['intensityMin'],
349
+ prosodic['intensityMax'],
350
+ prosodic['intensitySD'],
351
+ ]
352
+
353
+ feat.extend(X_text[i].toarray()[0].tolist())
354
+
355
+ doc = nlp(utterance['text'])
356
+ feat.extend([
357
+ int(utterance['text'].endswith('?')),
358
+ len(re.findall(r'\b(why|how|what|when|where|who|which)\b', utterance['text'].lower())),
359
+ len(utterance['text'].split()),
360
+ sum(1 for token in doc if token.pos_ == 'VERB'),
361
+ sum(1 for token in doc if token.pos_ == 'NOUN')
362
+ ])
363
+
364
+ X = scaler.transform([feat])
365
+ role = 'Interviewer' if clf.predict(X)[0] == 0 else 'Interviewee'
366
+
367
+ results.append({**utterance, 'role': role})
368
+
369
+ return results
370
+ except Exception as e:
371
+ logger.error(f"Role classification failed: {str(e)}")
372
+ raise
373
+
374
+
375
+ def analyze_interviewee_voice(audio_path: str, utterances: List[Dict]) -> Dict:
376
+ try:
377
+ y, sr = librosa.load(audio_path, sr=16000)
378
+
379
+ interviewee_utterances = [u for u in utterances if u['role'] == 'Interviewee']
380
+ if not interviewee_utterances:
381
+ return {'error': 'No interviewee utterances found'}
382
+
383
+ segments = []
384
+ for u in interviewee_utterances:
385
+ start = int(u['start'] * sr / 1000)
386
+ end = int(u['end'] * sr / 1000)
387
+ segments.append(y[start:end])
388
+
389
+ combined_audio = np.concatenate(segments)
390
+
391
+ total_duration = sum(u['prosodic_features']['duration'] for u in interviewee_utterances)
392
+ total_words = sum(len(u['text'].split()) for u in interviewee_utterances)
393
+ speaking_rate = total_words / total_duration if total_duration > 0 else 0
394
+
395
+ filler_words = ['um', 'uh', 'like', 'you know', 'so', 'i mean']
396
+ filler_count = sum(
397
+ sum(u['text'].lower().count(fw) for fw in filler_words)
398
+ for u in interviewee_utterances
399
+ )
400
+ filler_ratio = filler_count / total_words if total_words > 0 else 0
401
+
402
+ all_words = ' '.join(u['text'].lower() for u in interviewee_utterances).split()
403
+ word_counts = {}
404
+ for i in range(len(all_words) - 1):
405
+ bigram = (all_words[i], all_words[i + 1])
406
+ word_counts[bigram] = word_counts.get(bigram, 0) + 1
407
+ repetition_score = sum(1 for count in word_counts.values() if count > 1) / len(
408
+ word_counts) if word_counts else 0
409
+
410
+ pitches = []
411
+ for segment in segments:
412
+ f0, voiced_flag, _ = librosa.pyin(segment, fmin=80, fmax=300, sr=sr)
413
+ pitches.extend(f0[voiced_flag])
414
+
415
+ pitch_mean = np.mean(pitches) if len(pitches) > 0 else 0
416
+ pitch_std = np.std(pitches) if len(pitches) > 0 else 0
417
+ jitter = np.mean(np.abs(np.diff(pitches))) / pitch_mean if len(pitches) > 1 and pitch_mean > 0 else 0
418
+
419
+ intensities = []
420
+ for segment in segments:
421
+ rms = librosa.feature.rms(y=segment)[0]
422
+ intensities.extend(rms)
423
+
424
+ intensity_mean = np.mean(intensities) if intensities else 0
425
+ intensity_std = np.std(intensities) if intensities else 0
426
+ shimmer = np.mean(np.abs(np.diff(intensities))) / intensity_mean if len(
427
+ intensities) > 1 and intensity_mean > 0 else 0
428
+
429
+ anxiety_score = 0.6 * (pitch_std / pitch_mean) + 0.4 * (jitter + shimmer) if pitch_mean > 0 else 0
430
+ confidence_score = 0.7 * (1 / (1 + intensity_std)) + 0.3 * (1 / (1 + filler_ratio))
431
+ hesitation_score = filler_ratio + repetition_score
432
+
433
+ anxiety_level = 'high' if anxiety_score > 0.15 else 'moderate' if anxiety_score > 0.07 else 'low'
434
+ confidence_level = 'high' if confidence_score > 0.7 else 'moderate' if confidence_score > 0.5 else 'low'
435
+ fluency_level = 'fluent' if (filler_ratio < 0.05 and repetition_score < 0.1) else 'moderate' if (
436
+ filler_ratio < 0.1 and repetition_score < 0.2) else 'disfluent'
437
+
438
+ return {
439
+ 'speaking_rate': float(round(speaking_rate, 2)),
440
+ 'filler_ratio': float(round(filler_ratio, 4)),
441
+ 'repetition_score': float(round(repetition_score, 4)),
442
+ 'pitch_analysis': {
443
+ 'mean': float(round(pitch_mean, 2)),
444
+ 'std_dev': float(round(pitch_std, 2)),
445
+ 'jitter': float(round(jitter, 4))
446
+ },
447
+ 'intensity_analysis': {
448
+ 'mean': float(round(intensity_mean, 2)),
449
+ 'std_dev': float(round(intensity_std, 2)),
450
+ 'shimmer': float(round(shimmer, 4))
451
+ },
452
+ 'composite_scores': {
453
+ 'anxiety': float(round(anxiety_score, 4)),
454
+ 'confidence': float(round(confidence_score, 4)),
455
+ 'hesitation': float(round(hesitation_score, 4))
456
+ },
457
+ 'interpretation': {
458
+ 'anxiety_level': anxiety_level,
459
+ 'confidence_level': confidence_level,
460
+ 'fluency_level': fluency_level
461
+ }
462
+ }
463
+ except Exception as e:
464
+ logger.error(f"Voice analysis failed: {str(e)}")
465
+ return {'error': str(e)}
466
+
467
+
468
+ def generate_voice_interpretation(analysis: Dict) -> str:
469
+ # This function is used to provide the text interpretation for Gemini's prompt.
470
+ if 'error' in analysis:
471
+ return "Voice analysis not available."
472
+
473
+ interpretation_lines = []
474
+ interpretation_lines.append("Voice Analysis Summary:")
475
+ interpretation_lines.append(f"- Speaking Rate: {analysis['speaking_rate']} words/sec (average)")
476
+ interpretation_lines.append(f"- Filler Words: {analysis['filler_ratio'] * 100:.1f}% of words")
477
+ interpretation_lines.append(f"- Repetition Score: {analysis['repetition_score']:.3f}")
478
+ interpretation_lines.append(
479
+ f"- Anxiety Level: {analysis['interpretation']['anxiety_level'].upper()} (score: {analysis['composite_scores']['anxiety']:.3f})")
480
+ interpretation_lines.append(
481
+ f"- Confidence Level: {analysis['interpretation']['confidence_level'].upper()} (score: {analysis['composite_scores']['confidence']:.3f})")
482
+ interpretation_lines.append(f"- Fluency: {analysis['interpretation']['fluency_level'].upper()}")
483
+ interpretation_lines.append("")
484
+ interpretation_lines.append("Detailed Interpretation:")
485
+ interpretation_lines.append(
486
+ "1. A higher speaking rate indicates faster speech, which can suggest nervousness or enthusiasm.")
487
+ interpretation_lines.append("2. Filler words and repetitions reduce speech clarity and professionalism.")
488
+ interpretation_lines.append("3. Anxiety is measured through pitch variability and voice instability.")
489
+ interpretation_lines.append("4. Confidence is assessed through voice intensity and stability.")
490
+ interpretation_lines.append("5. Fluency combines filler words and repetition metrics.")
491
+
492
+ return "\n".join(interpretation_lines)
493
+
494
+
495
+ # --- Chart Generation Function ---
496
+ def generate_anxiety_confidence_chart(composite_scores: Dict, chart_path: str):
497
+ try:
498
+ labels = ['Anxiety', 'Confidence']
499
+ scores = [composite_scores.get('anxiety', 0), composite_scores.get('confidence', 0)]
500
+
501
+ fig, ax = plt.subplots(figsize=(4, 2.5)) # Smaller size for embedding in PDF
502
+ ax.bar(labels, scores, color=['lightcoral', 'lightskyblue'])
503
+ ax.set_ylabel('Score')
504
+ ax.set_title('Anxiety vs. Confidence Scores')
505
+ ax.set_ylim(0, 1.0) # Assuming scores are normalized 0-1
506
+
507
+ for i, v in enumerate(scores):
508
+ ax.text(i, v + 0.05, f"{v:.2f}", color='black', ha='center', fontweight='bold')
509
+
510
+ # هذه الأوامر يجب أن تكون خارج الـ loop عشان يتم تنفيذها مرة واحدة بعد رسم كل العناصر
511
+ plt.tight_layout()
512
+ plt.savefig(chart_path)
513
+ plt.close(fig) # Close the figure to free up memory
514
+ except Exception as e:
515
+ logger.error(f"Error generating chart: {str(e)}")
516
+
517
+
518
+ # --- Acceptance Probability Calculation ---
519
+ def calculate_acceptance_probability(analysis_data: Dict) -> float:
520
+ """
521
+ Calculates a hypothetical acceptance probability based on voice and content analysis.
522
+ This is a simplified, heuristic model and can be refined with more data/ML.
523
+ """
524
+ voice = analysis_data.get('voice_analysis', {})
525
+
526
+ if 'error' in voice:
527
+ return 0.0 # Cannot calculate if voice analysis failed
528
+
529
+ # Weights for different factors (adjust these to fine-tune the model)
530
+ w_confidence = 0.4
531
+ w_anxiety = -0.3 # Negative weight for anxiety
532
+ w_fluency = 0.2
533
+ w_speaking_rate = 0.1 # Ideal rate gets higher score
534
+ w_filler_repetition = -0.1 # Negative weight for filler/repetition
535
+ w_content_strengths = 0.2 # Placeholder, ideally from deeper content analysis
536
+
537
+ # Normalize/interpret scores
538
+ confidence_score = voice.get('composite_scores', {}).get('confidence', 0.0)
539
+ anxiety_score = voice.get('composite_scores', {}).get('anxiety', 0.0)
540
+ fluency_level = voice.get('interpretation', {}).get('fluency_level', 'disfluent')
541
+ speaking_rate = voice.get('speaking_rate', 0.0)
542
+ filler_ratio = voice.get('filler_ratio', 0.0)
543
+ repetition_score = voice.get('repetition_score', 0.0)
544
+
545
+ # Fluency mapping (higher score for more fluent)
546
+ fluency_map = {'fluent': 1.0, 'moderate': 0.5, 'disfluent': 0.0}
547
+ fluency_val = fluency_map.get(fluency_level, 0.0)
548
+
549
+ # Speaking rate scoring (e.g., ideal is around 2.5 words/sec, gets lower for too fast/slow)
550
+ # This is a simple inverse of deviation from ideal
551
+ ideal_speaking_rate = 2.5
552
+ speaking_rate_deviation = abs(speaking_rate - ideal_speaking_rate)
553
+ speaking_rate_score = max(0, 1 - (speaking_rate_deviation / ideal_speaking_rate)) # Max 1.0, min 0.0
554
+
555
+ # Filler/Repetition score (lower is better, so 1 - score)
556
+ filler_repetition_composite = (filler_ratio + repetition_score) / 2 # Average them
557
+ filler_repetition_score = max(0, 1 - filler_repetition_composite)
558
+
559
+ # Simplified content strength score (you might need a more sophisticated NLP method here)
560
+ # For now, based on presence of strengths in Gemini's content analysis
561
+ content_strength_val = 0.0
562
+ # This part would ideally come from a structured output from Gemini's content analysis.
563
+ # For now, we'll make a simplified assumption based on the analysis data:
564
+ # If content analysis found "strengths" (which is likely if Gemini generates a full report)
565
+ # This needs refinement if Gemini output is not structured for this.
566
+ if analysis_data.get('text_analysis', {}).get('total_duration', 0) > 0: # Basic check if interview happened
567
+ content_strength_val = 0.8 # Assume moderate strength if analysis went through
568
+ # You could parse gemini_report_text for specific phrases like "Strengths:" and count items.
569
+
570
+ # Calculate raw score
571
+ raw_score = (
572
+ confidence_score * w_confidence +
573
+ (1 - anxiety_score) * abs(w_anxiety) + # (1 - anxiety) because lower anxiety is better
574
+ fluency_val * w_fluency +
575
+ speaking_rate_score * w_speaking_rate +
576
+ filler_repetition_score * abs(w_filler_repetition) + # Use abs weight as score is already inverted
577
+ content_strength_val * w_content_strengths
578
+ )
579
+
580
+ # Normalize to 0-1 and then to percentage
581
+ # These max/min values are rough estimates and should be calibrated with real data
582
+ min_possible_score = (0 * w_confidence) + (0 * abs(w_anxiety)) + (0 * w_fluency) + (0 * w_speaking_rate) + (
583
+ 0 * abs(w_filler_repetition)) + (0 * w_content_strengths)
584
+ max_possible_score = (1 * w_confidence) + (1 * abs(w_anxiety)) + (1 * w_fluency) + (1 * w_speaking_rate) + (
585
+ 1 * abs(w_filler_repetition)) + (1 * w_content_strengths)
586
+
587
+ # Prevent division by zero if all weights are zero or min/max are same
588
+ if max_possible_score == min_possible_score:
589
+ normalized_score = 0.5 # Default if no variation
590
+ else:
591
+ normalized_score = (raw_score - min_possible_score) / (max_possible_score - min_possible_score)
592
+
593
+ acceptance_probability = max(0.0, min(1.0, normalized_score)) # Clamp between 0 and 1
594
+
595
+ return float(f"{acceptance_probability * 100:.2f}") # Return as percentage
596
+
597
+
598
+ def generate_report(analysis_data: Dict) -> str:
599
+ try:
600
+ voice = analysis_data.get('voice_analysis', {})
601
+ voice_interpretation = generate_voice_interpretation(voice)
602
+
603
+ interviewee_responses = [
604
+ f"Speaker {u['speaker']} ({u['role']}): {u['text']}"
605
+ for u in analysis_data['transcript']
606
+ if u['role'] == 'Interviewee'
607
+ ][:5] # Limit to first 5 for prompt brevity
608
+
609
+ acceptance_prob = analysis_data.get('acceptance_probability', None)
610
+ acceptance_line = ""
611
+ if acceptance_prob is not None:
612
+ acceptance_line = f"\n**Estimated Acceptance Probability: {acceptance_prob:.2f}%**\n"
613
+ if acceptance_prob >= 80:
614
+ acceptance_line += "This indicates a very strong candidate. Well done!"
615
+ elif acceptance_prob >= 50:
616
+ acceptance_line += "This indicates a solid candidate with potential for improvement."
617
+ else:
618
+ acceptance_line += "This candidate may require significant development or may not be a strong fit."
619
+
620
+ prompt = f"""
621
+ As EvalBot, an AI interview analysis system, generate a highly professional, well-structured, and concise interview analysis report.
622
+ The report should be suitable for a professional setting and clearly highlight key findings and actionable recommendations.
623
+ Use clear headings and subheadings. For bullet points, use '- '.
624
+
625
+ {acceptance_line}
626
+
627
+ **1. Executive Summary**
628
+ Provide a brief, high-level overview of the interview.
629
+ - Overall interview duration: {analysis_data['text_analysis']['total_duration']:.2f} seconds
630
+ - Number of speaker turns: {analysis_data['text_analysis']['speaker_turns']}
631
+ - Main participants: {', '.join(analysis_data['speakers'])}
632
+
633
+ **2. Voice Analysis Insights**
634
+ Analyze key voice metrics and provide a detailed interpretation.
635
+ {voice_interpretation}
636
+
637
+ **3. Content Analysis & Strengths/Areas for Development**
638
+ Analyze the key themes and identify both strengths and areas for development in the interviewee's responses.
639
+ Key responses from interviewee (for context):
640
+ {chr(10).join(interviewee_responses)}
641
+
642
+ **4. Actionable Recommendations**
643
+ Offer specific, actionable suggestions for improvement.
644
+ Focus on:
645
+ - Communication Skills (e.g., pacing, clarity, filler words)
646
+ - Content Delivery (e.g., quantifying achievements, structuring answers)
647
+ - Professional Presentation (e.g., research, specific examples, mock interviews)
648
+ """
649
+
650
+ response = gemini_model.generate_content(prompt)
651
+ return response.text
652
+ except Exception as e:
653
+ logger.error(f"Report generation failed: {str(e)}")
654
+ return f"Error generating report: {str(e)}"
655
+
656
+
657
+ # --- ENHANCED PDF GENERATION FUNCTION ---
658
+ def create_pdf_report(analysis_data: Dict, output_path: str, gemini_report_text: str):
659
+ try:
660
+ doc = SimpleDocTemplate(output_path, pagesize=letter)
661
+ styles = getSampleStyleSheet()
662
+
663
+ # Define custom styles
664
+ h1 = ParagraphStyle(name='Heading1', parent=styles['h1'], fontSize=16, spaceAfter=14, alignment=1,
665
+ textColor=colors.HexColor('#003366'))
666
+ h2 = ParagraphStyle(name='Heading2', parent=styles['h2'], fontSize=12, spaceBefore=10, spaceAfter=8,
667
+ textColor=colors.HexColor('#336699'))
668
+ h3 = ParagraphStyle(name='Heading3', parent=styles['h3'], fontSize=10, spaceBefore=8, spaceAfter=4,
669
+ textColor=colors.HexColor('#0055AA'))
670
+ body_text = ParagraphStyle(name='BodyText', parent=styles['Normal'], fontSize=9, leading=12, spaceAfter=4)
671
+ bullet_style = ParagraphStyle(name='Bullet', parent=styles['Normal'], fontSize=9, leading=12, leftIndent=18,
672
+ bulletIndent=9)
673
+
674
+ story = []
675
+
676
+ # Title and Date
677
+ story.append(Paragraph(f"<b>EvalBot Interview Analysis Report</b>", h1))
678
+ story.append(Spacer(1, 0.2 * inch))
679
+ story.append(Paragraph(f"<b>Date:</b> {time.strftime('%Y-%m-%d')}", body_text))
680
+ story.append(Spacer(1, 0.3 * inch))
681
+
682
+ # --- Acceptance Probability (New Section) ---
683
+ acceptance_prob = analysis_data.get('acceptance_probability', None)
684
+ if acceptance_prob is not None:
685
+ story.append(Paragraph("<b>Candidate Evaluation Summary</b>", h2))
686
+ story.append(Spacer(1, 0.1 * inch))
687
+
688
+ prob_color = colors.green if acceptance_prob >= 70 else (
689
+ colors.orange if acceptance_prob >= 40 else colors.red)
690
+
691
+ # --- FIX: Call .hexval() as a method ---
692
+ story.append(Paragraph(
693
+ f"<font size='12' color='{prob_color.hexval()}'><b>Estimated Acceptance Probability: {acceptance_prob:.2f}%</b></font>",
694
+ ParagraphStyle(name='AcceptanceProbability', parent=styles['Normal'], fontSize=12, spaceAfter=10,
695
+ alignment=1)
696
+ ))
697
+ # --- End FIX ---
698
+
699
+ if acceptance_prob >= 80:
700
+ story.append(
701
+ Paragraph("This indicates a very strong candidate with high potential. Well done!", body_text))
702
+ elif acceptance_prob >= 50:
703
+ story.append(Paragraph(
704
+ "This candidate shows solid potential but has areas for improvement to become an even stronger fit.",
705
+ body_text))
706
+ else:
707
+ story.append(Paragraph(
708
+ "This candidate may require significant development or may not be the ideal fit at this time.",
709
+ body_text))
710
+ story.append(Spacer(1, 0.3 * inch))
711
+ # --- End Acceptance Probability ---
712
+
713
+ # Parse Gemini's report into sections for better PDF structuring
714
+ sections = {}
715
+ current_section = None
716
+ # Use regex to robustly identify sections, especially with varied bullet points
717
+ section_patterns = {
718
+ r'^\s*\*\*\s*1\.\s*Executive Summary\s*\*\*': 'Executive Summary',
719
+ r'^\s*\*\*\s*2\.\s*Voice Analysis Insights\s*\*\*': 'Voice Analysis Insights',
720
+ r'^\s*\*\*\s*3\.\s*Content Analysis & Strengths/Areas for Development\s*\*\*': 'Content Analysis & Strengths/Areas for Development',
721
+ r'^\s*\*\*\s*4\.\s*Actionable Recommendations\s*\*\*': 'Actionable Recommendations'
722
+ }
723
+
724
+ for line in gemini_report_text.split('\n'):
725
+ matched_section = False
726
+ for pattern, section_name in section_patterns.items():
727
+ if re.match(pattern, line):
728
+ current_section = section_name
729
+ sections[current_section] = []
730
+ matched_section = True
731
+ break
732
+ if not matched_section and current_section:
733
+ sections[current_section].append(line)
734
+
735
+ # 1. Executive Summary
736
+ story.append(Paragraph("1. Executive Summary", h2))
737
+ story.append(Spacer(1, 0.1 * inch))
738
+ if 'Executive Summary' in sections:
739
+ for line in sections['Executive Summary']:
740
+ if line.strip():
741
+ story.append(Paragraph(line.strip(), body_text))
742
+ story.append(Spacer(1, 0.2 * inch))
743
+
744
+ # 2. Voice Analysis (Detailed - using Table for summary)
745
+ story.append(Paragraph("2. Voice Analysis", h2))
746
+ voice_analysis = analysis_data.get('voice_analysis', {})
747
+
748
+ if voice_analysis and 'error' not in voice_analysis:
749
+ # Voice Analysis Summary Table
750
+ table_data = [
751
+ ['Metric', 'Value', 'Interpretation'],
752
+ ['Speaking Rate', f"{voice_analysis['speaking_rate']:.2f} words/sec", 'Average rate'],
753
+ ['Filler Words', f"{voice_analysis['filler_ratio'] * 100:.1f}%", 'Percentage of total words'],
754
+ ['Repetition Score', f"{voice_analysis['repetition_score']:.3f}", 'Lower is better articulation'],
755
+ ['Anxiety Level', voice_analysis['interpretation']['anxiety_level'].upper(),
756
+ f"Score: {voice_analysis['composite_scores']['anxiety']:.3f}"],
757
+ ['Confidence Level', voice_analysis['interpretation']['confidence_level'].upper(),
758
+ f"Score: {voice_analysis['composite_scores']['confidence']:.3f}"],
759
+ ['Fluency', voice_analysis['interpretation']['fluency_level'].upper(), 'Overall speech flow']
760
+ ]
761
+
762
+ table_style = TableStyle([
763
+ ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#6699CC')),
764
+ ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
765
+ ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
766
+ ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
767
+ ('BOTTOMPADDING', (0, 0), (-1, 0), 10),
768
+ ('BACKGROUND', (0, 1), (-1, -1), colors.HexColor('#EFEFEF')),
769
+ ('GRID', (0, 0), (-1, -1), 0.5, colors.HexColor('#CCCCCC')),
770
+ ('LEFTPADDING', (0, 0), (-1, -1), 6),
771
+ ('RIGHTPADDING', (0, 0), (-1, -1), 6),
772
+ ('TOPPADDING', (0, 0), (-1, -1), 6),
773
+ ('BOTTOMPADDING', (0, 0), (-1, -1), 6),
774
+ ])
775
+
776
+ table = Table(table_data)
777
+ table.setStyle(table_style)
778
+ story.append(table)
779
+ story.append(Spacer(1, 0.2 * inch))
780
+
781
+ # --- Charts ---
782
+ story.append(Paragraph("Score Visualization:", h3))
783
+ chart_path = os.path.join(OUTPUT_DIR, f"anxiety_confidence_{uuid.uuid4().hex[:8]}.png")
784
+ # --- FIX: Call generate_anxiety_confidence_chart if it is defined and imports are correct ---
785
+ try:
786
+ # The generate_anxiety_confidence_chart function is now expected to be defined.
787
+ # It relies on matplotlib and Image (from reportlab.platypus)
788
+ generate_anxiety_confidence_chart(voice_analysis['composite_scores'], chart_path)
789
+ if os.path.exists(chart_path):
790
+ img = Image(chart_path, width=3.5 * inch, height=2.0 * inch)
791
+ story.append(img)
792
+ story.append(Spacer(1, 0.1 * inch))
793
+ os.remove(chart_path)
794
+ except NameError: # Catch NameError if function is truly not defined
795
+ logger.warning(
796
+ "Chart generation function 'generate_anxiety_confidence_chart' is not defined. Skipping chart.")
797
+ except Exception as chart_e:
798
+ logger.warning(f"Could not add chart image to PDF: {chart_e}. Please check matplotlib installation.")
799
+ # --- End FIX ---
800
+ # --- End Charts ---
801
+
802
+ # Detailed Interpretation from Gemini (if present)
803
+ if 'Voice Analysis Insights' in sections:
804
+ story.append(Paragraph("Detailed Interpretation:", h3))
805
+ for line in sections['Voice Analysis Insights']:
806
+ if line.strip():
807
+ # Handle numbered lists from Gemini
808
+ if re.match(r'^\d+\.\s', line.strip()):
809
+ story.append(
810
+ Paragraph(line.strip(), bullet_style))
811
+ else:
812
+ story.append(Paragraph(line.strip(), body_text))
813
+ story.append(Spacer(1, 0.2 * inch))
814
+
815
+ else:
816
+ story.append(Paragraph("Voice analysis not available or encountered an error.", body_text))
817
+ story.append(Spacer(1, 0.3 * inch))
818
+
819
+ # 3. Content Analysis
820
+ story.append(Paragraph("3. Content Analysis", h2))
821
+ if 'Content Analysis & Strengths/Areas for Development' in sections:
822
+ for line in sections['Content Analysis & Strengths/Areas for Development']:
823
+ if line.strip():
824
+ # Handle bullet points from Gemini
825
+ if line.strip().startswith('-'):
826
+ story.append(Paragraph(line.strip()[1:].strip(), bullet_style)) # Remove the '-' and strip
827
+ else:
828
+ story.append(Paragraph(line.strip(), body_text))
829
+ story.append(Spacer(1, 0.2 * inch))
830
+
831
+ # Add some interviewee responses to the report (can be formatted as a list)
832
+ story.append(Paragraph("Key Interviewee Responses (Contextual):", h3))
833
+ interviewee_responses = [
834
+ f"Speaker {u['speaker']} ({u['role']}): {u['text']}"
835
+ for u in analysis_data['transcript']
836
+ if u['role'] == 'Interviewee'
837
+ ][:5]
838
+ for res in interviewee_responses:
839
+ story.append(Paragraph(res, bullet_style))
840
+ story.append(Spacer(1, 0.3 * inch))
841
+
842
+ # 4. Recommendations
843
+ story.append(Paragraph("4. Recommendations", h2))
844
+ if 'Actionable Recommendations' in sections:
845
+ for line in sections['Actionable Recommendations']:
846
+ if line.strip():
847
+ # Handle bullet points from Gemini
848
+ if line.strip().startswith('-'):
849
+ story.append(Paragraph(line.strip()[1:].strip(), bullet_style)) # Remove the '-' and strip
850
+ else:
851
+ story.append(Paragraph(line.strip(), body_text))
852
+ story.append(Spacer(1, 0.2 * inch))
853
+
854
+ # Footer Text
855
+ story.append(Spacer(1, 0.5 * inch))
856
+ story.append(Paragraph("--- Analysis by EvalBot ---", ParagraphStyle(
857
+ name='FooterText', parent=styles['Normal'], fontSize=8, alignment=1, textColor=colors.HexColor('#666666')
858
+ )))
859
+
860
+ doc.build(story)
861
+ return True
862
+ except Exception as e:
863
+ logger.error(f"PDF creation failed: {str(e)}", exc_info=True)
864
+ return False
865
+
866
+
867
+ def convert_to_serializable(obj):
868
+ if isinstance(obj, np.generic):
869
+ return obj.item()
870
+ elif isinstance(obj, dict):
871
+ return {key: convert_to_serializable(value) for key, value in obj.items()}
872
+ elif isinstance(obj, list):
873
+ return [convert_to_serializable(item) for item in obj]
874
+ elif isinstance(obj, np.ndarray):
875
+ return obj.tolist()
876
+ return obj
877
+
878
+
879
+ def process_interview(audio_path: str):
880
+ try:
881
+ logger.info(f"Starting processing for {audio_path}")
882
+
883
+ wav_file = convert_to_wav(audio_path)
884
+
885
+ logger.info("Starting transcription")
886
+ transcript = transcribe(wav_file)
887
+
888
+ logger.info("Extracting prosodic features")
889
+ for utterance in transcript['utterances']:
890
+ utterance['prosodic_features'] = extract_prosodic_features(
891
+ wav_file,
892
+ utterance['start'],
893
+ utterance['end']
894
+ )
895
+
896
+ logger.info("Identifying speakers")
897
+ utterances_with_speakers = identify_speakers(transcript, wav_file)
898
+
899
+ logger.info("Classifying roles")
900
+ # Ensure role classifier models are loaded/trained only once if possible,
901
+ # or handled carefully in a multi-threaded context.
902
+ # For simplicity, keeping it inside process_interview for now.
903
+ if os.path.exists(os.path.join(OUTPUT_DIR, 'role_classifier.pkl')):
904
+ clf = joblib.load(os.path.join(OUTPUT_DIR, 'role_classifier.pkl'))
905
+ vectorizer = joblib.load(os.path.join(OUTPUT_DIR, 'text_vectorizer.pkl'))
906
+ scaler = joblib.load(os.path.join(OUTPUT_DIR, 'feature_scaler.pkl'))
907
+ else:
908
+ clf, vectorizer, scaler = train_role_classifier(utterances_with_speakers)
909
+
910
+ classified_utterances = classify_roles(utterances_with_speakers, clf, vectorizer, scaler)
911
+
912
+ logger.info("Analyzing interviewee voice")
913
+ voice_analysis = analyze_interviewee_voice(wav_file, classified_utterances)
914
+
915
+ analysis_data = {
916
+ 'transcript': classified_utterances,
917
+ 'speakers': list(set(u['speaker'] for u in classified_utterances)),
918
+ 'voice_analysis': voice_analysis,
919
+ 'text_analysis': {
920
+ 'total_duration': sum(u['prosodic_features']['duration'] for u in classified_utterances),
921
+ 'speaker_turns': len(classified_utterances)
922
+ }
923
+ }
924
+
925
+ # --- Calculate Acceptance Probability ---
926
+ acceptance_probability = calculate_acceptance_probability(analysis_data)
927
+ analysis_data['acceptance_probability'] = acceptance_probability
928
+ # --- End Acceptance Probability ---
929
+
930
+ logger.info("Generating report text using Gemini")
931
+ gemini_report_text = generate_report(analysis_data)
932
+
933
+ base_name = os.path.splitext(os.path.basename(audio_path))[0]
934
+ pdf_path = os.path.join(OUTPUT_DIR, f"{base_name}_report.pdf")
935
+ create_pdf_report(analysis_data, pdf_path, gemini_report_text=gemini_report_text)
936
+
937
+ json_path = os.path.join(OUTPUT_DIR, f"{base_name}_analysis.json")
938
+ with open(json_path, 'w') as f:
939
+ serializable_data = convert_to_serializable(analysis_data)
940
+ json.dump(serializable_data, f, indent=2)
941
+
942
+ os.remove(wav_file) # Clean up WAV file after processing
943
+
944
+ logger.info(f"Processing completed for {audio_path}")
945
+ return {
946
+ 'pdf_path': pdf_path,
947
+ 'json_path': json_path
948
+ }
949
+ except Exception as e:
950
+ logger.error(f"Processing failed: {str(e)}", exc_info=True)
951
+ # Clean up wav_file in case of error
952
+ if 'wav_file' in locals() and os.path.exists(wav_file):
953
+ os.remove(wav_file)
954
+ raise