Update process_interview.py
Browse files- process_interview.py +50 -47
process_interview.py
CHANGED
|
@@ -39,7 +39,7 @@ matplotlib.use('Agg')
|
|
| 39 |
|
| 40 |
# Concurrency
|
| 41 |
from concurrent.futures import ThreadPoolExecutor
|
| 42 |
-
import joblib
|
| 43 |
|
| 44 |
# ==============================================================================
|
| 45 |
# 2. CONFIGURATION AND INITIALIZATION
|
|
@@ -67,7 +67,6 @@ if not all([PINECONE_KEY, ASSEMBLYAI_KEY, GEMINI_API_KEY]):
|
|
| 67 |
index, gemini_model, speaker_model, nlp, tokenizer, text_embedding_model = (None,) * 6
|
| 68 |
|
| 69 |
def initialize_all_services_and_models():
|
| 70 |
-
"""Initializes all external services and loads all AI models into memory."""
|
| 71 |
global index, gemini_model, speaker_model, nlp, tokenizer, text_embedding_model
|
| 72 |
logger.info("Initializing all services and loading all models...")
|
| 73 |
pc = Pinecone(api_key=PINECONE_KEY)
|
|
@@ -144,7 +143,7 @@ def identify_speakers(transcript: Dict, wav_file_path: str) -> List[Dict]:
|
|
| 144 |
|
| 145 |
def process_utterance(utterance):
|
| 146 |
start_ms, end_ms = utterance['start'], utterance['end']
|
| 147 |
-
if end_ms - start_ms < 1000:
|
| 148 |
return {**utterance, 'speaker_id': 'unknown_short_utterance'}
|
| 149 |
with temp_audio_file() as temp_path:
|
| 150 |
full_audio[start_ms:end_ms].export(temp_path, format="wav")
|
|
@@ -199,7 +198,7 @@ def convert_to_serializable(obj):
|
|
| 199 |
return obj.tolist()
|
| 200 |
if isinstance(obj, bytes):
|
| 201 |
import base64
|
| 202 |
-
return base64.b64encode(obj).decode('utf-8')
|
| 203 |
if isinstance(obj, dict):
|
| 204 |
return {k: convert_to_serializable(v) for k, v in obj.items()}
|
| 205 |
if isinstance(obj, list):
|
|
@@ -219,7 +218,7 @@ def classify_roles_ultimate(utterances: List[Dict], audio_path: str) -> List[Dic
|
|
| 219 |
interviewer_keywords = r'\b(what|why|how|when|where|who|which|tell me about|can you explain|describe|give me an example)\b'
|
| 220 |
for u in utterances:
|
| 221 |
sid, text = u.get('speaker_id'), u.get('text', '').lower()
|
| 222 |
-
if sid not in speaker_data or not text:
|
| 223 |
continue
|
| 224 |
rule_score = 10 if text.endswith('?') else 0
|
| 225 |
rule_score += 5 * len(re.findall(interviewer_keywords, text))
|
|
@@ -259,9 +258,9 @@ def analyze_interviewee_voice(audio_path: str, utterances: List[Dict]) -> Dict:
|
|
| 259 |
logger.info("Performing detailed voice analysis using your custom function...")
|
| 260 |
try:
|
| 261 |
y, sr = librosa.load(audio_path, sr=16000)
|
| 262 |
-
interviewee_utterances = [u for u in utterances if u.get('role') == 'Interviewee']
|
| 263 |
if not interviewee_utterances:
|
| 264 |
-
return {'error': 'No interviewee utterances found'}
|
| 265 |
segments = [y[int(u['start'] * sr / 1000):int(u['end'] * sr / 1000)] for u in interviewee_utterances]
|
| 266 |
if not segments:
|
| 267 |
return {'error': 'No valid interviewee segments to analyze.'}
|
|
@@ -351,13 +350,11 @@ def calculate_acceptance_probability(analysis_data: Dict) -> float:
|
|
| 351 |
# 6. AI-POWERED NARRATIVE AND PDF REPORTING
|
| 352 |
# ==============================================================================
|
| 353 |
def generate_gemini_report_text(analysis_data: Dict) -> str:
|
| 354 |
-
"""Generates a comprehensive narrative report using the Gemini model, based on your prompt structure."""
|
| 355 |
logger.info("Generating AI-powered narrative report with Gemini...")
|
| 356 |
voice = analysis_data.get('voice_analysis_metrics', {})
|
| 357 |
interviewee_text = "\n".join([f"- {u['text']}" for u in analysis_data['transcript_with_roles'] if u.get('role') == 'Interviewee'])
|
| 358 |
acceptance_prob = analysis_data.get('acceptance_probability', 50.0)
|
| 359 |
|
| 360 |
-
# Format numbers only if they are not 'N/A' or strings
|
| 361 |
def format_value(val):
|
| 362 |
return f"{val:.2f}" if isinstance(val, (int, float)) else val
|
| 363 |
|
|
@@ -397,7 +394,6 @@ def generate_gemini_report_text(analysis_data: Dict) -> str:
|
|
| 397 |
return "Error: Could not generate AI analysis report."
|
| 398 |
|
| 399 |
def create_pdf_report(analysis_data: Dict, output_path: str):
|
| 400 |
-
"""Generates a detailed, professional PDF report including all analysis sections, based on your structure."""
|
| 401 |
logger.info(f"Generating comprehensive PDF report at {output_path}...")
|
| 402 |
doc = SimpleDocTemplate(output_path, pagesize=letter, topMargin=inch, bottomMargin=inch)
|
| 403 |
styles = getSampleStyleSheet()
|
|
@@ -407,47 +403,55 @@ def create_pdf_report(analysis_data: Dict, output_path: str):
|
|
| 407 |
textColor=colors.HexColor('#0050BC'), fontName='Helvetica-Bold'))
|
| 408 |
styles.add(ParagraphStyle(name='H3', fontSize=12, leading=16, spaceBefore=10, spaceAfter=6,
|
| 409 |
textColor=colors.HexColor('#333333'), fontName='Helvetica-Bold'))
|
| 410 |
-
# تعديل الـ Body style ليحتوي على indentation
|
| 411 |
styles.add(ParagraphStyle(name='Body', fontSize=10, leading=14, spaceAfter=6, alignment=TA_JUSTIFY,
|
| 412 |
-
leftIndent=10))
|
| 413 |
-
story = []
|
| 414 |
|
| 415 |
-
|
| 416 |
-
|
| 417 |
-
|
| 418 |
-
|
| 419 |
-
|
| 420 |
-
|
| 421 |
-
|
| 422 |
-
|
| 423 |
-
|
| 424 |
-
|
| 425 |
-
|
| 426 |
-
|
| 427 |
-
|
| 428 |
-
story.append(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 429 |
story.append(Spacer(1, 0.2 * inch))
|
| 430 |
|
| 431 |
-
|
| 432 |
-
|
| 433 |
-
|
| 434 |
|
| 435 |
-
|
| 436 |
-
|
| 437 |
-
|
| 438 |
-
|
| 439 |
-
|
| 440 |
-
|
| 441 |
-
|
| 442 |
-
|
| 443 |
-
|
| 444 |
-
|
| 445 |
-
|
| 446 |
-
|
| 447 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 448 |
|
| 449 |
-
doc.build(story)
|
| 450 |
-
logger.info("PDF report generated successfully.")
|
| 451 |
# ==============================================================================
|
| 452 |
# 7. MAIN PROCESSING PIPELINE
|
| 453 |
# ==============================================================================
|
|
@@ -471,7 +475,6 @@ def process_interview(audio_path: str, user_id: str = "candidate-123") -> Dict:
|
|
| 471 |
logger.info("Identifying speakers")
|
| 472 |
utterances_with_speakers = identify_speakers(transcript, wav_file)
|
| 473 |
|
| 474 |
-
# Add duration feature here
|
| 475 |
logger.info("Extracting duration features")
|
| 476 |
utterances_with_duration = extract_duration_feature(utterances_with_speakers)
|
| 477 |
|
|
@@ -498,7 +501,7 @@ def process_interview(audio_path: str, user_id: str = "candidate-123") -> Dict:
|
|
| 498 |
},
|
| 499 |
'acceptance_probability': calculate_acceptance_probability({'voice_analysis_metrics': voice_analysis}),
|
| 500 |
'voice_interpretation_text': voice_interpretation,
|
| 501 |
-
'chart_image_bytes': chart_buffer.getvalue()
|
| 502 |
}
|
| 503 |
|
| 504 |
logger.info("Generating report text using Gemini")
|
|
|
|
| 39 |
|
| 40 |
# Concurrency
|
| 41 |
from concurrent.futures import ThreadPoolExecutor
|
| 42 |
+
import joblib
|
| 43 |
|
| 44 |
# ==============================================================================
|
| 45 |
# 2. CONFIGURATION AND INITIALIZATION
|
|
|
|
| 67 |
index, gemini_model, speaker_model, nlp, tokenizer, text_embedding_model = (None,) * 6
|
| 68 |
|
| 69 |
def initialize_all_services_and_models():
|
|
|
|
| 70 |
global index, gemini_model, speaker_model, nlp, tokenizer, text_embedding_model
|
| 71 |
logger.info("Initializing all services and loading all models...")
|
| 72 |
pc = Pinecone(api_key=PINECONE_KEY)
|
|
|
|
| 143 |
|
| 144 |
def process_utterance(utterance):
|
| 145 |
start_ms, end_ms = utterance['start'], utterance['end']
|
| 146 |
+
if end_ms - start_ms < 1000: # Skip short utterances
|
| 147 |
return {**utterance, 'speaker_id': 'unknown_short_utterance'}
|
| 148 |
with temp_audio_file() as temp_path:
|
| 149 |
full_audio[start_ms:end_ms].export(temp_path, format="wav")
|
|
|
|
| 198 |
return obj.tolist()
|
| 199 |
if isinstance(obj, bytes):
|
| 200 |
import base64
|
| 201 |
+
return base64.b64encode(obj).decode('utf-8')
|
| 202 |
if isinstance(obj, dict):
|
| 203 |
return {k: convert_to_serializable(v) for k, v in obj.items()}
|
| 204 |
if isinstance(obj, list):
|
|
|
|
| 218 |
interviewer_keywords = r'\b(what|why|how|when|where|who|which|tell me about|can you explain|describe|give me an example)\b'
|
| 219 |
for u in utterances:
|
| 220 |
sid, text = u.get('speaker_id'), u.get('text', '').lower()
|
| 221 |
+
if sid not in speaker_data or not text or sid.startswith('unknown'):
|
| 222 |
continue
|
| 223 |
rule_score = 10 if text.endswith('?') else 0
|
| 224 |
rule_score += 5 * len(re.findall(interviewer_keywords, text))
|
|
|
|
| 258 |
logger.info("Performing detailed voice analysis using your custom function...")
|
| 259 |
try:
|
| 260 |
y, sr = librosa.load(audio_path, sr=16000)
|
| 261 |
+
interviewee_utterances = [u for u in utterances if u.get('role') == 'Interviewee' and not u['speaker_id'].startswith('unknown')]
|
| 262 |
if not interviewee_utterances:
|
| 263 |
+
return {'error': 'No valid interviewee utterances found'}
|
| 264 |
segments = [y[int(u['start'] * sr / 1000):int(u['end'] * sr / 1000)] for u in interviewee_utterances]
|
| 265 |
if not segments:
|
| 266 |
return {'error': 'No valid interviewee segments to analyze.'}
|
|
|
|
| 350 |
# 6. AI-POWERED NARRATIVE AND PDF REPORTING
|
| 351 |
# ==============================================================================
|
| 352 |
def generate_gemini_report_text(analysis_data: Dict) -> str:
|
|
|
|
| 353 |
logger.info("Generating AI-powered narrative report with Gemini...")
|
| 354 |
voice = analysis_data.get('voice_analysis_metrics', {})
|
| 355 |
interviewee_text = "\n".join([f"- {u['text']}" for u in analysis_data['transcript_with_roles'] if u.get('role') == 'Interviewee'])
|
| 356 |
acceptance_prob = analysis_data.get('acceptance_probability', 50.0)
|
| 357 |
|
|
|
|
| 358 |
def format_value(val):
|
| 359 |
return f"{val:.2f}" if isinstance(val, (int, float)) else val
|
| 360 |
|
|
|
|
| 394 |
return "Error: Could not generate AI analysis report."
|
| 395 |
|
| 396 |
def create_pdf_report(analysis_data: Dict, output_path: str):
|
|
|
|
| 397 |
logger.info(f"Generating comprehensive PDF report at {output_path}...")
|
| 398 |
doc = SimpleDocTemplate(output_path, pagesize=letter, topMargin=inch, bottomMargin=inch)
|
| 399 |
styles = getSampleStyleSheet()
|
|
|
|
| 403 |
textColor=colors.HexColor('#0050BC'), fontName='Helvetica-Bold'))
|
| 404 |
styles.add(ParagraphStyle(name='H3', fontSize=12, leading=16, spaceBefore=10, spaceAfter=6,
|
| 405 |
textColor=colors.HexColor('#333333'), fontName='Helvetica-Bold'))
|
|
|
|
| 406 |
styles.add(ParagraphStyle(name='Body', fontSize=10, leading=14, spaceAfter=6, alignment=TA_JUSTIFY,
|
| 407 |
+
leftIndent=10))
|
|
|
|
| 408 |
|
| 409 |
+
story = []
|
| 410 |
+
try:
|
| 411 |
+
# Cover Page
|
| 412 |
+
story.append(Paragraph("Candidate Interview Analysis Report", styles['H1']))
|
| 413 |
+
story.append(Spacer(1, 0.2 * inch))
|
| 414 |
+
story.append(Paragraph(f"Candidate ID: {analysis_data.get('user_id', 'N/A')}", styles['Body']))
|
| 415 |
+
story.append(Paragraph(f"Date of Analysis: {time.strftime('%B %d, %Y')}", styles['Body']))
|
| 416 |
+
prob = analysis_data.get('acceptance_probability', 0)
|
| 417 |
+
prob_color = 'green' if prob >= 75 else 'orange' if prob >= 50 else 'red'
|
| 418 |
+
story.append(Paragraph(f"<b>Overall Suitability Score:</b> <font size=16 color='{prob_color}'>{prob}%</font>", styles['H2']))
|
| 419 |
+
story.append(PageBreak())
|
| 420 |
+
|
| 421 |
+
# Quantitative Analysis Page
|
| 422 |
+
story.append(Paragraph("Quantitative Vocal Analysis", styles['H2']))
|
| 423 |
+
if analysis_data.get('chart_image_bytes'):
|
| 424 |
+
logger.debug("Adding chart image to PDF")
|
| 425 |
+
img_buffer = io.BytesIO(analysis_data['chart_image_bytes'])
|
| 426 |
+
story.append(Image(img_buffer, width=5.5 * inch, height=3.3 * inch))
|
| 427 |
+
else:
|
| 428 |
+
story.append(Paragraph("No chart data available.", styles['Body']))
|
| 429 |
story.append(Spacer(1, 0.2 * inch))
|
| 430 |
|
| 431 |
+
voice_text = analysis_data.get('voice_interpretation_text', 'Not available.').replace('\n', '<br/>')
|
| 432 |
+
story.append(Paragraph(voice_text, styles['Body']))
|
| 433 |
+
story.append(Spacer(1, 0.2 * inch))
|
| 434 |
|
| 435 |
+
# AI-Generated Narrative Page
|
| 436 |
+
story.append(Paragraph("Qualitative AI-Powered Report", styles['H2']))
|
| 437 |
+
gemini_text = analysis_data.get('gemini_report_text', 'Not available.')
|
| 438 |
+
for line in gemini_text.split('\n'):
|
| 439 |
+
line = line.strip()
|
| 440 |
+
if not line:
|
| 441 |
+
continue
|
| 442 |
+
if line.startswith('**') and line.endswith('**'):
|
| 443 |
+
story.append(Paragraph(line.strip('*'), styles['H3']))
|
| 444 |
+
elif line.startswith('- ') or line.startswith('* '):
|
| 445 |
+
story.append(Paragraph(f"• {line[2:]}", styles['Body']))
|
| 446 |
+
else:
|
| 447 |
+
story.append(Paragraph(line, styles['Body']))
|
| 448 |
+
|
| 449 |
+
doc.build(story)
|
| 450 |
+
logger.info("PDF report generated successfully.")
|
| 451 |
+
except Exception as e:
|
| 452 |
+
logger.error(f"Error generating PDF: {e}", exc_info=True)
|
| 453 |
+
raise
|
| 454 |
|
|
|
|
|
|
|
| 455 |
# ==============================================================================
|
| 456 |
# 7. MAIN PROCESSING PIPELINE
|
| 457 |
# ==============================================================================
|
|
|
|
| 475 |
logger.info("Identifying speakers")
|
| 476 |
utterances_with_speakers = identify_speakers(transcript, wav_file)
|
| 477 |
|
|
|
|
| 478 |
logger.info("Extracting duration features")
|
| 479 |
utterances_with_duration = extract_duration_feature(utterances_with_speakers)
|
| 480 |
|
|
|
|
| 501 |
},
|
| 502 |
'acceptance_probability': calculate_acceptance_probability({'voice_analysis_metrics': voice_analysis}),
|
| 503 |
'voice_interpretation_text': voice_interpretation,
|
| 504 |
+
'chart_image_bytes': chart_buffer.getvalue() if chart_buffer.tell() > 0 else None
|
| 505 |
}
|
| 506 |
|
| 507 |
logger.info("Generating report text using Gemini")
|