Update process_interview.py
Browse files- process_interview.py +64 -35
process_interview.py
CHANGED
|
@@ -442,45 +442,74 @@ def create_pdf_report(analysis_data: Dict, output_path: str):
|
|
| 442 |
# 7. MAIN PROCESSING PIPELINE
|
| 443 |
# ==============================================================================
|
| 444 |
def process_interview(audio_path: str, user_id: str = "candidate-123") -> Dict:
|
| 445 |
-
"""The main orchestrator function to process an interview from start to finish."""
|
| 446 |
-
wav_file = None
|
| 447 |
try:
|
| 448 |
-
logger.info(f"
|
| 449 |
wav_file = convert_to_wav(audio_path)
|
|
|
|
|
|
|
| 450 |
transcript = transcribe(wav_file)
|
| 451 |
-
|
| 452 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 453 |
utterances_with_speakers = identify_speakers(transcript, wav_file)
|
| 454 |
-
|
| 455 |
-
|
| 456 |
-
|
| 457 |
-
|
| 458 |
-
|
| 459 |
-
|
| 460 |
-
|
| 461 |
-
|
| 462 |
-
|
| 463 |
-
|
| 464 |
-
|
| 465 |
-
|
| 466 |
-
|
| 467 |
-
|
| 468 |
-
|
| 469 |
-
|
| 470 |
-
|
| 471 |
-
|
| 472 |
-
|
| 473 |
-
|
| 474 |
-
|
| 475 |
-
|
| 476 |
-
|
| 477 |
-
|
| 478 |
-
|
| 479 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 480 |
except Exception as e:
|
| 481 |
-
logger.
|
| 482 |
-
|
| 483 |
-
|
| 484 |
-
|
| 485 |
|
| 486 |
|
|
|
|
| 442 |
# 7. MAIN PROCESSING PIPELINE
|
| 443 |
# ==============================================================================
|
| 444 |
def process_interview(audio_path: str, user_id: str = "candidate-123") -> Dict:
|
|
|
|
|
|
|
| 445 |
try:
|
| 446 |
+
logger.info(f"Starting processing for {audio_path} (User ID: {user_id})")
|
| 447 |
wav_file = convert_to_wav(audio_path)
|
| 448 |
+
logger.debug(f"Created WAV file: {wav_file}")
|
| 449 |
+
logger.info("Starting transcription")
|
| 450 |
transcript = transcribe(wav_file)
|
| 451 |
+
logger.info("Transcript result: %s", transcript)
|
| 452 |
+
if not transcript or 'utterances' not in transcript or not transcript['utterances']:
|
| 453 |
+
logger.error("Transcription failed or returned empty utterances")
|
| 454 |
+
raise ValueError("Transcription failed or returned empty utterances")
|
| 455 |
+
logger.info("Extracting prosodic features")
|
| 456 |
+
for utterance in transcript['utterances']:
|
| 457 |
+
utterance['prosodic_features'] = extract_prosodic_features(
|
| 458 |
+
wav_file,
|
| 459 |
+
utterance['start'],
|
| 460 |
+
utterance['end']
|
| 461 |
+
)
|
| 462 |
+
logger.info("Identifying speakers")
|
| 463 |
utterances_with_speakers = identify_speakers(transcript, wav_file)
|
| 464 |
+
logger.info("Classifying roles")
|
| 465 |
+
if os.path.exists(os.path.join(OUTPUT_DIR, 'role_classifier.pkl')):
|
| 466 |
+
clf = joblib.load(os.path.join(OUTPUT_DIR, 'role_classifier.pkl'))
|
| 467 |
+
vectorizer = joblib.load(os.path.join(OUTPUT_DIR, 'text_vectorizer.pkl'))
|
| 468 |
+
scaler = joblib.load(os.path.join(OUTPUT_DIR, 'feature_scaler.pkl'))
|
| 469 |
+
else:
|
| 470 |
+
clf, vectorizer, scaler = train_role_classifier(utterances_with_speakers)
|
| 471 |
+
classified_utterances = classify_roles(utterances_with_speakers, clf, vectorizer, scaler)
|
| 472 |
+
logger.info("Analyzing interviewee voice")
|
| 473 |
+
voice_analysis = analyze_interviewee_voice(wav_file, classified_utterances)
|
| 474 |
+
analysis_data = {
|
| 475 |
+
'user_id': user_id,
|
| 476 |
+
'transcript': classified_utterances,
|
| 477 |
+
'speakers': list(set(u['speaker'] for u in classified_utterances)),
|
| 478 |
+
'voice_analysis': voice_analysis,
|
| 479 |
+
'text_analysis': {
|
| 480 |
+
'total_duration': sum(u['prosodic_features']['duration'] for u in classified_utterances),
|
| 481 |
+
'speaker_turns': len(classified_utterances)
|
| 482 |
+
}
|
| 483 |
+
}
|
| 484 |
+
acceptance_probability = calculate_acceptance_probability(analysis_data)
|
| 485 |
+
analysis_data['acceptance_probability'] = acceptance_probability
|
| 486 |
+
logger.info("Generating report text using Gemini")
|
| 487 |
+
gemini_report_text = generate_report(analysis_data)
|
| 488 |
+
base_name = f"{user_id}_{os.path.splitext(os.path.basename(audio_path))[0].split('_', 1)[1]}"
|
| 489 |
+
pdf_path = os.path.join(OUTPUT_DIR, f"{base_name}_report.pdf")
|
| 490 |
+
if not create_pdf_report(analysis_data, pdf_path, gemini_report_text=gemini_report_text):
|
| 491 |
+
logger.error(f"Failed to create PDF report: {pdf_path}")
|
| 492 |
+
raise RuntimeError("PDF report generation failed")
|
| 493 |
+
try:
|
| 494 |
+
json_path = os.path.join(OUTPUT_DIR, f"{base_name}_analysis.json")
|
| 495 |
+
with open(json_path, 'w') as f:
|
| 496 |
+
logger.debug(f"Serializing analysis_data with keys: {list(analysis_data.keys())}")
|
| 497 |
+
serializable_data = convert_to_serializable(analysis_data)
|
| 498 |
+
json.dump(serializable_data, f, indent=2)
|
| 499 |
+
except Exception as e:
|
| 500 |
+
logger.error(f"Failed to serialize analysis_data to JSON: {str(e)}", exc_info=True)
|
| 501 |
+
raise
|
| 502 |
+
os.remove(wav_file)
|
| 503 |
+
logger.info(f"Processing completed for {audio_path} (User ID: {user_id})")
|
| 504 |
+
return {
|
| 505 |
+
'summary': f"User ID: {user_id}\nspeakers: {', '.join(analysis_data['speakers'])}",
|
| 506 |
+
'json_path': json_path,
|
| 507 |
+
'pdf_path': pdf_path
|
| 508 |
+
}
|
| 509 |
except Exception as e:
|
| 510 |
+
logger.error(f"Processing failed: {str(e)}", exc_info=True)
|
| 511 |
+
if 'wav_file' in locals() and os.path.exists(wav_file):
|
| 512 |
+
os.remove(wav_file)
|
| 513 |
+
raise
|
| 514 |
|
| 515 |
|