Update process_interview.py
Browse files- process_interview.py +110 -50
process_interview.py
CHANGED
|
@@ -368,18 +368,18 @@ def analyze_interviewee_voice(audio_path: str, utterances: List[Dict]) -> Dict:
|
|
| 368 |
intensity_std = np.std(intensities) if intensities else 0
|
| 369 |
shimmer = np.mean(np.abs(np.diff(intensities))) / intensity_mean if len(intensities) > 1 and intensity_mean > 0 else 0
|
| 370 |
anxiety_score = 0.6 * (pitch_std / pitch_mean) + 0.4 * (jitter + shimmer) if pitch_mean > 0 else 0
|
| 371 |
-
confidence_score = 0.7 * (1 / (1 + intensity_std)) + 0.3 * (1
|
| 372 |
hesitation_score = filler_ratio + repetition_score
|
| 373 |
anxiety_level = 'High' if anxiety_score > 0.15 else 'Moderate' if anxiety_score > 0.07 else 'Low'
|
| 374 |
-
confidence_level = 'High' if confidence_score > 0.
|
| 375 |
fluency_level = 'Fluent' if (filler_ratio < 0.05 and repetition_score < 0.1) else 'Moderate' if (filler_ratio < 0.1 and repetition_score < 0.2) else 'Disfluent'
|
| 376 |
return {
|
| 377 |
'speaking_rate': float(round(speaking_rate, 2)),
|
| 378 |
-
'filler_ratio': float(round(filler_ratio,
|
| 379 |
-
'repetition_score': float(round(repetition_score,
|
| 380 |
'pitch_analysis': {'mean': float(round(pitch_mean, 2)), 'std_dev': float(round(pitch_std, 2)), 'jitter': float(round(jitter, 4))},
|
| 381 |
-
'intensity_analysis': {'mean': float(round(intensity_mean, 2)), 'std_dev': float(round(intensity_std, 2)), 'shimmer': float(round(shimmer, 4))},
|
| 382 |
-
'composite_scores': {'anxiety': float(round(anxiety_score,
|
| 383 |
'interpretation': {'anxiety_level': anxiety_level, 'confidence_level': confidence_level, 'fluency_level': fluency_level}
|
| 384 |
}
|
| 385 |
except Exception as e:
|
|
@@ -390,15 +390,15 @@ def generate_voice_interpretation(analysis: Dict) -> str:
|
|
| 390 |
if 'error' in analysis:
|
| 391 |
return f"Voice analysis unavailable: {analysis['error']}"
|
| 392 |
interpretation_lines = [
|
| 393 |
-
f"- Speaking
|
| 394 |
-
f"- Filler
|
| 395 |
f"- Anxiety: {analysis['interpretation']['anxiety_level']} (Score: {analysis['composite_scores']['anxiety']:.3f}; stress response)",
|
| 396 |
f"- Confidence: {analysis['interpretation']['confidence_level']} (Score: {analysis['composite_scores']['confidence']:.3f}; vocal strength)",
|
| 397 |
f"- Fluency: {analysis['interpretation']['fluency_level']} (Drives engagement)",
|
| 398 |
"",
|
| 399 |
"HR Insights:",
|
| 400 |
"- Rapid speech (>3.0 wps) may reduce clarity; slower pacing enhances professionalism.",
|
| 401 |
-
"- High filler word usage undermines perceived
|
| 402 |
"- Elevated anxiety suggests pressure; training can improve resilience.",
|
| 403 |
"- Strong confidence supports leadership presence.",
|
| 404 |
"- Fluent speech enhances engagement in team settings."
|
|
@@ -413,11 +413,11 @@ def generate_anxiety_confidence_chart(composite_scores: Dict, chart_buffer):
|
|
| 413 |
bars = ax.bar(labels, scores, color=['#FF5252', '#26A69A'], edgecolor='black', width=0.45)
|
| 414 |
ax.set_ylabel('Score', fontsize=12)
|
| 415 |
ax.set_title('Vocal Dynamics: Anxiety vs. Confidence', fontsize=14, pad=15)
|
| 416 |
-
ax.set_ylim(0, 1.
|
| 417 |
for bar in bars:
|
| 418 |
height = bar.get_height()
|
| 419 |
ax.text(bar.get_x() + bar.get_width()/2, height + 0.05, f"{height:.2f}",
|
| 420 |
-
ha='center', color='black', fontweight='bold', fontsize=10)
|
| 421 |
ax.grid(True, axis='y', linestyle='--', alpha=0.7)
|
| 422 |
plt.tight_layout()
|
| 423 |
plt.savefig(chart_buffer, format='png', bbox_inches='tight', dpi=300)
|
|
@@ -454,7 +454,25 @@ def generate_report(analysis_data: Dict) -> str:
|
|
| 454 |
try:
|
| 455 |
voice = analysis_data.get('voice_analysis', {})
|
| 456 |
voice_interpretation = generate_voice_interpretation(voice)
|
| 457 |
-
interviewee_responses = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 458 |
acceptance_prob = analysis_data.get('acceptance_probability', 50.0)
|
| 459 |
acceptance_line = f"\n**Suitability Score: {acceptance_prob:.2f}%**\n"
|
| 460 |
if acceptance_prob >= 80:
|
|
@@ -465,33 +483,66 @@ def generate_report(analysis_data: Dict) -> str:
|
|
| 465 |
acceptance_line += "HR Verdict: Moderate potential, needs additional assessment."
|
| 466 |
else:
|
| 467 |
acceptance_line += "HR Verdict: Limited fit, significant improvement required."
|
|
|
|
| 468 |
prompt = f"""
|
| 469 |
-
|
| 470 |
-
|
| 471 |
-
|
| 472 |
-
|
| 473 |
-
|
| 474 |
-
|
| 475 |
-
|
| 476 |
-
|
| 477 |
-
|
| 478 |
-
|
| 479 |
-
|
| 480 |
-
|
| 481 |
-
|
| 482 |
-
|
| 483 |
-
|
| 484 |
-
|
| 485 |
-
|
| 486 |
-
|
| 487 |
-
|
| 488 |
-
|
| 489 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 490 |
response = gemini_model.generate_content(prompt)
|
| 491 |
-
|
|
|
|
|
|
|
| 492 |
except Exception as e:
|
| 493 |
-
logger.error(f"Report generation failed: {str(e)}")
|
| 494 |
-
return f"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 495 |
|
| 496 |
def create_pdf_report(analysis_data: Dict, output_path: str, gemini_report_text: str) -> bool:
|
| 497 |
try:
|
|
@@ -539,13 +590,14 @@ def create_pdf_report(analysis_data: Dict, output_path: str, gemini_report_text:
|
|
| 539 |
story.append(Paragraph("<b>HR Verdict:</b> Limited fit, significant improvement required.", body_text))
|
| 540 |
story.append(Spacer(1, 0.2*inch))
|
| 541 |
participants = sorted([p for p in set(u['speaker'] for u in analysis_data['transcript']) if p != 'Unknown'])
|
|
|
|
| 542 |
table_data = [
|
| 543 |
['Metric', 'Value'],
|
| 544 |
['Interview Duration', f"{analysis_data['text_analysis']['total_duration']:.2f} seconds"],
|
| 545 |
['Speaker Turns', f"{analysis_data['text_analysis']['speaker_turns']}"],
|
| 546 |
-
['Participants',
|
| 547 |
]
|
| 548 |
-
table = Table(table_data, colWidths=[2.
|
| 549 |
table.setStyle(TableStyle([
|
| 550 |
('BACKGROUND', (0,0), (-1,0), colors.HexColor('#0050BC')),
|
| 551 |
('TEXTCOLOR', (0,0), (-1,0), colors.white),
|
|
@@ -557,6 +609,8 @@ def create_pdf_report(analysis_data: Dict, output_path: str, gemini_report_text:
|
|
| 557 |
('TOPPADDING', (0,0), (-1,0), 6),
|
| 558 |
('BACKGROUND', (0,1), (-1,-1), colors.HexColor('#F5F6FA')),
|
| 559 |
('GRID', (0,0), (-1,-1), 0.4, colors.HexColor('#DDE4EB')),
|
|
|
|
|
|
|
| 560 |
]))
|
| 561 |
story.append(table)
|
| 562 |
story.append(Spacer(1, 0.3*inch))
|
|
@@ -641,23 +695,26 @@ def create_pdf_report(analysis_data: Dict, output_path: str, gemini_report_text:
|
|
| 641 |
clean_line = line.lstrip('-').strip()
|
| 642 |
if not clean_line:
|
| 643 |
continue
|
| 644 |
-
clean_line = re.sub(r'[
|
| 645 |
if current_section == 'Competency':
|
| 646 |
-
if any(k in clean_line.lower() for k in ['leader', 'problem', 'commun', 'adapt', '
|
| 647 |
current_subsection = 'Strengths'
|
| 648 |
-
elif any(k in clean_line.lower() for k in ['improv', 'grow', '
|
| 649 |
current_subsection = 'Growth Areas'
|
| 650 |
if current_subsection:
|
| 651 |
sections[current_section][current_subsection].append(clean_line)
|
| 652 |
elif current_section == 'Recommendations':
|
| 653 |
-
if any(k in clean_line.lower() for k in ['commun', 'tech', '
|
| 654 |
current_subsection = 'Development'
|
| 655 |
-
elif any(k in clean_line.lower() for k in ['adv', '
|
| 656 |
current_subsection = 'Next Steps'
|
| 657 |
if current_subsection:
|
| 658 |
sections[current_section][current_subsection].append(clean_line)
|
| 659 |
else:
|
| 660 |
sections[current_section].append(clean_line)
|
|
|
|
|
|
|
|
|
|
| 661 |
|
| 662 |
# Executive Summary
|
| 663 |
story.append(Paragraph("2. Executive Summary", h2))
|
|
@@ -665,7 +722,8 @@ def create_pdf_report(analysis_data: Dict, output_path: str, gemini_report_text:
|
|
| 665 |
for line in sections['Executive Summary']:
|
| 666 |
story.append(Paragraph(line, bullet_style))
|
| 667 |
else:
|
| 668 |
-
story.append(Paragraph("
|
|
|
|
| 669 |
story.append(Spacer(1, 0.15*inch))
|
| 670 |
|
| 671 |
# Competency and Content
|
|
@@ -675,14 +733,14 @@ def create_pdf_report(analysis_data: Dict, output_path: str, gemini_report_text:
|
|
| 675 |
for line in sections['Competency']['Strengths']:
|
| 676 |
story.append(Paragraph(line, bullet_style))
|
| 677 |
else:
|
| 678 |
-
story.append(Paragraph("
|
| 679 |
story.append(Spacer(1, 0.1*inch))
|
| 680 |
story.append(Paragraph("Growth Areas", h3))
|
| 681 |
if sections['Competency']['Growth Areas']:
|
| 682 |
for line in sections['Competency']['Growth Areas']:
|
| 683 |
story.append(Paragraph(line, bullet_style))
|
| 684 |
else:
|
| 685 |
-
story.append(Paragraph("
|
| 686 |
story.append(Spacer(1, 0.15*inch))
|
| 687 |
|
| 688 |
# Role Fit
|
|
@@ -691,7 +749,7 @@ def create_pdf_report(analysis_data: Dict, output_path: str, gemini_report_text:
|
|
| 691 |
for line in sections['Role Fit']:
|
| 692 |
story.append(Paragraph(line, bullet_style))
|
| 693 |
else:
|
| 694 |
-
story.append(Paragraph("
|
| 695 |
story.append(Spacer(1, 0.15*inch))
|
| 696 |
|
| 697 |
# Recommendations
|
|
@@ -701,14 +759,14 @@ def create_pdf_report(analysis_data: Dict, output_path: str, gemini_report_text:
|
|
| 701 |
for line in sections['Recommendations']['Development']:
|
| 702 |
story.append(Paragraph(line, bullet_style))
|
| 703 |
else:
|
| 704 |
-
story.append(Paragraph("
|
| 705 |
story.append(Spacer(1, 0.1*inch))
|
| 706 |
story.append(Paragraph("Next Steps for Hiring Managers", h3))
|
| 707 |
if sections['Recommendations']['Next Steps']:
|
| 708 |
for line in sections['Recommendations']['Next Steps']:
|
| 709 |
story.append(Paragraph(line, bullet_style))
|
| 710 |
else:
|
| 711 |
-
story.append(Paragraph("
|
| 712 |
story.append(Spacer(1, 0.15*inch))
|
| 713 |
story.append(Paragraph("This report provides actionable insights to support hiring and candidate development.", body_text))
|
| 714 |
|
|
@@ -751,6 +809,8 @@ def process_interview(audio_url: str) -> Dict:
|
|
| 751 |
for utterance in transcript['utterances']:
|
| 752 |
utterance['prosodic_features'] = extract_prosodic_features(wav_file, utterance['start'], utterance['end'])
|
| 753 |
utterances_with_speakers = identify_speakers(transcript, wav_file)
|
|
|
|
|
|
|
| 754 |
clf, vectorizer, scaler = None, None, None
|
| 755 |
if os.path.exists(os.path.join(OUTPUT_DIR, 'role_classifier.pkl')):
|
| 756 |
clf = joblib.load(os.path.join(OUTPUT_DIR, 'role_classifier.pkl'))
|
|
@@ -762,7 +822,7 @@ def process_interview(audio_url: str) -> Dict:
|
|
| 762 |
voice_analysis = analyze_interviewee_voice(wav_file, classified_utterances)
|
| 763 |
analysis_data = {
|
| 764 |
'transcript': classified_utterances,
|
| 765 |
-
'speakers': list(set(u['speaker'] for u in classified_utterances)),
|
| 766 |
'voice_analysis': voice_analysis,
|
| 767 |
'text_analysis': {
|
| 768 |
'total_duration': sum(u['prosodic_features']['duration'] for u in classified_utterances),
|
|
|
|
| 368 |
intensity_std = np.std(intensities) if intensities else 0
|
| 369 |
shimmer = np.mean(np.abs(np.diff(intensities))) / intensity_mean if len(intensities) > 1 and intensity_mean > 0 else 0
|
| 370 |
anxiety_score = 0.6 * (pitch_std / pitch_mean) + 0.4 * (jitter + shimmer) if pitch_mean > 0 else 0
|
| 371 |
+
confidence_score = 0.7 * (1 / (1 + intensity_std)) + 0.3 * (1 - filler_ratio)
|
| 372 |
hesitation_score = filler_ratio + repetition_score
|
| 373 |
anxiety_level = 'High' if anxiety_score > 0.15 else 'Moderate' if anxiety_score > 0.07 else 'Low'
|
| 374 |
+
confidence_level = 'High' if confidence_score > 0.75 else 'Moderate' if confidence_score > 0.5 else 'Low'
|
| 375 |
fluency_level = 'Fluent' if (filler_ratio < 0.05 and repetition_score < 0.1) else 'Moderate' if (filler_ratio < 0.1 and repetition_score < 0.2) else 'Disfluent'
|
| 376 |
return {
|
| 377 |
'speaking_rate': float(round(speaking_rate, 2)),
|
| 378 |
+
'filler_ratio': float(round(filler_ratio, 3)),
|
| 379 |
+
'repetition_score': float(round(repetition_score, 3)),
|
| 380 |
'pitch_analysis': {'mean': float(round(pitch_mean, 2)), 'std_dev': float(round(pitch_std, 2)), 'jitter': float(round(jitter, 4))},
|
| 381 |
+
'intensity_analysis': {'mean': float(round(intensity_mean, 2)), 'std_dev': float(round(float(intensity_std), 2)), 'shimmer': float(round(shimmer, 4))},
|
| 382 |
+
'composite_scores': {'anxiety': float(round(anxiety_score, 3)), 'confidence': float(round(confidence_score, 3)), 'hesitation': float(round(hesitation_score, 3))},
|
| 383 |
'interpretation': {'anxiety_level': anxiety_level, 'confidence_level': confidence_level, 'fluency_level': fluency_level}
|
| 384 |
}
|
| 385 |
except Exception as e:
|
|
|
|
| 390 |
if 'error' in analysis:
|
| 391 |
return f"Voice analysis unavailable: {analysis['error']}"
|
| 392 |
interpretation_lines = [
|
| 393 |
+
f"- Speaking rate: {analysis['speaking_rate', 0):.2f} words/sec (Benchmark: 2.0}-{3.0}; affects clarity)",
|
| 394 |
+
f"- Filler words: {analysis['filler_ratio'] * 100:.1f}% (High usage reduces credibility)",
|
| 395 |
f"- Anxiety: {analysis['interpretation']['anxiety_level']} (Score: {analysis['composite_scores']['anxiety']:.3f}; stress response)",
|
| 396 |
f"- Confidence: {analysis['interpretation']['confidence_level']} (Score: {analysis['composite_scores']['confidence']:.3f}; vocal strength)",
|
| 397 |
f"- Fluency: {analysis['interpretation']['fluency_level']} (Drives engagement)",
|
| 398 |
"",
|
| 399 |
"HR Insights:",
|
| 400 |
"- Rapid speech (>3.0 wps) may reduce clarity; slower pacing enhances professionalism.",
|
| 401 |
+
"- High filler word usage undermines perceived credibility.",
|
| 402 |
"- Elevated anxiety suggests pressure; training can improve resilience.",
|
| 403 |
"- Strong confidence supports leadership presence.",
|
| 404 |
"- Fluent speech enhances engagement in team settings."
|
|
|
|
| 413 |
bars = ax.bar(labels, scores, color=['#FF5252', '#26A69A'], edgecolor='black', width=0.45)
|
| 414 |
ax.set_ylabel('Score', fontsize=12)
|
| 415 |
ax.set_title('Vocal Dynamics: Anxiety vs. Confidence', fontsize=14, pad=15)
|
| 416 |
+
ax.set_ylim(0, 1.2)
|
| 417 |
for bar in bars:
|
| 418 |
height = bar.get_height()
|
| 419 |
ax.text(bar.get_x() + bar.get_width()/2, height + 0.05, f"{height:.2f}",
|
| 420 |
+
ha='center', va='bottom', color='black', fontweight='bold', fontsize=10)
|
| 421 |
ax.grid(True, axis='y', linestyle='--', alpha=0.7)
|
| 422 |
plt.tight_layout()
|
| 423 |
plt.savefig(chart_buffer, format='png', bbox_inches='tight', dpi=300)
|
|
|
|
| 454 |
try:
|
| 455 |
voice = analysis_data.get('voice_analysis', {})
|
| 456 |
voice_interpretation = generate_voice_interpretation(voice)
|
| 457 |
+
interviewee_responses = [u['text'] for u in analysis_data['transcript'] if u['role'] == 'Interviewee']
|
| 458 |
+
if not interviewee_responses:
|
| 459 |
+
logger.warning("No interviewee responses found for report generation")
|
| 460 |
+
return """**1. Executive Summary**
|
| 461 |
+
- Insufficient interviewee content to generate a summary.
|
| 462 |
+
|
| 463 |
+
**2. Communication and Vocal Dynamics**
|
| 464 |
+
{voice_interpretation}
|
| 465 |
+
|
| 466 |
+
**3. Competency and Content**
|
| 467 |
+
- Strengths: Unable to identify strengths due to limited content.
|
| 468 |
+
- Growth Areas: Recommend further interview to assess competencies.
|
| 469 |
+
|
| 470 |
+
**4. Role Fit and Potential**
|
| 471 |
+
- Unable to assess role fit due to insufficient content.
|
| 472 |
+
|
| 473 |
+
**5. Recommendations**
|
| 474 |
+
- Development: Schedule additional interview to gather more data.
|
| 475 |
+
- Next Steps: Conduct a follow-up interview with targeted questions."""
|
| 476 |
acceptance_prob = analysis_data.get('acceptance_probability', 50.0)
|
| 477 |
acceptance_line = f"\n**Suitability Score: {acceptance_prob:.2f}%**\n"
|
| 478 |
if acceptance_prob >= 80:
|
|
|
|
| 483 |
acceptance_line += "HR Verdict: Moderate potential, needs additional assessment."
|
| 484 |
else:
|
| 485 |
acceptance_line += "HR Verdict: Limited fit, significant improvement required."
|
| 486 |
+
transcript_text = "\n".join([f"- {u['speaker']}: {u['text']}" for u in analysis_data['transcript']])
|
| 487 |
prompt = f"""
|
| 488 |
+
You are EvalBot, a senior HR consultant delivering a professional interview analysis report. Use clear headings with '**', bullet points ('-'), complete sentences, and formal language. Avoid redundancy, vague terms, and special characters that could break formatting (e.g., parentheses). Ensure each section is unique, actionable, and at least 2-3 bullet points long. If content is limited, provide reasonable inferences based on available data.
|
| 489 |
+
|
| 490 |
+
**Input Data**
|
| 491 |
+
- Suitability Score: {acceptance_prob:.2f}%
|
| 492 |
+
- Interview Duration: {analysis_data['text_analysis']['total_duration']:.2f} seconds
|
| 493 |
+
- Speaker Turns: {analysis_data['text_analysis']['speaker_turns']}
|
| 494 |
+
- Participants: {', '.join(sorted(set(u['speaker'] for u in analysis_data['transcript'])))}
|
| 495 |
+
- Voice Analysis: {voice_interpretation}
|
| 496 |
+
- Transcript Sample:
|
| 497 |
+
{transcript_text[:1000]}...
|
| 498 |
+
|
| 499 |
+
**Report Structure**
|
| 500 |
+
{acceptance_line}
|
| 501 |
+
|
| 502 |
+
**1. Executive Summary**
|
| 503 |
+
- Provide a narrative overview of the candidate’s performance, focusing on key strengths and role fit.
|
| 504 |
+
- Highlight communication style and engagement based on voice analysis and transcript.
|
| 505 |
+
- Note interview duration and participant dynamics.
|
| 506 |
+
|
| 507 |
+
**2. Communication and Vocal Dynamics**
|
| 508 |
+
- Evaluate vocal delivery (rate, fluency, confidence) with specific insights.
|
| 509 |
+
{voice_interpretation}
|
| 510 |
+
|
| 511 |
+
**3. Competency and Content**
|
| 512 |
+
- Assess leadership, problem-solving, communication, and adaptability with examples from the transcript.
|
| 513 |
+
- List strengths with quantifiable achievements where possible.
|
| 514 |
+
- Identify growth areas with constructive feedback.
|
| 515 |
+
|
| 516 |
+
**4. Role Fit and Potential**
|
| 517 |
+
- Analyze cultural fit, role readiness, and long-term growth potential.
|
| 518 |
+
- Align findings with typical role requirements (e.g., teamwork, technical skills).
|
| 519 |
+
|
| 520 |
+
**5. Recommendations**
|
| 521 |
+
- Provide prioritized development strategies (e.g., communication training, technical assessments).
|
| 522 |
+
- Suggest specific next steps for hiring managers (e.g., advance to next round, schedule tests).
|
| 523 |
+
"""
|
| 524 |
response = gemini_model.generate_content(prompt)
|
| 525 |
+
report_text = re.sub(r'[^\x00-\x7F]+|[()]+', '', response.text)
|
| 526 |
+
logger.info(f"Generated Gemini report: {report_text[:500]}...") # Log first 500 chars for debugging
|
| 527 |
+
return report_text
|
| 528 |
except Exception as e:
|
| 529 |
+
logger.error(f"Report generation failed: {str(e)}", exc_info=True)
|
| 530 |
+
return f"""**1. Executive Summary**
|
| 531 |
+
- Report generation failed due to processing error.
|
| 532 |
+
|
| 533 |
+
**2. Communication and Vocal Dynamics**
|
| 534 |
+
{generate_voice_interpretation(analysis_data.get('voice_analysis', {}))}
|
| 535 |
+
|
| 536 |
+
**3. Competency and Content**
|
| 537 |
+
- Strengths: Unable to assess due to error.
|
| 538 |
+
- Growth Areas: Recommend reprocessing the audio.
|
| 539 |
+
|
| 540 |
+
**4. Role Fit and Potential**
|
| 541 |
+
- Unable to assess due to error.
|
| 542 |
+
|
| 543 |
+
**5. Recommendations**
|
| 544 |
+
- Development: Investigate processing error.
|
| 545 |
+
- Next Steps: Retry analysis with corrected audio."""
|
| 546 |
|
| 547 |
def create_pdf_report(analysis_data: Dict, output_path: str, gemini_report_text: str) -> bool:
|
| 548 |
try:
|
|
|
|
| 590 |
story.append(Paragraph("<b>HR Verdict:</b> Limited fit, significant improvement required.", body_text))
|
| 591 |
story.append(Spacer(1, 0.2*inch))
|
| 592 |
participants = sorted([p for p in set(u['speaker'] for u in analysis_data['transcript']) if p != 'Unknown'])
|
| 593 |
+
participants_str = ', '.join(participants)
|
| 594 |
table_data = [
|
| 595 |
['Metric', 'Value'],
|
| 596 |
['Interview Duration', f"{analysis_data['text_analysis']['total_duration']:.2f} seconds"],
|
| 597 |
['Speaker Turns', f"{analysis_data['text_analysis']['speaker_turns']}"],
|
| 598 |
+
['Participants', participants_str],
|
| 599 |
]
|
| 600 |
+
table = Table(table_data, colWidths=[2.0*inch, 4.0*inch])
|
| 601 |
table.setStyle(TableStyle([
|
| 602 |
('BACKGROUND', (0,0), (-1,0), colors.HexColor('#0050BC')),
|
| 603 |
('TEXTCOLOR', (0,0), (-1,0), colors.white),
|
|
|
|
| 609 |
('TOPPADDING', (0,0), (-1,0), 6),
|
| 610 |
('BACKGROUND', (0,1), (-1,-1), colors.HexColor('#F5F6FA')),
|
| 611 |
('GRID', (0,0), (-1,-1), 0.4, colors.HexColor('#DDE4EB')),
|
| 612 |
+
('LEFTPADDING', (1,3), (1,3), 10), # Add padding for Participants
|
| 613 |
+
('WORDWRAP', (1,3), (1,3), 'CJK'), # Enable word wrapping
|
| 614 |
]))
|
| 615 |
story.append(table)
|
| 616 |
story.append(Spacer(1, 0.3*inch))
|
|
|
|
| 695 |
clean_line = line.lstrip('-').strip()
|
| 696 |
if not clean_line:
|
| 697 |
continue
|
| 698 |
+
clean_line = re.sub(r'[^\w\s.,;:-]', '', clean_line) # Enhanced sanitization
|
| 699 |
if current_section == 'Competency':
|
| 700 |
+
if any(k in clean_line.lower() for k in ['leader', 'problem', 'commun', 'adapt', 'achieve', 'skill', 'success']):
|
| 701 |
current_subsection = 'Strengths'
|
| 702 |
+
elif any(k in clean_line.lower() for k in ['improv', 'grow', 'develop', 'weak', 'area']):
|
| 703 |
current_subsection = 'Growth Areas'
|
| 704 |
if current_subsection:
|
| 705 |
sections[current_section][current_subsection].append(clean_line)
|
| 706 |
elif current_section == 'Recommendations':
|
| 707 |
+
if any(k in clean_line.lower() for k in ['commun', 'tech', 'train', 'skill', 'pres']):
|
| 708 |
current_subsection = 'Development'
|
| 709 |
+
elif any(k in clean_line.lower() for k in ['adv', 'assess', 'next', 'schedule', 'mentor']):
|
| 710 |
current_subsection = 'Next Steps'
|
| 711 |
if current_subsection:
|
| 712 |
sections[current_section][current_subsection].append(clean_line)
|
| 713 |
else:
|
| 714 |
sections[current_section].append(clean_line)
|
| 715 |
+
elif current_section and line: # Handle non-bulleted lines
|
| 716 |
+
clean_line = re.sub(r'[^\w\s.,;:-]', '', line)
|
| 717 |
+
sections[current_section].append(clean_line)
|
| 718 |
|
| 719 |
# Executive Summary
|
| 720 |
story.append(Paragraph("2. Executive Summary", h2))
|
|
|
|
| 722 |
for line in sections['Executive Summary']:
|
| 723 |
story.append(Paragraph(line, bullet_style))
|
| 724 |
else:
|
| 725 |
+
story.append(Paragraph("Candidate showed moderate engagement; further data needed for full assessment.", bullet_style))
|
| 726 |
+
story.append(Paragraph(f"Interview lasted {analysis_data['text_analysis']['total_duration']:.2f} seconds with {analysis_data['text_analysis']['speaker_turns']} turns.", bullet_style))
|
| 727 |
story.append(Spacer(1, 0.15*inch))
|
| 728 |
|
| 729 |
# Competency and Content
|
|
|
|
| 733 |
for line in sections['Competency']['Strengths']:
|
| 734 |
story.append(Paragraph(line, bullet_style))
|
| 735 |
else:
|
| 736 |
+
story.append(Paragraph("Strengths not fully assessed; candidate demonstrated consistent communication.", bullet_style))
|
| 737 |
story.append(Spacer(1, 0.1*inch))
|
| 738 |
story.append(Paragraph("Growth Areas", h3))
|
| 739 |
if sections['Competency']['Growth Areas']:
|
| 740 |
for line in sections['Competency']['Growth Areas']:
|
| 741 |
story.append(Paragraph(line, bullet_style))
|
| 742 |
else:
|
| 743 |
+
story.append(Paragraph("Consider enhancing specificity in responses to highlight expertise.", bullet_style))
|
| 744 |
story.append(Spacer(1, 0.15*inch))
|
| 745 |
|
| 746 |
# Role Fit
|
|
|
|
| 749 |
for line in sections['Role Fit']:
|
| 750 |
story.append(Paragraph(line, bullet_style))
|
| 751 |
else:
|
| 752 |
+
story.append(Paragraph("Potential for role fit exists; further evaluation needed to confirm alignment.", bullet_style))
|
| 753 |
story.append(Spacer(1, 0.15*inch))
|
| 754 |
|
| 755 |
# Recommendations
|
|
|
|
| 759 |
for line in sections['Recommendations']['Development']:
|
| 760 |
story.append(Paragraph(line, bullet_style))
|
| 761 |
else:
|
| 762 |
+
story.append(Paragraph("Enroll in communication training to reduce filler words.", bullet_style))
|
| 763 |
story.append(Spacer(1, 0.1*inch))
|
| 764 |
story.append(Paragraph("Next Steps for Hiring Managers", h3))
|
| 765 |
if sections['Recommendations']['Next Steps']:
|
| 766 |
for line in sections['Recommendations']['Next Steps']:
|
| 767 |
story.append(Paragraph(line, bullet_style))
|
| 768 |
else:
|
| 769 |
+
story.append(Paragraph("Schedule a technical assessment to evaluate role-specific skills.", bullet_style))
|
| 770 |
story.append(Spacer(1, 0.15*inch))
|
| 771 |
story.append(Paragraph("This report provides actionable insights to support hiring and candidate development.", body_text))
|
| 772 |
|
|
|
|
| 809 |
for utterance in transcript['utterances']:
|
| 810 |
utterance['prosodic_features'] = extract_prosodic_features(wav_file, utterance['start'], utterance['end'])
|
| 811 |
utterances_with_speakers = identify_speakers(transcript, wav_file)
|
| 812 |
+
if not utterances_with_speakers:
|
| 813 |
+
raise ValueError("No utterances identified in the audio")
|
| 814 |
clf, vectorizer, scaler = None, None, None
|
| 815 |
if os.path.exists(os.path.join(OUTPUT_DIR, 'role_classifier.pkl')):
|
| 816 |
clf = joblib.load(os.path.join(OUTPUT_DIR, 'role_classifier.pkl'))
|
|
|
|
| 822 |
voice_analysis = analyze_interviewee_voice(wav_file, classified_utterances)
|
| 823 |
analysis_data = {
|
| 824 |
'transcript': classified_utterances,
|
| 825 |
+
'speakers': list(set(u['speaker'] for u in classified_utterances if u['speaker'] != 'Unknown')),
|
| 826 |
'voice_analysis': voice_analysis,
|
| 827 |
'text_analysis': {
|
| 828 |
'total_duration': sum(u['prosodic_features']['duration'] for u in classified_utterances),
|