Spaces:

EvalBot
/

Audio

Sleeping

App Files Files Community

norhan12 commited on Jun 11, 2025

Commit

87066d1

verified ·

1 Parent(s): dda086c

Update process_interview.py

Browse files

Files changed (1) hide show

process_interview.py +541 -535

process_interview.py CHANGED Viewed

@@ -10,34 +10,35 @@ import wave
 from nemo.collections.asr.models import EncDecSpeakerLabelModel
 from pinecone import Pinecone, ServerlessSpec
 import librosa
 import re
-from typing import Dict, List
 import logging
-import tempfile
 from reportlab.lib.pagesizes import letter
 from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak
 from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
 from reportlab.lib.units import inch
 from reportlab.lib import colors
-import matplotlib.pyplot as plt
-import matplotlib
-matplotlib.use('Agg')
-from reportlab.platypus import Image
-import io
 import spacy
 import google.generativeai as genai
 from concurrent.futures import ThreadPoolExecutor
-import urllib3 # <-- تم الإصلاح: إضافة استيراد urllib3
-# إعدادات التسجيل (Logging)
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
-# تقليل verbosity من مكتبة NeMo
-logging.getLogger("nemo_logging").setLevel(logging.WARNING)
-logging.getLogger("nemo").setLevel(logging.WARNING)
 # Configuration
 OUTPUT_DIR = "./processed_audio"
 os.makedirs(OUTPUT_DIR, exist_ok=True)
@@ -46,34 +47,9 @@ PINECONE_KEY = os.getenv("PINECONE_KEY")
 ASSEMBLYAI_KEY = os.getenv("ASSEMBLYAI_KEY")
 GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
-# --- تم الإصلاح: دالة تحميل محسّنة مع إعادة المحاولة ---
-def download_audio_from_url(url: str, retries=3) -> str:
-    """Downloads an audio file from a URL to a temporary local path with retries."""
-    temp_dir = tempfile.gettempdir()
-    temp_path = os.path.join(temp_dir, f"{uuid.uuid4()}.tmp_audio")
-    logger.info(f"Downloading audio from {url} to {temp_path}")
-    for attempt in range(retries):
-        try:
-            with requests.get(url, stream=True, timeout=60) as r: # زيادة timeout
-                r.raise_for_status()
-                with open(temp_path, 'wb') as f:
-                    for chunk in r.iter_content(chunk_size=8192):
-                        f.write(chunk)
-                logger.info("Download completed successfully.")
-                return temp_path
-        except (requests.exceptions.RequestException, urllib3.exceptions.ProtocolError) as e:
-            logger.warning(f"Attempt {attempt + 1}/{retries} failed: {e}. Retrying...")
-            if attempt < retries - 1:
-                time.sleep(2 ** attempt)  # Exponential backoff
-            else:
-                logger.error(f"Failed to download audio after {retries} attempts.")
-                raise
-    raise Exception(f"Failed to download audio from URL {url}")
 def initialize_services():
-    """Initializes Pinecone and Gemini services."""
     try:
         pc = Pinecone(api_key=PINECONE_KEY)
         index_name = "interview-speaker-embeddings"
@@ -85,23 +61,30 @@ def initialize_services():
                 spec=ServerlessSpec(cloud="aws", region="us-east-1")
             )
         index = pc.Index(index_name)
         genai.configure(api_key=GEMINI_API_KEY)
         gemini_model = genai.GenerativeModel('gemini-1.5-flash')
         return index, gemini_model
     except Exception as e:
         logger.error(f"Error initializing services: {str(e)}")
         raise
 index, gemini_model = initialize_services()
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 logger.info(f"Using device: {device}")
 def load_speaker_model():
-    """Loads the speaker verification model."""
     try:
-        # يضمن عدم استخدام عدد كبير جدًا من الخيوط
-        torch.set_num_threads(1)
         model = EncDecSpeakerLabelModel.from_pretrained(
             "nvidia/speakerverification_en_titanet_large",
             map_location=torch.device('cpu')
@@ -112,19 +95,30 @@ def load_speaker_model():
         logger.error(f"Model loading failed: {str(e)}")
         raise RuntimeError("Could not load speaker verification model")
 def load_models():
-    """Loads all necessary models."""
     speaker_model = load_speaker_model()
     nlp = spacy.load("en_core_web_sm")
-    return speaker_model, nlp
-speaker_model, nlp = load_models()
 def convert_to_wav(audio_path: str, output_dir: str = OUTPUT_DIR) -> str:
-    """Converts any audio file to a 16kHz mono WAV file."""
     try:
         audio = AudioSegment.from_file(audio_path)
-        audio = audio.set_frame_rate(16000).set_channels(1)
         wav_file = os.path.join(output_dir, f"{uuid.uuid4()}.wav")
         audio.export(wav_file, format="wav")
         return wav_file
@@ -132,628 +126,640 @@ def convert_to_wav(audio_path: str, output_dir: str = OUTPUT_DIR) -> str:
         logger.error(f"Audio conversion failed: {str(e)}")
         raise
 def extract_prosodic_features(audio_path: str, start_ms: int, end_ms: int) -> Dict:
-    """Extracts prosodic features from an audio segment."""
     try:
-        y, sr = librosa.load(audio_path, sr=16000, offset=start_ms/1000.0, duration=(end_ms-start_ms)/1000.0)
-        pitches, _ = librosa.piptrack(y=y, sr=sr)
         pitches = pitches[pitches > 0]
-        rms = librosa.feature.rms(y=y)[0]
-        return {
             'duration': (end_ms - start_ms) / 1000,
             'mean_pitch': float(np.mean(pitches)) if len(pitches) > 0 else 0.0,
             'pitch_sd': float(np.std(pitches)) if len(pitches) > 0 else 0.0,
-            'intensityMean': float(np.mean(rms)),
-            'intensitySD': float(np.std(rms)),
         }
     except Exception as e:
         logger.error(f"Feature extraction failed: {str(e)}")
-        return {'duration': 0, 'mean_pitch': 0, 'pitch_sd': 0, 'intensityMean': 0, 'intensitySD': 0}
 def transcribe(audio_path: str) -> Dict:
-    """Transcribes audio using AssemblyAI and enables speaker labels."""
     try:
-        headers = {"authorization": ASSEMBLYAI_KEY}
         with open(audio_path, 'rb') as f:
-            upload_response = requests.post("https://api.assemblyai.com/v2/upload", headers=headers, data=f)
         audio_url = upload_response.json()['upload_url']
-        transcript_request = {
-            "audio_url": audio_url,
-            "speaker_labels": True,
-        }
-        transcript_response = requests.post("https://api.assemblyai.com/v2/transcript", json=transcript_request, headers=headers)
         transcript_id = transcript_response.json()['id']
         while True:
-            result = requests.get(f"https://api.assemblyai.com/v2/transcript/{transcript_id}", headers=headers).json()
             if result['status'] == 'completed':
-                if not result.get('utterances'):
-                     raise ValueError("Transcription completed but no utterances were returned. The audio may be too short or silent.")
                 return result
             elif result['status'] == 'error':
-                raise Exception(f"Transcription failed: {result['error']}")
             time.sleep(5)
     except Exception as e:
         logger.error(f"Transcription failed: {str(e)}")
         raise
-def process_utterance(utterance, full_audio):
-    """Processes a single utterance to get a speaker embedding."""
     try:
-        start, end = utterance['start'], utterance['end']
         segment = full_audio[start:end]
-        with tempfile.NamedTemporaryFile(suffix=".wav", delete=True) as temp_f:
-            segment.export(temp_f.name, format="wav")
-            with torch.no_grad():
-                embedding = speaker_model.get_embedding(temp_f.name).cpu().numpy().flatten()
-        return {**utterance, 'embedding': embedding}
     except Exception as e:
         logger.error(f"Utterance processing failed: {str(e)}")
-        return {**utterance, 'embedding': np.zeros(192)} # Return zero vector on failure
 def identify_speakers(transcript: Dict, wav_file: str) -> List[Dict]:
-    """Identifies unique speakers from utterances."""
     try:
         full_audio = AudioSegment.from_wav(wav_file)
         utterances = transcript['utterances']
-        with ThreadPoolExecutor(max_workers=4) as executor:
-            futures = [executor.submit(process_utterance, u, full_audio) for u in utterances]
-            processed_utterances = [f.result() for f in futures]
-        # Map AssemblyAI speaker labels (A, B, C...) to unique speaker names
-        speaker_map = {}
-        unique_speaker_count = 0
-        for u in processed_utterances:
-            assembly_speaker = u['speaker']
-            if assembly_speaker not in speaker_map:
-                unique_speaker_count += 1
-                speaker_map[assembly_speaker] = f"Speaker_{unique_speaker_count}"
-            u['speaker_name'] = speaker_map[assembly_speaker]
-        return processed_utterances
     except Exception as e:
         logger.error(f"Speaker identification failed: {str(e)}")
         raise
-# --- تم الإصلاح: استبدال نموذج التصنيف بمنهجية إرشادية (Heuristic) لتصنيف الأدوار ---
-def classify_roles(utterances: List[Dict]) -> List[Dict]:
-    """
-    Classifies roles as 'Interviewer' or 'Interviewee' based on heuristics.
-    The 'Interviewer' is assumed to be the one who asks more questions.
-    """
     try:
-        speaker_stats = {}
-        question_words = {'what', 'why', 'how', 'when', 'where', 'who', 'which', 'tell', 'describe', 'explain'}
-        for u in utterances:
-            speaker = u['speaker_name']
-            if speaker not in speaker_stats:
-                speaker_stats[speaker] = {'question_score': 0, 'utterance_count': 0}
-            speaker_stats[speaker]['utterance_count'] += 1
-            text_lower = u['text'].lower()
-            # زيادة النتيجة إذا انتهى النص بعلامة استفهام
-            if text_lower.endswith('?'):
-                speaker_stats[speaker]['question_score'] += 1
-            # زيادة النتيجة لكل كلمة استفهامية
-            for word in question_words:
-                if word in text_lower.split():
-                    speaker_stats[speaker]['question_score'] += 1
-        if not speaker_stats:
-            # إذا لم يتم العثور على متحدثين، لا يمكن التصنيف
-            return utterances
-        # تحديد المحاور بناءً على أعلى "question_score"
-        interviewer_speaker = max(speaker_stats, key=lambda s: speaker_stats[s]['question_score'])
-        logger.info(f"Speaker stats for role classification: {speaker_stats}")
-        logger.info(f"Identified Interviewer: {interviewer_speaker}")
-        for u in utterances:
-            if u['speaker_name'] == interviewer_speaker:
-                u['role'] = 'Interviewer'
-            else:
-                u['role'] = 'Interviewee'
-        return utterances
     except Exception as e:
         logger.error(f"Role classification failed: {str(e)}")
-        # تعيين دور افتراضي في حالة الفشل
-        for u in utterances:
-             u['role'] = 'Unknown'
-        return utterances
 def analyze_interviewee_voice(audio_path: str, utterances: List[Dict]) -> Dict:
-    """Analyzes the voice characteristics of the interviewee."""
     try:
-        interviewee_utterances = [u for u in utterances if u.get('role') == 'Interviewee']
         if not interviewee_utterances:
             return {'error': 'No interviewee utterances found'}
-        y, sr = librosa.load(audio_path, sr=16000)
-        # استخراج مقاطع صوتية للمرشح
-        segments = [y[int(u['start']*sr/1000):int(u['end']*sr/1000)] for u in interviewee_utterances]
         total_duration = sum(u['prosodic_features']['duration'] for u in interviewee_utterances)
         total_words = sum(len(u['text'].split()) for u in interviewee_utterances)
-        speaking_rate = total_words / (total_duration / 60) if total_duration > 0 else 0 # Words per minute
-        # تحليل الكلمات الحشوية (Filler words)
-        filler_words = {'um', 'uh', 'like', 'you know', 'so', 'i mean', 'actually'}
-        filler_count = sum(1 for u in interviewee_utterances for word in u['text'].lower().split() if word in filler_words)
         filler_ratio = filler_count / total_words if total_words > 0 else 0
-        # تحليل تكرار الكلمات
         all_words = ' '.join(u['text'].lower() for u in interviewee_utterances).split()
-        repetition_score = (len(all_words) - len(set(all_words))) / len(all_words) if all_words else 0
-        # تحليل طبقة الصوت (Pitch) والكثافة (Intensity)
-        pitches = np.concatenate([librosa.pyin(s, fmin=librosa.note_to_hz('C2'), fmax=librosa.note_to_hz('C7'))[0] for s in segments if len(s)>0])
-        pitches = pitches[~np.isnan(pitches)]
-        intensities = np.concatenate([librosa.feature.rms(y=s)[0] for s in segments if len(s)>0])
         pitch_mean = np.mean(pitches) if len(pitches) > 0 else 0
         pitch_std = np.std(pitches) if len(pitches) > 0 else 0
-        intensity_mean = np.mean(intensities) if len(intensities) > 0 else 0
-        intensity_std = np.std(intensities) if len(intensities) > 0 else 0
-        # حساب الدرجات المركبة
-        anxiety_score = (pitch_std / 150) if pitch_std > 0 else 0 # تطبيع بسيط
-        confidence_score = 1 - (intensity_std * 5) if intensity_std > 0 else 1 # تطبيع بسيط
-        hesitation_score = (filler_ratio + repetition_score) / 2
-        # تقييد الدرجات بين 0 و 1
-        anxiety_score = max(0, min(1, anxiety_score))
-        confidence_score = max(0, min(1, confidence_score))
         return {
             'speaking_rate': float(round(speaking_rate, 2)),
             'filler_ratio': float(round(filler_ratio, 4)),
             'repetition_score': float(round(repetition_score, 4)),
-            'pitch_analysis': {'mean': float(round(pitch_mean, 2)), 'std_dev': float(round(pitch_std, 2))},
-            'intensity_analysis': {'mean': float(round(intensity_mean, 4)), 'std_dev': float(round(intensity_std, 4))},
             'composite_scores': {
                 'anxiety': float(round(anxiety_score, 4)),
                 'confidence': float(round(confidence_score, 4)),
                 'hesitation': float(round(hesitation_score, 4))
             },
             'interpretation': {
-                'anxiety_level': 'high' if anxiety_score > 0.6 else 'moderate' if anxiety_score > 0.3 else 'low',
-                'confidence_level': 'high' if confidence_score > 0.7 else 'moderate' if confidence_score > 0.4 else 'low',
-                'fluency_level': 'disfluent' if hesitation_score > 0.1 else 'moderate' if hesitation_score > 0.05 else 'fluent'
             }
         }
     except Exception as e:
-        logger.error(f"Voice analysis failed: {str(e)}", exc_info=True)
         return {'error': str(e)}
-def generate_anxiety_confidence_chart(composite_scores: Dict, chart_path_or_buffer):
-    """Generates a bar chart for anxiety and confidence scores."""
-    try:
-        labels = ['Anxiety', 'Confidence']
-        scores = [composite_scores.get('anxiety', 0), composite_scores.get('confidence', 0)]
-        fig, ax = plt.subplots(figsize=(5, 3.5))
-        bars = ax.bar(labels, scores, color=['#FF5252', '#26A69A'], edgecolor='black', width=0.45)
-        ax.set_ylabel('Score (0 to 1)', fontsize=12)
-        ax.set_title('Vocal Dynamics: Anxiety vs. Confidence', fontsize=14, pad=15)
-        ax.set_ylim(0, 1.1)
-        for bar in bars:
-            height = bar.get_height()
-            ax.text(bar.get_x() + bar.get_width()/2, height + 0.02, f"{height:.2f}",
-                    ha='center', va='bottom', color='black', fontweight='bold', fontsize=11)
-        ax.grid(True, axis='y', linestyle='--', alpha=0.7)
-        plt.tight_layout()
-        plt.savefig(chart_path_or_buffer, format='png', bbox_inches='tight', dpi=300)
-        plt.close(fig)
-    except Exception as e:
-        logger.error(f"Error generating chart: {str(e)}")
-def calculate_acceptance_probability(analysis_data: Dict) -> float:
-    """Calculates a suitability score based on analysis data."""
-    voice = analysis_data.get('voice_analysis', {})
-    if 'error' in voice: return 0.0
-    # تعريف الأوزان
-    w_confidence, w_anxiety, w_fluency, w_speaking_rate = 0.4, -0.2, 0.2, 0.2
-    confidence_score = voice.get('composite_scores', {}).get('confidence', 0.5)
-    anxiety_score = voice.get('composite_scores', {}).get('anxiety', 0.5)
-    hesitation_score = voice.get('composite_scores', {}).get('hesitation', 0.5)
-    fluency_score = 1 - hesitation_score
-    # تقييم سرعة الكلام
-    rate = voice.get('speaking_rate', 150)
-    if 120 <= rate <= 180:
-        speaking_rate_score = 1.0
-    elif 100 <= rate < 120 or 180 < rate <= 200:
-        speaking_rate_score = 0.7
-    else:
-        speaking_rate_score = 0.4
-    raw_score = (confidence_score * w_confidence +
-                 (1 - anxiety_score) * abs(w_anxiety) +
-                 fluency_score * w_fluency +
-                 speaking_rate_score * w_speaking_rate)
-    max_possible_score = w_confidence + abs(w_anxiety) + w_fluency + w_speaking_rate
-    normalized_score = raw_score / max_possible_score if max_possible_score != 0 else 0
-    acceptance_probability = max(0.0, min(1.0, normalized_score))
-    return float(f"{acceptance_probability * 100:.2f}")
-# --- تم الإصلاح: إضافة الدالة المفقودة ---
-def generate_voice_interpretation(voice: Dict) -> str:
-    """Generates a human-readable interpretation of voice analysis."""
-    if not voice or 'error' in voice:
-        return "- Vocal analysis could not be performed as no interviewee was identified."
-    interp = voice.get('interpretation', {})
-    scores = voice.get('composite_scores', {})
-    confidence = interp.get('confidence_level', 'N/A').capitalize()
-    anxiety = interp.get('anxiety_level', 'N/A').capitalize()
-    fluency = interp.get('fluency_level', 'N/A').capitalize()
-    rate = voice.get('speaking_rate', 0)
-    lines = [
-        f"- **Confidence:** {confidence} (Score: {scores.get('confidence', 0):.2f}). The candidate's vocal tone suggests their level of assurance.",
-        f"- **Anxiety:** {anxiety} (Score: {scores.get('anxiety', 0):.2f}). Vocal stress indicators point to their comfort level during the interview.",
-        f"- **Fluency & Hesitation:** {fluency} (Hesitation Score: {scores.get('hesitation', 0):.2f}). Reflects the smoothness of speech and use of filler words.",
-        f"- **Speaking Rate:** {rate:.0f} words per minute. A normal conversational pace is typically between 120-180 WPM."
-    ]
-    return "\n".join(lines)
 def generate_report(analysis_data: Dict) -> str:
-    """Generates a comprehensive report using Gemini AI."""
     try:
-        voice_interpretation = generate_voice_interpretation(analysis_data.get('voice_analysis', {}))
-        interviewee_responses = [f"- {u['text']}" for u in analysis_data['transcript'] if u.get('role') == 'Interviewee'][:4]
-        acceptance_prob = analysis_data.get('acceptance_probability')
-        acceptance_line = ""
-        if acceptance_prob is not None:
-            acceptance_line = f"\n**Hiring Suitability Score: {acceptance_prob:.2f}%**\n"
-            if acceptance_prob >= 80: acceptance_line += "HR Verdict: Outstanding candidate. Highly recommended for advancement."
-            elif acceptance_prob >= 60: acceptance_line += "HR Verdict: Strong candidate. Suitable for further evaluation."
-            elif acceptance_prob >= 40: acceptance_line += "HR Verdict: Moderate potential. Requires additional assessment."
-            else: acceptance_line += "HR Verdict: Limited fit for the role at this time."
-        prompt = f"""
-        You are EvalBot, a senior HR consultant. Generate a polished, concise, and engaging interview analysis report. Use a professional tone, clear headings, and bullet points.
-        {acceptance_line}
         **1. Executive Summary**
-        - Provide a concise overview of the candidate's performance, key metrics, and hiring potential.
-        - Interview length: {analysis_data['text_analysis']['total_duration']:.2f} seconds
-        - Participants: {', '.join(analysis_data['speakers'])}
-        **2. Communication and Vocal Dynamics**
-        - Evaluate vocal delivery based on the following analysis. Offer HR insights on its impact.
         {voice_interpretation}
-        **3. Competency and Content Evaluation**
-        - Based on the sample responses below, assess competencies like leadership, problem-solving, and self-awareness.
-        - List strengths and growth areas separately, with specific examples.
-        - Sample Responses from Candidate:
-        {' '.join(interviewee_responses) if interviewee_responses else "No responses from interviewee were identified."}
-        **4. Strategic HR Recommendations**
-        - Provide prioritized strategies for the candidate's growth.
-        - List clear next steps for hiring managers (e.g., advance, further technical assessment, reject).
         """
         response = gemini_model.generate_content(prompt)
         return response.text
     except Exception as e:
         logger.error(f"Report generation failed: {str(e)}")
         return f"Error generating report: {str(e)}"
 def create_pdf_report(analysis_data: Dict, output_path: str, gemini_report_text: str):
     try:
-        doc = SimpleDocTemplate(output_path, pagesize=letter,
-                                rightMargin=0.7*inch, leftMargin=0.7*inch,
-                                topMargin=0.9*inch, bottomMargin=0.9*inch)
         styles = getSampleStyleSheet()
-        h1 = ParagraphStyle(name='Heading1', fontSize=22, leading=26, spaceAfter=20, alignment=1, textColor=colors.HexColor('#003087'), fontName='Helvetica-Bold')
-        h2 = ParagraphStyle(name='Heading2', fontSize=15, leading=18, spaceBefore=14, spaceAfter=8, textColor=colors.HexColor('#0050BC'), fontName='Helvetica-Bold')
-        h3 = ParagraphStyle(name='Heading3', fontSize=11, leading=14, spaceBefore=10, spaceAfter=6, textColor=colors.HexColor('#3F7CFF'), fontName='Helvetica')
-        body_text = ParagraphStyle(name='BodyText', fontSize=10, leading=13, spaceAfter=8, fontName='Helvetica', textColor=colors.HexColor('#333333'))
-        bullet_style = ParagraphStyle(name='Bullet', parent=body_text, leftIndent=20, bulletIndent=10, fontName='Helvetica', bulletFontName='Helvetica', bulletFontSize=10)
         story = []
-        def header_footer(canvas, doc):
-            canvas.saveState()
-            canvas.setFont('Helvetica', 8)
-            canvas.setFillColor(colors.HexColor('#666666'))
-            canvas.drawString(doc.leftMargin, 0.4 * inch, f"Page {doc.page} | EvalBot HR Interview Report | Confidential")
-            canvas.setStrokeColor(colors.HexColor('#0050BC'))
-            canvas.setLineWidth(1)
-            canvas.line(doc.leftMargin, doc.height + 0.85*inch, doc.width + doc.leftMargin, doc.height + 0.85*inch)
-            canvas.setFont('Helvetica-Bold', 10)
-            canvas.drawString(doc.leftMargin, doc.height + 0.9*inch, "Candidate Interview Analysis")
-            canvas.drawRightString(doc.width + doc.leftMargin, doc.height + 0.9*inch, time.strftime('%B %d, %Y'))
-            canvas.restoreState()
-        # Title Page
-        story.append(Paragraph("Candidate Interview Analysis", h1))
-        story.append(Paragraph(f"Generated: {time.strftime('%B %d, %Y')}", ParagraphStyle(name='Date', alignment=1, fontSize=10, textColor=colors.HexColor('#666666'), fontName='Helvetica')))
-        story.append(Spacer(1, 0.5 * inch))
-        acceptance_prob = analysis_data.get('acceptance_probability')
-        if acceptance_prob is not None:
-            story.append(Paragraph("Hiring Suitability Snapshot", h2))
-            prob_color = colors.HexColor('#2E7D32') if acceptance_prob >= 80 else (colors.HexColor('#F57C00') if acceptance_prob >= 60 else colors.HexColor('#D32F2F'))
-            story.append(Paragraph(f"Suitability Score: <font size=16 color='{prob_color.hexval()}'><b>{acceptance_prob:.2f}%</b></font>",
-                                 ParagraphStyle(name='Prob', fontSize=12, spaceAfter=12, alignment=1, fontName='Helvetica-Bold')))
-            if acceptance_prob >= 80:
-                story.append(Paragraph("<b>HR Verdict:</b> Outstanding candidate, highly recommended for immediate advancement.", body_text))
-            elif acceptance_prob >= 60:
-                story.append(Paragraph("<b>HR Verdict:</b> Strong candidate, suitable for further evaluation with targeted development.", body_text))
-            elif acceptance_prob >= 40:
-                story.append(Paragraph("<b>HR Verdict:</b> Moderate potential, requires additional assessment and skill-building.", body_text))
-            else:
-                story.append(Paragraph("<b>HR Verdict:</b> Limited fit, significant improvement needed for role alignment.", body_text))
-            story.append(Spacer(1, 0.3 * inch))
-            table_data = [
-                ['Metric', 'Value'],
-                ['Interview Duration', f"{analysis_data['text_analysis']['total_duration']:.2f} seconds"],
-                ['Speaker Turns', f"{analysis_data['text_analysis']['speaker_turns']}"],
-                ['Participants', ', '.join(sorted(analysis_data['speakers']))]
-            ]
-            table = Table(table_data, colWidths=[2.2*inch, 3.8*inch])
-            table.setStyle(TableStyle([
-                ('BACKGROUND', (0,0), (-1,0), colors.HexColor('#0050BC')),
-                ('TEXTCOLOR', (0,0), (-1,0), colors.white),
-                ('ALIGN', (0,0), (-1,-1), 'LEFT'),
-                ('VALIGN', (0,0), (-1,-1), 'MIDDLE'),
-                ('FONTNAME', (0,0), (-1,0), 'Helvetica-Bold'),
-                ('FONTSIZE', (0,0), (-1,-1), 9),
-                ('BOTTOMPADDING', (0,0), (-1,0), 10),
-                ('TOPPADDING', (0,0), (-1,0), 10),
-                ('BACKGROUND', (0,1), (-1,-1), colors.HexColor('#F5F6FA')),
-                ('GRID', (0,0), (-1,-1), 0.5, colors.HexColor('#DDE4EB'))
-            ]))
-            story.append(table)
-        story.append(Spacer(1, 0.4 * inch))
-        story.append(Paragraph("Prepared by: EvalBot - AI-Powered HR Analysis", body_text))
-        story.append(PageBreak())
-        # Detailed Analysis
-        story.append(Paragraph("Detailed Candidate Evaluation", h1))
-        # Communication and Vocal Dynamics
-        story.append(Paragraph("1. Communication & Vocal Dynamics", h2))
         voice_analysis = analysis_data.get('voice_analysis', {})
         if voice_analysis and 'error' not in voice_analysis:
             table_data = [
-                ['Metric', 'Value', 'HR Insight'],
-                ['Speaking Rate', f"{voice_analysis.get('speaking_rate', 0):.2f} words/sec", 'Benchmark: 2.0-3.0 wps; impacts clarity'],
-                ['Filler Words', f"{voice_analysis.get('filler_ratio', 0) * 100:.1f}%", 'High usage reduces credibility'],
-                ['Anxiety', voice_analysis.get('interpretation', {}).get('anxiety_level', 'N/A'), f"Score: {voice_analysis.get('composite_scores', {}).get('anxiety', 0):.3f}; stress response"],
-                ['Confidence', voice_analysis.get('interpretation', {}).get('confidence_level', 'N/A'), f"Score: {voice_analysis.get('composite_scores', {}).get('confidence', 0):.3f}; vocal strength"],
-                ['Fluency', voice_analysis.get('interpretation', {}).get('fluency_level', 'N/A'), 'Drives engagement']
             ]
-            table = Table(table_data, colWidths=[1.7*inch, 1.2*inch, 3.1*inch])
-            table.setStyle(TableStyle([
-                ('BACKGROUND', (0,0), (-1,0), colors.HexColor('#0050BC')),
-                ('TEXTCOLOR', (0,0), (-1,0), colors.white),
-                ('ALIGN', (0,0), (-1,-1), 'LEFT'),
-                ('VALIGN', (0,0), (-1,-1), 'MIDDLE'),
-                ('FONTNAME', (0,0), (-1,0), 'Helvetica-Bold'),
-                ('FONTSIZE', (0,0), (-1,-1), 9),
-                ('BOTTOMPADDING', (0,0), (-1,0), 10),
-                ('TOPPADDING', (0,0), (-1,0), 10),
-                ('BACKGROUND', (0,1), (-1,-1), colors.HexColor('#F5F6FA')),
-                ('GRID', (0,0), (-1,-1), 0.5, colors.HexColor('#DDE4EB'))
-            ]))
             story.append(table)
             story.append(Spacer(1, 0.2 * inch))
-            chart_buffer = io.BytesIO()
-            generate_anxiety_confidence_chart(voice_analysis.get('composite_scores', {}), chart_buffer)
-            chart_buffer.seek(0)
-            img = Image(chart_buffer, width=4.8*inch, height=3.2*inch)
-            img.hAlign = 'CENTER'
-            story.append(img)
-        else:
-            story.append(Paragraph("Vocal analysis unavailable.", body_text))
-        story.append(Spacer(1, 0.3 * inch))
-        # Parse Gemini Report
-        sections = {
-            "Executive Summary": [],
-            "Communication and Vocal Dynamics": [],
-            "Competency and Content Evaluation": {"Strengths": [], "Growth Areas": []},
-            "Role Fit and Growth Potential": [],
-            "Strategic HR Recommendations": {"Development Priorities": [], "Next Steps": []}
-        }
-        report_parts = re.split(r'(\s*\\\s*\d\.\s*.?\s\\)', gemini_report_text)
-        current_section = None
-        for part in report_parts:
-            if not part.strip(): continue
-            is_heading = False
-            for title in sections.keys():
-                if title.lower() in part.lower():
-                    current_section = title
-                    is_heading = True
-                    break
-            if not is_heading and current_section:
-                if current_section == "Competency and Content Evaluation":
-                    if 'strength' in part.lower() or any(k in part.lower() for k in ['leadership', 'problem-solving', 'communication', 'adaptability']):
-                        sections[current_section]["Strengths"].append(part.strip())
-                    elif 'improve' in part.lower() or 'grow' in part.lower() or 'challenge' in part.lower():
-                        sections[current_section]["Growth Areas"].append(part.strip())
-                elif current_section == "Strategic HR Recommendations":
-                    if any(k in part.lower() for k in ['communication', 'depth', 'presence', 'improve']):
-                        sections[current_section]["Development Priorities"].append(part.strip())
-                    elif any(k in part.lower() for k in ['advance', 'train', 'assess', 'next step']):
-                        sections[current_section]["Next Steps"].append(part.strip())
-                else:
-                    sections[current_section].append(part.strip())
-        # Executive Summary
-        story.append(Paragraph("2. Executive Summary", h2))
-        if sections['Executive Summary']:
-            for line in sections['Executive Summary']:
-                if line.startswith(('-', '•', '*')):
-                    story.append(Paragraph(line.lstrip('-•* ').strip(), bullet_style))
-                else:
-                    story.append(Paragraph(line, body_text))
-        else:
-            story.append(Paragraph("Summary unavailable.", body_text))
-        story.append(Spacer(1, 0.3 * inch))
-        # Competency and Content
-        story.append(Paragraph("3. Competency & Content", h2))
-        story.append(Paragraph("Strengths", h3))
-        if sections['Competency and Content Evaluation']['Strengths']:
-            for line in sections['Competency and Content Evaluation']['Strengths']:
-                story.append(Paragraph(line.lstrip('-•* ').strip(), bullet_style))
         else:
-            story.append(Paragraph("No strengths identified.", body_text))
-        story.append(Spacer(1, 0.2 * inch))
-        story.append(Paragraph("Growth Areas", h3))
-        if sections['Competency and Content Evaluation']['Growth Areas']:
-            for line in sections['Competency and Content Evaluation']['Growth Areas']:
-                story.append(Paragraph(line.lstrip('-•* ').strip(), bullet_style))
-        else:
-            story.append(Paragraph("No growth areas identified.", body_text))
         story.append(Spacer(1, 0.3 * inch))
-        # Role Fit
-        story.append(Paragraph("4. Role Fit & Potential", h2))
-        if sections['Role Fit and Growth Potential']:
-            for line in sections['Role Fit and Growth Potential']:
-                if line.startswith(('-', '•', '*')):
-                    story.append(Paragraph(line.lstrip('-•* ').strip(), bullet_style))
-                else:
-                    story.append(Paragraph(line, body_text))
-        else:
-            story.append(Paragraph("Fit and potential analysis unavailable.", body_text))
-        story.append(Spacer(1, 0.3 * inch))
-        # Strategic Recommendations
-        story.append(Paragraph("5. Strategic Recommendations", h2))
-        story.append(Paragraph("Development Priorities", h3))
-        if sections['Strategic HR Recommendations']['Development Priorities']:
-            for line in sections['Strategic HR Recommendations']['Development Priorities']:
-                story.append(Paragraph(line.lstrip('-•* ').strip(), bullet_style))
-        else:
-            story.append(Paragraph("No development priorities specified.", body_text))
-        story.append(Spacer(1, 0.2 * inch))
-        story.append(Paragraph("Next Steps for Managers", h3))
-        if sections['Strategic HR Recommendations']['Next Steps']:
-            for line in sections['Strategic HR Recommendations']['Next Steps']:
-                story.append(Paragraph(line.lstrip('-•* ').strip(), bullet_style))
-        else:
-            story.append(Paragraph("No next steps provided.", body_text))
         story.append(Spacer(1, 0.3 * inch))
-        story.append(Paragraph("This report provides a data-driven evaluation to guide hiring and development decisions.", body_text))
-        doc.build(story, onFirstPage=header_footer, onLaterPages=header_footer)
         return True
     except Exception as e:
         logger.error(f"PDF creation failed: {str(e)}", exc_info=True)
         return False
-def convert_to_serializable(obj):
-    if isinstance(obj, np.generic): return obj.item()
-    if isinstance(obj, dict): return {k: convert_to_serializable(v) for k, v in obj.items()}
-    if isinstance(obj, list): return [convert_to_serializable(i) for i in obj]
-    if isinstance(obj, np.ndarray): return obj.tolist()
-    return obj
 def convert_to_serializable(obj):
-    """Converts numpy types to native Python types for JSON serialization."""
-    if isinstance(obj, np.generic): return obj.item()
-    if isinstance(obj, dict): return {k: convert_to_serializable(v) for k, v in obj.items()}
-    if isinstance(obj, list): return [convert_to_serializable(i) for i in obj]
-    if isinstance(obj, np.ndarray): return obj.tolist()
     return obj
-def process_interview(audio_path_or_url: str):
-    """Main function to process an interview from an audio file or URL."""
-    local_audio_path, wav_file = None, None
-    is_downloaded = False
     try:
-        logger.info(f"Starting processing for {audio_path_or_url}")
-        if audio_path_or_url.startswith(('http://', 'https://')):
-            local_audio_path = download_audio_from_url(audio_path_or_url)
-            is_downloaded = True
-        else:
-            local_audio_path = audio_path_or_url
-        wav_file = convert_to_wav(local_audio_path)
         transcript = transcribe(wav_file)
-        for u in transcript['utterances']:
-            u['prosodic_features'] = extract_prosodic_features(wav_file, u['start'], u['end'])
         utterances_with_speakers = identify_speakers(transcript, wav_file)
-        # التصنيف باستخدام المنهجية الإرشادية
-        classified_utterances = classify_roles(utterances_with_speakers)
         voice_analysis = analyze_interviewee_voice(wav_file, classified_utterances)
         analysis_data = {
             'transcript': classified_utterances,
-            'speakers': list(set(u['speaker_name'] for u in classified_utterances)),
             'voice_analysis': voice_analysis,
             'text_analysis': {
                 'total_duration': sum(u['prosodic_features']['duration'] for u in classified_utterances),
                 'speaker_turns': len(classified_utterances)
             }
         }
-        analysis_data['acceptance_probability'] = calculate_acceptance_probability(analysis_data)
         gemini_report_text = generate_report(analysis_data)
-        base_name = str(uuid.uuid4())
         pdf_path = os.path.join(OUTPUT_DIR, f"{base_name}_report.pdf")
         json_path = os.path.join(OUTPUT_DIR, f"{base_name}_analysis.json")
-        # create_pdf_report(analysis_data, pdf_path, gemini_report_text=gemini_report_text)
         with open(json_path, 'w') as f:
             serializable_data = convert_to_serializable(analysis_data)
             json.dump(serializable_data, f, indent=2)
-        logger.info(f"Processing completed. JSON report at: {json_path}")
-        return {'pdf_path': pdf_path, 'json_path': json_path, 'report_text': gemini_report_text}
     except Exception as e:
-        logger.error(f"Processing failed for {audio_path_or_url}: {str(e)}", exc_info=True)
-        raise
-    finally:
-        # تنظيف الملفات المؤقتة
-        if wav_file and os.path.exists(wav_file):
             os.remove(wav_file)
-        if is_downloaded and local_audio_path and os.path.exists(local_audio_path):
-            os.remove(local_audio_path)
-            logger.info(f"Cleaned up temporary downloaded file: {local_audio_path}")

 from nemo.collections.asr.models import EncDecSpeakerLabelModel
 from pinecone import Pinecone, ServerlessSpec
 import librosa
+import pandas as pd
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.preprocessing import StandardScaler
+from sklearn.feature_extraction.text import TfidfVectorizer
 import re
+from typing import Dict, List, Tuple
 import logging
+# --- Imports for enhanced PDF ---
 from reportlab.lib.pagesizes import letter
 from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak
 from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
 from reportlab.lib.units import inch
 from reportlab.lib import colors
+import matplotlib.pyplot as plt  # Uncomment if you want to add charts and have matplotlib installed
+from reportlab.platypus import Image  # Uncomment if you want to add charts and have reportlab.platypus.Image installed
+# --- End Imports for enhanced PDF ---
+from transformers import AutoTokenizer, AutoModel
 import spacy
 import google.generativeai as genai
+import joblib
 from concurrent.futures import ThreadPoolExecutor
+# Setup logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+logging.getLogger("nemo_logging").setLevel(logging.ERROR)
 # Configuration
+AUDIO_DIR = "./uploads"
 OUTPUT_DIR = "./processed_audio"
 os.makedirs(OUTPUT_DIR, exist_ok=True)
 ASSEMBLYAI_KEY = os.getenv("ASSEMBLYAI_KEY")
 GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
+# Initialize services
 def initialize_services():
     try:
         pc = Pinecone(api_key=PINECONE_KEY)
         index_name = "interview-speaker-embeddings"
                 spec=ServerlessSpec(cloud="aws", region="us-east-1")
             )
         index = pc.Index(index_name)
         genai.configure(api_key=GEMINI_API_KEY)
         gemini_model = genai.GenerativeModel('gemini-1.5-flash')
         return index, gemini_model
     except Exception as e:
         logger.error(f"Error initializing services: {str(e)}")
         raise
 index, gemini_model = initialize_services()
+# Device setup
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 logger.info(f"Using device: {device}")
 def load_speaker_model():
     try:
+        import torch
+        torch.set_num_threads(5)
+        # -----------------------------------------------------------
+        # التعديل هنا: تحميل الموديل مباشرة من Hugging Face Hub
+        # -----------------------------------------------------------
         model = EncDecSpeakerLabelModel.from_pretrained(
             "nvidia/speakerverification_en_titanet_large",
             map_location=torch.device('cpu')
         logger.error(f"Model loading failed: {str(e)}")
         raise RuntimeError("Could not load speaker verification model")
+# Load ML models
 def load_models():
     speaker_model = load_speaker_model()
     nlp = spacy.load("en_core_web_sm")
+    tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
+    llm_model = AutoModel.from_pretrained("distilbert-base-uncased").to(device)
+    llm_model.eval()
+    return speaker_model, nlp, tokenizer, llm_model
+speaker_model, nlp, tokenizer, llm_model = load_models()
+# Audio processing functions
 def convert_to_wav(audio_path: str, output_dir: str = OUTPUT_DIR) -> str:
     try:
         audio = AudioSegment.from_file(audio_path)
+        if audio.channels > 1:
+            audio = audio.set_channels(1)
+        audio = audio.set_frame_rate(16000)
         wav_file = os.path.join(output_dir, f"{uuid.uuid4()}.wav")
         audio.export(wav_file, format="wav")
         return wav_file
         logger.error(f"Audio conversion failed: {str(e)}")
         raise
 def extract_prosodic_features(audio_path: str, start_ms: int, end_ms: int) -> Dict:
     try:
+        audio = AudioSegment.from_file(audio_path)
+        segment = audio[start_ms:end_ms]
+        temp_path = os.path.join(OUTPUT_DIR, f"temp_{uuid.uuid4()}.wav")
+        segment.export(temp_path, format="wav")
+        y, sr = librosa.load(temp_path, sr=16000)
+        pitches = librosa.piptrack(y=y, sr=sr)[0]
         pitches = pitches[pitches > 0]
+        features = {
             'duration': (end_ms - start_ms) / 1000,
             'mean_pitch': float(np.mean(pitches)) if len(pitches) > 0 else 0.0,
+            'min_pitch': float(np.min(pitches)) if len(pitches) > 0 else 0.0,
+            'max_pitch': float(np.max(pitches)) if len(pitches) > 0 else 0.0,
             'pitch_sd': float(np.std(pitches)) if len(pitches) > 0 else 0.0,
+            'intensityMean': float(np.mean(librosa.feature.rms(y=y)[0])),
+            'intensityMin': float(np.min(librosa.feature.rms(y=y)[0])),
+            'intensityMax': float(np.max(librosa.feature.rms(y=y)[0])),
+            'intensitySD': float(np.std(librosa.feature.rms(y=y)[0])),
         }
+        os.remove(temp_path)
+        return features
     except Exception as e:
         logger.error(f"Feature extraction failed: {str(e)}")
+        return {
+            'duration': (end_ms - start_ms) / 1000,
+            'mean_pitch': 0.0,
+            'min_pitch': 0.0,
+            'max_pitch': 0.0,
+            'pitch_sd': 0.0,
+            'intensityMean': 0.0,
+            'intensityMin': 0.0,
+            'intensityMax': 0.0,
+            'intensitySD': 0.0,
+        }
 def transcribe(audio_path: str) -> Dict:
     try:
         with open(audio_path, 'rb') as f:
+            upload_response = requests.post(
+                "https://api.assemblyai.com/v2/upload",
+                headers={"authorization": ASSEMBLYAI_KEY},
+                data=f
+            )
         audio_url = upload_response.json()['upload_url']
+        transcript_response = requests.post(
+            "https://api.assemblyai.com/v2/transcript",
+            headers={"authorization": ASSEMBLYAI_KEY},
+            json={
+                "audio_url": audio_url,
+                "speaker_labels": True,
+                "filter_profanity": True
+            }
+        )
         transcript_id = transcript_response.json()['id']
         while True:
+            result = requests.get(
+                f"https://api.assemblyai.com/v2/transcript/{transcript_id}",
+                headers={"authorization": ASSEMBLYAI_KEY}
+            ).json()
             if result['status'] == 'completed':
                 return result
             elif result['status'] == 'error':
+                raise Exception(result['error'])
             time.sleep(5)
     except Exception as e:
         logger.error(f"Transcription failed: {str(e)}")
         raise
+def process_utterance(utterance, full_audio, wav_file):
     try:
+        start = utterance['start']
+        end = utterance['end']
         segment = full_audio[start:end]
+        temp_path = os.path.join(OUTPUT_DIR, f"temp_{uuid.uuid4()}.wav")
+        segment.export(temp_path, format="wav")
+        with torch.no_grad():
+            embedding = speaker_model.get_embedding(temp_path).to(device)
+        query_result = index.query(
+            vector=embedding.cpu().numpy().tolist(),
+            top_k=1,
+            include_metadata=True
+        )
+        if query_result['matches'] and query_result['matches'][0]['score'] > 0.7:
+            speaker_id = query_result['matches'][0]['id']
+            speaker_name = query_result['matches'][0]['metadata']['speaker_name']
+        else:
+            speaker_id = f"unknown_{uuid.uuid4().hex[:6]}"
+            speaker_name = f"Speaker_{speaker_id[-4:]}"
+            index.upsert([(speaker_id, embedding.tolist(), {"speaker_name": speaker_name})])
+        os.remove(temp_path)
+        return {
+            **utterance,
+            'speaker': speaker_name,
+            'speaker_id': speaker_id,
+            'embedding': embedding.cpu().numpy().tolist()
+        }
     except Exception as e:
         logger.error(f"Utterance processing failed: {str(e)}")
+        return {
+            **utterance,
+            'speaker': 'Unknown',
+            'speaker_id': 'unknown',
+            'embedding': None
+        }
 def identify_speakers(transcript: Dict, wav_file: str) -> List[Dict]:
     try:
         full_audio = AudioSegment.from_wav(wav_file)
         utterances = transcript['utterances']
+        with ThreadPoolExecutor(max_workers=5) as executor:  # Changed to 5 workers
+            futures = [
+                executor.submit(process_utterance, utterance, full_audio, wav_file)
+                for utterance in utterances
+            ]
+            results = [f.result() for f in futures]
+        return results
     except Exception as e:
         logger.error(f"Speaker identification failed: {str(e)}")
         raise
+def train_role_classifier(utterances: List[Dict]):
     try:
+        texts = [u['text'] for u in utterances]
+        vectorizer = TfidfVectorizer(max_features=500, ngram_range=(1, 2))
+        X_text = vectorizer.fit_transform(texts)
+        features = []
+        labels = []
+        for i, utterance in enumerate(utterances):
+            prosodic = utterance['prosodic_features']
+            feat = [
+                prosodic['duration'],
+                prosodic['mean_pitch'],
+                prosodic['min_pitch'],
+                prosodic['max_pitch'],
+                prosodic['pitch_sd'],
+                prosodic['intensityMean'],
+                prosodic['intensityMin'],
+                prosodic['intensityMax'],
+                prosodic['intensitySD'],
+            ]
+            feat.extend(X_text[i].toarray()[0].tolist())
+            doc = nlp(utterance['text'])
+            feat.extend([
+                int(utterance['text'].endswith('?')),
+                len(re.findall(r'\b(why|how|what|when|where|who|which)\b', utterance['text'].lower())),
+                len(utterance['text'].split()),
+                sum(1 for token in doc if token.pos_ == 'VERB'),
+                sum(1 for token in doc if token.pos_ == 'NOUN')
+            ])
+            features.append(feat)
+            labels.append(0 if i % 2 == 0 else 1)
+        scaler = StandardScaler()
+        X = scaler.fit_transform(features)
+        clf = RandomForestClassifier(
+            n_estimators=150,
+            max_depth=10,
+            random_state=42,
+            class_weight='balanced'
+        )
+        clf.fit(X, labels)
+        joblib.dump(clf, os.path.join(OUTPUT_DIR, 'role_classifier.pkl'))
+        joblib.dump(vectorizer, os.path.join(OUTPUT_DIR, 'text_vectorizer.pkl'))
+        joblib.dump(scaler, os.path.join(OUTPUT_DIR, 'feature_scaler.pkl'))
+        return clf, vectorizer, scaler
+    except Exception as e:
+        logger.error(f"Classifier training failed: {str(e)}")
+        raise
+def classify_roles(utterances: List[Dict], clf, vectorizer, scaler):
+    try:
+        texts = [u['text'] for u in utterances]
+        X_text = vectorizer.transform(texts)
+        results = []
+        for i, utterance in enumerate(utterances):
+            prosodic = utterance['prosodic_features']
+            feat = [
+                prosodic['duration'],
+                prosodic['mean_pitch'],
+                prosodic['min_pitch'],
+                prosodic['max_pitch'],
+                prosodic['pitch_sd'],
+                prosodic['intensityMean'],
+                prosodic['intensityMin'],
+                prosodic['intensityMax'],
+                prosodic['intensitySD'],
+            ]
+            feat.extend(X_text[i].toarray()[0].tolist())
+            doc = nlp(utterance['text'])
+            feat.extend([
+                int(utterance['text'].endswith('?')),
+                len(re.findall(r'\b(why|how|what|when|where|who|which)\b', utterance['text'].lower())),
+                len(utterance['text'].split()),
+                sum(1 for token in doc if token.pos_ == 'VERB'),
+                sum(1 for token in doc if token.pos_ == 'NOUN')
+            ])
+            X = scaler.transform([feat])
+            role = 'Interviewer' if clf.predict(X)[0] == 0 else 'Interviewee'
+            results.append({**utterance, 'role': role})
+        return results
     except Exception as e:
         logger.error(f"Role classification failed: {str(e)}")
+        raise
 def analyze_interviewee_voice(audio_path: str, utterances: List[Dict]) -> Dict:
     try:
+        y, sr = librosa.load(audio_path, sr=16000)
+        interviewee_utterances = [u for u in utterances if u['role'] == 'Interviewee']
         if not interviewee_utterances:
             return {'error': 'No interviewee utterances found'}
+        segments = []
+        for u in interviewee_utterances:
+            start = int(u['start'] * sr / 1000)
+            end = int(u['end'] * sr / 1000)
+            segments.append(y[start:end])
+        combined_audio = np.concatenate(segments)
         total_duration = sum(u['prosodic_features']['duration'] for u in interviewee_utterances)
         total_words = sum(len(u['text'].split()) for u in interviewee_utterances)
+        speaking_rate = total_words / total_duration if total_duration > 0 else 0
+        filler_words = ['um', 'uh', 'like', 'you know', 'so', 'i mean']
+        filler_count = sum(
+            sum(u['text'].lower().count(fw) for fw in filler_words)
+            for u in interviewee_utterances
+        )
         filler_ratio = filler_count / total_words if total_words > 0 else 0
         all_words = ' '.join(u['text'].lower() for u in interviewee_utterances).split()
+        word_counts = {}
+        for i in range(len(all_words) - 1):
+            bigram = (all_words[i], all_words[i + 1])
+            word_counts[bigram] = word_counts.get(bigram, 0) + 1
+        repetition_score = sum(1 for count in word_counts.values() if count > 1) / len(
+            word_counts) if word_counts else 0
+        pitches = []
+        for segment in segments:
+            f0, voiced_flag, _ = librosa.pyin(segment, fmin=80, fmax=300, sr=sr)
+            pitches.extend(f0[voiced_flag])
         pitch_mean = np.mean(pitches) if len(pitches) > 0 else 0
         pitch_std = np.std(pitches) if len(pitches) > 0 else 0
+        jitter = np.mean(np.abs(np.diff(pitches))) / pitch_mean if len(pitches) > 1 and pitch_mean > 0 else 0
+        intensities = []
+        for segment in segments:
+            rms = librosa.feature.rms(y=segment)[0]
+            intensities.extend(rms)
+        intensity_mean = np.mean(intensities) if intensities else 0
+        intensity_std = np.std(intensities) if intensities else 0
+        shimmer = np.mean(np.abs(np.diff(intensities))) / intensity_mean if len(
+            intensities) > 1 and intensity_mean > 0 else 0
+        anxiety_score = 0.6 * (pitch_std / pitch_mean) + 0.4 * (jitter + shimmer) if pitch_mean > 0 else 0
+        confidence_score = 0.7 * (1 / (1 + intensity_std)) + 0.3 * (1 / (1 + filler_ratio))
+        hesitation_score = filler_ratio + repetition_score
+        anxiety_level = 'high' if anxiety_score > 0.15 else 'moderate' if anxiety_score > 0.07 else 'low'
+        confidence_level = 'high' if confidence_score > 0.7 else 'moderate' if confidence_score > 0.5 else 'low'
+        fluency_level = 'fluent' if (filler_ratio < 0.05 and repetition_score < 0.1) else 'moderate' if (
+                    filler_ratio < 0.1 and repetition_score < 0.2) else 'disfluent'
         return {
             'speaking_rate': float(round(speaking_rate, 2)),
             'filler_ratio': float(round(filler_ratio, 4)),
             'repetition_score': float(round(repetition_score, 4)),
+            'pitch_analysis': {
+                'mean': float(round(pitch_mean, 2)),
+                'std_dev': float(round(pitch_std, 2)),
+                'jitter': float(round(jitter, 4))
+            },
+            'intensity_analysis': {
+                'mean': float(round(intensity_mean, 2)),
+                'std_dev': float(round(intensity_std, 2)),
+                'shimmer': float(round(shimmer, 4))
+            },
             'composite_scores': {
                 'anxiety': float(round(anxiety_score, 4)),
                 'confidence': float(round(confidence_score, 4)),
                 'hesitation': float(round(hesitation_score, 4))
             },
             'interpretation': {
+                'anxiety_level': anxiety_level,
+                'confidence_level': confidence_level,
+                'fluency_level': fluency_level
             }
         }
     except Exception as e:
+        logger.error(f"Voice analysis failed: {str(e)}")
         return {'error': str(e)}
+def generate_voice_interpretation(analysis: Dict) -> str:
+    # This function is used to provide the text interpretation for Gemini's prompt.
+    if 'error' in analysis:
+        return "Voice analysis not available."
+    interpretation_lines = []
+    interpretation_lines.append("Voice Analysis Summary:")
+    interpretation_lines.append(f"- Speaking Rate: {analysis['speaking_rate']} words/sec (average)")
+    interpretation_lines.append(f"- Filler Words: {analysis['filler_ratio'] * 100:.1f}% of words")
+    interpretation_lines.append(f"- Repetition Score: {analysis['repetition_score']:.3f}")
+    interpretation_lines.append(
+        f"- Anxiety Level: {analysis['interpretation']['anxiety_level'].upper()} (score: {analysis['composite_scores']['anxiety']:.3f})")
+    interpretation_lines.append(
+        f"- Confidence Level: {analysis['interpretation']['confidence_level'].upper()} (score: {analysis['composite_scores']['confidence']:.3f})")
+    interpretation_lines.append(f"- Fluency: {analysis['interpretation']['fluency_level'].upper()}")
+    interpretation_lines.append("")
+    interpretation_lines.append("Detailed Interpretation:")
+    interpretation_lines.append(
+        "1. A higher speaking rate indicates faster speech, which can suggest nervousness or enthusiasm.")
+    interpretation_lines.append("2. Filler words and repetitions reduce speech clarity and professionalism.")
+    interpretation_lines.append("3. Anxiety is measured through pitch variability and voice instability.")
+    interpretation_lines.append("4. Confidence is assessed through voice intensity and stability.")
+    interpretation_lines.append("5. Fluency combines filler words and repetition metrics.")
+    return "\n".join(interpretation_lines)
 def generate_report(analysis_data: Dict) -> str:
     try:
+        voice = analysis_data.get('voice_analysis', {})
+        voice_interpretation = generate_voice_interpretation(voice)
+        interviewee_responses = [
+                                    f"Speaker {u['speaker']} ({u['role']}): {u['text']}"
+                                    for u in analysis_data['transcript']
+                                    if u['role'] == 'Interviewee'
+                                ][:5]  # Limit to first 5 for prompt brevity
+        prompt = f"""
+        Generate a comprehensive interview analysis report based on the provided data.
+        The report should be structured with clear headings and concise summaries.
         **1. Executive Summary**
+        Provide a brief overview of the interview, its duration, number of speaker turns, and main participants.
+        - Overall interview duration: {analysis_data['text_analysis']['total_duration']:.2f} seconds
+        - Number of speaker turns: {analysis_data['text_analysis']['speaker_turns']}
+        - Main participants: {', '.join(analysis_data['speakers'])}
+        **2. Voice Analysis**
+        Summarize key voice metrics and provide a detailed interpretation.
         {voice_interpretation}
+        **3. Content Analysis**
+        Analyze the key themes and strengths/weaknesses in the interviewee's responses.
+        Key responses from interviewee:
+        {chr(10).join(interviewee_responses)}
+        **4. Recommendations**
+        Offer specific, actionable suggestions for improvement focusing on communication skills, content delivery, and professional presentation.
         """
         response = gemini_model.generate_content(prompt)
         return response.text
     except Exception as e:
         logger.error(f"Report generation failed: {str(e)}")
         return f"Error generating report: {str(e)}"
+# --- ENHANCED PDF GENERATION FUNCTION ---
 def create_pdf_report(analysis_data: Dict, output_path: str, gemini_report_text: str):
     try:
+        doc = SimpleDocTemplate(output_path, pagesize=letter)
         styles = getSampleStyleSheet()
+        # Define custom styles
+        h1 = ParagraphStyle(name='Heading1', parent=styles['h1'], fontSize=16, spaceAfter=14, alignment=1)
+        h2 = ParagraphStyle(name='Heading2', parent=styles['h2'], fontSize=12, spaceBefore=10, spaceAfter=8,
+                            textColor=colors.HexColor('#333366'))
+        h3 = ParagraphStyle(name='Heading3', parent=styles['h3'], fontSize=10, spaceBefore=8, spaceAfter=4,
+                            textColor=colors.HexColor('#0055AA'))
+        body_text = ParagraphStyle(name='BodyText', parent=styles['Normal'], fontSize=9, leading=12, spaceAfter=4)
+        bullet_style = ParagraphStyle(name='Bullet', parent=styles['Normal'], fontSize=9, leading=12, leftIndent=18,
+                                      bulletIndent=9)
         story = []
+        # Title Page / Header
+        story.append(Paragraph("<b>Interview Analysis Report</b>", h1))
+        story.append(Spacer(1, 0.2 * inch))
+        story.append(Paragraph(f"<b>Date:</b> {time.strftime('%Y-%m-%d')}", body_text))
+        story.append(Spacer(1, 0.3 * inch))
+        # Parse Gemini's report into sections for better PDF structuring
+        sections = {}
+        current_section = None
+        for line in gemini_report_text.split('\n'):
+            if line.startswith('**1. Executive Summary**'):
+                current_section = 'Executive Summary'
+                sections[current_section] = []
+            elif line.startswith('**2. Voice Analysis**'):
+                current_section = 'Voice Analysis (Gemini Interpretation)'
+                sections[current_section] = []
+            elif line.startswith('**3. Content Analysis**'):
+                current_section = 'Content Analysis'
+                sections[current_section] = []
+            elif line.startswith('**4. Recommendations**'):
+                current_section = 'Recommendations'
+                sections[current_section] = []
+            elif current_section:
+                sections[current_section].append(line)
+        # 1. Executive Summary
+        story.append(Paragraph("1. Executive Summary", h2))
+        story.append(Spacer(1, 0.1 * inch))
+        if 'Executive Summary' in sections:
+            for line in sections['Executive Summary']:
+                if line.strip():
+                    story.append(Paragraph(line.strip(), body_text))
+            story.append(Spacer(1, 0.2 * inch))
+        # 2. Voice Analysis (Detailed - using Table for summary)
+        story.append(Paragraph("2. Voice Analysis", h2))
         voice_analysis = analysis_data.get('voice_analysis', {})
         if voice_analysis and 'error' not in voice_analysis:
+            # Voice Analysis Summary Table
             table_data = [
+                ['Metric', 'Value', 'Interpretation'],
+                ['Speaking Rate', f"{voice_analysis['speaking_rate']:.2f} words/sec", 'Average rate'],
+                ['Filler Words', f"{voice_analysis['filler_ratio'] * 100:.1f}%", 'Percentage of total words'],
+                ['Repetition Score', f"{voice_analysis['repetition_score']:.3f}", 'Lower is better articulation'],
+                ['Anxiety Level', voice_analysis['interpretation']['anxiety_level'].upper(),
+                 f"Score: {voice_analysis['composite_scores']['anxiety']:.3f}"],
+                ['Confidence Level', voice_analysis['interpretation']['confidence_level'].upper(),
+                 f"Score: {voice_analysis['composite_scores']['confidence']:.3f}"],
+                ['Fluency', voice_analysis['interpretation']['fluency_level'].upper(), 'Overall speech flow']
             ]
+            table_style = TableStyle([
+                ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#6699CC')),
+                ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
+                ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
+                ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
+                ('BOTTOMPADDING', (0, 0), (-1, 0), 10),
+                ('BACKGROUND', (0, 1), (-1, -1), colors.HexColor('#EFEFEF')),
+                ('GRID', (0, 0), (-1, -1), 0.5, colors.HexColor('#CCCCCC')),
+                ('LEFTPADDING', (0, 0), (-1, -1), 6),
+                ('RIGHTPADDING', (0, 0), (-1, -1), 6),
+                ('TOPPADDING', (0, 0), (-1, -1), 6),
+                ('BOTTOMPADDING', (0, 0), (-1, -1), 6),
+            ])
+            table = Table(table_data)
+            table.setStyle(table_style)
             story.append(table)
             story.append(Spacer(1, 0.2 * inch))
+            # Detailed Interpretation from Gemini (if present)
+            if 'Voice Analysis (Gemini Interpretation)' in sections:
+                story.append(Paragraph("Detailed Interpretation:", h3))
+                for line in sections['Voice Analysis (Gemini Interpretation)']:
+                    if line.strip():
+                        story.append(Paragraph(line.strip(), body_text))
+                story.append(Spacer(1, 0.2 * inch))
+            # --- Placeholder for Charts ---
+            # You would generate charts here using matplotlib/seaborn
+            # Example (uncomment and implement generate_anxiety_confidence_chart):
+            # chart_path = os.path.join(OUTPUT_DIR, f"anxiety_confidence_{uuid.uuid4().hex[:8]}.png")
+            # generate_anxiety_confidence_chart(voice_analysis['composite_scores'], chart_path) # Your function to generate chart
+            # try:
+            #     if os.path.exists(chart_path):
+            #         img = Image(chart_path, width=4*inch, height=2.5*inch)
+            #         story.append(img)
+            #         story.append(Spacer(1, 0.1 * inch))
+            #         os.remove(chart_path) # Clean up generated chart image
+            # except Exception as img_e:
+            #     logger.warning(f"Could not add chart image to PDF: {img_e}")
+            # --- End Placeholder for Charts ---
         else:
+            story.append(Paragraph("Voice analysis not available or encountered an error.", body_text))
         story.append(Spacer(1, 0.3 * inch))
+        # 3. Content Analysis
+        story.append(Paragraph("3. Content Analysis", h2))
+        if 'Content Analysis' in sections:
+            for line in sections['Content Analysis']:
+                if line.strip():
+                    if line.strip().startswith('-'):  # For bullet points from Gemini
+                        story.append(Paragraph(line.strip(), bullet_style))
+                    else:
+                        story.append(Paragraph(line.strip(), body_text))
+            story.append(Spacer(1, 0.2 * inch))
+        # Add some interviewee responses to the report (can be formatted as a list)
+        story.append(Paragraph("Key Interviewee Responses:", h3))
+        interviewee_responses = [
+                                    f"Speaker {u['speaker']} ({u['role']}): {u['text']}"
+                                    for u in analysis_data['transcript']
+                                    if u['role'] == 'Interviewee'
+                                ][:5]  # Show only first 5
+        for res in interviewee_responses:
+            story.append(Paragraph(res, bullet_style))
         story.append(Spacer(1, 0.3 * inch))
+        # 4. Recommendations
+        story.append(Paragraph("4. Recommendations", h2))
+        if 'Recommendations' in sections:
+            for line in sections['Recommendations']:
+                if line.strip():
+                    if line.strip().startswith('-'):  # For bullet points from Gemini
+                        story.append(Paragraph(line.strip(), bullet_style))
+                    else:
+                        story.append(Paragraph(line.strip(), body_text))
+            story.append(Spacer(1, 0.2 * inch))
+        doc.build(story)
         return True
     except Exception as e:
         logger.error(f"PDF creation failed: {str(e)}", exc_info=True)
         return False
 def convert_to_serializable(obj):
+    if isinstance(obj, np.generic):
+        return obj.item()
+    elif isinstance(obj, dict):
+        return {key: convert_to_serializable(value) for key, value in obj.items()}
+    elif isinstance(obj, list):
+        return [convert_to_serializable(item) for item in obj]
+    elif isinstance(obj, np.ndarray):
+        return obj.tolist()
     return obj
+def process_interview(audio_path: str):
     try:
+        logger.info(f"Starting processing for {audio_path}")
+        wav_file = convert_to_wav(audio_path)
+        logger.info("Starting transcription")
         transcript = transcribe(wav_file)
+        logger.info("Extracting prosodic features")
+        for utterance in transcript['utterances']:
+            utterance['prosodic_features'] = extract_prosodic_features(
+                wav_file,
+                utterance['start'],
+                utterance['end']
+            )
+        logger.info("Identifying speakers")
         utterances_with_speakers = identify_speakers(transcript, wav_file)
+        logger.info("Classifying roles")
+        # Ensure role classifier models are loaded/trained only once if possible,
+        # or handled carefully in a multi-threaded context.
+        # For simplicity, keeping it inside process_interview for now.
+        if os.path.exists(os.path.join(OUTPUT_DIR, 'role_classifier.pkl')):
+            clf = joblib.load(os.path.join(OUTPUT_DIR, 'role_classifier.pkl'))
+            vectorizer = joblib.load(os.path.join(OUTPUT_DIR, 'text_vectorizer.pkl'))
+            scaler = joblib.load(os.path.join(OUTPUT_DIR, 'feature_scaler.pkl'))
+        else:
+            clf, vectorizer, scaler = train_role_classifier(utterances_with_speakers)
+        classified_utterances = classify_roles(utterances_with_speakers, clf, vectorizer, scaler)
+        logger.info("Analyzing interviewee voice")
         voice_analysis = analyze_interviewee_voice(wav_file, classified_utterances)
         analysis_data = {
             'transcript': classified_utterances,
+            'speakers': list(set(u['speaker'] for u in classified_utterances)),
             'voice_analysis': voice_analysis,
             'text_analysis': {
                 'total_duration': sum(u['prosodic_features']['duration'] for u in classified_utterances),
                 'speaker_turns': len(classified_utterances)
             }
         }
+        logger.info("Generating report text using Gemini")
         gemini_report_text = generate_report(analysis_data)
+        base_name = os.path.splitext(os.path.basename(audio_path))[0]
         pdf_path = os.path.join(OUTPUT_DIR, f"{base_name}_report.pdf")
+        # Pass the full analysis_data AND the gemini_report_text to the PDF function
+        create_pdf_report(analysis_data, pdf_path, gemini_report_text=gemini_report_text)
         json_path = os.path.join(OUTPUT_DIR, f"{base_name}_analysis.json")
         with open(json_path, 'w') as f:
             serializable_data = convert_to_serializable(analysis_data)
             json.dump(serializable_data, f, indent=2)
+        os.remove(wav_file)  # Clean up WAV file after processing
+        logger.info(f"Processing completed for {audio_path}")
+        return {
+            'pdf_path': pdf_path,
+            'json_path': json_path
+        }
     except Exception as e:
+        logger.error(f"Processing failed: {str(e)}", exc_info=True)
+        # Clean up wav_file in case of error
+        if 'wav_file' in locals() and os.path.exists(wav_file):
             os.remove(wav_file)
+        raise