Spaces:

EvalBot
/

Audio

Sleeping

App Files Files Community

norhan12 commited on Jun 10, 2025

Commit

8474847

verified ·

1 Parent(s): 28b5bc1

Update process_interview.py

Browse files

Files changed (1) hide show

process_interview.py +432 -215

process_interview.py CHANGED Viewed

@@ -19,15 +19,16 @@ from typing import Dict, List, Tuple
 import logging
 import tempfile
 from reportlab.lib.pagesizes import letter
-from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak, Image, HRFlowable
 from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
 from reportlab.lib.units import inch
 from reportlab.lib import colors
 import matplotlib.pyplot as plt
 import matplotlib
 matplotlib.use('Agg')
 import io
-from transformers import AutoTokenizer, AutoModel, pipeline
 import spacy
 import google.generativeai as genai
 import joblib
@@ -35,10 +36,12 @@ from concurrent.futures import ThreadPoolExecutor
 # Setup logging
 logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
 logging.getLogger("nemo_logging").setLevel(logging.ERROR)
 # Configuration
 OUTPUT_DIR = "./processed_audio"
 os.makedirs(OUTPUT_DIR, exist_ok=True)
@@ -47,21 +50,18 @@ PINECONE_KEY = os.getenv("PINECONE_KEY")
 ASSEMBLYAI_KEY = os.getenv("ASSEMBLYAI_KEY")
 GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
-# --- All your original helper functions ---
-# I am including them exactly as you last provided them.
-# --- HELPER FUNCTION to download from URL ---
 def download_audio_from_url(url: str) -> str:
     try:
         temp_dir = tempfile.gettempdir()
-        temp_filename = f"{uuid.uuid4()}.tmp_audio"
-        local_filename = os.path.join(temp_dir, temp_filename)
-        logger.info(f"Downloading audio from {url} to {local_filename}")
         with requests.get(url, stream=True) as r:
             r.raise_for_status()
-            with open(local_filename, 'wb') as f:
                 for chunk in r.iter_content(chunk_size=8192):
                     f.write(chunk)
-        return local_filename
     except Exception as e:
         logger.error(f"Failed to download audio from URL {url}: {e}")
         raise
@@ -71,7 +71,12 @@ def initialize_services():
         pc = Pinecone(api_key=PINECONE_KEY)
         index_name = "interview-speaker-embeddings"
         if index_name not in pc.list_indexes().names():
-            pc.create_index(name=index_name, dimension=192, metric="cosine", spec=ServerlessSpec(cloud="aws", region="us-east-1"))
         index = pc.Index(index_name)
         genai.configure(api_key=GEMINI_API_KEY)
         gemini_model = genai.GenerativeModel('gemini-1.5-flash')
@@ -87,8 +92,12 @@ logger.info(f"Using device: {device}")
 def load_speaker_model():
     try:
         torch.set_num_threads(5)
-        model = EncDecSpeakerLabelModel.from_pretrained("nvidia/speakerverification_en_titanet_large", map_location=device)
         model.eval()
         return model
     except Exception as e:
@@ -108,7 +117,8 @@ speaker_model, nlp, tokenizer, llm_model = load_models()
 def convert_to_wav(audio_path: str, output_dir: str = OUTPUT_DIR) -> str:
     try:
         audio = AudioSegment.from_file(audio_path)
-        if audio.channels > 1: audio = audio.set_channels(1)
         audio = audio.set_frame_rate(16000)
         wav_file = os.path.join(output_dir, f"{uuid.uuid4()}.wav")
         audio.export(wav_file, format="wav")
@@ -121,14 +131,13 @@ def extract_prosodic_features(audio_path: str, start_ms: int, end_ms: int) -> Di
     try:
         audio = AudioSegment.from_file(audio_path)
         segment = audio[start_ms:end_ms]
-        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
-            segment.export(tmp.name, format="wav")
-            y, sr = librosa.load(tmp.name, sr=16000)
-            os.remove(tmp.name)
-        pitches, _ = librosa.piptrack(y=y, sr=sr)
         pitches = pitches[pitches > 0]
-        return {
-            'duration': (end_ms - start_ms) / 1000.0,
             'mean_pitch': float(np.mean(pitches)) if len(pitches) > 0 else 0.0,
             'min_pitch': float(np.min(pitches)) if len(pitches) > 0 else 0.0,
             'max_pitch': float(np.max(pitches)) if len(pitches) > 0 else 0.0,
@@ -138,132 +147,277 @@ def extract_prosodic_features(audio_path: str, start_ms: int, end_ms: int) -> Di
             'intensityMax': float(np.max(librosa.feature.rms(y=y)[0])),
             'intensitySD': float(np.std(librosa.feature.rms(y=y)[0])),
         }
     except Exception as e:
         logger.error(f"Feature extraction failed: {str(e)}")
-        return {}
 def transcribe(audio_path: str) -> Dict:
     try:
         with open(audio_path, 'rb') as f:
-            upload_response = requests.post("https://api.assemblyai.com/v2/upload", headers={"authorization": ASSEMBLYAI_KEY}, data=f)
         audio_url = upload_response.json()['upload_url']
-        transcript_response = requests.post("https://api.assemblyai.com/v2/transcript", headers={"authorization": ASSEMBLYAI_KEY}, json={"audio_url": audio_url, "speaker_labels": True, "filter_profanity": True})
         transcript_id = transcript_response.json()['id']
         while True:
-            result = requests.get(f"https://api.assemblyai.com/v2/transcript/{transcript_id}", headers={"authorization": ASSEMBLYAI_KEY}).json()
-            if result['status'] == 'completed': return result
-            elif result['status'] == 'error': raise Exception(f"AssemblyAI Error: {result.get('error')}")
             time.sleep(5)
     except Exception as e:
         logger.error(f"Transcription failed: {str(e)}")
         raise
-def process_utterance(utterance, full_audio):
     try:
-        start, end = utterance['start'], utterance['end']
         segment = full_audio[start:end]
-        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
-            segment.export(tmp.name, format="wav")
-            with torch.no_grad():
-                embedding = speaker_model.get_embedding(tmp.name).cpu().numpy()
-            os.remove(tmp.name)
         embedding_list = embedding.flatten().tolist()
-        query_result = index.query(vector=embedding_list, top_k=1, include_metadata=True)
-        if query_result['matches'] and query_result['matches'][0]['score'] > 0.75:
             speaker_id = query_result['matches'][0]['id']
             speaker_name = query_result['matches'][0]['metadata']['speaker_name']
         else:
-            speaker_id = f"speaker_{uuid.uuid4().hex[:6]}"
-            speaker_name = f"Speaker_{speaker_id[-4:].upper()}"
             index.upsert([(speaker_id, embedding_list, {"speaker_name": speaker_name})])
-        return {**utterance, 'speaker': speaker_name, 'speaker_id': speaker_id}
     except Exception as e:
         logger.error(f"Utterance processing failed: {str(e)}", exc_info=True)
-        return {**utterance, 'speaker': 'Unknown', 'speaker_id': 'unknown'}
 def identify_speakers(transcript: Dict, wav_file: str) -> List[Dict]:
     try:
         full_audio = AudioSegment.from_wav(wav_file)
         with ThreadPoolExecutor(max_workers=5) as executor:
-            futures = [executor.submit(process_utterance, u, full_audio) for u in transcript['utterances']]
             results = [f.result() for f in futures]
         return results
     except Exception as e:
         logger.error(f"Speaker identification failed: {str(e)}")
         raise
 def analyze_interviewee_voice(audio_path: str, utterances: List[Dict]) -> Dict:
     try:
         y, sr = librosa.load(audio_path, sr=16000)
-        interviewee_utterances = [u for u in utterances if u.get('role') == 'Interviewee']
-        if not interviewee_utterances: return {'error': 'No interviewee utterances found'}
-        segments = [y[int(u['start']*sr/1000):int(u['end']*sr/1000)] for u in interviewee_utterances]
         total_duration = sum(u['prosodic_features']['duration'] for u in interviewee_utterances)
         total_words = sum(len(u['text'].split()) for u in interviewee_utterances)
         speaking_rate = total_words / total_duration if total_duration > 0 else 0
         filler_words = ['um', 'uh', 'like', 'you know', 'so', 'i mean']
         filler_count = sum(sum(u['text'].lower().count(fw) for fw in filler_words) for u in interviewee_utterances)
         filler_ratio = filler_count / total_words if total_words > 0 else 0
-        repetition_score = 0
-        pitches, intensities = [], []
         for segment in segments:
-            if len(segment) == 0: continue
-            f0, voiced_flag, _ = librosa.pyin(segment, fmin=librosa.note_to_hz('C2'), fmax=librosa.note_to_hz('C7'), sr=sr)
             pitches.extend(f0[voiced_flag])
-            intensities.extend(librosa.feature.rms(y=segment)[0])
         pitch_mean = np.mean(pitches) if len(pitches) > 0 else 0
-        intensity_std = np.std(intensities) if len(intensities) > 0 else 0
         jitter = np.mean(np.abs(np.diff(pitches))) / pitch_mean if len(pitches) > 1 and pitch_mean > 0 else 0
-        shimmer = np.mean(np.abs(np.diff(intensities))) / np.mean(intensities) if len(intensities) > 1 and np.mean(intensities) > 0 else 0
-        anxiety_score = (0.6 * (np.std(pitches)/pitch_mean if pitch_mean > 0 else 0) + 0.4 * (jitter + shimmer))
-        confidence_score = 0.7 * (1/(1+intensity_std)) + 0.3 * (1/(1+filler_ratio))
         hesitation_score = filler_ratio + repetition_score
         return {
-            'speaking_rate': float(round(speaking_rate, 2)), 'filler_ratio': float(round(filler_ratio, 4)), 'repetition_score': float(round(repetition_score, 4)),
             'composite_scores': {'anxiety': float(round(anxiety_score, 4)), 'confidence': float(round(confidence_score, 4)), 'hesitation': float(round(hesitation_score, 4))},
-            'interpretation': {
-                'anxiety_level': 'high' if anxiety_score > 0.15 else 'moderate' if anxiety_score > 0.07 else 'low',
-                'confidence_level': 'high' if confidence_score > 0.7 else 'moderate' if confidence_score > 0.5 else 'low',
-                'fluency_level': 'fluent' if filler_ratio < 0.05 and repetition_score < 0.1 else 'disfluent'
-            }
         }
     except Exception as e:
         logger.error(f"Voice analysis failed: {str(e)}")
         return {'error': str(e)}
-def analyze_text_content(utterances: List[Dict]) -> Dict:
-    interviewee_utterances = [u['text'] for u in utterances if u.get('role') == 'Interviewee']
-    if not interviewee_utterances:
-        return {"overall_sentiment": {"label": "NEUTRAL", "score": 1.0}, "mentioned_technologies": [], "mentioned_soft_skills": []}
-    full_text = " ".join(interviewee_utterances)
-    sentiment_results = sentiment_pipeline(full_text, truncation=True, max_length=512)
-    tech_keywords = ['python', 'react', 'aws', 'docker', 'api', 'fastapi', 'machine learning', 'pytorch', 'tensorflow']
-    soft_skills = ['leadership', 'teamwork', 'communication', 'problem solving', 'management', 'planning']
-    found_tech = [kw for kw in tech_keywords if kw in full_text.lower()]
-    found_skills = [skill for skill in soft_skills if skill in full_text.lower()]
-    return {
-        "overall_sentiment": sentiment_results[0],
-        "mentioned_technologies": list(set(found_tech)),
-        "mentioned_soft_skills": list(set(found_skills))
-    }
 def calculate_acceptance_probability(analysis_data: Dict) -> float:
-    # Your full, detailed function
     voice = analysis_data.get('voice_analysis', {})
     if 'error' in voice: return 0.0
     w_confidence, w_anxiety, w_fluency, w_speaking_rate, w_filler_repetition, w_content_strengths = 0.4, -0.3, 0.2, 0.1, -0.1, 0.2
     confidence_score = voice.get('composite_scores', {}).get('confidence', 0.0)
     anxiety_score = voice.get('composite_scores', {}).get('anxiety', 0.0)
-    fluency_level = voice.get('interpretation', {}).get('fluency_level', 'disfluent')
     speaking_rate = voice.get('speaking_rate', 0.0)
     filler_ratio = voice.get('filler_ratio', 0.0)
     repetition_score = voice.get('repetition_score', 0.0)
-    fluency_map = {'fluent': 1.0, 'moderate': 0.5, 'disfluent': 0.0}
     fluency_val = fluency_map.get(fluency_level, 0.0)
     ideal_speaking_rate = 2.5
     speaking_rate_deviation = abs(speaking_rate - ideal_speaking_rate)
@@ -278,55 +432,42 @@ def calculate_acceptance_probability(analysis_data: Dict) -> float:
     acceptance_probability = max(0.0, min(1.0, normalized_score))
     return float(f"{acceptance_probability * 100:.2f}")
-def convert_to_serializable(obj):
-    if isinstance(obj, np.generic): return obj.item()
-    if isinstance(obj, dict): return {k: convert_to_serializable(v) for k, v in obj.items()}
-    if isinstance(obj, list): return [convert_to_serializable(i) for i in obj]
-    if isinstance(obj, np.ndarray): return obj.tolist()
-    return obj
-# --- NEW: HR Persona Report Generation ---
 def generate_report(analysis_data: Dict) -> str:
     try:
         voice = analysis_data.get('voice_analysis', {})
-        voice_interp = "Voice analysis data was not available."
-        if voice and 'error' not in voice:
-            voice_interp = (f"The candidate's voice profile indicates a '{voice.get('interpretation', {}).get('confidence_level', 'N/A').upper()}' confidence level "
-                          f"and a '{voice.get('interpretation', {}).get('anxiety_level', 'N/A').upper()}' anxiety level. "
-                          f"Fluency was rated as '{voice.get('interpretation', {}).get('fluency_level', 'N/A').upper()}'.")
-        content = analysis_data.get('advanced_content_analysis', {})
-        content_interp = (f"Sentiment of responses was generally '{content.get('overall_sentiment', {}).get('label', 'N/A')}'. "
-                        f"Mentioned technical skills: {', '.join(content.get('mentioned_technologies', [])) or 'None'}. "
-                        f"Mentioned soft skills: {', '.join(content.get('mentioned_soft_skills', [])) or 'None'}.")
-        prob = analysis_data.get('acceptance_probability')
         prompt = f"""
-        *Persona:* You are a Senior HR Partner writing a candidate evaluation memo for the hiring manager.
-        *Task:* Write a professional, objective, and concise evaluation based on the data below.
-        *Tone:* Analytical and formal.
-        *CANDIDATE EVALUATION MEMORANDUM*
-        *CONFIDENTIAL*
-        *Candidate ID:* {analysis_data.get('user_id', 'N/A')}
-        *Analysis Date:* {time.strftime('%Y-%m-%d')}
-        *Estimated Acceptance Probability:* {prob:.2f}%
-        *1. Overall Recommendation:*
-        Provide a clear, one-sentence recommendation (e.g., "Highly recommend proceeding to the final round," or "Recommend with reservations due to...").
-        *2. Key Competency Assessment (Content & Skills):*
-        - Summarize the candidate's key strengths and areas for development based on the content analysis.
-        - *Data for Content Analysis:* {content_interp}
-        *3. Communication Style (Voice & Speech Analysis):*
-        - Evaluate the candidate's communication style (confidence, clarity, nervousness).
-        - *Data for Voice Analysis:* {voice_interp}
-        *4. Actionable Next Steps:*
-        - Suggest specific questions or topics for the next interviewer to focus on.
         """
         response = gemini_model.generate_content(prompt)
         return response.text
@@ -334,63 +475,156 @@ def generate_report(analysis_data: Dict) -> str:
         logger.error(f"Report generation failed: {str(e)}")
         return f"Error generating report: {str(e)}"
-# --- NEW: Polished PDF Creation ---
-def parse_gemini_report(text: str) -> list:
-    parsed_elements = []
-    patterns = {
-        'h3': r'^\s*\\\d\.\d\s+(.?)\\*:',
-        'bullet': r'^\s*[-•]\s(.*)',
-        'bold': r'^\s*\\(.?)\\*'
-    }
-    for line in text.split('\n'):
-        line = line.strip()
-        if not line: continue
-        match_h3 = re.match(patterns['h3'], line)
-        if match_h3:
-            parsed_elements.append({'type': 'h3', 'content': match_h3.group(1)})
-            continue
-        match_bold = re.match(patterns['bold'], line)
-        if match_bold:
-            if not re.match(r'^\d\.', match_bold.group(1)):
-                 parsed_elements.append({'type': 'h3', 'content': match_bold.group(1)})
-                 continue
-        match_bullet = re.match(patterns['bullet'], line)
-        if match_bullet:
-            parsed_elements.append({'type': 'bullet', 'content': match_bullet.group(1)})
-            continue
-        parsed_elements.append({'type': 'body', 'content': line})
-    return parsed_elements
 def create_pdf_report(analysis_data: Dict, output_path: str, gemini_report_text: str):
     try:
-        doc = SimpleDocTemplate(output_path, pagesize=letter, rightMargin=0.75*inch, leftMargin=0.75*inch, topMargin=1.2*inch, bottomMargin=1*inch)
         styles = getSampleStyleSheet()
-        h1 = ParagraphStyle(name='Heading1', fontSize=18, leading=22, spaceAfter=12, alignment=1, textColor=colors.HexColor('#00205B'), fontName='Helvetica-Bold')
-        h2 = ParagraphStyle(name='Heading2', fontSize=14, leading=18, spaceBefore=18, spaceAfter=10, textColor=colors.HexColor('#003366'), fontName='Helvetica-Bold')
-        h3 = ParagraphStyle(name='Heading3', parent=h2, fontSize=11, spaceBefore=10, spaceAfter=4, textColor=colors.HexColor('#2E8B57'), fontName='Helvetica-Bold')
-        body_text = ParagraphStyle(name='BodyText', parent=styles['Normal'], fontSize=10, leading=14, spaceAfter=6, fontName='Helvetica')
-        bullet_style = ParagraphStyle(name='Bullet', parent=body_text, leftIndent=20, bulletIndent=10, spaceAfter=4)
         story = []
         def header_footer(canvas, doc):
             canvas.saveState()
             canvas.setFont('Helvetica', 9)
             canvas.setFillColor(colors.grey)
-            canvas.drawString(doc.leftMargin, 0.5 * inch, f"Page {doc.page} | EvalBot Confidential Report")
-            canvas.setStrokeColor(colors.HexColor('#003366'))
-            canvas.setLineWidth(0.5)
-            canvas.line(doc.leftMargin, doc.height + 0.8*inch, doc.width + doc.leftMargin, doc.height + 0.8*inch)
             canvas.setFont('Helvetica-Bold', 10)
-            canvas.setFillColor(colors.HexColor('#003366'))
-            canvas.drawString(doc.leftMargin, doc.height + 0.9*inch, "Interview Performance Analysis")
             canvas.restoreState()
-        # Build the story from the parsed Gemini report
-        parsed_report = parse_gemini_report(gemini_report_text)
-        for element in parsed_report:
-            if element['type'] == 'h2': story.append(Paragraph(element['content'], h2))
-            elif element['type'] == 'h3': story.append(Paragraph(element['content'], h3))
-            elif element['type'] == 'bullet': story.append(Paragraph(f"• {element['content']}", bullet_style))
-            else: story.append(Paragraph(element['content'], body_text))
         doc.build(story, onFirstPage=header_footer, onLaterPages=header_footer)
         return True
@@ -398,81 +632,64 @@ def create_pdf_report(analysis_data: Dict, output_path: str, gemini_report_text:
         logger.error(f"Enhanced PDF creation failed: {str(e)}", exc_info=True)
         return False
-# --- MAIN ORCHESTRATOR FUNCTION ---
 def process_interview(audio_path_or_url: str):
-    local_audio_path, wav_file, is_downloaded = None, None, False
     try:
-        user_id_from_task = "unknown_user"
-        try:
-            from celery_worker import celery_app
-            if celery_app.current_task:
-                user_id_from_task = celery_app.current_task.request.kwargs.get('item_data', {}).get('user_id', 'unknown_user')
-        except (ImportError, AttributeError):
-            pass # Celery might not be in the context if run locally
         logger.info(f"Starting processing for {audio_path_or_url}")
         if audio_path_or_url.startswith(('http://', 'https://')):
             local_audio_path = download_audio_from_url(audio_path_or_url)
             is_downloaded = True
         else:
             local_audio_path = audio_path_or_url
         wav_file = convert_to_wav(local_audio_path)
         transcript = transcribe(wav_file)
-        for u in transcript['utterances']:
-            u['prosodic_features'] = extract_prosodic_features(wav_file, u['start'], u['end'])
         utterances_with_speakers = identify_speakers(transcript, wav_file)
-        # NOTE: Using alternating role classification as decided.
-        for i, u in enumerate(utterances_with_speakers):
-            u['role'] = 'Interviewer' if i % 2 == 0 else 'Interviewee'
-        classified_utterances = utterances_with_speakers
         voice_analysis = analyze_interviewee_voice(wav_file, classified_utterances)
-        content_analysis = analyze_text_content(classified_utterances)
         analysis_data = {
-            'user_id': user_id_from_task,
             'transcript': classified_utterances,
             'speakers': list(set(u['speaker'] for u in classified_utterances)),
             'voice_analysis': voice_analysis,
-            'advanced_content_analysis': content_analysis,
             'text_analysis': {
                 'total_duration': sum(u['prosodic_features']['duration'] for u in classified_utterances),
                 'speaker_turns': len(classified_utterances)
             }
         }
         analysis_data['acceptance_probability'] = calculate_acceptance_probability(analysis_data)
         gemini_report_text = generate_report(analysis_data)
         base_name = str(uuid.uuid4())
         pdf_path = os.path.join(OUTPUT_DIR, f"{base_name}_report.pdf")
         json_path = os.path.join(OUTPUT_DIR, f"{base_name}_analysis.json")
-        create_pdf_report(analysis_data, pdf_path, gemini_report_text)
         with open(json_path, 'w') as f:
-            json.dump(convert_to_serializable(analysis_data), f, indent=2)
         logger.info(f"Processing completed for {audio_path_or_url}")
-        return {
-            'pdf_path': pdf_path,
-            'json_path': json_path,
-            'pdf_filename': os.path.basename(pdf_path),
-            'json_filename': os.path.basename(json_path)
-        }
     except Exception as e:
         logger.error(f"Processing failed for {audio_path_or_url}: {str(e)}", exc_info=True)
         raise
     finally:
-        if wav_file and os.path.exists(wav_file): os.remove(wav_file)
         if is_downloaded and local_audio_path and os.path.exists(local_audio_path):
             os.remove(local_audio_path)
             logger.info(f"Cleaned up temporary downloaded file: {local_audio_path}")

 import logging
 import tempfile
 from reportlab.lib.pagesizes import letter
+from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak, Image
 from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
 from reportlab.lib.units import inch
 from reportlab.lib import colors
 import matplotlib.pyplot as plt
 import matplotlib
 matplotlib.use('Agg')
+from reportlab.platypus import Image
 import io
+from transformers import AutoTokenizer, AutoModel
 import spacy
 import google.generativeai as genai
 import joblib
 # Setup logging
 logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(_name_)
 logging.getLogger("nemo_logging").setLevel(logging.ERROR)
+logging.getLogger("nemo").setLevel(logging.ERROR)
 # Configuration
+AUDIO_DIR = "./uploads"
 OUTPUT_DIR = "./processed_audio"
 os.makedirs(OUTPUT_DIR, exist_ok=True)
 ASSEMBLYAI_KEY = os.getenv("ASSEMBLYAI_KEY")
 GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
 def download_audio_from_url(url: str) -> str:
+    """Downloads an audio file from a URL to a temporary local path."""
     try:
         temp_dir = tempfile.gettempdir()
+        temp_path = os.path.join(temp_dir, f"{uuid.uuid4()}.tmp_audio")
+        logger.info(f"Downloading audio from {url} to {temp_path}")
         with requests.get(url, stream=True) as r:
             r.raise_for_status()
+            with open(temp_path, 'wb') as f:
                 for chunk in r.iter_content(chunk_size=8192):
                     f.write(chunk)
+        return temp_path
     except Exception as e:
         logger.error(f"Failed to download audio from URL {url}: {e}")
         raise
         pc = Pinecone(api_key=PINECONE_KEY)
         index_name = "interview-speaker-embeddings"
         if index_name not in pc.list_indexes().names():
+            pc.create_index(
+                name=index_name,
+                dimension=192,
+                metric="cosine",
+                spec=ServerlessSpec(cloud="aws", region="us-east-1")
+            )
         index = pc.Index(index_name)
         genai.configure(api_key=GEMINI_API_KEY)
         gemini_model = genai.GenerativeModel('gemini-1.5-flash')
 def load_speaker_model():
     try:
+        import torch
         torch.set_num_threads(5)
+        model = EncDecSpeakerLabelModel.from_pretrained(
+            "nvidia/speakerverification_en_titanet_large",
+            map_location=torch.device('cpu')
+        )
         model.eval()
         return model
     except Exception as e:
 def convert_to_wav(audio_path: str, output_dir: str = OUTPUT_DIR) -> str:
     try:
         audio = AudioSegment.from_file(audio_path)
+        if audio.channels > 1:
+            audio = audio.set_channels(1)
         audio = audio.set_frame_rate(16000)
         wav_file = os.path.join(output_dir, f"{uuid.uuid4()}.wav")
         audio.export(wav_file, format="wav")
     try:
         audio = AudioSegment.from_file(audio_path)
         segment = audio[start_ms:end_ms]
+        temp_path = os.path.join(OUTPUT_DIR, f"temp_{uuid.uuid4()}.wav")
+        segment.export(temp_path, format="wav")
+        y, sr = librosa.load(temp_path, sr=16000)
+        pitches = librosa.piptrack(y=y, sr=sr)[0]
         pitches = pitches[pitches > 0]
+        features = {
+            'duration': (end_ms - start_ms) / 1000,
             'mean_pitch': float(np.mean(pitches)) if len(pitches) > 0 else 0.0,
             'min_pitch': float(np.min(pitches)) if len(pitches) > 0 else 0.0,
             'max_pitch': float(np.max(pitches)) if len(pitches) > 0 else 0.0,
             'intensityMax': float(np.max(librosa.feature.rms(y=y)[0])),
             'intensitySD': float(np.std(librosa.feature.rms(y=y)[0])),
         }
+        os.remove(temp_path)
+        return features
     except Exception as e:
         logger.error(f"Feature extraction failed: {str(e)}")
+        return {
+            'duration': 0.0, 'mean_pitch': 0.0, 'min_pitch': 0.0, 'max_pitch': 0.0,
+            'pitch_sd': 0.0, 'intensityMean': 0.0, 'intensityMin': 0.0,
+            'intensityMax': 0.0, 'intensitySD': 0.0
+        }
 def transcribe(audio_path: str) -> Dict:
     try:
         with open(audio_path, 'rb') as f:
+            upload_response = requests.post(
+                "https://api.assemblyai.com/v2/upload",
+                headers={"authorization": ASSEMBLYAI_KEY},
+                data=f
+            )
         audio_url = upload_response.json()['upload_url']
+        transcript_response = requests.post(
+            "https://api.assemblyai.com/v2/transcript",
+            headers={"authorization": ASSEMBLYAI_KEY},
+            json={
+                "audio_url": audio_url,
+                "speaker_labels": True,
+                "filter_profanity": True
+            }
+        )
         transcript_id = transcript_response.json()['id']
         while True:
+            result = requests.get(
+                f"https://api.assemblyai.com/v2/transcript/{transcript_id}",
+                headers={"authorization": ASSEMBLYAI_KEY}
+            ).json()
+            if result['status'] == 'completed':
+                return result
+            elif result['status'] == 'error':
+                raise Exception(result['error'])
             time.sleep(5)
     except Exception as e:
         logger.error(f"Transcription failed: {str(e)}")
         raise
+def process_utterance(utterance, full_audio, wav_file):
     try:
+        start = utterance['start']
+        end = utterance['end']
         segment = full_audio[start:end]
+        temp_path = os.path.join(OUTPUT_DIR, f"temp_{uuid.uuid4()}.wav")
+        segment.export(temp_path, format="wav")
+        with torch.no_grad():
+            embedding = speaker_model.get_embedding(temp_path).cpu().numpy()
         embedding_list = embedding.flatten().tolist()
+        query_result = index.query(
+            vector=embedding_list,
+            top_k=1,
+            include_metadata=True
+        )
+        if query_result['matches'] and query_result['matches'][0]['score'] > 0.7:
             speaker_id = query_result['matches'][0]['id']
             speaker_name = query_result['matches'][0]['metadata']['speaker_name']
         else:
+            speaker_id = f"unknown_{uuid.uuid4().hex[:6]}"
+            speaker_name = f"Speaker_{speaker_id[-4:]}"
             index.upsert([(speaker_id, embedding_list, {"speaker_name": speaker_name})])
+        os.remove(temp_path)
+        return {
+            **utterance,
+            'speaker': speaker_name,
+            'speaker_id': speaker_id,
+            'embedding': embedding_list
+        }
     except Exception as e:
         logger.error(f"Utterance processing failed: {str(e)}", exc_info=True)
+        return {
+            **utterance,
+            'speaker': 'Unknown',
+            'speaker_id': 'unknown',
+            'embedding': None
+        }
 def identify_speakers(transcript: Dict, wav_file: str) -> List[Dict]:
     try:
         full_audio = AudioSegment.from_wav(wav_file)
+        utterances = transcript['utterances']
         with ThreadPoolExecutor(max_workers=5) as executor:
+            futures = [
+                executor.submit(process_utterance, utterance, full_audio, wav_file)
+                for utterance in utterances
+            ]
             results = [f.result() for f in futures]
         return results
     except Exception as e:
         logger.error(f"Speaker identification failed: {str(e)}")
         raise
+def train_role_classifier(utterances: List[Dict]):
+    try:
+        texts = [u['text'] for u in utterances]
+        vectorizer = TfidfVectorizer(max_features=500, ngram_range=(1, 2))
+        X_text = vectorizer.fit_transform(texts)
+        features = []
+        labels = []
+        for i, utterance in enumerate(utterances):
+            prosodic = utterance['prosodic_features']
+            feat = [
+                prosodic['duration'], prosodic['mean_pitch'], prosodic['min_pitch'],
+                prosodic['max_pitch'], prosodic['pitch_sd'], prosodic['intensityMean'],
+                prosodic['intensityMin'], prosodic['intensityMax'], prosodic['intensitySD'],
+            ]
+            feat.extend(X_text[i].toarray()[0].tolist())
+            doc = nlp(utterance['text'])
+            feat.extend([
+                int(utterance['text'].endswith('?')),
+                len(re.findall(r'\b(why|how|what|when|where|who|which)\b', utterance['text'].lower())),
+                len(utterance['text'].split()),
+                sum(1 for token in doc if token.pos_ == 'VERB'),
+                sum(1 for token in doc if token.pos_ == 'NOUN')
+            ])
+            features.append(feat)
+            labels.append(0 if i % 2 == 0 else 1)
+        scaler = StandardScaler()
+        X = scaler.fit_transform(features)
+        clf = RandomForestClassifier(
+            n_estimators=150, max_depth=10, random_state=42, class_weight='balanced'
+        )
+        clf.fit(X, labels)
+        joblib.dump(clf, os.path.join(OUTPUT_DIR, 'role_classifier.pkl'))
+        joblib.dump(vectorizer, os.path.join(OUTPUT_DIR, 'text_vectorizer.pkl'))
+        joblib.dump(scaler, os.path.join(OUTPUT_DIR, 'feature_scaler.pkl'))
+        return clf, vectorizer, scaler
+    except Exception as e:
+        logger.error(f"Classifier training failed: {str(e)}")
+        raise
+def classify_roles(utterances: List[Dict], clf, vectorizer, scaler):
+    try:
+        texts = [u['text'] for u in utterances]
+        X_text = vectorizer.transform(texts)
+        results = []
+        for i, utterance in enumerate(utterances):
+            prosodic = utterance['prosodic_features']
+            feat = [
+                prosodic['duration'], prosodic['mean_pitch'], prosodic['min_pitch'],
+                prosodic['max_pitch'], prosodic['pitch_sd'], prosodic['intensityMean'],
+                prosodic['intensityMin'], prosodic['intensityMax'], prosodic['intensitySD'],
+            ]
+            feat.extend(X_text[i].toarray()[0].tolist())
+            doc = nlp(utterance['text'])
+            feat.extend([
+                int(utterance['text'].endswith('?')),
+                len(re.findall(r'\b(why|how|what|when|where|who|which)\b', utterance['text'].lower())),
+                len(utterance['text'].split()),
+                sum(1 for token in doc if token.pos_ == 'VERB'),
+                sum(1 for token in doc if token.pos_ == 'NOUN')
+            ])
+            X = scaler.transform([feat])
+            role = 'Interviewer' if clf.predict(X)[0] == 0 else 'Interviewee'
+            results.append({**utterance, 'role': role})
+        return results
+    except Exception as e:
+        logger.error(f"Role classification failed: {str(e)}")
+        raise
 def analyze_interviewee_voice(audio_path: str, utterances: List[Dict]) -> Dict:
     try:
         y, sr = librosa.load(audio_path, sr=16000)
+        interviewee_utterances = [u for u in utterances if u['role'] == 'Interviewee']
+        if not interviewee_utterances:
+            return {'error': 'No interviewee utterances found'}
+        segments = []
+        for u in interviewee_utterances:
+            start = int(u['start'] * sr / 1000)
+            end = int(u['end'] * sr / 1000)
+            segments.append(y[start:end])
         total_duration = sum(u['prosodic_features']['duration'] for u in interviewee_utterances)
         total_words = sum(len(u['text'].split()) for u in interviewee_utterances)
         speaking_rate = total_words / total_duration if total_duration > 0 else 0
         filler_words = ['um', 'uh', 'like', 'you know', 'so', 'i mean']
         filler_count = sum(sum(u['text'].lower().count(fw) for fw in filler_words) for u in interviewee_utterances)
         filler_ratio = filler_count / total_words if total_words > 0 else 0
+        all_words = ' '.join(u['text'].lower() for u in interviewee_utterances).split()
+        word_counts = {}
+        for i in range(len(all_words) - 1):
+            bigram = (all_words[i], all_words[i + 1])
+            word_counts[bigram] = word_counts.get(bigram, 0) + 1
+        repetition_score = sum(1 for count in word_counts.values() if count > 1) / len(word_counts) if word_counts else 0
+        pitches = []
         for segment in segments:
+            f0, voiced_flag, _ = librosa.pyin(segment, fmin=80, fmax=300, sr=sr)
             pitches.extend(f0[voiced_flag])
         pitch_mean = np.mean(pitches) if len(pitches) > 0 else 0
+        pitch_std = np.std(pitches) if len(pitches) > 0 else 0
         jitter = np.mean(np.abs(np.diff(pitches))) / pitch_mean if len(pitches) > 1 and pitch_mean > 0 else 0
+        intensities = []
+        for segment in segments:
+            rms = librosa.feature.rms(y=segment)[0]
+            intensities.extend(rms)
+        intensity_mean = np.mean(intensities) if intensities else 0
+        intensity_std = np.std(intensities) if intensities else 0
+        shimmer = np.mean(np.abs(np.diff(intensities))) / intensity_mean if len(intensities) > 1 and intensity_mean > 0 else 0
+        anxiety_score = 0.6 * (pitch_std / pitch_mean) + 0.4 * (jitter + shimmer) if pitch_mean > 0 else 0
+        confidence_score = 0.7 * (1 / (1 + intensity_std)) + 0.3 * (1 / (1 + filler_ratio))
         hesitation_score = filler_ratio + repetition_score
+        anxiety_level = 'High' if anxiety_score > 0.15 else 'Moderate' if anxiety_score > 0.07 else 'Low'
+        confidence_level = 'High' if confidence_score > 0.7 else 'Moderate' if confidence_score > 0.5 else 'Low'
+        fluency_level = 'Fluent' if (filler_ratio < 0.05 and repetition_score < 0.1) else 'Moderate' if (filler_ratio < 0.1 and repetition_score < 0.2) else 'Disfluent'
         return {
+            'speaking_rate': float(round(speaking_rate, 2)),
+            'filler_ratio': float(round(filler_ratio, 4)),
+            'repetition_score': float(round(repetition_score, 4)),
+            'pitch_analysis': {'mean': float(round(pitch_mean, 2)), 'std_dev': float(round(pitch_std, 2)), 'jitter': float(round(jitter, 4))},
+            'intensity_analysis': {'mean': float(round(intensity_mean, 2)), 'std_dev': float(round(intensity_std, 2)), 'shimmer': float(round(shimmer, 4))},
             'composite_scores': {'anxiety': float(round(anxiety_score, 4)), 'confidence': float(round(confidence_score, 4)), 'hesitation': float(round(hesitation_score, 4))},
+            'interpretation': {'anxiety_level': anxiety_level, 'confidence_level': confidence_level, 'fluency_level': fluency_level}
         }
     except Exception as e:
         logger.error(f"Voice analysis failed: {str(e)}")
         return {'error': str(e)}
+def generate_voice_interpretation(analysis: Dict) -> str:
+    if 'error' in analysis:
+        return "Voice analysis not available due to processing error."
+    interpretation_lines = [
+        "Voice and Speech Profile:",
+        f"- Speaking Rate: {analysis['speaking_rate']} words/sec - Compared to optimal range (2.0-3.0 words/sec)",
+        f"- Filler Word Usage: {analysis['filler_ratio'] * 100:.1f}% - Frequency of non-content words (e.g., 'um', 'like')",
+        f"- Repetition Tendency: {analysis['repetition_score']:.3f} - Measure of repeated phrases",
+        f"- Anxiety Indicator: {analysis['interpretation']['anxiety_level']} (Score: {analysis['composite_scores']['anxiety']:.3f}) - Based on pitch and voice stability",
+        f"- Confidence Indicator: {analysis['interpretation']['confidence_level']} (Score: {analysis['composite_scores']['confidence']:.3f}) - Derived from vocal consistency",
+        f"- Fluency Assessment: {analysis['interpretation']['fluency_level']} - Reflects speech flow and coherence",
+        "",
+        "HR Insights:",
+        "- Faster speaking rates may indicate confidence but can suggest nervousness if excessive.",
+        "- High filler word usage often reduces perceived professionalism and clarity.",
+        "- Elevated anxiety indicators (pitch variability, jitter) may reflect interview pressure.",
+        "- Strong confidence scores suggest effective vocal presence and control.",
+        "- Fluency impacts listener engagement; disfluency may hinder communication effectiveness."
+    ]
+    return "\n".join(interpretation_lines)
+def generate_anxiety_confidence_chart(composite_scores: Dict, chart_path_or_buffer):
+    try:
+        labels = ['Anxiety', 'Confidence']
+        scores = [composite_scores.get('anxiety', 0), composite_scores.get('confidence', 0)]
+        fig, ax = plt.subplots(figsize=(4, 2.5))
+        bars = ax.bar(labels, scores, color=['#FF6B6B', '#4ECDC4'], edgecolor='black')
+        ax.set_ylabel('Score (Normalized)')
+        ax.set_title('Vocal Dynamics: Anxiety vs. Confidence')
+        ax.set_ylim(0, 1.2)
+        for bar in bars:
+            height = bar.get_height()
+            ax.text(bar.get_x() + bar.get_width()/2, height + 0.05, f"{height:.2f}",
+                    ha='center', color='black', fontweight='bold', fontsize=10)
+        plt.tight_layout()
+        plt.savefig(chart_path_or_buffer, format='png', bbox_inches='tight', dpi=150)
+        plt.close(fig)
+    except Exception as e:
+        logger.error(f"Error generating chart: {str(e)}")
 def calculate_acceptance_probability(analysis_data: Dict) -> float:
     voice = analysis_data.get('voice_analysis', {})
     if 'error' in voice: return 0.0
     w_confidence, w_anxiety, w_fluency, w_speaking_rate, w_filler_repetition, w_content_strengths = 0.4, -0.3, 0.2, 0.1, -0.1, 0.2
     confidence_score = voice.get('composite_scores', {}).get('confidence', 0.0)
     anxiety_score = voice.get('composite_scores', {}).get('anxiety', 0.0)
+    fluency_level = voice.get('interpretation', {}).get('fluency_level', 'Disfluent')
     speaking_rate = voice.get('speaking_rate', 0.0)
     filler_ratio = voice.get('filler_ratio', 0.0)
     repetition_score = voice.get('repetition_score', 0.0)
+    fluency_map = {'Fluent': 1.0, 'Moderate': 0.5, 'Disfluent': 0.0}
     fluency_val = fluency_map.get(fluency_level, 0.0)
     ideal_speaking_rate = 2.5
     speaking_rate_deviation = abs(speaking_rate - ideal_speaking_rate)
     acceptance_probability = max(0.0, min(1.0, normalized_score))
     return float(f"{acceptance_probability * 100:.2f}")
 def generate_report(analysis_data: Dict) -> str:
     try:
         voice = analysis_data.get('voice_analysis', {})
+        voice_interpretation = generate_voice_interpretation(voice)
+        interviewee_responses = [f"Speaker {u['speaker']} ({u['role']}): {u['text']}" for u in analysis_data['transcript'] if u['role'] == 'Interviewee'][:5]
+        acceptance_prob = analysis_data.get('acceptance_probability', None)
+        acceptance_line = ""
+        if acceptance_prob is not None:
+            acceptance_line = f"\n*Hiring Potential Score: {acceptance_prob:.2f}%*\n"
+            if acceptance_prob >= 80: acceptance_line += "Assessment: Exceptional candidate, strongly recommended for advancement."
+            elif acceptance_prob >= 50: acceptance_line += "Assessment: Promising candidate with moderate strengths; consider for further evaluation."
+            else: acceptance_line += "Assessment: Limited alignment with role expectations; significant development needed."
         prompt = f"""
+        You are an expert HR consultant, EvalBot, tasked with producing a professional, concise, and actionable interview analysis report. Structure the report with clear headings, subheadings, and bullet points (use '- ' for bullets). Adopt a formal, HR-professional tone, focusing on candidate evaluation, fit for role, and development insights.
+        {acceptance_line}
+        *1. Executive Summary*
+        - Provide a concise overview of the interview, highlighting key metrics and overall candidate performance.
+        - Interview duration: {analysis_data['text_analysis']['total_duration']:.2f} seconds
+        - Total speaker turns: {analysis_data['text_analysis']['speaker_turns']}
+        - Participants: {', '.join(analysis_data['speakers'])}
+        *2. Communication and Vocal Analysis*
+        - Evaluate the candidate's vocal delivery, including speaking rate, fluency, and confidence indicators.
+        - Provide HR-relevant insights into how these metrics impact perceived professionalism and role suitability.
+        {voice_interpretation}
+        *3. Content Analysis and Competency Assessment*
+        - Analyze key themes in the candidate's responses to assess alignment with job competencies (e.g., problem-solving, communication, leadership).
+        - Identify strengths and areas for improvement, supported by specific examples.
+        - Sample responses for context:
+        {chr(10).join(interviewee_responses)}
+        *4. Fit and Potential Evaluation*
+        - Assess the candidate's overall fit for a typical professional role based on communication, content, and vocal dynamics.
+        - Consider cultural fit, adaptability, and readiness for the role.
+        *5. Actionable HR Recommendations*
+        - Provide specific, prioritized recommendations for the candidate’s development.
+        - Focus areas: Effective Communication, Content Clarity and Depth, Professional Presence.
+        - Suggest next steps for hiring managers (e.g., advance to next round, additional assessments, training focus).
         """
         response = gemini_model.generate_content(prompt)
         return response.text
         logger.error(f"Report generation failed: {str(e)}")
         return f"Error generating report: {str(e)}"
 def create_pdf_report(analysis_data: Dict, output_path: str, gemini_report_text: str):
     try:
+        doc = SimpleDocTemplate(output_path, pagesize=letter,
+                                rightMargin=0.75*inch, leftMargin=0.75*inch,
+                                topMargin=1*inch, bottomMargin=1*inch)
         styles = getSampleStyleSheet()
+        h1 = ParagraphStyle(name='Heading1', fontSize=22, leading=26, spaceAfter=20, alignment=1, textColor=colors.HexColor('#1A3C5E'))
+        h2 = ParagraphStyle(name='Heading2', fontSize=14, leading=18, spaceBefore=14, spaceAfter=8, textColor=colors.HexColor('#2E5A87'))
+        body_text = ParagraphStyle(name='BodyText', parent=styles['Normal'], fontSize=10, leading=14, spaceAfter=8, fontName='Helvetica')
+        bullet_style = ParagraphStyle(name='Bullet', parent=body_text, leftIndent=20, bulletIndent=10, fontName='Helvetica')
         story = []
         def header_footer(canvas, doc):
             canvas.saveState()
             canvas.setFont('Helvetica', 9)
             canvas.setFillColor(colors.grey)
+            canvas.drawString(doc.leftMargin, 0.5 * inch, f"Page {doc.page} | EvalBot HR Interview Report | Confidential")
+            canvas.setStrokeColor(colors.HexColor('#2E5A87'))
+            canvas.setLineWidth(1)
+            canvas.line(doc.leftMargin, doc.height + 0.85*inch, doc.width + doc.leftMargin, doc.height + 0.85*inch)
             canvas.setFont('Helvetica-Bold', 10)
+            canvas.drawString(doc.leftMargin, doc.height + 0.9*inch, "Candidate Interview Analysis Report")
             canvas.restoreState()
+        # Title Page
+        story.append(Paragraph("Candidate Interview Analysis Report", h1))
+        story.append(Paragraph(f"Generated on: {time.strftime('%B %d, %Y')}", ParagraphStyle(name='Date', alignment=1, fontSize=10, textColor=colors.grey)))
+        story.append(Spacer(1, 0.5 * inch))
+        acceptance_prob = analysis_data.get('acceptance_probability')
+        if acceptance_prob is not None:
+            story.append(Paragraph("Hiring Potential Snapshot", h2))
+            prob_color = colors.HexColor('#2E7D32') if acceptance_prob >= 70 else (colors.HexColor('#F57C00') if acceptance_prob >= 40 else colors.HexColor('#D32F2F'))
+            story.append(Paragraph(f"Hiring Potential Score: <font size=16 color='{prob_color.hexval()}'><b>{acceptance_prob:.2f}%</b></font>",
+                                 ParagraphStyle(name='Prob', fontSize=12, spaceAfter=12, alignment=1)))
+            if acceptance_prob >= 80:
+                story.append(Paragraph("<b>HR Assessment:</b> Exceptional candidate, strongly recommended for advancement to the next stage.", body_text))
+            elif acceptance_prob >= 50:
+                story.append(Paragraph("<b>HR Assessment:</b> Promising candidate with moderate strengths; consider for further evaluation.", body_text))
+            else:
+                story.append(Paragraph("<b>HR Assessment:</b> Limited alignment with role expectations; significant development needed.", body_text))
+        story.append(Spacer(1, 0.3 * inch))
+        story.append(Paragraph("Prepared by: EvalBot - AI-Powered HR Interview Analysis System", body_text))
+        story.append(PageBreak())
+        # Detailed Analysis
+        story.append(Paragraph("Detailed Candidate Evaluation", h1))
+        story.append(Paragraph("1. Communication and Vocal Profile", h2))
+        voice_analysis = analysis_data.get('voice_analysis', {})
+        if voice_analysis and 'error' not in voice_analysis:
+            table_data = [
+                ['Metric', 'Value', 'HR Insight'],
+                ['Speaking Rate', f"{voice_analysis.get('speaking_rate', 0):.2f} words/sec", 'Optimal: 2.0-3.0 wps; impacts clarity and confidence'],
+                ['Filler Word Usage', f"{voice_analysis.get('filler_ratio', 0) * 100:.1f}%", 'High usage may reduce perceived professionalism'],
+                ['Anxiety Indicator', voice_analysis.get('interpretation', {}).get('anxiety_level', 'N/A'), f"Score: {voice_analysis.get('composite_scores', {}).get('anxiety', 0):.3f}; reflects pressure response"],
+                ['Confidence Indicator', voice_analysis.get('interpretation', {}).get('confidence_level', 'N/A'), f"Score: {voice_analysis.get('composite_scores', {}).get('confidence', 0):.3f}; indicates vocal authority"],
+                ['Fluency Assessment', voice_analysis.get('interpretation', {}).get('fluency_level', 'N/A'), 'Affects engagement and message delivery']
+            ]
+            table = Table(table_data, colWidths=[1.8*inch, 1.2*inch, 3.5*inch])
+            table.setStyle(TableStyle([
+                ('BACKGROUND', (0,0), (-1,0), colors.HexColor('#2E5A87')),
+                ('TEXTCOLOR', (0,0), (-1,0), colors.whitesmoke),
+                ('ALIGN', (0,0), (-1,-1), 'LEFT'),
+                ('VALIGN', (0,0), (-1,-1), 'MIDDLE'),
+                ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
+                ('FONTSIZE', (0, 0), (-1, -1), 9),
+                ('BOTTOMPADDING', (0, 0), (-1, 0), 12),
+                ('TOPPADDING', (0, 0), (-1, 0), 12),
+                ('BACKGROUND', (0, 1), (-1, -1), colors.HexColor('#F5F7FA')),
+                ('GRID', (0,0), (-1,-1), 1, colors.HexColor('#DDE4EB'))
+            ]))
+            story.append(table)
+            story.append(Spacer(1, 0.25 * inch))
+            chart_buffer = io.BytesIO()
+            generate_anxiety_confidence_chart(voice_analysis.get('composite_scores', {}), chart_buffer)
+            chart_buffer.seek(0)
+            img = Image(chart_buffer, width=4.5*inch, height=2.8*inch)
+            img.hAlign = 'CENTER'
+            story.append(img)
+        else:
+            story.append(Paragraph("Voice analysis unavailable due to processing limitations.", body_text))
+        story.append(Spacer(1, 0.3 * inch))
+        # Parse Gemini Report
+        sections = {}
+        section_titles = ["Executive Summary", "Communication and Vocal Analysis",
+                         "Content Analysis and Competency Assessment",
+                         "Fit and Potential Evaluation", "Actionable HR Recommendations"]
+        for title in section_titles:
+            sections[title] = []
+        report_parts = re.split(r'(\s*\\\s*\d\.\s*.?\s\\)', gemini_report_text)
+        current_section = None
+        for part in report_parts:
+            if not part.strip(): continue
+            is_heading = False
+            for title in section_titles:
+                if title.lower() in part.lower():
+                    current_section = title
+                    is_heading = True
+                    break
+            if not is_heading and current_section:
+                sections[current_section].append(part.strip())
+        # Executive Summary
+        story.append(Paragraph("2. Executive Summary", h2))
+        if sections['Executive Summary']:
+            for line in sections['Executive Summary']:
+                if line.startswith(('-', '•', '*')):
+                    story.append(Paragraph(line.lstrip('-•* ').strip(), bullet_style))
+                else:
+                    story.append(Paragraph(line, body_text))
+        else:
+            story.append(Paragraph("Summary not available from analysis.", body_text))
+        story.append(Spacer(1, 0.3 * inch))
+        # Content and Competency
+        story.append(Paragraph("3. Content and Competency Assessment", h2))
+        if sections['Content Analysis and Competency Assessment']:
+            for line in sections['Content Analysis and Competency Assessment']:
+                if line.startswith(('-', '•', '*')):
+                    story.append(Paragraph(line.lstrip('-•* ').strip(), bullet_style))
+                else:
+                    story.append(Paragraph(line, body_text))
+        else:
+            story.append(Paragraph("Content and competency analysis not provided.", body_text))
+        story.append(PageBreak())
+        # Fit and Potential
+        story.append(Paragraph("4. Fit and Potential Evaluation", h2))
+        if sections['Fit and Potential Evaluation']:
+            for line in sections['Fit and Potential Evaluation']:
+                if line.startswith(('-', '•', '*')):
+                    story.append(Paragraph(line.lstrip('-•* ').strip(), bullet_style))
+                else:
+                    story.append(Paragraph(line, body_text))
+        else:
+            story.append(Paragraph("Fit and potential evaluation not available.", body_text))
+        story.append(Spacer(1, 0.3 * inch))
+        # HR Recommendations
+        story.append(Paragraph("5. Actionable HR Recommendations", h2))
+        if sections['Actionable HR Recommendations']:
+            for line in sections['Actionable HR Recommendations']:
+                if line.startswith(('-', '•', '*')):
+                    story.append(Paragraph(line.lstrip('-•* ').strip(), bullet_style))
+                else:
+                    story.append(Paragraph(line, body_text))
+        else:
+            story.append(Paragraph("HR recommendations not provided.", body_text))
         doc.build(story, onFirstPage=header_footer, onLaterPages=header_footer)
         return True
         logger.error(f"Enhanced PDF creation failed: {str(e)}", exc_info=True)
         return False
+def convert_to_serializable(obj):
+    if isinstance(obj, np.generic): return obj.item()
+    if isinstance(obj, dict): return {k: convert_to_serializable(v) for k, v in obj.items()}
+    if isinstance(obj, list): return [convert_to_serializable(i) for i in obj]
+    if isinstance(obj, np.ndarray): return obj.tolist()
+    return obj
 def process_interview(audio_path_or_url: str):
+    local_audio_path = None
+    wav_file = None
+    is_downloaded = False
     try:
         logger.info(f"Starting processing for {audio_path_or_url}")
         if audio_path_or_url.startswith(('http://', 'https://')):
             local_audio_path = download_audio_from_url(audio_path_or_url)
             is_downloaded = True
         else:
             local_audio_path = audio_path_or_url
         wav_file = convert_to_wav(local_audio_path)
         transcript = transcribe(wav_file)
+        for utterance in transcript['utterances']:
+            utterance['prosodic_features'] = extract_prosodic_features(wav_file, utterance['start'], utterance['end'])
         utterances_with_speakers = identify_speakers(transcript, wav_file)
+        clf, vectorizer, scaler = None, None, None
+        if os.path.exists(os.path.join(OUTPUT_DIR, 'role_classifier.pkl')):
+            clf = joblib.load(os.path.join(OUTPUT_DIR, 'role_classifier.pkl'))
+            vectorizer = joblib.load(os.path.join(OUTPUT_DIR, 'text_vectorizer.pkl'))
+            scaler = joblib.load(os.path.join(OUTPUT_DIR, 'feature_scaler.pkl'))
+        else:
+            clf, vectorizer, scaler = train_role_classifier(utterances_with_speakers)
+        classified_utterances = classify_roles(utterances_with_speakers, clf, vectorizer, scaler)
         voice_analysis = analyze_interviewee_voice(wav_file, classified_utterances)
         analysis_data = {
             'transcript': classified_utterances,
             'speakers': list(set(u['speaker'] for u in classified_utterances)),
             'voice_analysis': voice_analysis,
             'text_analysis': {
                 'total_duration': sum(u['prosodic_features']['duration'] for u in classified_utterances),
                 'speaker_turns': len(classified_utterances)
             }
         }
         analysis_data['acceptance_probability'] = calculate_acceptance_probability(analysis_data)
         gemini_report_text = generate_report(analysis_data)
         base_name = str(uuid.uuid4())
         pdf_path = os.path.join(OUTPUT_DIR, f"{base_name}_report.pdf")
         json_path = os.path.join(OUTPUT_DIR, f"{base_name}_analysis.json")
+        create_pdf_report(analysis_data, pdf_path, gemini_report_text=gemini_report_text)
         with open(json_path, 'w') as f:
+            serializable_data = convert_to_serializable(analysis_data)
+            json.dump(serializable_data, f, indent=2)
         logger.info(f"Processing completed for {audio_path_or_url}")
+        return {'pdf_path': pdf_path, 'json_path': json_path}
     except Exception as e:
         logger.error(f"Processing failed for {audio_path_or_url}: {str(e)}", exc_info=True)
         raise
     finally:
+        if wav_file and os.path.exists(wav_file):
+            os.remove(wav_file)
         if is_downloaded and local_audio_path and os.path.exists(local_audio_path):
             os.remove(local_audio_path)
             logger.info(f"Cleaned up temporary downloaded file: {local_audio_path}")