Spaces:

EvalBot
/

Audio

Sleeping

App Files Files Community

norhan12 commited on Jun 10, 2025

Commit

285f925

verified ·

1 Parent(s): 8474847

Update process_interview.py

Browse files

Files changed (1) hide show

process_interview.py +261 -423

process_interview.py CHANGED Viewed

@@ -17,60 +17,56 @@ from sklearn.feature_extraction.text import TfidfVectorizer
 import re
 from typing import Dict, List, Tuple
 import logging
-import tempfile
 from reportlab.lib.pagesizes import letter
-from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak, Image
 from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
 from reportlab.lib.units import inch
 from reportlab.lib import colors
-import matplotlib.pyplot as plt
-import matplotlib
-matplotlib.use('Agg')
-from reportlab.platypus import Image
-import io
 from transformers import AutoTokenizer, AutoModel
 import spacy
 import google.generativeai as genai
 import joblib
 from concurrent.futures import ThreadPoolExecutor
 # Setup logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(_name_)
 logging.getLogger("nemo_logging").setLevel(logging.ERROR)
-logging.getLogger("nemo").setLevel(logging.ERROR)
 # Configuration
 AUDIO_DIR = "./uploads"
 OUTPUT_DIR = "./processed_audio"
 os.makedirs(OUTPUT_DIR, exist_ok=True)
-# API Keys
 PINECONE_KEY = os.getenv("PINECONE_KEY")
 ASSEMBLYAI_KEY = os.getenv("ASSEMBLYAI_KEY")
 GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
-def download_audio_from_url(url: str) -> str:
-    """Downloads an audio file from a URL to a temporary local path."""
-    try:
-        temp_dir = tempfile.gettempdir()
-        temp_path = os.path.join(temp_dir, f"{uuid.uuid4()}.tmp_audio")
-        logger.info(f"Downloading audio from {url} to {temp_path}")
-        with requests.get(url, stream=True) as r:
-            r.raise_for_status()
-            with open(temp_path, 'wb') as f:
-                for chunk in r.iter_content(chunk_size=8192):
-                    f.write(chunk)
-        return temp_path
-    except Exception as e:
-        logger.error(f"Failed to download audio from URL {url}: {e}")
-        raise
 def initialize_services():
     try:
         pc = Pinecone(api_key=PINECONE_KEY)
         index_name = "interview-speaker-embeddings"
         if index_name not in pc.list_indexes().names():
             pc.create_index(
                 name=index_name,
                 dimension=192,
@@ -80,9 +76,10 @@ def initialize_services():
         index = pc.Index(index_name)
         genai.configure(api_key=GEMINI_API_KEY)
         gemini_model = genai.GenerativeModel('gemini-1.5-flash')
         return index, gemini_model
     except Exception as e:
-        logger.error(f"Error initializing services: {str(e)}")
         raise
 index, gemini_model = initialize_services()
@@ -90,29 +87,31 @@ index, gemini_model = initialize_services()
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 logger.info(f"Using device: {device}")
-def load_speaker_model():
     try:
-        import torch
-        torch.set_num_threads(5)
-        model = EncDecSpeakerLabelModel.from_pretrained(
             "nvidia/speakerverification_en_titanet_large",
             map_location=torch.device('cpu')
         )
-        model.eval()
-        return model
     except Exception as e:
-        logger.error(f"Model loading failed: {str(e)}")
-        raise RuntimeError("Could not load speaker verification model")
-def load_models():
-    speaker_model = load_speaker_model()
-    nlp = spacy.load("en_core_web_sm")
-    tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
-    llm_model = AutoModel.from_pretrained("distilbert-base-uncased").to(device)
-    llm_model.eval()
-    return speaker_model, nlp, tokenizer, llm_model
-speaker_model, nlp, tokenizer, llm_model = load_models()
 def convert_to_wav(audio_path: str, output_dir: str = OUTPUT_DIR) -> str:
     try:
@@ -124,7 +123,7 @@ def convert_to_wav(audio_path: str, output_dir: str = OUTPUT_DIR) -> str:
         audio.export(wav_file, format="wav")
         return wav_file
     except Exception as e:
-        logger.error(f"Audio conversion failed: {str(e)}")
         raise
 def extract_prosodic_features(audio_path: str, start_ms: int, end_ms: int) -> Dict:
@@ -150,11 +149,10 @@ def extract_prosodic_features(audio_path: str, start_ms: int, end_ms: int) -> Di
         os.remove(temp_path)
         return features
     except Exception as e:
-        logger.error(f"Feature extraction failed: {str(e)}")
         return {
-            'duration': 0.0, 'mean_pitch': 0.0, 'min_pitch': 0.0, 'max_pitch': 0.0,
-            'pitch_sd': 0.0, 'intensityMean': 0.0, 'intensityMin': 0.0,
-            'intensityMax': 0.0, 'intensitySD': 0.0
         }
 def transcribe(audio_path: str) -> Dict:
@@ -162,127 +160,138 @@ def transcribe(audio_path: str) -> Dict:
         with open(audio_path, 'rb') as f:
             upload_response = requests.post(
                 "https://api.assemblyai.com/v2/upload",
-                headers={"authorization": ASSEMBLYAI_KEY},
-                data=f
             )
-        audio_url = upload_response.json()['upload_url']
         transcript_response = requests.post(
             "https://api.assemblyai.com/v2/transcript",
             headers={"authorization": ASSEMBLYAI_KEY},
-            json={
-                "audio_url": audio_url,
-                "speaker_labels": True,
-                "filter_profanity": True
-            }
         )
         transcript_id = transcript_response.json()['id']
         while True:
-            result = requests.get(
                 f"https://api.assemblyai.com/v2/transcript/{transcript_id}",
                 headers={"authorization": ASSEMBLYAI_KEY}
-            ).json()
             if result['status'] == 'completed':
                 return result
             elif result['status'] == 'error':
-                raise Exception(result['error'])
             time.sleep(5)
     except Exception as e:
-        logger.error(f"Transcription failed: {str(e)}")
         raise
-def process_utterance(utterance, full_audio, wav_file):
     try:
         start = utterance['start']
         end = utterance['end']
         segment = full_audio[start:end]
-        temp_path = os.path.join(OUTPUT_DIR, f"temp_{uuid.uuid4()}.wav")
         segment.export(temp_path, format="wav")
         with torch.no_grad():
-            embedding = speaker_model.get_embedding(temp_path).cpu().numpy()
-        embedding_list = embedding.flatten().tolist()
         query_result = index.query(
-            vector=embedding_list,
-            top_k=1,
-            include_metadata=True
         )
         if query_result['matches'] and query_result['matches'][0]['score'] > 0.7:
             speaker_id = query_result['matches'][0]['id']
             speaker_name = query_result['matches'][0]['metadata']['speaker_name']
         else:
             speaker_id = f"unknown_{uuid.uuid4().hex[:6]}"
             speaker_name = f"Speaker_{speaker_id[-4:]}"
-            index.upsert([(speaker_id, embedding_list, {"speaker_name": speaker_name})])
         os.remove(temp_path)
         return {
-            **utterance,
-            'speaker': speaker_name,
-            'speaker_id': speaker_id,
-            'embedding': embedding_list
         }
     except Exception as e:
-        logger.error(f"Utterance processing failed: {str(e)}", exc_info=True)
-        return {
-            **utterance,
-            'speaker': 'Unknown',
-            'speaker_id': 'unknown',
-            'embedding': None
-        }
 def identify_speakers(transcript: Dict, wav_file: str) -> List[Dict]:
     try:
         full_audio = AudioSegment.from_wav(wav_file)
         utterances = transcript['utterances']
-        with ThreadPoolExecutor(max_workers=5) as executor:
-            futures = [
-                executor.submit(process_utterance, utterance, full_audio, wav_file)
-                for utterance in utterances
-            ]
             results = [f.result() for f in futures]
         return results
     except Exception as e:
-        logger.error(f"Speaker identification failed: {str(e)}")
         raise
 def train_role_classifier(utterances: List[Dict]):
     try:
         texts = [u['text'] for u in utterances]
         vectorizer = TfidfVectorizer(max_features=500, ngram_range=(1, 2))
         X_text = vectorizer.fit_transform(texts)
-        features = []
-        labels = []
         for i, utterance in enumerate(utterances):
             prosodic = utterance['prosodic_features']
             feat = [
                 prosodic['duration'], prosodic['mean_pitch'], prosodic['min_pitch'],
-                prosodic['max_pitch'], prosodic['pitch_sd'], prosodic['intensityMean'],
-                prosodic['intensityMin'], prosodic['intensityMax'], prosodic['intensitySD'],
             ]
             feat.extend(X_text[i].toarray()[0].tolist())
             doc = nlp(utterance['text'])
             feat.extend([
                 int(utterance['text'].endswith('?')),
-                len(re.findall(r'\b(why|how|what|when|where|who|which)\b', utterance['text'].lower())),
                 len(utterance['text'].split()),
                 sum(1 for token in doc if token.pos_ == 'VERB'),
-                sum(1 for token in doc if token.pos_ == 'NOUN')
             ])
             features.append(feat)
-            labels.append(0 if i % 2 == 0 else 1)
         scaler = StandardScaler()
         X = scaler.fit_transform(features)
-        clf = RandomForestClassifier(
-            n_estimators=150, max_depth=10, random_state=42, class_weight='balanced'
-        )
         clf.fit(X, labels)
         joblib.dump(clf, os.path.join(OUTPUT_DIR, 'role_classifier.pkl'))
         joblib.dump(vectorizer, os.path.join(OUTPUT_DIR, 'text_vectorizer.pkl'))
         joblib.dump(scaler, os.path.join(OUTPUT_DIR, 'feature_scaler.pkl'))
         return clf, vectorizer, scaler
     except Exception as e:
-        logger.error(f"Classifier training failed: {str(e)}")
         raise
 def classify_roles(utterances: List[Dict], clf, vectorizer, scaler):
     try:
         texts = [u['text'] for u in utterances]
         X_text = vectorizer.transform(texts)
@@ -291,405 +300,234 @@ def classify_roles(utterances: List[Dict], clf, vectorizer, scaler):
             prosodic = utterance['prosodic_features']
             feat = [
                 prosodic['duration'], prosodic['mean_pitch'], prosodic['min_pitch'],
-                prosodic['max_pitch'], prosodic['pitch_sd'], prosodic['intensityMean'],
-                prosodic['intensityMin'], prosodic['intensityMax'], prosodic['intensitySD'],
             ]
             feat.extend(X_text[i].toarray()[0].tolist())
             doc = nlp(utterance['text'])
             feat.extend([
                 int(utterance['text'].endswith('?')),
-                len(re.findall(r'\b(why|how|what|when|where|who|which)\b', utterance['text'].lower())),
                 len(utterance['text'].split()),
                 sum(1 for token in doc if token.pos_ == 'VERB'),
-                sum(1 for token in doc if token.pos_ == 'NOUN')
             ])
             X = scaler.transform([feat])
             role = 'Interviewer' if clf.predict(X)[0] == 0 else 'Interviewee'
             results.append({**utterance, 'role': role})
         return results
     except Exception as e:
-        logger.error(f"Role classification failed: {str(e)}")
-        raise
-def analyze_interviewee_voice(audio_path: str, utterances: List[Dict]) -> Dict:
     try:
-        y, sr = librosa.load(audio_path, sr=16000)
-        interviewee_utterances = [u for u in utterances if u['role'] == 'Interviewee']
         if not interviewee_utterances:
-            return {'error': 'No interviewee utterances found'}
-        segments = []
-        for u in interviewee_utterances:
-            start = int(u['start'] * sr / 1000)
-            end = int(u['end'] * sr / 1000)
-            segments.append(y[start:end])
         total_duration = sum(u['prosodic_features']['duration'] for u in interviewee_utterances)
         total_words = sum(len(u['text'].split()) for u in interviewee_utterances)
         speaking_rate = total_words / total_duration if total_duration > 0 else 0
         filler_words = ['um', 'uh', 'like', 'you know', 'so', 'i mean']
-        filler_count = sum(sum(u['text'].lower().count(fw) for fw in filler_words) for u in interviewee_utterances)
         filler_ratio = filler_count / total_words if total_words > 0 else 0
-        all_words = ' '.join(u['text'].lower() for u in interviewee_utterances).split()
-        word_counts = {}
-        for i in range(len(all_words) - 1):
-            bigram = (all_words[i], all_words[i + 1])
-            word_counts[bigram] = word_counts.get(bigram, 0) + 1
-        repetition_score = sum(1 for count in word_counts.values() if count > 1) / len(word_counts) if word_counts else 0
-        pitches = []
-        for segment in segments:
-            f0, voiced_flag, _ = librosa.pyin(segment, fmin=80, fmax=300, sr=sr)
-            pitches.extend(f0[voiced_flag])
-        pitch_mean = np.mean(pitches) if len(pitches) > 0 else 0
-        pitch_std = np.std(pitches) if len(pitches) > 0 else 0
-        jitter = np.mean(np.abs(np.diff(pitches))) / pitch_mean if len(pitches) > 1 and pitch_mean > 0 else 0
-        intensities = []
-        for segment in segments:
-            rms = librosa.feature.rms(y=segment)[0]
-            intensities.extend(rms)
-        intensity_mean = np.mean(intensities) if intensities else 0
-        intensity_std = np.std(intensities) if intensities else 0
-        shimmer = np.mean(np.abs(np.diff(intensities))) / intensity_mean if len(intensities) > 1 and intensity_mean > 0 else 0
-        anxiety_score = 0.6 * (pitch_std / pitch_mean) + 0.4 * (jitter + shimmer) if pitch_mean > 0 else 0
-        confidence_score = 0.7 * (1 / (1 + intensity_std)) + 0.3 * (1 / (1 + filler_ratio))
-        hesitation_score = filler_ratio + repetition_score
-        anxiety_level = 'High' if anxiety_score > 0.15 else 'Moderate' if anxiety_score > 0.07 else 'Low'
-        confidence_level = 'High' if confidence_score > 0.7 else 'Moderate' if confidence_score > 0.5 else 'Low'
-        fluency_level = 'Fluent' if (filler_ratio < 0.05 and repetition_score < 0.1) else 'Moderate' if (filler_ratio < 0.1 and repetition_score < 0.2) else 'Disfluent'
         return {
             'speaking_rate': float(round(speaking_rate, 2)),
             'filler_ratio': float(round(filler_ratio, 4)),
-            'repetition_score': float(round(repetition_score, 4)),
-            'pitch_analysis': {'mean': float(round(pitch_mean, 2)), 'std_dev': float(round(pitch_std, 2)), 'jitter': float(round(jitter, 4))},
-            'intensity_analysis': {'mean': float(round(intensity_mean, 2)), 'std_dev': float(round(intensity_std, 2)), 'shimmer': float(round(shimmer, 4))},
-            'composite_scores': {'anxiety': float(round(anxiety_score, 4)), 'confidence': float(round(confidence_score, 4)), 'hesitation': float(round(hesitation_score, 4))},
-            'interpretation': {'anxiety_level': anxiety_level, 'confidence_level': confidence_level, 'fluency_level': fluency_level}
         }
     except Exception as e:
-        logger.error(f"Voice analysis failed: {str(e)}")
         return {'error': str(e)}
-def generate_voice_interpretation(analysis: Dict) -> str:
-    if 'error' in analysis:
-        return "Voice analysis not available due to processing error."
-    interpretation_lines = [
-        "Voice and Speech Profile:",
-        f"- Speaking Rate: {analysis['speaking_rate']} words/sec - Compared to optimal range (2.0-3.0 words/sec)",
-        f"- Filler Word Usage: {analysis['filler_ratio'] * 100:.1f}% - Frequency of non-content words (e.g., 'um', 'like')",
-        f"- Repetition Tendency: {analysis['repetition_score']:.3f} - Measure of repeated phrases",
-        f"- Anxiety Indicator: {analysis['interpretation']['anxiety_level']} (Score: {analysis['composite_scores']['anxiety']:.3f}) - Based on pitch and voice stability",
-        f"- Confidence Indicator: {analysis['interpretation']['confidence_level']} (Score: {analysis['composite_scores']['confidence']:.3f}) - Derived from vocal consistency",
-        f"- Fluency Assessment: {analysis['interpretation']['fluency_level']} - Reflects speech flow and coherence",
-        "",
-        "HR Insights:",
-        "- Faster speaking rates may indicate confidence but can suggest nervousness if excessive.",
-        "- High filler word usage often reduces perceived professionalism and clarity.",
-        "- Elevated anxiety indicators (pitch variability, jitter) may reflect interview pressure.",
-        "- Strong confidence scores suggest effective vocal presence and control.",
-        "- Fluency impacts listener engagement; disfluency may hinder communication effectiveness."
-    ]
-    return "\n".join(interpretation_lines)
-def generate_anxiety_confidence_chart(composite_scores: Dict, chart_path_or_buffer):
-    try:
-        labels = ['Anxiety', 'Confidence']
-        scores = [composite_scores.get('anxiety', 0), composite_scores.get('confidence', 0)]
-        fig, ax = plt.subplots(figsize=(4, 2.5))
-        bars = ax.bar(labels, scores, color=['#FF6B6B', '#4ECDC4'], edgecolor='black')
-        ax.set_ylabel('Score (Normalized)')
-        ax.set_title('Vocal Dynamics: Anxiety vs. Confidence')
-        ax.set_ylim(0, 1.2)
-        for bar in bars:
-            height = bar.get_height()
-            ax.text(bar.get_x() + bar.get_width()/2, height + 0.05, f"{height:.2f}",
-                    ha='center', color='black', fontweight='bold', fontsize=10)
-        plt.tight_layout()
-        plt.savefig(chart_path_or_buffer, format='png', bbox_inches='tight', dpi=150)
-        plt.close(fig)
-    except Exception as e:
-        logger.error(f"Error generating chart: {str(e)}")
-def calculate_acceptance_probability(analysis_data: Dict) -> float:
-    voice = analysis_data.get('voice_analysis', {})
-    if 'error' in voice: return 0.0
-    w_confidence, w_anxiety, w_fluency, w_speaking_rate, w_filler_repetition, w_content_strengths = 0.4, -0.3, 0.2, 0.1, -0.1, 0.2
-    confidence_score = voice.get('composite_scores', {}).get('confidence', 0.0)
-    anxiety_score = voice.get('composite_scores', {}).get('anxiety', 0.0)
-    fluency_level = voice.get('interpretation', {}).get('fluency_level', 'Disfluent')
-    speaking_rate = voice.get('speaking_rate', 0.0)
-    filler_ratio = voice.get('filler_ratio', 0.0)
-    repetition_score = voice.get('repetition_score', 0.0)
-    fluency_map = {'Fluent': 1.0, 'Moderate': 0.5, 'Disfluent': 0.0}
-    fluency_val = fluency_map.get(fluency_level, 0.0)
-    ideal_speaking_rate = 2.5
-    speaking_rate_deviation = abs(speaking_rate - ideal_speaking_rate)
-    speaking_rate_score = max(0, 1 - (speaking_rate_deviation / ideal_speaking_rate))
-    filler_repetition_composite = (filler_ratio + repetition_score) / 2
-    filler_repetition_score = max(0, 1 - filler_repetition_composite)
-    content_strength_val = 0.8 if analysis_data.get('text_analysis', {}).get('total_duration', 0) > 0 else 0.0
-    raw_score = (confidence_score * w_confidence + (1 - anxiety_score) * abs(w_anxiety) + fluency_val * w_fluency + speaking_rate_score * w_speaking_rate + filler_repetition_score * abs(w_filler_repetition) + content_strength_val * w_content_strengths)
-    max_possible_score = (w_confidence + abs(w_anxiety) + w_fluency + w_speaking_rate + abs(w_filler_repetition) + w_content_strengths)
-    if max_possible_score == 0: return 50.0
-    normalized_score = raw_score / max_possible_score
-    acceptance_probability = max(0.0, min(1.0, normalized_score))
-    return float(f"{acceptance_probability * 100:.2f}")
-def generate_report(analysis_data: Dict) -> str:
     try:
         voice = analysis_data.get('voice_analysis', {})
-        voice_interpretation = generate_voice_interpretation(voice)
-        interviewee_responses = [f"Speaker {u['speaker']} ({u['role']}): {u['text']}" for u in analysis_data['transcript'] if u['role'] == 'Interviewee'][:5]
-        acceptance_prob = analysis_data.get('acceptance_probability', None)
-        acceptance_line = ""
-        if acceptance_prob is not None:
-            acceptance_line = f"\n*Hiring Potential Score: {acceptance_prob:.2f}%*\n"
-            if acceptance_prob >= 80: acceptance_line += "Assessment: Exceptional candidate, strongly recommended for advancement."
-            elif acceptance_prob >= 50: acceptance_line += "Assessment: Promising candidate with moderate strengths; consider for further evaluation."
-            else: acceptance_line += "Assessment: Limited alignment with role expectations; significant development needed."
         prompt = f"""
-        You are an expert HR consultant, EvalBot, tasked with producing a professional, concise, and actionable interview analysis report. Structure the report with clear headings, subheadings, and bullet points (use '- ' for bullets). Adopt a formal, HR-professional tone, focusing on candidate evaluation, fit for role, and development insights.
-        {acceptance_line}
-        *1. Executive Summary*
-        - Provide a concise overview of the interview, highlighting key metrics and overall candidate performance.
-        - Interview duration: {analysis_data['text_analysis']['total_duration']:.2f} seconds
-        - Total speaker turns: {analysis_data['text_analysis']['speaker_turns']}
-        - Participants: {', '.join(analysis_data['speakers'])}
-        *2. Communication and Vocal Analysis*
-        - Evaluate the candidate's vocal delivery, including speaking rate, fluency, and confidence indicators.
-        - Provide HR-relevant insights into how these metrics impact perceived professionalism and role suitability.
-        {voice_interpretation}
-        *3. Content Analysis and Competency Assessment*
-        - Analyze key themes in the candidate's responses to assess alignment with job competencies (e.g., problem-solving, communication, leadership).
-        - Identify strengths and areas for improvement, supported by specific examples.
-        - Sample responses for context:
-        {chr(10).join(interviewee_responses)}
-        *4. Fit and Potential Evaluation*
-        - Assess the candidate's overall fit for a typical professional role based on communication, content, and vocal dynamics.
-        - Consider cultural fit, adaptability, and readiness for the role.
-        *5. Actionable HR Recommendations*
-        - Provide specific, prioritized recommendations for the candidate’s development.
-        - Focus areas: Effective Communication, Content Clarity and Depth, Professional Presence.
-        - Suggest next steps for hiring managers (e.g., advance to next round, additional assessments, training focus).
         """
         response = gemini_model.generate_content(prompt)
         return response.text
     except Exception as e:
-        logger.error(f"Report generation failed: {str(e)}")
         return f"Error generating report: {str(e)}"
 def create_pdf_report(analysis_data: Dict, output_path: str, gemini_report_text: str):
     try:
-        doc = SimpleDocTemplate(output_path, pagesize=letter,
-                                rightMargin=0.75*inch, leftMargin=0.75*inch,
-                                topMargin=1*inch, bottomMargin=1*inch)
         styles = getSampleStyleSheet()
-        h1 = ParagraphStyle(name='Heading1', fontSize=22, leading=26, spaceAfter=20, alignment=1, textColor=colors.HexColor('#1A3C5E'))
-        h2 = ParagraphStyle(name='Heading2', fontSize=14, leading=18, spaceBefore=14, spaceAfter=8, textColor=colors.HexColor('#2E5A87'))
-        body_text = ParagraphStyle(name='BodyText', parent=styles['Normal'], fontSize=10, leading=14, spaceAfter=8, fontName='Helvetica')
-        bullet_style = ParagraphStyle(name='Bullet', parent=body_text, leftIndent=20, bulletIndent=10, fontName='Helvetica')
         story = []
-        def header_footer(canvas, doc):
-            canvas.saveState()
-            canvas.setFont('Helvetica', 9)
-            canvas.setFillColor(colors.grey)
-            canvas.drawString(doc.leftMargin, 0.5 * inch, f"Page {doc.page} | EvalBot HR Interview Report | Confidential")
-            canvas.setStrokeColor(colors.HexColor('#2E5A87'))
-            canvas.setLineWidth(1)
-            canvas.line(doc.leftMargin, doc.height + 0.85*inch, doc.width + doc.leftMargin, doc.height + 0.85*inch)
-            canvas.setFont('Helvetica-Bold', 10)
-            canvas.drawString(doc.leftMargin, doc.height + 0.9*inch, "Candidate Interview Analysis Report")
-            canvas.restoreState()
-        # Title Page
-        story.append(Paragraph("Candidate Interview Analysis Report", h1))
-        story.append(Paragraph(f"Generated on: {time.strftime('%B %d, %Y')}", ParagraphStyle(name='Date', alignment=1, fontSize=10, textColor=colors.grey)))
-        story.append(Spacer(1, 0.5 * inch))
-        acceptance_prob = analysis_data.get('acceptance_probability')
-        if acceptance_prob is not None:
-            story.append(Paragraph("Hiring Potential Snapshot", h2))
-            prob_color = colors.HexColor('#2E7D32') if acceptance_prob >= 70 else (colors.HexColor('#F57C00') if acceptance_prob >= 40 else colors.HexColor('#D32F2F'))
-            story.append(Paragraph(f"Hiring Potential Score: <font size=16 color='{prob_color.hexval()}'><b>{acceptance_prob:.2f}%</b></font>",
-                                 ParagraphStyle(name='Prob', fontSize=12, spaceAfter=12, alignment=1)))
-            if acceptance_prob >= 80:
-                story.append(Paragraph("<b>HR Assessment:</b> Exceptional candidate, strongly recommended for advancement to the next stage.", body_text))
-            elif acceptance_prob >= 50:
-                story.append(Paragraph("<b>HR Assessment:</b> Promising candidate with moderate strengths; consider for further evaluation.", body_text))
-            else:
-                story.append(Paragraph("<b>HR Assessment:</b> Limited alignment with role expectations; significant development needed.", body_text))
-        story.append(Spacer(1, 0.3 * inch))
-        story.append(Paragraph("Prepared by: EvalBot - AI-Powered HR Interview Analysis System", body_text))
-        story.append(PageBreak())
-        # Detailed Analysis
-        story.append(Paragraph("Detailed Candidate Evaluation", h1))
-        story.append(Paragraph("1. Communication and Vocal Profile", h2))
-        voice_analysis = analysis_data.get('voice_analysis', {})
-        if voice_analysis and 'error' not in voice_analysis:
-            table_data = [
-                ['Metric', 'Value', 'HR Insight'],
-                ['Speaking Rate', f"{voice_analysis.get('speaking_rate', 0):.2f} words/sec", 'Optimal: 2.0-3.0 wps; impacts clarity and confidence'],
-                ['Filler Word Usage', f"{voice_analysis.get('filler_ratio', 0) * 100:.1f}%", 'High usage may reduce perceived professionalism'],
-                ['Anxiety Indicator', voice_analysis.get('interpretation', {}).get('anxiety_level', 'N/A'), f"Score: {voice_analysis.get('composite_scores', {}).get('anxiety', 0):.3f}; reflects pressure response"],
-                ['Confidence Indicator', voice_analysis.get('interpretation', {}).get('confidence_level', 'N/A'), f"Score: {voice_analysis.get('composite_scores', {}).get('confidence', 0):.3f}; indicates vocal authority"],
-                ['Fluency Assessment', voice_analysis.get('interpretation', {}).get('fluency_level', 'N/A'), 'Affects engagement and message delivery']
-            ]
-            table = Table(table_data, colWidths=[1.8*inch, 1.2*inch, 3.5*inch])
-            table.setStyle(TableStyle([
-                ('BACKGROUND', (0,0), (-1,0), colors.HexColor('#2E5A87')),
-                ('TEXTCOLOR', (0,0), (-1,0), colors.whitesmoke),
-                ('ALIGN', (0,0), (-1,-1), 'LEFT'),
-                ('VALIGN', (0,0), (-1,-1), 'MIDDLE'),
-                ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
-                ('FONTSIZE', (0, 0), (-1, -1), 9),
-                ('BOTTOMPADDING', (0, 0), (-1, 0), 12),
-                ('TOPPADDING', (0, 0), (-1, 0), 12),
-                ('BACKGROUND', (0, 1), (-1, -1), colors.HexColor('#F5F7FA')),
-                ('GRID', (0,0), (-1,-1), 1, colors.HexColor('#DDE4EB'))
-            ]))
-            story.append(table)
-            story.append(Spacer(1, 0.25 * inch))
-            chart_buffer = io.BytesIO()
-            generate_anxiety_confidence_chart(voice_analysis.get('composite_scores', {}), chart_buffer)
-            chart_buffer.seek(0)
-            img = Image(chart_buffer, width=4.5*inch, height=2.8*inch)
-            img.hAlign = 'CENTER'
-            story.append(img)
-        else:
-            story.append(Paragraph("Voice analysis unavailable due to processing limitations.", body_text))
-        story.append(Spacer(1, 0.3 * inch))
-        # Parse Gemini Report
-        sections = {}
-        section_titles = ["Executive Summary", "Communication and Vocal Analysis",
-                         "Content Analysis and Competency Assessment",
-                         "Fit and Potential Evaluation", "Actionable HR Recommendations"]
-        for title in section_titles:
-            sections[title] = []
-        report_parts = re.split(r'(\s*\\\s*\d\.\s*.?\s\\)', gemini_report_text)
-        current_section = None
         for part in report_parts:
-            if not part.strip(): continue
-            is_heading = False
-            for title in section_titles:
-                if title.lower() in part.lower():
-                    current_section = title
-                    is_heading = True
-                    break
-            if not is_heading and current_section:
-                sections[current_section].append(part.strip())
-        # Executive Summary
-        story.append(Paragraph("2. Executive Summary", h2))
-        if sections['Executive Summary']:
-            for line in sections['Executive Summary']:
-                if line.startswith(('-', '•', '*')):
-                    story.append(Paragraph(line.lstrip('-•* ').strip(), bullet_style))
-                else:
-                    story.append(Paragraph(line, body_text))
-        else:
-            story.append(Paragraph("Summary not available from analysis.", body_text))
-        story.append(Spacer(1, 0.3 * inch))
-        # Content and Competency
-        story.append(Paragraph("3. Content and Competency Assessment", h2))
-        if sections['Content Analysis and Competency Assessment']:
-            for line in sections['Content Analysis and Competency Assessment']:
-                if line.startswith(('-', '•', '*')):
-                    story.append(Paragraph(line.lstrip('-•* ').strip(), bullet_style))
-                else:
-                    story.append(Paragraph(line, body_text))
-        else:
-            story.append(Paragraph("Content and competency analysis not provided.", body_text))
-        story.append(PageBreak())
-        # Fit and Potential
-        story.append(Paragraph("4. Fit and Potential Evaluation", h2))
-        if sections['Fit and Potential Evaluation']:
-            for line in sections['Fit and Potential Evaluation']:
-                if line.startswith(('-', '•', '*')):
-                    story.append(Paragraph(line.lstrip('-•* ').strip(), bullet_style))
                 else:
-                    story.append(Paragraph(line, body_text))
-        else:
-            story.append(Paragraph("Fit and potential evaluation not available.", body_text))
-        story.append(Spacer(1, 0.3 * inch))
-        # HR Recommendations
-        story.append(Paragraph("5. Actionable HR Recommendations", h2))
-        if sections['Actionable HR Recommendations']:
-            for line in sections['Actionable HR Recommendations']:
-                if line.startswith(('-', '•', '*')):
-                    story.append(Paragraph(line.lstrip('-•* ').strip(), bullet_style))
-                else:
-                    story.append(Paragraph(line, body_text))
-        else:
-            story.append(Paragraph("HR recommendations not provided.", body_text))
-        doc.build(story, onFirstPage=header_footer, onLaterPages=header_footer)
-        return True
     except Exception as e:
-        logger.error(f"Enhanced PDF creation failed: {str(e)}", exc_info=True)
-        return False
 def convert_to_serializable(obj):
     if isinstance(obj, np.generic): return obj.item()
-    if isinstance(obj, dict): return {k: convert_to_serializable(v) for k, v in obj.items()}
-    if isinstance(obj, list): return [convert_to_serializable(i) for i in obj]
     if isinstance(obj, np.ndarray): return obj.tolist()
     return obj
-def process_interview(audio_path_or_url: str):
-    local_audio_path = None
     wav_file = None
-    is_downloaded = False
     try:
-        logger.info(f"Starting processing for {audio_path_or_url}")
-        if audio_path_or_url.startswith(('http://', 'https://')):
-            local_audio_path = download_audio_from_url(audio_path_or_url)
-            is_downloaded = True
-        else:
-            local_audio_path = audio_path_or_url
-        wav_file = convert_to_wav(local_audio_path)
         transcript = transcribe(wav_file)
         for utterance in transcript['utterances']:
-            utterance['prosodic_features'] = extract_prosodic_features(wav_file, utterance['start'], utterance['end'])
         utterances_with_speakers = identify_speakers(transcript, wav_file)
-        clf, vectorizer, scaler = None, None, None
-        if os.path.exists(os.path.join(OUTPUT_DIR, 'role_classifier.pkl')):
-            clf = joblib.load(os.path.join(OUTPUT_DIR, 'role_classifier.pkl'))
-            vectorizer = joblib.load(os.path.join(OUTPUT_DIR, 'text_vectorizer.pkl'))
-            scaler = joblib.load(os.path.join(OUTPUT_DIR, 'feature_scaler.pkl'))
-        else:
             clf, vectorizer, scaler = train_role_classifier(utterances_with_speakers)
         classified_utterances = classify_roles(utterances_with_speakers, clf, vectorizer, scaler)
-        voice_analysis = analyze_interviewee_voice(wav_file, classified_utterances)
         analysis_data = {
             'transcript': classified_utterances,
             'speakers': list(set(u['speaker'] for u in classified_utterances)),
             'voice_analysis': voice_analysis,
             'text_analysis': {
-                'total_duration': sum(u['prosodic_features']['duration'] for u in classified_utterances),
                 'speaker_turns': len(classified_utterances)
             }
         }
-        analysis_data['acceptance_probability'] = calculate_acceptance_probability(analysis_data)
-        gemini_report_text = generate_report(analysis_data)
-        base_name = str(uuid.uuid4())
         pdf_path = os.path.join(OUTPUT_DIR, f"{base_name}_report.pdf")
         json_path = os.path.join(OUTPUT_DIR, f"{base_name}_analysis.json")
-        create_pdf_report(analysis_data, pdf_path, gemini_report_text=gemini_report_text)
         with open(json_path, 'w') as f:
             serializable_data = convert_to_serializable(analysis_data)
             json.dump(serializable_data, f, indent=2)
-        logger.info(f"Processing completed for {audio_path_or_url}")
         return {'pdf_path': pdf_path, 'json_path': json_path}
-    except Exception as e:
-        logger.error(f"Processing failed for {audio_path_or_url}: {str(e)}", exc_info=True)
-        raise
     finally:
         if wav_file and os.path.exists(wav_file):
             os.remove(wav_file)
-        if is_downloaded and local_audio_path and os.path.exists(local_audio_path):
-            os.remove(local_audio_path)
-            logger.info(f"Cleaned up temporary downloaded file: {local_audio_path}")

 import re
 from typing import Dict, List, Tuple
 import logging
+# --- Imports for enhanced PDF ---
 from reportlab.lib.pagesizes import letter
+from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle
 from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
 from reportlab.lib.units import inch
 from reportlab.lib import colors
+# --- Imports for NLP and models ---
 from transformers import AutoTokenizer, AutoModel
 import spacy
 import google.generativeai as genai
 import joblib
 from concurrent.futures import ThreadPoolExecutor
+# ==============================================================================
+# 1. SETUP & CONFIGURATION
+# ==============================================================================
 # Setup logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
 logging.getLogger("nemo_logging").setLevel(logging.ERROR)
 # Configuration
 AUDIO_DIR = "./uploads"
 OUTPUT_DIR = "./processed_audio"
+os.makedirs(AUDIO_DIR, exist_ok=True)
 os.makedirs(OUTPUT_DIR, exist_ok=True)
+# API Keys from environment variables
 PINECONE_KEY = os.getenv("PINECONE_KEY")
 ASSEMBLYAI_KEY = os.getenv("ASSEMBLYAI_KEY")
 GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
+if not all([PINECONE_KEY, ASSEMBLYAI_KEY, GEMINI_API_KEY]):
+    logger.error("CRITICAL: API keys (PINECONE_KEY, ASSEMBLYAI_KEY, GEMINI_API_KEY) must be set as environment variables.")
+    raise EnvironmentError("API keys must be set for the application to run.")
+# ==============================================================================
+# 2. INITIALIZE MODELS AND SERVICES (Executed once on import)
+# ==============================================================================
 def initialize_services():
     try:
+        logger.info("Initializing Pinecone and Gemini services...")
         pc = Pinecone(api_key=PINECONE_KEY)
         index_name = "interview-speaker-embeddings"
         if index_name not in pc.list_indexes().names():
+            logger.info(f"Creating Pinecone index: {index_name}")
             pc.create_index(
                 name=index_name,
                 dimension=192,
         index = pc.Index(index_name)
         genai.configure(api_key=GEMINI_API_KEY)
         gemini_model = genai.GenerativeModel('gemini-1.5-flash')
+        logger.info("Services initialized successfully.")
         return index, gemini_model
     except Exception as e:
+        logger.error(f"Error initializing services: {str(e)}", exc_info=True)
         raise
 index, gemini_model = initialize_services()
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 logger.info(f"Using device: {device}")
+def load_models():
     try:
+        logger.info("Loading ML models...")
+        # Speaker model
+        speaker_model = EncDecSpeakerLabelModel.from_pretrained(
             "nvidia/speakerverification_en_titanet_large",
             map_location=torch.device('cpu')
         )
+        speaker_model.eval()
+        # NLP model
+        nlp = spacy.load("en_core_web_sm")
+        logger.info("All models loaded successfully.")
+        return speaker_model, nlp
     except Exception as e:
+        logger.error(f"Model loading failed: {str(e)}", exc_info=True)
+        raise RuntimeError("Could not load machine learning models.")
+speaker_model, nlp = load_models()
+# ==============================================================================
+# 3. HELPER FUNCTIONS (The core logic for each step of the pipeline)
+# ==============================================================================
 def convert_to_wav(audio_path: str, output_dir: str = OUTPUT_DIR) -> str:
     try:
         audio.export(wav_file, format="wav")
         return wav_file
     except Exception as e:
+        logger.error(f"Audio conversion failed for {audio_path}: {str(e)}")
         raise
 def extract_prosodic_features(audio_path: str, start_ms: int, end_ms: int) -> Dict:
         os.remove(temp_path)
         return features
     except Exception as e:
+        logger.warning(f"Feature extraction failed, returning zeros: {str(e)}")
         return {
+            'duration': (end_ms - start_ms) / 1000, 'mean_pitch': 0.0, 'min_pitch': 0.0, 'max_pitch': 0.0,
+            'pitch_sd': 0.0, 'intensityMean': 0.0, 'intensityMin': 0.0, 'intensityMax': 0.0, 'intensitySD': 0.0,
         }
 def transcribe(audio_path: str) -> Dict:
         with open(audio_path, 'rb') as f:
             upload_response = requests.post(
                 "https://api.assemblyai.com/v2/upload",
+                headers={"authorization": ASSEMBLYAI_KEY}, data=f
             )
+            upload_response.raise_for_status()
+            audio_url = upload_response.json()['upload_url']
         transcript_response = requests.post(
             "https://api.assemblyai.com/v2/transcript",
             headers={"authorization": ASSEMBLYAI_KEY},
+            json={"audio_url": audio_url, "speaker_labels": True, "filter_profanity": True}
         )
+        transcript_response.raise_for_status()
         transcript_id = transcript_response.json()['id']
         while True:
+            result_response = requests.get(
                 f"https://api.assemblyai.com/v2/transcript/{transcript_id}",
                 headers={"authorization": ASSEMBLYAI_KEY}
+            )
+            result_response.raise_for_status()
+            result = result_response.json()
             if result['status'] == 'completed':
+                if 'utterances' not in result or result['utterances'] is None:
+                    result['utterances'] = []
+                    logger.warning("Transcription completed but no utterances found.")
                 return result
             elif result['status'] == 'error':
+                raise Exception(f"Transcription failed: {result['error']}")
             time.sleep(5)
     except Exception as e:
+        logger.error(f"Transcription process failed: {str(e)}", exc_info=True)
         raise
+def process_utterance(utterance, full_audio):
     try:
         start = utterance['start']
         end = utterance['end']
         segment = full_audio[start:end]
+        temp_path = os.path.join(OUTPUT_DIR, f"temp_utterance_{uuid.uuid4()}.wav")
         segment.export(temp_path, format="wav")
         with torch.no_grad():
+            embedding = speaker_model.get_embedding(temp_path).to(device)
         query_result = index.query(
+            vector=embedding.cpu().numpy().tolist(), top_k=1, include_metadata=True
         )
         if query_result['matches'] and query_result['matches'][0]['score'] > 0.7:
             speaker_id = query_result['matches'][0]['id']
             speaker_name = query_result['matches'][0]['metadata']['speaker_name']
         else:
             speaker_id = f"unknown_{uuid.uuid4().hex[:6]}"
             speaker_name = f"Speaker_{speaker_id[-4:]}"
+            index.upsert([(speaker_id, embedding.cpu().numpy().tolist(), {"speaker_name": speaker_name})])
         os.remove(temp_path)
         return {
+            **utterance, 'speaker': speaker_name, 'speaker_id': speaker_id
         }
     except Exception as e:
+        logger.warning(f"Utterance processing failed: {str(e)}")
+        return {**utterance, 'speaker': 'Unknown', 'speaker_id': 'unknown'}
 def identify_speakers(transcript: Dict, wav_file: str) -> List[Dict]:
     try:
+        if not transcript.get('utterances'):
+            return []
         full_audio = AudioSegment.from_wav(wav_file)
         utterances = transcript['utterances']
+        with ThreadPoolExecutor(max_workers=4) as executor:
+            futures = [executor.submit(process_utterance, utterance, full_audio) for utterance in utterances]
             results = [f.result() for f in futures]
         return results
     except Exception as e:
+        logger.error(f"Speaker identification failed: {str(e)}", exc_info=True)
         raise
+def get_role_classification_models():
+    """Loads role classification models if they exist, otherwise returns None."""
+    clf_path = os.path.join(OUTPUT_DIR, 'role_classifier.pkl')
+    vec_path = os.path.join(OUTPUT_DIR, 'text_vectorizer.pkl')
+    scl_path = os.path.join(OUTPUT_DIR, 'feature_scaler.pkl')
+    if all(os.path.exists(p) for p in [clf_path, vec_path, scl_path]):
+        clf = joblib.load(clf_path)
+        vectorizer = joblib.load(vec_path)
+        scaler = joblib.load(scl_path)
+        return clf, vectorizer, scaler
+    return None, None, None
 def train_role_classifier(utterances: List[Dict]):
+    """Trains and saves a role classifier based on utterance features."""
     try:
         texts = [u['text'] for u in utterances]
         vectorizer = TfidfVectorizer(max_features=500, ngram_range=(1, 2))
         X_text = vectorizer.fit_transform(texts)
+        features, labels = [], []
+        # Simple heuristic: assume alternating speakers are interviewer/interviewee
         for i, utterance in enumerate(utterances):
             prosodic = utterance['prosodic_features']
             feat = [
                 prosodic['duration'], prosodic['mean_pitch'], prosodic['min_pitch'],
+                prosodic['pitch_sd'], prosodic['intensityMean'],
             ]
             feat.extend(X_text[i].toarray()[0].tolist())
             doc = nlp(utterance['text'])
             feat.extend([
                 int(utterance['text'].endswith('?')),
+                len(re.findall(r'\b(why|how|what|when|where)\b', utterance['text'].lower())),
                 len(utterance['text'].split()),
                 sum(1 for token in doc if token.pos_ == 'VERB'),
             ])
             features.append(feat)
+            labels.append(i % 2) # 0 for interviewer, 1 for interviewee
         scaler = StandardScaler()
         X = scaler.fit_transform(features)
+        clf = RandomForestClassifier(n_estimators=100, random_state=42, class_weight='balanced')
         clf.fit(X, labels)
         joblib.dump(clf, os.path.join(OUTPUT_DIR, 'role_classifier.pkl'))
         joblib.dump(vectorizer, os.path.join(OUTPUT_DIR, 'text_vectorizer.pkl'))
         joblib.dump(scaler, os.path.join(OUTPUT_DIR, 'feature_scaler.pkl'))
         return clf, vectorizer, scaler
     except Exception as e:
+        logger.error(f"Classifier training failed: {str(e)}", exc_info=True)
         raise
 def classify_roles(utterances: List[Dict], clf, vectorizer, scaler):
+    """Classifies roles for each utterance using a pre-trained model."""
     try:
         texts = [u['text'] for u in utterances]
         X_text = vectorizer.transform(texts)
             prosodic = utterance['prosodic_features']
             feat = [
                 prosodic['duration'], prosodic['mean_pitch'], prosodic['min_pitch'],
+                prosodic['pitch_sd'], prosodic['intensityMean'],
             ]
             feat.extend(X_text[i].toarray()[0].tolist())
             doc = nlp(utterance['text'])
             feat.extend([
                 int(utterance['text'].endswith('?')),
+                len(re.findall(r'\b(why|how|what|when|where)\b', utterance['text'].lower())),
                 len(utterance['text'].split()),
                 sum(1 for token in doc if token.pos_ == 'VERB'),
             ])
             X = scaler.transform([feat])
             role = 'Interviewer' if clf.predict(X)[0] == 0 else 'Interviewee'
             results.append({**utterance, 'role': role})
         return results
     except Exception as e:
+        logger.error(f"Role classification failed: {str(e)}", exc_info=True)
+        # Fallback if classification fails
+        return [dict(u, role='Unknown') for u in utterances]
+def analyze_interviewee_voice(utterances: List[Dict]) -> Dict:
+    # (This function is complex, including it fully)
     try:
+        interviewee_utterances = [u for u in utterances if u.get('role') == 'Interviewee']
         if not interviewee_utterances:
+            return {'error': 'No interviewee utterances found to analyze.'}
         total_duration = sum(u['prosodic_features']['duration'] for u in interviewee_utterances)
         total_words = sum(len(u['text'].split()) for u in interviewee_utterances)
         speaking_rate = total_words / total_duration if total_duration > 0 else 0
         filler_words = ['um', 'uh', 'like', 'you know', 'so', 'i mean']
+        filler_count = sum(u['text'].lower().count(fw) for u in interviewee_utterances for fw in filler_words)
         filler_ratio = filler_count / total_words if total_words > 0 else 0
+        all_pitches = [u['prosodic_features']['mean_pitch'] for u in interviewee_utterances if u['prosodic_features']['mean_pitch'] > 0]
+        pitch_mean = np.mean(all_pitches) if all_pitches else 0
+        pitch_std = np.std(all_pitches) if all_pitches else 0
+        anxiety_score = (pitch_std / 100) + (filler_ratio * 2)
+        confidence_score = 1 - anxiety_score if anxiety_score < 1 else 0
         return {
             'speaking_rate': float(round(speaking_rate, 2)),
             'filler_ratio': float(round(filler_ratio, 4)),
+            'pitch_mean': float(round(pitch_mean, 2)),
+            'pitch_std_dev': float(round(pitch_std, 2)),
+            'composite_scores': {
+                'anxiety': float(round(anxiety_score, 4)),
+                'confidence': float(round(confidence_score, 4)),
+            }
         }
     except Exception as e:
+        logger.error(f"Voice analysis failed: {str(e)}", exc_info=True)
         return {'error': str(e)}
+def generate_report_text(analysis_data: Dict) -> str:
+    """Generates the text for the final report using Gemini."""
     try:
         voice = analysis_data.get('voice_analysis', {})
+        interviewee_responses = [u['text'] for u in analysis_data['transcript'] if u.get('role') == 'Interviewee']
         prompt = f"""
+        Analyze the following interview data and generate a concise, professional report.
+        **Interview Data:**
+        - Total Duration: {analysis_data['text_analysis']['total_duration']:.2f} seconds
+        - Speaker Turns: {analysis_data['text_analysis']['speaker_turns']}
+        - Speakers: {', '.join(analysis_data['speakers'])}
+        **Voice Analysis of Interviewee:**
+        - Speaking Rate: {voice.get('speaking_rate', 'N/A')} words/sec
+        - Filler Word Ratio: {voice.get('filler_ratio', 'N/A')}
+        - Anxiety Score (lower is better): {voice.get('composite_scores', {}).get('anxiety', 'N/A')}
+        - Confidence Score (higher is better): {voice.get('composite_scores', {}).get('confidence', 'N/A')}
+        **Interviewee's Key Responses:**
+        - {"- ".join(interviewee_responses[:3])}
+        **Task:**
+        Based on all the data above, provide:
+        1.  **Executive Summary:** A brief paragraph summarizing the candidate's performance.
+        2.  **Strengths:** 2-3 bullet points on what the candidate did well (e.g., clear articulation, confidence).
+        3.  **Areas for Improvement:** 2-3 bullet points on specific, actionable feedback (e.g., reduce filler words, elaborate on answers).
         """
         response = gemini_model.generate_content(prompt)
         return response.text
     except Exception as e:
+        logger.error(f"Report generation with Gemini failed: {str(e)}", exc_info=True)
         return f"Error generating report: {str(e)}"
 def create_pdf_report(analysis_data: Dict, output_path: str, gemini_report_text: str):
+    """Creates a PDF report from the analysis data."""
     try:
+        doc = SimpleDocTemplate(output_path, pagesize=letter)
         styles = getSampleStyleSheet()
         story = []
+        story.append(Paragraph("Interview Analysis Report", styles['h1']))
+        story.append(Spacer(1, 0.2 * inch))
+        # Split Gemini text into paragraphs for cleaner formatting
+        report_parts = gemini_report_text.split('\n')
         for part in report_parts:
+            if part.strip():
+                if part.startswith('**'):
+                    story.append(Paragraph(part.replace('**', ''), styles['h2']))
                 else:
+                    story.append(Paragraph(part, styles['BodyText']))
+        doc.build(story)
     except Exception as e:
+        logger.error(f"PDF creation failed: {str(e)}", exc_info=True)
+        # Create a fallback text file if PDF fails
+        with open(output_path.replace('.pdf', '.txt'), 'w') as f:
+            f.write(gemini_report_text)
 def convert_to_serializable(obj):
+    """Converts numpy types to native Python types for JSON serialization."""
     if isinstance(obj, np.generic): return obj.item()
+    if isinstance(obj, dict): return {key: convert_to_serializable(value) for key, value in obj.items()}
+    if isinstance(obj, list): return [convert_to_serializable(item) for item in obj]
     if isinstance(obj, np.ndarray): return obj.tolist()
     return obj
+# ==============================================================================
+# 4. ORCHESTRATION FUNCTIONS
+# ==============================================================================
+def _process_local_audio_file(local_audio_path: str, base_name: str) -> dict:
+    """
+    Internal function to process a local audio file.
+    This contains the main pipeline logic.
+    """
     wav_file = None
     try:
+        logger.info(f"Step 1/8: Converting to WAV: {local_audio_path}")
+        wav_file = convert_to_wav(local_audio_path, OUTPUT_DIR)
+        logger.info("Step 2/8: Transcribing audio...")
         transcript = transcribe(wav_file)
+        logger.info("Step 3/8: Extracting prosodic features...")
         for utterance in transcript['utterances']:
+            utterance['prosodic_features'] = extract_prosodic_features(
+                wav_file, utterance['start'], utterance['end']
+            )
+        logger.info("Step 4/8: Identifying speakers...")
         utterances_with_speakers = identify_speakers(transcript, wav_file)
+        logger.info("Step 5/8: Classifying speaker roles...")
+        clf, vectorizer, scaler = get_role_classification_models()
+        if not clf:
+            logger.info("No role classifier found, training a new one...")
             clf, vectorizer, scaler = train_role_classifier(utterances_with_speakers)
         classified_utterances = classify_roles(utterances_with_speakers, clf, vectorizer, scaler)
+        logger.info("Step 6/8: Analyzing interviewee voice...")
+        voice_analysis = analyze_interviewee_voice(classified_utterances)
         analysis_data = {
             'transcript': classified_utterances,
             'speakers': list(set(u['speaker'] for u in classified_utterances)),
             'voice_analysis': voice_analysis,
             'text_analysis': {
+                'total_duration': transcript.get('audio_duration', 0),
                 'speaker_turns': len(classified_utterances)
             }
         }
+        logger.info("Step 7/8: Generating report text with Gemini...")
+        gemini_report_text = generate_report_text(analysis_data)
         pdf_path = os.path.join(OUTPUT_DIR, f"{base_name}_report.pdf")
         json_path = os.path.join(OUTPUT_DIR, f"{base_name}_analysis.json")
+        logger.info(f"Step 8/8: Creating output files (PDF and JSON)...")
+        create_pdf_report(analysis_data, pdf_path, gemini_report_text)
         with open(json_path, 'w') as f:
             serializable_data = convert_to_serializable(analysis_data)
             json.dump(serializable_data, f, indent=2)
+        logger.info("Processing completed successfully.")
         return {'pdf_path': pdf_path, 'json_path': json_path}
     finally:
         if wav_file and os.path.exists(wav_file):
             os.remove(wav_file)
+            logger.info(f"Cleaned up temporary WAV file: {wav_file}")
+def process_interview(audio_url: str) -> dict:
+    """
+    Main public function called by the API. It downloads a file from a URL,
+    processes it using the internal pipeline, and returns the output file paths.
+    """
+    temp_audio_path = None
+    try:
+        # 1. Download the audio file from the URL
+        logger.info(f"Downloading audio from URL: {audio_url}")
+        response = requests.get(audio_url, stream=True, timeout=60) # 60 second timeout
+        response.raise_for_status() # Raise an exception for bad status codes
+        # Generate a unique name for the temporary file
+        original_filename = audio_url.split('/')[-1]
+        file_extension = os.path.splitext(original_filename)[1] or '.tmp'
+        base_name = f"{uuid.uuid4()}"
+        temp_audio_path = os.path.join(AUDIO_DIR, f"{base_name}{file_extension}")
+        with open(temp_audio_path, 'wb') as f:
+            for chunk in response.iter_content(chunk_size=8192):
+                f.write(chunk)
+        logger.info(f"Audio downloaded and saved to: {temp_audio_path}")
+        # 2. Process the downloaded local file using the main pipeline
+        result = _process_local_audio_file(temp_audio_path, base_name)
+        return result
+    except requests.exceptions.RequestException as e:
+        logger.error(f"Failed to download or access URL {audio_url}: {e}")
+        raise RuntimeError(f"Could not download file from URL: {audio_url}") from e
+    except Exception as e:
+        logger.error(f"An unexpected error occurred during processing for URL {audio_url}: {e}", exc_info=True)
+        raise
+    finally:
+        # 3. Clean up the downloaded audio file
+        if temp_audio_path and os.path.exists(temp_audio_path):
+            os.remove(temp_audio_path)
+            logger.info(f"Cleaned up temporary downloaded file: {temp_audio_path}")