Spaces:

akpande2
/

Aurator_Coaching

Paused

App Files Files Community

akpande2 commited on Dec 16, 2025

Commit

9e0d500

verified ·

1 Parent(s): e08bd5b

Upload 6 files

Browse files

Files changed (6) hide show

Dockerfile +47 -0
env.example +20 -0
kid_coach_pipeline.py +1194 -0
main.py +335 -0
requirements1.txt +13 -0
test_api.py +71 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,47 @@

+# Production Dockerfile for Public Speaking Coach API
+# Optimized for Hugging Face Spaces or any cloud deployment
+FROM python:3.11-slim
+# Set environment variables
+ENV PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1 \
+    PIP_NO_CACHE_DIR=1 \
+    PIP_DISABLE_PIP_VERSION_CHECK=1
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    ffmpeg \
+    libsndfile1 \
+    git \
+    && rm -rf /var/lib/apt/lists/*
+# Set working directory
+ENV OMP_NUM_THREADS=1
+WORKDIR /app
+# Copy requirements first (for better caching)
+COPY requirements.txt .
+# Install Python dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+RUN pip install uvicorn
+# Copy application code
+COPY kid_coach_pipeline.py .
+COPY main.py .
+# Create directory for temporary files
+RUN mkdir -p /tmp/uploads
+# Expose port
+EXPOSE 7860
+# Health check
+HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
+    CMD python -c "import requests; requests.get('http://localhost:7860/health')"
+# Run the application
+# Use port 7860 for Hugging Face Spaces compatibility
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

env.example ADDED Viewed

	@@ -0,0 +1,20 @@

+# ===========================================
+# ENVIRONMENT VARIABLES
+# ===========================================
+# OpenAI API Key (optional - for better tips)
+# Get from: https://platform.openai.com/api-keys
+OPENAI_API_KEY=sk-proj-xxxxx
+# AWS S3 Configuration (only for production on AWS)
+USE_S3=false                          # Set to "true" on AWS
+S3_BUCKET_NAME=aurator-audio-outputs  # Your S3 bucket name
+AWS_REGION=us-east-1                  # Your AWS region
+AWS_ACCESS_KEY_ID=AKIAxxxxx          # AWS credentials
+AWS_SECRET_ACCESS_KEY=xxxxx          # AWS credentials
+# ===========================================
+# FOR HUGGING FACE TESTING:
+# Just add OPENAI_API_KEY in Settings > Variables
+# Leave USE_S3=false (will use local storage)
+# ===========================================

kid_coach_pipeline.py ADDED Viewed

	@@ -0,0 +1,1194 @@

+"""
+Enhanced Public Speaking Coach with PERSONALIZED LLM Tips and Avatar Voice
+Includes: Speech Analysis + OpenAI-Powered Personalized Tips + Text-to-Speech Avatar
+"""
+import os
+import io
+import json
+import logging
+import warnings
+import re
+import uuid
+from typing import Dict, List, Any, Optional, Tuple
+from dataclasses import dataclass, asdict
+from pathlib import Path
+import torch
+import librosa
+import numpy as np
+import soundfile as sf
+from scipy.signal import medfilt
+from scipy.stats import zscore
+import textstat
+from TTS.api import TTS
+# Suppress warnings
+os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
+logging.getLogger("whisper").setLevel(logging.ERROR)
+logging.getLogger("transformers").setLevel(logging.ERROR)
+warnings.filterwarnings("ignore")
+# Validate Whisper installation
+try:
+    import whisper
+    if not hasattr(whisper, "load_model"):
+        raise ImportError("Wrong whisper library installed")
+except ImportError:
+    print("\n❌ CRITICAL: Install correct whisper library:")
+    print("   pip uninstall -y whisper && pip install openai-whisper")
+    exit(1)
+# Import transformers for LLM
+try:
+    from transformers import (
+        pipeline,
+        AutoTokenizer,
+        AutoModel,
+        AutoModelForSequenceClassification,
+        AutoModelForCausalLM
+    )
+    from sentence_transformers import SentenceTransformer
+except ImportError:
+    print("\n❌ CRITICAL: Install required libraries:")
+    print("   pip install transformers sentence-transformers torch")
+    exit(1)
+# Import OpenAI for better tips generation
+try:
+    import openai
+    OPENAI_AVAILABLE = True
+except ImportError:
+    print("\n⚠️  WARNING: OpenAI not installed. Using fallback tips.")
+    print("   To enable better tips: pip install openai")
+    OPENAI_AVAILABLE = False
+# Import TTS
+try:
+    from TTS.api import TTS as CoquiTTS
+except ImportError:
+    print("\n⚠️  WARNING: TTS not installed. Avatar voice will be disabled.")
+    print("   To enable: pip install TTS")
+    CoquiTTS = None
+# JSON Serialization Helper
+class NumpyEncoder(json.JSONEncoder):
+    """Handles numpy types in JSON serialization"""
+    def default(self, obj):
+        if isinstance(obj, (np.integer, np.int64)):
+            return int(obj)
+        if isinstance(obj, (np.floating, np.float32, np.float64)):
+            return float(obj)
+        if isinstance(obj, np.ndarray):
+            return obj.tolist()
+        return super().default(obj)
+class EnhancedPublicSpeakingCoach:
+    """
+    Complete speech analysis engine with LLM tips and avatar voice
+    """
+    def __init__(self, whisper_model_size: str = "base", enable_tts: bool = True, openai_api_key: Optional[str] = None):
+        """
+        Initialize the enhanced coach engine
+        Args:
+            whisper_model_size: Whisper model size (tiny/base/small/medium)
+            enable_tts: Enable text-to-speech avatar voice generation
+            openai_api_key: OpenAI API key for better tips (optional)
+        """
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        print(f"🚀 Initializing Enhanced Coach on {self.device}...")
+        # Set up OpenAI if available
+        self.use_openai = False
+        if OPENAI_AVAILABLE and openai_api_key:
+            openai.api_key = openai_api_key
+            self.use_openai = True
+            print("   ✅ OpenAI enabled for personalized tips")
+        # Load Whisper for transcription
+        print(f"   Loading Whisper ({whisper_model_size})...")
+        self.whisper = whisper.load_model(whisper_model_size, device=self.device)
+        # Load sentiment analysis model (using a more reliable one)
+        print("   Loading Sentiment Model...")
+        try:
+            # Using cardiffnlp/twitter-roberta-base-sentiment-latest - more accurate
+            self.sentiment_analyzer = pipeline(
+                "sentiment-analysis",
+                model="cardiffnlp/twitter-roberta-base-sentiment-latest",
+                device=0 if self.device == "cuda" else -1
+            )
+            print("   ✅ Using RoBERTa sentiment model")
+        except Exception as e:
+            print(f"   ⚠️  Failed to load RoBERTa model, falling back to DistilBERT: {e}")
+            self.sentiment_analyzer = pipeline(
+                "sentiment-analysis",
+                model="distilbert-base-uncased-finetuned-sst-2-english",
+                device=0 if self.device == "cuda" else -1
+            )
+        # Load sentence transformer for semantic analysis
+        print("   Loading Sentence Transformer...")
+        self.sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
+        # Load TTS for avatar voice
+        self.tts_enabled = False
+        self.tts_model = None
+        if enable_tts and CoquiTTS:
+            try:
+                print("   Loading TTS for Avatar Voice...")
+                # Using lightweight TTS model
+                self.tts_model = CoquiTTS(model_name="tts_models/en/ljspeech/tacotron2-DDC")
+                self.tts_enabled = True
+                print("   ✅ TTS enabled")
+            except Exception as e:
+                print(f"   ⚠️  TTS initialization failed: {e}")
+                self.tts_enabled = False
+        # Linguistic patterns
+        self.filler_patterns = {
+            "um": r"\bum+h*\b",
+            "uh": r"\buh+h*\b",
+            "like": r"\blike\b",
+            "you know": r"\byou know\b",
+            "so": r"\bso+\b",
+            "actually": r"\bactually\b",
+            "basically": r"\bbasically\b",
+            "literally": r"\bliterally\b",
+            "i mean": r"\bi mean\b",
+            "kind of": r"\bkind of\b",
+            "sort of": r"\bsort of\b",
+            "right": r"\bright\b",
+            "okay": r"\bokay\b",
+            "well": r"\bwell\b"
+        }
+        self.power_words = {
+            "significant", "critical", "essential", "vital", "crucial",
+            "important", "remarkable", "extraordinary", "exceptional",
+            "achieve", "accomplish", "create", "develop", "innovate",
+            "transform", "revolutionize", "enhance", "optimize",
+            "evidence", "data", "research", "proven", "demonstrate",
+            "validate", "verify", "confirm", "establish",
+            "believe", "imagine", "discover", "realize", "understand",
+            "recognize", "appreciate", "consider", "envision",
+            "opportunity", "benefit", "advantage", "solution", "success",
+            "excellence", "quality", "value", "impact", "results",
+            "together", "collaborate", "participate", "engage", "contribute"
+        }
+        print("✅ Enhanced Coach Engine Ready!")
+    def _calculate_overall_score(
+        self,
+        pacing: Dict,
+        prosody: Dict,
+        fillers: Dict,
+        silences: Dict,
+        sentiment: Dict,
+        vocabulary: Dict,
+        logical_flow: Dict,
+        coherence: Dict,
+        persuasion: Dict
+    ) -> float:
+        """
+        Calculate overall score (0-10 scale) based on all metrics
+        Weighted scoring system:
+        - Pacing: 10%
+        - Prosody: 10%
+        - Fillers: 15% (fewer is better)
+        - Silences: 10%
+        - Sentiment: 10%
+        - Vocabulary: 15%
+        - Logical Flow: 10%
+        - Coherence: 10%
+        - Persuasion: 10%
+        """
+        total_score = 0.0
+        # 1. Pacing Score (10%) - 120-160 WPM is ideal
+        wpm = pacing['words_per_minute']
+        if 120 <= wpm <= 160:
+            pacing_score = 10.0
+        elif 100 <= wpm < 120 or 160 < wpm <= 180:
+            pacing_score = 7.0
+        elif 80 <= wpm < 100 or 180 < wpm <= 200:
+            pacing_score = 5.0
+        else:
+            pacing_score = 3.0
+        total_score += pacing_score * 0.10
+        # 2. Prosody Score (10%) - dynamic is good
+        if prosody['category'].lower() == 'dynamic':
+            prosody_score = 10.0
+        elif prosody['category'].lower() == 'monotone':
+            prosody_score = 4.0
+        else:
+            prosody_score = 7.0
+        total_score += prosody_score * 0.10
+        # 3. Filler Words Score (15%) - fewer is better
+        total_fillers = sum(fillers.values())
+        if total_fillers == 0:
+            filler_score = 10.0
+        elif total_fillers <= 3:
+            filler_score = 9.0
+        elif total_fillers <= 5:
+            filler_score = 7.0
+        elif total_fillers <= 10:
+            filler_score = 5.0
+        else:
+            filler_score = max(2.0, 10.0 - (total_fillers * 0.3))
+        total_score += filler_score * 0.15
+        # 4. Silences Score (10%) - 2-5 pauses is ideal
+        silence_count = silences['count']
+        if 2 <= silence_count <= 5:
+            silence_score = 10.0
+        elif silence_count <= 8:
+            silence_score = 8.0
+        elif silence_count == 0 or silence_count == 1:
+            silence_score = 6.0
+        else:
+            silence_score = max(3.0, 10.0 - (silence_count * 0.5))
+        total_score += silence_score * 0.10
+        # 5. Sentiment Score (10%) - positive is best
+        sentiment_type = sentiment['dominant_sentiment'].lower()
+        confidence = sentiment['confidence']
+        if sentiment_type == 'positive':
+            sentiment_score = 8.0 + (confidence * 2.0)
+        elif sentiment_type == 'neutral':
+            sentiment_score = 6.0 + (confidence * 1.0)
+        else:  # negative
+            sentiment_score = max(3.0, 7.0 - (confidence * 3.0))
+        total_score += sentiment_score * 0.10
+        # 6. Vocabulary Score (15%) - convert 0-100 to 0-10
+        vocab_score = vocabulary['score'] / 10.0
+        total_score += vocab_score * 0.15
+        # 7. Logical Flow Score (10%) - convert 0-100 to 0-10
+        flow_score = logical_flow['score'] / 10.0
+        total_score += flow_score * 0.10
+        # 8. Coherence Score (10%) - convert 0-100 to 0-10
+        coherence_score = coherence['score'] / 10.0
+        total_score += coherence_score * 0.10
+        # 9. Persuasion Score (10%) - convert 0-100 to 0-10
+        persuasion_score = persuasion['score'] / 10.0
+        total_score += persuasion_score * 0.10
+        # Ensure score is in 0-10 range
+        final_score = max(0.0, min(10.0, total_score))
+        print(f"   📊 Overall Score Calculation:")
+        print(f"      Pacing: {pacing_score:.1f} (weight: 10%)")
+        print(f"      Prosody: {prosody_score:.1f} (weight: 10%)")
+        print(f"      Fillers: {filler_score:.1f} (weight: 15%)")
+        print(f"      Silences: {silence_score:.1f} (weight: 10%)")
+        print(f"      Sentiment: {sentiment_score:.1f} (weight: 10%)")
+        print(f"      Vocabulary: {vocab_score:.1f} (weight: 15%)")
+        print(f"      Flow: {flow_score:.1f} (weight: 10%)")
+        print(f"      Coherence: {coherence_score:.1f} (weight: 10%)")
+        print(f"      Persuasion: {persuasion_score:.1f} (weight: 10%)")
+        print(f"      ⭐ FINAL OVERALL SCORE: {final_score:.2f}/10")
+        return round(final_score, 2)
+    def analyze_speech(self, audio_path: str, output_dir: str = "/tmp/audio_outputs", enable_tts: bool = True, avatar_gender: str = 'male') -> Dict[str, Any]:
+        """
+        Main analysis pipeline with LLM tips and avatar voice
+        Args:
+            audio_path: Path to audio file
+            output_dir: Directory to save generated audio files
+        Returns:
+            Complete analysis as JSON-serializable dictionary with avatar audio
+        """
+        # Validation
+        if not os.path.exists(audio_path):
+            return {"error": "Audio file not found"}
+        # Create output directory
+        os.makedirs(output_dir, exist_ok=True)
+        print(f"\n🎤 Analyzing: {os.path.basename(audio_path)}")
+        try:
+            # Load audio
+            audio, sr = self._load_audio(audio_path)
+            duration = len(audio) / sr
+            if duration < 1.0:
+                return {"error": "Audio too short (minimum 1 second)"}
+            print(f"   Duration: {duration:.1f}s")
+            # Step 1: Transcription
+            print("   📝 Transcribing...")
+            transcript_data = self._transcribe_with_timestamps(audio)
+            if not transcript_data['text'].strip():
+                return {"error": "No speech detected"}
+            full_transcription = transcript_data['text']
+            words = transcript_data['words']
+            # Step 2-10: All analysis
+            print("   ⚡ Running analysis...")
+            pacing_result = self._analyze_pacing(words, duration)
+            prosody_result = self._analyze_prosody(audio, sr)
+            filler_result = self._detect_fillers_detailed(full_transcription)
+            silence_result = self._detect_silences(words)
+            sentiment_result = self._analyze_sentiment(full_transcription)
+            vocabulary_result = self._analyze_vocabulary(full_transcription, words)
+            logical_flow_result = self._analyze_logical_flow(full_transcription)
+            coherence_result = self._analyze_coherence(full_transcription)
+            persuasion_result = self._analyze_persuasion(full_transcription)
+            # ⭐ NEW: Calculate overall score
+            print("   🎯 Calculating overall score...")
+            overall_score = self._calculate_overall_score(
+                pacing_result,
+                prosody_result,
+                filler_result,
+                silence_result,
+                sentiment_result,
+                vocabulary_result,
+                logical_flow_result,
+                coherence_result,
+                persuasion_result
+            )
+            # Step 11: Generate personalized tips using LLM
+            print("   🤖 Generating personalized tips...")
+            personalized_tips = self._generate_personalized_tips(
+                full_transcription,
+                pacing_result,
+                prosody_result,
+                filler_result,
+                silence_result,
+                sentiment_result,
+                vocabulary_result,
+                logical_flow_result,
+                coherence_result,
+                persuasion_result,
+                overall_score
+            )
+            # Step 12: Create improved version of transcript
+            print("   ✨ Creating improved transcript...")
+            improved_transcript = self._create_improved_transcript(
+                full_transcription,
+                filler_result
+            )
+            # Step 13: Generate avatar voice (if enabled) - TWO SEPARATE AUDIOS
+            avatar_audio_url = None
+            tips_audio_url = None
+            if self.tts_enabled and self.tts_model and enable_tts:
+                # Generate audio for improved transcript
+                print("   🎙️ Generating avatar voice for improved transcript...")
+                avatar_audio_url = self._generate_avatar_voice(
+                    improved_transcript,
+                    output_dir,
+                    gender=avatar_gender,
+                    prefix="improved"
+                )
+                # Generate audio for coaching tips
+                print("   🎙️ Generating avatar voice for coaching tips...")
+                tips_text = self._format_tips_for_audio(personalized_tips, avatar_gender)
+                tips_audio_url = self._generate_avatar_voice(
+                    tips_text,
+                    output_dir,
+                    gender=avatar_gender,
+                    prefix="tips"
+                )
+            # Compile final result
+            result = {
+                "transcription": full_transcription,
+                "duration_seconds": round(duration, 2),
+                "word_count": len(words),
+                # ⭐ NEW: Overall score (0-10 scale)
+                "overall_score": overall_score,
+                "pacing": pacing_result,
+                "prosody": prosody_result,
+                "filler_words": filler_result,
+                "silence_detection": silence_result,
+                "sentiment_analysis": sentiment_result,
+                "vocabulary": vocabulary_result,
+                "logical_flow": logical_flow_result,
+                "coherence": coherence_result,
+                "persuasion": persuasion_result,
+                # NEW: LLM-generated content
+                "personalized_tips": personalized_tips,
+                "improved_transcript": improved_transcript,
+                # NEW: Separate audio URLs
+                "avatar_audio_url": avatar_audio_url,  # For improved transcript
+                "tips_audio_url": tips_audio_url        # For coaching tips
+            }
+            print("✅ Analysis complete!")
+            return result
+        except Exception as e:
+            import traceback
+            traceback.print_exc()
+            return {"error": f"Analysis failed: {str(e)}"}
+    def _load_audio(self, path: str) -> tuple:
+        """Load and normalize audio to 16kHz mono"""
+        try:
+            audio, sr = librosa.load(path, sr=16000, mono=True)
+            audio = librosa.util.normalize(audio)
+            return audio, sr
+        except Exception as e:
+            raise ValueError(f"Failed to load audio: {e}")
+    def _transcribe_with_timestamps(self, audio: np.ndarray) -> Dict:
+        """Transcribe with word-level timestamps"""
+        result = self.whisper.transcribe(
+            audio,
+            language='en',
+            word_timestamps=True,
+            fp16=(self.device == "cuda")
+        )
+        words = []
+        for segment in result['segments']:
+            if 'words' in segment:
+                for word_info in segment['words']:
+                    words.append({
+                        'word': word_info['word'].strip(),
+                        'start': word_info['start'],
+                        'end': word_info['end']
+                    })
+        return {
+            'text': result['text'].strip(),
+            'words': words
+        }
+    def _analyze_pacing(self, words: List[Dict], duration: float) -> Dict:
+        """Analyze speaking pace"""
+        word_count = len(words)
+        wpm = (word_count / duration * 60) if duration > 0 else 0
+        if wpm < 120:
+            category = "slow"
+        elif wpm <= 160:
+            category = "good"
+        else:
+            category = "fast"
+        return {
+            "category": category,
+            "words_per_minute": round(wpm, 1)
+        }
+    def _analyze_prosody(self, audio: np.ndarray, sr: int) -> Dict:
+        """Analyze prosody (pitch variation)"""
+        try:
+            f0 = librosa.yin(audio.astype(np.float64), fmin=80, fmax=400)
+            f0_clean = f0[f0 > 0]
+            if len(f0_clean) > 10:
+                pitch_std = np.std(f0_clean)
+                category = "monotone" if pitch_std < 25 else "dynamic"
+                return {
+                    "category": category,
+                    "pitch_variation_hz": round(float(pitch_std), 1)
+                }
+            else:
+                return {"category": "unknown", "pitch_variation_hz": 0.0}
+        except Exception as e:
+            logging.warning(f"Prosody analysis failed: {e}")
+            return {"category": "unknown", "pitch_variation_hz": 0.0}
+    def _detect_fillers_detailed(self, text: str) -> Dict:
+        """Detect filler words with counts"""
+        text_lower = text.lower()
+        filler_counts = {}
+        for filler_name, pattern in self.filler_patterns.items():
+            matches = re.findall(pattern, text_lower, re.IGNORECASE)
+            count = len(matches)
+            if count > 0:
+                filler_counts[filler_name] = count
+        return filler_counts
+    def _detect_silences(self, words: List[Dict]) -> Dict:
+        """Detect long pauses/silences"""
+        if len(words) < 2:
+            return {"count": 0, "total_silence_duration_seconds": 0.0}
+        silence_threshold = 2.0
+        silence_count = 0
+        total_silence_duration = 0.0
+        for i in range(len(words) - 1):
+            pause_duration = words[i+1]['start'] - words[i]['end']
+            if pause_duration >= silence_threshold:
+                silence_count += 1
+                total_silence_duration += pause_duration
+        return {
+            "count": silence_count,
+            "total_silence_duration_seconds": round(total_silence_duration, 2)
+        }
+    def _analyze_sentiment(self, text: str) -> Dict:
+        """Analyze dominant sentiment with improved accuracy"""
+        try:
+            # Clean the text
+            text = text.strip()
+            if not text:
+                return {"dominant_sentiment": "neutral", "confidence": 0.0}
+            print(f"   🔍 Analyzing sentiment for text length: {len(text)} chars")
+            # Split into sentences for better analysis
+            sentences = re.split(r'[.!?]+', text)
+            sentences = [s.strip() for s in sentences if len(s.strip()) > 5]
+            if not sentences:
+                return {"dominant_sentiment": "neutral", "confidence": 0.0}
+            print(f"   📊 Processing {len(sentences)} sentences")
+            # Analyze each sentence
+            sentiment_scores = {"positive": 0, "neutral": 0, "negative": 0}
+            for sentence in sentences:
+                if len(sentence) < 5:
+                    continue
+                try:
+                    # Truncate to model's max length
+                    sentence_truncated = sentence[:512]
+                    result = self.sentiment_analyzer(sentence_truncated)[0]
+                    label = result['label'].lower()
+                    score = result['score']
+                    # Handle different model output formats
+                    if 'positive' in label or label == 'pos':
+                        sentiment_scores['positive'] += score
+                    elif 'negative' in label or label == 'neg':
+                        sentiment_scores['negative'] += score
+                    elif 'neutral' in label or label == 'neu':
+                        sentiment_scores['neutral'] += score
+                    else:
+                        # If label doesn't match expected format, treat as neutral
+                        sentiment_scores['neutral'] += 0.5
+                    print(f"      Sentence: '{sentence[:50]}...' -> {label} ({score:.3f})")
+                except Exception as e:
+                    print(f"      ⚠️  Failed to analyze sentence: {e}")
+                    sentiment_scores['neutral'] += 0.5
+            # Determine dominant sentiment
+            dominant = max(sentiment_scores, key=sentiment_scores.get)
+            total_score = sum(sentiment_scores.values())
+            confidence = sentiment_scores[dominant] / total_score if total_score > 0 else 0.0
+            print(f"   📈 Sentiment scores: {sentiment_scores}")
+            print(f"   🎯 Dominant: {dominant} with confidence {confidence:.3f}")
+            return {
+                "dominant_sentiment": dominant,
+                "confidence": round(confidence, 3)
+            }
+        except Exception as e:
+            logging.error(f"Sentiment analysis failed: {e}")
+            import traceback
+            traceback.print_exc()
+            return {"dominant_sentiment": "neutral", "confidence": 0.0}
+    def _analyze_vocabulary(self, text: str, words: List[Dict]) -> Dict:
+        """Analyze vocabulary quality"""
+        word_list = [w['word'].lower().strip('.,!?;:') for w in words]
+        good_words_found = []
+        for word in word_list:
+            if word in self.power_words and word not in good_words_found:
+                good_words_found.append(word)
+        unique_words = len(set(word_list))
+        total_words = len(word_list)
+        diversity_ratio = (unique_words / total_words) if total_words > 0 else 0
+        score = 0
+        score += min(40, len(good_words_found) * 5)
+        score += min(40, diversity_ratio * 80)
+        if unique_words >= 100:
+            score += 20
+        elif unique_words >= 50:
+            score += 15
+        elif unique_words >= 25:
+            score += 10
+        else:
+            score += 5
+        return {
+            "score": round(score),
+            "good_words_used": sorted(good_words_found)
+        }
+    def _analyze_logical_flow(self, text: str) -> Dict:
+        """Analyze logical flow"""
+        try:
+            sentences = re.split(r'[.!?]+', text)
+            sentences = [s.strip() for s in sentences if len(s.strip()) > 10]
+            if len(sentences) < 2:
+                return {"score": 50, "flow_quality": "insufficient_data"}
+            embeddings = self.sentence_model.encode(sentences)
+            similarities = []
+            for i in range(len(embeddings) - 1):
+                similarity = np.dot(embeddings[i], embeddings[i + 1]) / (
+                    np.linalg.norm(embeddings[i]) * np.linalg.norm(embeddings[i + 1])
+                )
+                similarities.append(similarity)
+            avg_similarity = np.mean(similarities)
+            if 0.3 <= avg_similarity <= 0.7:
+                score = 70 + (30 * (1 - abs(avg_similarity - 0.5) / 0.2))
+            elif avg_similarity < 0.3:
+                score = 40 + (avg_similarity / 0.3) * 30
+            else:
+                score = 70 - ((avg_similarity - 0.7) / 0.3) * 30
+            score = max(0, min(100, score))
+            if score >= 80:
+                quality = "excellent"
+            elif score >= 65:
+                quality = "good"
+            elif score >= 50:
+                quality = "moderate"
+            else:
+                quality = "needs_improvement"
+            return {"score": round(score), "flow_quality": quality}
+        except Exception as e:
+            logging.warning(f"Logical flow analysis failed: {e}")
+            return {"score": 50, "flow_quality": "error"}
+    def _analyze_coherence(self, text: str) -> Dict:
+        """Analyze coherence"""
+        try:
+            sentences = re.split(r'[.!?]+', text)
+            sentences = [s.strip() for s in sentences if len(s.strip()) > 10]
+            if len(sentences) < 2:
+                return {"score": 50, "coherence_quality": "insufficient_data"}
+            discourse_markers = [
+                "however", "therefore", "moreover", "furthermore", "additionally",
+                "consequently", "nevertheless", "thus", "hence", "meanwhile",
+                "first", "second", "third", "finally", "in conclusion",
+                "for example", "for instance", "in particular", "specifically"
+            ]
+            text_lower = text.lower()
+            marker_count = sum(1 for marker in discourse_markers if marker in text_lower)
+            embeddings = self.sentence_model.encode(sentences)
+            coherence_scores = []
+            for i in range(len(embeddings)):
+                for j in range(i + 1, min(i + 3, len(embeddings))):
+                    similarity = np.dot(embeddings[i], embeddings[j]) / (
+                        np.linalg.norm(embeddings[i]) * np.linalg.norm(embeddings[j])
+                    )
+                    coherence_scores.append(similarity)
+            avg_coherence = np.mean(coherence_scores) if coherence_scores else 0.5
+            score = 0
+            score += min(60, avg_coherence * 100)
+            score += min(40, marker_count * 5)
+            score = max(0, min(100, score))
+            if score >= 80:
+                quality = "excellent"
+            elif score >= 65:
+                quality = "good"
+            elif score >= 50:
+                quality = "moderate"
+            else:
+                quality = "needs_improvement"
+            return {"score": round(score), "coherence_quality": quality}
+        except Exception as e:
+            logging.warning(f"Coherence analysis failed: {e}")
+            return {"score": 50, "coherence_quality": "error"}
+    def _analyze_persuasion(self, text: str) -> Dict:
+        """Analyze persuasive elements"""
+        try:
+            text_lower = text.lower()
+            score = 0
+            logical_connectors = [
+                "therefore", "thus", "consequently", "hence", "accordingly",
+                "because", "since", "as a result", "for this reason"
+            ]
+            evidence_markers = [
+                "research shows", "studies indicate", "data suggests",
+                "according to", "evidence demonstrates", "proven by"
+            ]
+            appeal_markers = [
+                "imagine", "consider", "think about", "what if",
+                "picture this", "envision"
+            ]
+            credibility_markers = [
+                "expert", "research", "study", "proven", "validated",
+                "established", "recognized"
+            ]
+            score += min(25, sum(1 for c in logical_connectors if c in text_lower) * 5)
+            score += min(25, sum(1 for m in evidence_markers if m in text_lower) * 8)
+            score += min(25, sum(1 for m in appeal_markers if m in text_lower) * 6)
+            score += min(25, sum(1 for m in credibility_markers if m in text_lower) * 5)
+            score = max(0, min(100, score))
+            if score >= 80:
+                level = "highly_persuasive"
+            elif score >= 60:
+                level = "persuasive"
+            elif score >= 40:
+                level = "moderately_persuasive"
+            else:
+                level = "needs_improvement"
+            return {"score": round(score), "persuasion_level": level}
+        except Exception as e:
+            logging.warning(f"Persuasion analysis failed: {e}")
+            return {"score": 50, "persuasion_level": "error"}
+    def _generate_personalized_tips(
+        self,
+        transcript: str,
+        pacing: Dict,
+        prosody: Dict,
+        fillers: Dict,
+        silences: Dict,
+        sentiment: Dict,
+        vocabulary: Dict,
+        logical_flow: Dict,
+        coherence: Dict,
+        persuasion: Dict,
+        overall_score: float
+    ) -> List[str]:
+        """Generate truly personalized tips using OpenAI or enhanced fallback"""
+        # Try OpenAI first if available
+        if self.use_openai:
+            try:
+                tips = self._generate_openai_tips(
+                    transcript, pacing, prosody, fillers, silences,
+                    sentiment, vocabulary, logical_flow, coherence, persuasion, overall_score
+                )
+                if tips and len(tips) >= 3:
+                    return tips
+            except Exception as e:
+                logging.warning(f"OpenAI tip generation failed: {e}")
+        # Use enhanced fallback tips
+        return self._generate_enhanced_fallback_tips(
+            transcript, pacing, prosody, fillers, silences,
+            sentiment, vocabulary, logical_flow, coherence, persuasion, overall_score
+        )
+    def _generate_openai_tips(
+        self,
+        transcript: str,
+        pacing: Dict,
+        prosody: Dict,
+        fillers: Dict,
+        silences: Dict,
+        sentiment: Dict,
+        vocabulary: Dict,
+        logical_flow: Dict,
+        coherence: Dict,
+        persuasion: Dict,
+        overall_score: float
+    ) -> List[str]:
+        """Generate personalized tips using OpenAI API"""
+        # Build detailed analysis summary
+        analysis_summary = f"""Speech Performance Analysis:
+Overall Score: {overall_score}/10
+Detailed Metrics:
+- Pacing: {pacing['category']} at {pacing['words_per_minute']} words per minute
+- Voice Variation: {prosody['category']} (pitch variation: {prosody['pitch_variation_hz']} Hz)
+- Filler Words: {sum(fillers.values())} total ({', '.join([f'{k}: {v}' for k, v in fillers.items()]) if fillers else 'none'})
+- Pauses: {silences['count']} long pauses
+- Tone: {sentiment['dominant_sentiment']} ({sentiment['confidence']:.0%} confidence)
+- Vocabulary: {vocabulary['score']}/100 (used {len(vocabulary['good_words_used'])} power words)
+- Logical Flow: {logical_flow['flow_quality']} ({logical_flow['score']}/100)
+- Coherence: {coherence['coherence_quality']} ({coherence['score']}/100)
+- Persuasiveness: {persuasion['persuasion_level']} ({persuasion['score']}/100)
+Speech excerpt: "{transcript[:200]}..."
+"""
+        # Create personalized prompt
+        prompt = f"""{analysis_summary}
+You are a friendly, encouraging public speaking coach. Based on this person's speech analysis, provide 5 specific, actionable coaching tips.
+Requirements:
+1. Be warm, supportive, and encouraging
+2. Focus on the 2-3 weakest areas that need improvement
+3. Give concrete examples for each tip (e.g., "Instead of saying 'um,' try pausing silently for 1-2 seconds")
+4. Use conversational, friendly language as if speaking to a friend
+5. Celebrate what they're doing well while gently addressing areas to improve
+6. Make tips practical and easy to implement immediately
+Format each tip as a complete, friendly sentence. Number them 1-5."""
+        try:
+            response = openai.ChatCompletion.create(
+                model="gpt-4o-mini",
+                messages=[
+                    {"role": "system", "content": "You are an expert public speaking coach who gives personalized, friendly, actionable advice."},
+                    {"role": "user", "content": prompt}
+                ],
+                max_tokens=500,
+                temperature=0.8
+            )
+            content = response.choices[0].message.content.strip()
+            # Parse tips
+            tips = []
+            for line in content.split('\n'):
+                line = line.strip()
+                # Remove numbering
+                line = re.sub(r'^\d+[\.\):\-]\s*', '', line)
+                if len(line) > 20:  # Valid tip
+                    tips.append(line)
+            return tips[:5]
+        except Exception as e:
+            logging.error(f"OpenAI API error: {e}")
+            return []
+    def _generate_enhanced_fallback_tips(
+        self,
+        transcript: str,
+        pacing: Dict,
+        prosody: Dict,
+        fillers: Dict,
+        silences: Dict,
+        sentiment: Dict,
+        vocabulary: Dict,
+        logical_flow: Dict,
+        coherence: Dict,
+        persuasion: Dict,
+        overall_score: float
+    ) -> List[str]:
+        """Generate personalized, friendly tips with examples (fallback)"""
+        tips = []
+        # Calculate what needs improvement most
+        scores = {
+            'pacing': self._get_pacing_score(pacing),
+            'prosody': self._get_prosody_score(prosody),
+            'fillers': self._get_filler_score(fillers),
+            'silences': self._get_silence_score(silences),
+            'vocabulary': vocabulary['score'] / 10.0,
+            'flow': logical_flow['score'] / 10.0,
+            'coherence': coherence['score'] / 10.0,
+            'persuasion': persuasion['score'] / 10.0
+        }
+        # Sort by score (lowest first = needs most improvement)
+        improvement_areas = sorted(scores.items(), key=lambda x: x[1])
+        # Generate tips for weakest areas
+        wpm = pacing['words_per_minute']
+        total_fillers = sum(fillers.values())
+        for area, score in improvement_areas[:5]:  # Top 5 areas needing improvement
+            if area == 'pacing':
+                if pacing['category'] == 'slow':
+                    tips.append(f"Your pace is currently {wpm} words per minute. Try speeding up to 130-140 WPM - imagine you're telling an exciting story to a friend! Practice by reading aloud with a timer.")
+                elif pacing['category'] == 'fast':
+                    tips.append(f"You're speaking at {wpm} words per minute, which is pretty fast! Slow down to about 140-150 WPM. Take a breath between sentences - your audience needs time to absorb your ideas.")
+            elif area == 'prosody':
+                if prosody['category'] == 'monotone':
+                    tips.append(f"Add more vocal variety to keep your audience engaged! Try emphasizing key words - for example, if you say 'This is REALLY important,' make 'really' louder and higher pitched. Practice reading children's books out loud to build this skill.")
+            elif area == 'fillers':
+                if total_fillers > 5:
+                    most_used = max(fillers.items(), key=lambda x: x[1])
+                    tips.append(f"You said '{most_used[0]}' {most_used[1]} times. When you feel the urge to say it, pause silently instead - it makes you sound more confident! Try counting to 2 in your head during pauses.")
+            elif area == 'silences':
+                if silences['count'] > 5:
+                    tips.append(f"You had {silences['count']} long pauses. That's okay! But try to keep pauses to 1-2 seconds. If you need to think, it's better to say 'Let me think about that...' than to go silent for too long.")
+                elif silences['count'] < 2:
+                    tips.append(f"Don't be afraid to pause! Strategic 2-second pauses after important points give your audience time to process. Try pausing after questions like 'Why does this matter?' - it creates anticipation.")
+            elif area == 'vocabulary':
+                if vocabulary['score'] < 60:
+                    good_words = vocabulary['good_words_used']
+                    if good_words:
+                        tips.append(f"Great job using power words like '{', '.join(good_words[:3])}'! Try adding more impact words like 'crucial,' 'remarkable,' or 'transform' to make your speech more memorable.")
+                    else:
+                        tips.append(f"Spice up your vocabulary! Instead of 'very good,' try 'excellent' or 'outstanding.' Instead of 'big problem,' say 'significant challenge.' Keep a list of power words on your phone!")
+            elif area == 'flow':
+                if logical_flow['score'] < 65:
+                    tips.append(f"Connect your ideas more smoothly! Use transition phrases like 'Building on that...', 'Here's why this matters...', or 'Let me give you an example...' - they're like road signs that guide your audience through your speech.")
+            elif area == 'coherence':
+                if coherence['score'] < 65:
+                    tips.append(f"Make your main message crystal clear! Try using signpost phrases: 'There are three reasons why...' or 'My main point is...' Then at the end, say 'To sum up...' and restate your key idea.")
+            elif area == 'persuasion':
+                if persuasion['score'] < 60:
+                    tips.append(f"Make your speech more convincing! Add phrases like 'Research shows that...' or 'Imagine if we could...' or 'The evidence is clear...' These make your points more compelling and credible.")
+        # If we don't have 5 tips yet, add some positive encouragement
+        if len(tips) < 5 and overall_score >= 7.0:
+            tips.append(f"You're doing great with a {overall_score:.1f}/10 score! Keep practicing regularly - even 5 minutes a day of reading aloud can make a huge difference in your confidence and delivery.")
+        # Always add one encouraging tip at the end
+        if len(tips) < 5:
+            if overall_score < 5.0:
+                tips.append("Remember, every great speaker started somewhere! Focus on improving one thing at a time, and you'll see amazing progress. Record yourself weekly to track your improvement!")
+            else:
+                tips.append("You're making good progress! Keep recording yourself and listening back - you'll be surprised how quickly you improve. Consider joining a speaking group like Toastmasters to practice regularly!")
+        return tips[:5]
+    def _get_pacing_score(self, pacing: Dict) -> float:
+        """Convert pacing to 0-10 score"""
+        wpm = pacing['words_per_minute']
+        if 120 <= wpm <= 160:
+            return 10.0
+        elif 100 <= wpm < 120 or 160 < wpm <= 180:
+            return 7.0
+        else:
+            return 4.0
+    def _get_prosody_score(self, prosody: Dict) -> float:
+        """Convert prosody to 0-10 score"""
+        return 10.0 if prosody['category'] == 'dynamic' else 4.0
+    def _get_filler_score(self, fillers: Dict) -> float:
+        """Convert filler count to 0-10 score"""
+        total = sum(fillers.values())
+        if total == 0:
+            return 10.0
+        elif total <= 3:
+            return 9.0
+        elif total <= 5:
+            return 7.0
+        else:
+            return max(2.0, 10.0 - (total * 0.3))
+    def _get_silence_score(self, silences: Dict) -> float:
+        """Convert silence count to 0-10 score"""
+        count = silences['count']
+        if 2 <= count <= 5:
+            return 10.0
+        elif count <= 8:
+            return 8.0
+        else:
+            return max(3.0, 10.0 - (count * 0.5))
+    def _format_tips_for_audio(self, tips: List[str], gender: str) -> str:
+        """Format tips in a natural, conversational way for audio"""
+        avatar_name = "Alex" if gender == "male" else "Maya"
+        # Create a friendly introduction
+        intro = f"Hey there! I'm {avatar_name}, your speaking coach. I've analyzed your speech, and I have some personalized tips to help you shine even brighter!"
+        # Add natural transitions between tips
+        transitions = [
+            "First,",
+            "Next up,",
+            "Here's another tip:",
+            "Also, I noticed that",
+            "And finally,"
+        ]
+        # Build the audio script
+        audio_parts = [intro]
+        for i, tip in enumerate(tips[:5]):
+            if i < len(transitions):
+                audio_parts.append(f"{transitions[i]} {tip}")
+            else:
+                audio_parts.append(tip)
+        # Add encouraging conclusion
+        conclusion = "You're making great progress! Keep practicing these tips, and you'll see amazing results. I'm cheering for you!"
+        audio_parts.append(conclusion)
+        return " ".join(audio_parts)
+    def _create_improved_transcript(self, original: str, fillers: Dict) -> str:
+        """Create improved version of transcript (remove fillers, clean up)"""
+        improved = original
+        # Remove filler words
+        for filler_name, pattern in self.filler_patterns.items():
+            if filler_name in fillers:
+                # Replace fillers with nothing or appropriate punctuation
+                improved = re.sub(pattern, '', improved, flags=re.IGNORECASE)
+        # Clean up multiple spaces
+        improved = re.sub(r'\s+', ' ', improved)
+        # Fix punctuation
+        improved = re.sub(r'\s+([,.!?])', r'\1', improved)
+        # Capitalize first letter of sentences
+        improved = re.sub(r'(^|[.!?]\s+)([a-z])', lambda m: m.group(1) + m.group(2).upper(), improved)
+        return improved.strip()
+    def _generate_avatar_voice(self, text: str, output_dir: str, gender: str = "male", prefix: str = "avatar") -> Optional[str]:
+        """
+        Generate avatar voice audio using TTS
+        Args:
+            text: Text to synthesize
+            output_dir: Directory to save audio file
+            gender: Avatar gender ("male" or "female")
+            prefix: Filename prefix (e.g., "improved", "tips")
+        Returns:
+            Relative path to generated audio file or None if generation fails
+        """
+        try:
+            if not self.tts_enabled or not self.tts_model:
+                print("   ⚠️  TTS not enabled, skipping avatar voice generation")
+                return None
+            # Generate unique filename with prefix
+            audio_filename = f"{prefix}_{uuid.uuid4()}.wav"
+            audio_path = os.path.join(output_dir, audio_filename)
+            # Truncate text if too long (TTS models have limits)
+            max_length = 1000  # characters
+            if len(text) > max_length:
+                text = text[:max_length] + "..."
+                print(f"   ⚠️  Text truncated to {max_length} characters for TTS")
+            # Generate audio using TTS
+            print(f"   🎙️ Generating {gender} {prefix} audio...")
+            self.tts_model.tts_to_file(text=text, file_path=audio_path)
+            # Return relative path (assuming output_dir is served)
+            return f"/audio/{audio_filename}"
+        except Exception as e:
+            logging.error(f"Avatar voice generation failed: {e}")
+            import traceback
+            traceback.print_exc()
+            return None
+# ================= MAIN =================
+if __name__ == "__main__":
+    print("\n" + "="*70)
+    print("ENHANCED PUBLIC SPEAKING COACH - TEST")
+    print("="*70 + "\n")
+    test_file = "test_speech.wav"
+    if not os.path.exists(test_file):
+        print("⚠️  No test file found. Generating dummy audio...")
+        sr = 16000
+        duration = 10
+        t = np.linspace(0, duration, sr * duration)
+        audio = 0.3 * np.sin(2 * np.pi * 200 * t) + 0.2 * np.sin(2 * np.pi * 300 * t)
+        audio += 0.1 * np.random.randn(len(audio))
+        sf.write(test_file, audio, sr)
+        print(f"✅ Created {test_file}\n")
+    try:
+        # Get OpenAI API key from environment variable if available
+        openai_key = os.getenv('OPENAI_API_KEY')
+        coach = EnhancedPublicSpeakingCoach(
+            whisper_model_size="base",
+            enable_tts=True,
+            openai_api_key=openai_key
+        )
+        result = coach.analyze_speech(test_file)
+        print("\n" + "="*70)
+        print("ANALYSIS RESULTS (JSON)")
+        print("="*70)
+        print(json.dumps(result, indent=2, cls=NumpyEncoder))
+        output_file = "speech_analysis_result.json"
+        with open(output_file, 'w') as f:
+            json.dump(result, f, indent=2, cls=NumpyEncoder)
+        print(f"\n✅ Results saved to: {output_file}")
+        print("✅ Test completed successfully!")
+    except Exception as e:
+        print(f"\n❌ ERROR: {e}")
+        import traceback
+        traceback.print_exc()

main.py ADDED Viewed

	@@ -0,0 +1,335 @@

+"""
+Production FastAPI Server with S3 Support
+Works on: Hugging Face Space (testing) → AWS (production)
+"""
+import os
+import tempfile
+from pathlib import Path
+from typing import Optional
+import time
+import uuid
+import uvicorn
+from fastapi import FastAPI, UploadFile, File, HTTPException, status, Form
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import JSONResponse, FileResponse
+from fastapi.staticfiles import StaticFiles
+from kid_coach_pipeline import EnhancedPublicSpeakingCoach
+# Try to import boto3 (for AWS S3)
+try:
+    import boto3
+    S3_AVAILABLE = True
+except ImportError:
+    S3_AVAILABLE = False
+    print("⚠️  boto3 not available - S3 uploads disabled")
+# ================= CONFIGURATION =================
+app = FastAPI(
+    title="Aurator Speech Coach API",
+    description="AI-powered speech analysis with personalized coaching",
+    version="4.0.0"
+)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Audio output directory (local fallback)
+AUDIO_OUTPUT_DIR = "/tmp/audio_outputs"
+os.makedirs(AUDIO_OUTPUT_DIR, exist_ok=True)
+# Mount for local testing
+app.mount("/audio", StaticFiles(directory=AUDIO_OUTPUT_DIR), name="audio")
+# AWS S3 Configuration (optional - for production)
+USE_S3 = os.getenv("USE_S3", "false").lower() == "true" and S3_AVAILABLE
+S3_BUCKET = os.getenv("S3_BUCKET_NAME", "aurator-audio-outputs")
+S3_REGION = os.getenv("AWS_REGION", "us-east-1")
+if USE_S3:
+    s3_client = boto3.client('s3', region_name=S3_REGION)
+    print(f"✅ S3 enabled: {S3_BUCKET}")
+else:
+    print("📁 Using local file storage")
+coach_engine: Optional[EnhancedPublicSpeakingCoach] = None
+SUPPORTED_FORMATS = {'.wav', '.mp3', '.m4a', '.flac', '.ogg', '.aac', '.webm'}
+MAX_FILE_SIZE = 50 * 1024 * 1024
+# ================= S3 HELPERS =================
+def upload_to_s3(file_path: str, file_type: str) -> str:
+    """Upload file to S3 and return public URL"""
+    if not USE_S3:
+        # Return local URL for HF testing
+        filename = os.path.basename(file_path)
+        return f"/audio/{filename}"
+    try:
+        # Generate S3 key with date structure
+        from datetime import datetime
+        now = datetime.now()
+        file_uuid = str(uuid.uuid4())
+        s3_key = f"{now.year}/{now.month:02d}/{now.day:02d}/{file_type}_{file_uuid}.wav"
+        # Upload to S3
+        s3_client.upload_file(
+            file_path,
+            S3_BUCKET,
+            s3_key,
+            ExtraArgs={'ContentType': 'audio/wav', 'ACL': 'public-read'}
+        )
+        # Return public URL
+        return f"https://{S3_BUCKET}.s3.{S3_REGION}.amazonaws.com/{s3_key}"
+    except Exception as e:
+        print(f"❌ S3 upload failed: {e}")
+        # Fallback to local URL
+        filename = os.path.basename(file_path)
+        return f"/audio/{filename}"
+# ================= STARTUP =================
+@app.on_event("startup")
+async def startup_event():
+    global coach_engine
+    print("\n" + "="*60)
+    print("🚀 AURATOR SPEECH COACH API")
+    print("="*60)
+    try:
+        openai_key = os.getenv("OPENAI_API_KEY")
+        print("\n📦 Loading models...")
+        coach_engine = EnhancedPublicSpeakingCoach(
+            whisper_model_size="base",
+            enable_tts=True,
+            openai_api_key=openai_key
+        )
+        print("✅ Engine ready!")
+        print(f"   Storage: {'S3' if USE_S3 else 'Local'}")
+        print(f"   OpenAI: {'Enabled' if openai_key else 'Fallback mode'}")
+        print("\n" + "="*60 + "\n")
+    except Exception as e:
+        print(f"\n❌ STARTUP FAILED: {e}")
+        coach_engine = None
+# ================= ENDPOINTS =================
+@app.get("/")
+async def root():
+    """API info"""
+    return {
+        "service": "Aurator Speech Coach API",
+        "version": "4.0.0",
+        "status": "online" if coach_engine else "degraded",
+        "storage": "s3" if USE_S3 else "local",
+        "endpoints": {
+            "analyze": "POST /api/analyze",
+            "health": "GET /api/health"
+        }
+    }
+@app.get("/api/health")
+async def health_check():
+    """Health check for AWS load balancer"""
+    return {
+        "status": "healthy" if coach_engine else "unhealthy",
+        "engine_loaded": coach_engine is not None,
+        "timestamp": time.time()
+    }
+@app.post("/api/analyze")
+async def analyze_speech(
+    audio_file: UploadFile = File(...),
+    avatar_gender: str = Form('male')
+):
+    """
+    Main endpoint: Analyze speech and return results
+    Request:
+    - audio_file: Audio file (wav/mp3/m4a/flac/ogg/aac/webm)
+    - avatar_gender: "male" or "female" (default: male)
+    Response:
+    {
+        "success": true,
+        "data": {
+            "overall_score": 8.6,
+            "transcription": {...},
+            "analysis": {...},
+            "coaching": {
+                "tips": [...],
+                "tips_audio_url": "https://...",
+                "improved_audio_url": "https://..."
+            }
+        },
+        "processing_time_ms": 3250,
+        "timestamp": "2025-12-16T..."
+    }
+    """
+    start_time = time.time()
+    # Validate engine
+    if coach_engine is None:
+        raise HTTPException(
+            status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
+            detail="Engine not initialized"
+        )
+    # Validate file
+    if not audio_file or not audio_file.filename:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail="No audio file provided"
+        )
+    file_ext = Path(audio_file.filename).suffix.lower()
+    if file_ext not in SUPPORTED_FORMATS:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail=f"Unsupported format: {file_ext}"
+        )
+    temp_file = None
+    try:
+        # Save uploaded file temporarily
+        content = await audio_file.read()
+        if len(content) > MAX_FILE_SIZE:
+            raise HTTPException(
+                status_code=status.HTTP_413_REQUEST_ENTITY_TOO_LARGE,
+                detail="File too large (max 50MB)"
+            )
+        with tempfile.NamedTemporaryFile(delete=False, suffix=file_ext) as temp:
+            temp.write(content)
+            temp_file = temp.name
+        print(f"\n🎤 Analyzing: {audio_file.filename} ({len(content)/1024:.1f} KB)")
+        # Run analysis
+        result = coach_engine.analyze_speech(
+            temp_file,
+            output_dir=AUDIO_OUTPUT_DIR,
+            enable_tts=True,
+            avatar_gender=avatar_gender
+        )
+        if "error" in result:
+            raise HTTPException(
+                status_code=status.HTTP_400_BAD_REQUEST,
+                detail=result["error"]
+            )
+        # Upload audio files to S3 (if enabled) or use local URLs
+        tips_audio_path = None
+        improved_audio_path = None
+        if result.get("tips_audio_url"):
+            local_path = os.path.join(AUDIO_OUTPUT_DIR, os.path.basename(result["tips_audio_url"]))
+            if os.path.exists(local_path):
+                tips_audio_url = upload_to_s3(local_path, "tips")
+                result["tips_audio_url"] = tips_audio_url
+        if result.get("avatar_audio_url"):
+            local_path = os.path.join(AUDIO_OUTPUT_DIR, os.path.basename(result["avatar_audio_url"]))
+            if os.path.exists(local_path):
+                improved_audio_url = upload_to_s3(local_path, "improved")
+                result["avatar_audio_url"] = improved_audio_url
+        processing_time = int((time.time() - start_time) * 1000)
+        print(f"✅ Complete in {processing_time}ms")
+        # Format response for React Native
+        response = {
+            "success": True,
+            "data": {
+                "overall_score": result.get("overall_score", 0),
+                "duration_seconds": result.get("duration_seconds", 0),
+                "word_count": result.get("word_count", 0),
+                "transcription": {
+                    "text": result.get("transcription", ""),
+                    "improved_text": result.get("improved_transcript", "")
+                },
+                "analysis": {
+                    "pacing": result.get("pacing", {}),
+                    "prosody": result.get("prosody", {}),
+                    "fillers": result.get("filler_words", {}),
+                    "silences": result.get("silence_detection", {}),
+                    "sentiment": result.get("sentiment_analysis", {}),
+                    "vocabulary": result.get("vocabulary", {}),
+                    "flow": result.get("logical_flow", {}),
+                    "coherence": result.get("coherence", {}),
+                    "persuasion": result.get("persuasion", {})
+                },
+                "coaching": {
+                    "tips": result.get("personalized_tips", []),
+                    "tips_audio_url": result.get("tips_audio_url"),
+                    "improved_audio_url": result.get("avatar_audio_url")
+                }
+            },
+            "processing_time_ms": processing_time,
+            "timestamp": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
+        }
+        return JSONResponse(content=response)
+    except HTTPException:
+        raise
+    except Exception as e:
+        import traceback
+        traceback.print_exc()
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=f"Analysis failed: {str(e)}"
+        )
+    finally:
+        # Cleanup
+        if temp_file and os.path.exists(temp_file):
+            try:
+                os.remove(temp_file)
+            except:
+                pass
+@app.get("/audio/{filename}")
+async def serve_audio(filename: str):
+    """Serve audio files (for local/HF testing)"""
+    file_path = os.path.join(AUDIO_OUTPUT_DIR, filename)
+    if not os.path.exists(file_path):
+        raise HTTPException(404, "Audio file not found")
+    return FileResponse(file_path, media_type="audio/wav")
+# ================= RUN =================
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=8000)

requirements1.txt ADDED Viewed

	@@ -0,0 +1,13 @@

+torch
+transformers
+sentence-transformers
+openai-whisper
+librosa
+soundfile
+textstat
+TTS
+fastapi
+uvicorn
+python-multipart
+boto3
+openai

test_api.py ADDED Viewed

	@@ -0,0 +1,71 @@

+"""
+Quick API Test Script
+Test the FastAPI server locally or on HF
+"""
+import requests
+import sys
+# Change this to your HF Space URL or local
+API_URL = "http://localhost:8000"  # For local testing
+# API_URL = "https://your-space.hf.space"  # For HF testing
+def test_health():
+    """Test health endpoint"""
+    print("Testing /api/health...")
+    response = requests.get(f"{API_URL}/api/health")
+    print(f"Status: {response.status_code}")
+    print(f"Response: {response.json()}\n")
+    return response.status_code == 200
+def test_analyze(audio_file_path):
+    """Test analyze endpoint"""
+    print(f"Testing /api/analyze with {audio_file_path}...")
+    with open(audio_file_path, 'rb') as f:
+        files = {'audio_file': f}
+        data = {'avatar_gender': 'male'}
+        response = requests.post(
+            f"{API_URL}/api/analyze",
+            files=files,
+            data=data,
+            timeout=60
+        )
+    print(f"Status: {response.status_code}")
+    if response.status_code == 200:
+        result = response.json()
+        print(f"Success: {result['success']}")
+        print(f"Overall Score: {result['data']['overall_score']}")
+        print(f"Processing Time: {result['processing_time_ms']}ms")
+        print(f"Tips Count: {len(result['data']['coaching']['tips'])}")
+        print(f"Tips Audio: {result['data']['coaching']['tips_audio_url']}")
+        print(f"Improved Audio: {result['data']['coaching']['improved_audio_url']}")
+    else:
+        print(f"Error: {response.text}")
+    return response.status_code == 200
+if __name__ == "__main__":
+    print("="*60)
+    print("API TEST SCRIPT")
+    print("="*60 + "\n")
+    # Test health
+    if not test_health():
+        print("❌ Health check failed!")
+        sys.exit(1)
+    print("✅ Health check passed!\n")
+    # Test analyze (provide your audio file)
+    if len(sys.argv) > 1:
+        audio_file = sys.argv[1]
+        if test_analyze(audio_file):
+            print("\n✅ Analysis test passed!")
+        else:
+            print("\n❌ Analysis test failed!")
+    else:
+        print("ℹ️  To test analysis: python test_api.py your_audio.wav")