Spaces:

akpande2
/

kid-coach-api

Sleeping

App Files Files Community

akpande2 commited on Dec 16, 2025

Commit

aa39a05

verified ·

1 Parent(s): 97c6a4d

Update kid_coach_pipeline.py

Browse files

Files changed (1) hide show

kid_coach_pipeline.py +264 -92

kid_coach_pipeline.py CHANGED Viewed

@@ -1,6 +1,6 @@
 """
-Enhanced Public Speaking Coach with LLM Tips and Avatar Voice + Overall Score
-Includes: Speech Analysis + LLM-Generated Tips + Text-to-Speech Avatar + Overall Score Calculation
 """
 import os
@@ -54,6 +54,15 @@ except ImportError:
     print("   pip install transformers sentence-transformers torch")
     exit(1)
 # Import TTS
 try:
     from TTS.api import TTS as CoquiTTS
@@ -81,17 +90,25 @@ class EnhancedPublicSpeakingCoach:
     Complete speech analysis engine with LLM tips and avatar voice
     """
-    def __init__(self, whisper_model_size: str = "base", enable_tts: bool = True):
         """
         Initialize the enhanced coach engine
         Args:
             whisper_model_size: Whisper model size (tiny/base/small/medium)
             enable_tts: Enable text-to-speech avatar voice generation
         """
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
         print(f"🚀 Initializing Enhanced Coach on {self.device}...")
         # Load Whisper for transcription
         print(f"   Loading Whisper ({whisper_model_size})...")
         self.whisper = whisper.load_model(whisper_model_size, device=self.device)
@@ -118,15 +135,6 @@ class EnhancedPublicSpeakingCoach:
         print("   Loading Sentence Transformer...")
         self.sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
-        # Load LLM for tips generation (using Flan-T5 - lightweight and effective)
-        print("   Loading LLM for Tips Generation...")
-        self.tips_generator = pipeline(
-            "text2text-generation",
-            model="google/flan-t5-base",
-            device=0 if self.device == "cuda" else -1,
-            max_length=512
-        )
         # Load TTS for avatar voice
         self.tts_enabled = False
         self.tts_model = None
@@ -374,7 +382,8 @@ class EnhancedPublicSpeakingCoach:
                 vocabulary_result,
                 logical_flow_result,
                 coherence_result,
-                persuasion_result
             )
             # Step 12: Create improved version of transcript
@@ -400,7 +409,7 @@ class EnhancedPublicSpeakingCoach:
                 # Generate audio for coaching tips
                 print("   🎙️ Generating avatar voice for coaching tips...")
-                tips_text = "Here are your personalized coaching tips. " + " ".join(personalized_tips)
                 tips_audio_url = self._generate_avatar_voice(
                     tips_text,
                     output_dir,
@@ -811,111 +820,268 @@ class EnhancedPublicSpeakingCoach:
         vocabulary: Dict,
         logical_flow: Dict,
         coherence: Dict,
-        persuasion: Dict
     ) -> List[str]:
-        """Generate personalized tips using LLM"""
         try:
-            # Prepare analysis summary for LLM
-            analysis_summary = f"""
-            Speech Analysis Summary:
-            - Pacing: {pacing['category']} ({pacing['words_per_minute']} WPM)
-            - Prosody: {prosody['category']}
-            - Filler words: {sum(fillers.values())} total
-            - Long pauses: {silences['count']}
-            - Sentiment: {sentiment['dominant_sentiment']}
-            - Vocabulary score: {vocabulary['score']}/100
-            - Logical flow: {logical_flow['flow_quality']}
-            - Coherence: {coherence['coherence_quality']}
-            - Persuasion: {persuasion['persuasion_level']}
-            Generate 5 specific, actionable tips to improve this speech. Focus on the weakest areas.
-            """
-            # Generate tips using LLM
-            prompt = f"Based on this speech analysis, provide 5 specific improvement tips:\n{analysis_summary}\n\nTips:"
-            response = self.tips_generator(
-                prompt,
-                max_length=300,
-                num_return_sequences=1,
-                temperature=0.7
-            )[0]['generated_text']
-            # Parse tips (split by newlines or numbers)
             tips = []
-            for line in response.split('\n'):
                 line = line.strip()
-                if line and len(line) > 10:
-                    # Remove numbering if present
-                    line = re.sub(r'^\d+[\.\)]\s*', '', line)
-                    if line:
-                        tips.append(line)
-            # Fallback to rule-based tips if LLM fails
-            if len(tips) < 3:
-                tips = self._generate_fallback_tips(
-                    pacing, prosody, fillers, silences, vocabulary,
-                    logical_flow, coherence, persuasion
-                )
-            return tips[:5]  # Return top 5
         except Exception as e:
-            logging.warning(f"LLM tip generation failed: {e}")
-            return self._generate_fallback_tips(
-                pacing, prosody, fillers, silences, vocabulary,
-                logical_flow, coherence, persuasion
-            )
-    def _generate_fallback_tips(
         self,
         pacing: Dict,
         prosody: Dict,
         fillers: Dict,
         silences: Dict,
         vocabulary: Dict,
         logical_flow: Dict,
         coherence: Dict,
-        persuasion: Dict
     ) -> List[str]:
-        """Generate rule-based tips as fallback"""
         tips = []
-        # Pacing tips
-        if pacing['category'] == 'slow':
-            tips.append("Try speaking 10-15% faster to maintain audience engagement and energy.")
-        elif pacing['category'] == 'fast':
-            tips.append("Slow down slightly to ensure clarity and give your audience time to process your message.")
-        # Prosody tips
-        if prosody['category'] == 'monotone':
-            tips.append("Add vocal variety by emphasizing key words and varying your pitch throughout the speech.")
-        # Filler word tips
-        if sum(fillers.values()) > 5:
-            tips.append("Reduce filler words by pausing silently instead of saying 'um' or 'uh'. Practice makes perfect!")
-        # Silence tips
-        if silences['count'] > 3:
-            tips.append("Work on smoother transitions between ideas to reduce long pauses.")
-        # Vocabulary tips
-        if vocabulary['score'] < 60:
-            tips.append("Expand your vocabulary by incorporating more power words and descriptive language.")
-        # Flow tips
-        if logical_flow['score'] < 60:
-            tips.append("Improve logical flow by using transition phrases like 'furthermore,' 'however,' and 'in conclusion.'")
-        # Coherence tips
-        if coherence['score'] < 60:
-            tips.append("Strengthen coherence by making sure each point clearly connects to your main message.")
-        # Persuasion tips
-        if persuasion['score'] < 60:
-            tips.append("Make your speech more persuasive by adding evidence, examples, and emotional appeals.")
-        return tips[:5]
     def _create_improved_transcript(self, original: str, fillers: Dict) -> str:
@@ -1001,7 +1167,13 @@ if __name__ == "__main__":
         print(f"✅ Created {test_file}\n")
     try:
-        coach = EnhancedPublicSpeakingCoach(whisper_model_size="base", enable_tts=True)
         result = coach.analyze_speech(test_file)
         print("\n" + "="*70)

 """
+Enhanced Public Speaking Coach with PERSONALIZED LLM Tips and Avatar Voice
+Includes: Speech Analysis + OpenAI-Powered Personalized Tips + Text-to-Speech Avatar
 """
 import os
     print("   pip install transformers sentence-transformers torch")
     exit(1)
+# Import OpenAI for better tips generation
+try:
+    import openai
+    OPENAI_AVAILABLE = True
+except ImportError:
+    print("\n⚠️  WARNING: OpenAI not installed. Using fallback tips.")
+    print("   To enable better tips: pip install openai")
+    OPENAI_AVAILABLE = False
 # Import TTS
 try:
     from TTS.api import TTS as CoquiTTS
     Complete speech analysis engine with LLM tips and avatar voice
     """
+    def __init__(self, whisper_model_size: str = "base", enable_tts: bool = True, openai_api_key: Optional[str] = None):
         """
         Initialize the enhanced coach engine
         Args:
             whisper_model_size: Whisper model size (tiny/base/small/medium)
             enable_tts: Enable text-to-speech avatar voice generation
+            openai_api_key: OpenAI API key for better tips (optional)
         """
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
         print(f"🚀 Initializing Enhanced Coach on {self.device}...")
+        # Set up OpenAI if available
+        self.use_openai = False
+        if OPENAI_AVAILABLE and openai_api_key:
+            openai.api_key = openai_api_key
+            self.use_openai = True
+            print("   ✅ OpenAI enabled for personalized tips")
         # Load Whisper for transcription
         print(f"   Loading Whisper ({whisper_model_size})...")
         self.whisper = whisper.load_model(whisper_model_size, device=self.device)
         print("   Loading Sentence Transformer...")
         self.sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
         # Load TTS for avatar voice
         self.tts_enabled = False
         self.tts_model = None
                 vocabulary_result,
                 logical_flow_result,
                 coherence_result,
+                persuasion_result,
+                overall_score
             )
             # Step 12: Create improved version of transcript
                 # Generate audio for coaching tips
                 print("   🎙️ Generating avatar voice for coaching tips...")
+                tips_text = self._format_tips_for_audio(personalized_tips, avatar_gender)
                 tips_audio_url = self._generate_avatar_voice(
                     tips_text,
                     output_dir,
         vocabulary: Dict,
         logical_flow: Dict,
         coherence: Dict,
+        persuasion: Dict,
+        overall_score: float
     ) -> List[str]:
+        """Generate truly personalized tips using OpenAI or enhanced fallback"""
+        # Try OpenAI first if available
+        if self.use_openai:
+            try:
+                tips = self._generate_openai_tips(
+                    transcript, pacing, prosody, fillers, silences,
+                    sentiment, vocabulary, logical_flow, coherence, persuasion, overall_score
+                )
+                if tips and len(tips) >= 3:
+                    return tips
+            except Exception as e:
+                logging.warning(f"OpenAI tip generation failed: {e}")
+        # Use enhanced fallback tips
+        return self._generate_enhanced_fallback_tips(
+            transcript, pacing, prosody, fillers, silences,
+            sentiment, vocabulary, logical_flow, coherence, persuasion, overall_score
+        )
+    def _generate_openai_tips(
+        self,
+        transcript: str,
+        pacing: Dict,
+        prosody: Dict,
+        fillers: Dict,
+        silences: Dict,
+        sentiment: Dict,
+        vocabulary: Dict,
+        logical_flow: Dict,
+        coherence: Dict,
+        persuasion: Dict,
+        overall_score: float
+    ) -> List[str]:
+        """Generate personalized tips using OpenAI API"""
+        # Build detailed analysis summary
+        analysis_summary = f"""Speech Performance Analysis:
+Overall Score: {overall_score}/10
+Detailed Metrics:
+- Pacing: {pacing['category']} at {pacing['words_per_minute']} words per minute
+- Voice Variation: {prosody['category']} (pitch variation: {prosody['pitch_variation_hz']} Hz)
+- Filler Words: {sum(fillers.values())} total ({', '.join([f'{k}: {v}' for k, v in fillers.items()]) if fillers else 'none'})
+- Pauses: {silences['count']} long pauses
+- Tone: {sentiment['dominant_sentiment']} ({sentiment['confidence']:.0%} confidence)
+- Vocabulary: {vocabulary['score']}/100 (used {len(vocabulary['good_words_used'])} power words)
+- Logical Flow: {logical_flow['flow_quality']} ({logical_flow['score']}/100)
+- Coherence: {coherence['coherence_quality']} ({coherence['score']}/100)
+- Persuasiveness: {persuasion['persuasion_level']} ({persuasion['score']}/100)
+Speech excerpt: "{transcript[:200]}..."
+"""
+        # Create personalized prompt
+        prompt = f"""{analysis_summary}
+You are a friendly, encouraging public speaking coach. Based on this person's speech analysis, provide 5 specific, actionable coaching tips.
+Requirements:
+1. Be warm, supportive, and encouraging
+2. Focus on the 2-3 weakest areas that need improvement
+3. Give concrete examples for each tip (e.g., "Instead of saying 'um,' try pausing silently for 1-2 seconds")
+4. Use conversational, friendly language as if speaking to a friend
+5. Celebrate what they're doing well while gently addressing areas to improve
+6. Make tips practical and easy to implement immediately
+Format each tip as a complete, friendly sentence. Number them 1-5."""
         try:
+            response = openai.ChatCompletion.create(
+                model="gpt-4o-mini",
+                messages=[
+                    {"role": "system", "content": "You are an expert public speaking coach who gives personalized, friendly, actionable advice."},
+                    {"role": "user", "content": prompt}
+                ],
+                max_tokens=500,
+                temperature=0.8
+            )
+            content = response.choices[0].message.content.strip()
+            # Parse tips
             tips = []
+            for line in content.split('\n'):
                 line = line.strip()
+                # Remove numbering
+                line = re.sub(r'^\d+[\.\):\-]\s*', '', line)
+                if len(line) > 20:  # Valid tip
+                    tips.append(line)
+            return tips[:5]
         except Exception as e:
+            logging.error(f"OpenAI API error: {e}")
+            return []
+    def _generate_enhanced_fallback_tips(
         self,
+        transcript: str,
         pacing: Dict,
         prosody: Dict,
         fillers: Dict,
         silences: Dict,
+        sentiment: Dict,
         vocabulary: Dict,
         logical_flow: Dict,
         coherence: Dict,
+        persuasion: Dict,
+        overall_score: float
     ) -> List[str]:
+        """Generate personalized, friendly tips with examples (fallback)"""
         tips = []
+        # Calculate what needs improvement most
+        scores = {
+            'pacing': self._get_pacing_score(pacing),
+            'prosody': self._get_prosody_score(prosody),
+            'fillers': self._get_filler_score(fillers),
+            'silences': self._get_silence_score(silences),
+            'vocabulary': vocabulary['score'] / 10.0,
+            'flow': logical_flow['score'] / 10.0,
+            'coherence': coherence['score'] / 10.0,
+            'persuasion': persuasion['score'] / 10.0
+        }
+        # Sort by score (lowest first = needs most improvement)
+        improvement_areas = sorted(scores.items(), key=lambda x: x[1])
+        # Generate tips for weakest areas
+        wpm = pacing['words_per_minute']
+        total_fillers = sum(fillers.values())
+        for area, score in improvement_areas[:5]:  # Top 5 areas needing improvement
+            if area == 'pacing':
+                if pacing['category'] == 'slow':
+                    tips.append(f"Your pace is currently {wpm} words per minute. Try speeding up to 130-140 WPM - imagine you're telling an exciting story to a friend! Practice by reading aloud with a timer.")
+                elif pacing['category'] == 'fast':
+                    tips.append(f"You're speaking at {wpm} words per minute, which is pretty fast! Slow down to about 140-150 WPM. Take a breath between sentences - your audience needs time to absorb your ideas.")
+            elif area == 'prosody':
+                if prosody['category'] == 'monotone':
+                    tips.append(f"Add more vocal variety to keep your audience engaged! Try emphasizing key words - for example, if you say 'This is REALLY important,' make 'really' louder and higher pitched. Practice reading children's books out loud to build this skill.")
+            elif area == 'fillers':
+                if total_fillers > 5:
+                    most_used = max(fillers.items(), key=lambda x: x[1])
+                    tips.append(f"You said '{most_used[0]}' {most_used[1]} times. When you feel the urge to say it, pause silently instead - it makes you sound more confident! Try counting to 2 in your head during pauses.")
+            elif area == 'silences':
+                if silences['count'] > 5:
+                    tips.append(f"You had {silences['count']} long pauses. That's okay! But try to keep pauses to 1-2 seconds. If you need to think, it's better to say 'Let me think about that...' than to go silent for too long.")
+                elif silences['count'] < 2:
+                    tips.append(f"Don't be afraid to pause! Strategic 2-second pauses after important points give your audience time to process. Try pausing after questions like 'Why does this matter?' - it creates anticipation.")
+            elif area == 'vocabulary':
+                if vocabulary['score'] < 60:
+                    good_words = vocabulary['good_words_used']
+                    if good_words:
+                        tips.append(f"Great job using power words like '{', '.join(good_words[:3])}'! Try adding more impact words like 'crucial,' 'remarkable,' or 'transform' to make your speech more memorable.")
+                    else:
+                        tips.append(f"Spice up your vocabulary! Instead of 'very good,' try 'excellent' or 'outstanding.' Instead of 'big problem,' say 'significant challenge.' Keep a list of power words on your phone!")
+            elif area == 'flow':
+                if logical_flow['score'] < 65:
+                    tips.append(f"Connect your ideas more smoothly! Use transition phrases like 'Building on that...', 'Here's why this matters...', or 'Let me give you an example...' - they're like road signs that guide your audience through your speech.")
+            elif area == 'coherence':
+                if coherence['score'] < 65:
+                    tips.append(f"Make your main message crystal clear! Try using signpost phrases: 'There are three reasons why...' or 'My main point is...' Then at the end, say 'To sum up...' and restate your key idea.")
+            elif area == 'persuasion':
+                if persuasion['score'] < 60:
+                    tips.append(f"Make your speech more convincing! Add phrases like 'Research shows that...' or 'Imagine if we could...' or 'The evidence is clear...' These make your points more compelling and credible.")
+        # If we don't have 5 tips yet, add some positive encouragement
+        if len(tips) < 5 and overall_score >= 7.0:
+            tips.append(f"You're doing great with a {overall_score:.1f}/10 score! Keep practicing regularly - even 5 minutes a day of reading aloud can make a huge difference in your confidence and delivery.")
+        # Always add one encouraging tip at the end
+        if len(tips) < 5:
+            if overall_score < 5.0:
+                tips.append("Remember, every great speaker started somewhere! Focus on improving one thing at a time, and you'll see amazing progress. Record yourself weekly to track your improvement!")
+            else:
+                tips.append("You're making good progress! Keep recording yourself and listening back - you'll be surprised how quickly you improve. Consider joining a speaking group like Toastmasters to practice regularly!")
+        return tips[:5]
+    def _get_pacing_score(self, pacing: Dict) -> float:
+        """Convert pacing to 0-10 score"""
+        wpm = pacing['words_per_minute']
+        if 120 <= wpm <= 160:
+            return 10.0
+        elif 100 <= wpm < 120 or 160 < wpm <= 180:
+            return 7.0
+        else:
+            return 4.0
+    def _get_prosody_score(self, prosody: Dict) -> float:
+        """Convert prosody to 0-10 score"""
+        return 10.0 if prosody['category'] == 'dynamic' else 4.0
+    def _get_filler_score(self, fillers: Dict) -> float:
+        """Convert filler count to 0-10 score"""
+        total = sum(fillers.values())
+        if total == 0:
+            return 10.0
+        elif total <= 3:
+            return 9.0
+        elif total <= 5:
+            return 7.0
+        else:
+            return max(2.0, 10.0 - (total * 0.3))
+    def _get_silence_score(self, silences: Dict) -> float:
+        """Convert silence count to 0-10 score"""
+        count = silences['count']
+        if 2 <= count <= 5:
+            return 10.0
+        elif count <= 8:
+            return 8.0
+        else:
+            return max(3.0, 10.0 - (count * 0.5))
+    def _format_tips_for_audio(self, tips: List[str], gender: str) -> str:
+        """Format tips in a natural, conversational way for audio"""
+        avatar_name = "Alex" if gender == "male" else "Maya"
+        # Create a friendly introduction
+        intro = f"Hey there! I'm {avatar_name}, your speaking coach. I've analyzed your speech, and I have some personalized tips to help you shine even brighter!"
+        # Add natural transitions between tips
+        transitions = [
+            "First,",
+            "Next up,",
+            "Here's another tip:",
+            "Also, I noticed that",
+            "And finally,"
+        ]
+        # Build the audio script
+        audio_parts = [intro]
+        for i, tip in enumerate(tips[:5]):
+            if i < len(transitions):
+                audio_parts.append(f"{transitions[i]} {tip}")
+            else:
+                audio_parts.append(tip)
+        # Add encouraging conclusion
+        conclusion = "You're making great progress! Keep practicing these tips, and you'll see amazing results. I'm cheering for you!"
+        audio_parts.append(conclusion)
+        return " ".join(audio_parts)
     def _create_improved_transcript(self, original: str, fillers: Dict) -> str:
         print(f"✅ Created {test_file}\n")
     try:
+        # Get OpenAI API key from environment variable if available
+        openai_key = os.getenv('OPENAI_API_KEY')
+        coach = EnhancedPublicSpeakingCoach(
+            whisper_model_size="base",
+            enable_tts=True,
+            openai_api_key=openai_key
+        )
         result = coach.analyze_speech(test_file)
         print("\n" + "="*70)