akpande2 commited on
Commit
aa39a05
·
verified ·
1 Parent(s): 97c6a4d

Update kid_coach_pipeline.py

Browse files
Files changed (1) hide show
  1. kid_coach_pipeline.py +264 -92
kid_coach_pipeline.py CHANGED
@@ -1,6 +1,6 @@
1
  """
2
- Enhanced Public Speaking Coach with LLM Tips and Avatar Voice + Overall Score
3
- Includes: Speech Analysis + LLM-Generated Tips + Text-to-Speech Avatar + Overall Score Calculation
4
  """
5
 
6
  import os
@@ -54,6 +54,15 @@ except ImportError:
54
  print(" pip install transformers sentence-transformers torch")
55
  exit(1)
56
 
 
 
 
 
 
 
 
 
 
57
  # Import TTS
58
  try:
59
  from TTS.api import TTS as CoquiTTS
@@ -81,17 +90,25 @@ class EnhancedPublicSpeakingCoach:
81
  Complete speech analysis engine with LLM tips and avatar voice
82
  """
83
 
84
- def __init__(self, whisper_model_size: str = "base", enable_tts: bool = True):
85
  """
86
  Initialize the enhanced coach engine
87
 
88
  Args:
89
  whisper_model_size: Whisper model size (tiny/base/small/medium)
90
  enable_tts: Enable text-to-speech avatar voice generation
 
91
  """
92
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
93
  print(f"🚀 Initializing Enhanced Coach on {self.device}...")
94
 
 
 
 
 
 
 
 
95
  # Load Whisper for transcription
96
  print(f" Loading Whisper ({whisper_model_size})...")
97
  self.whisper = whisper.load_model(whisper_model_size, device=self.device)
@@ -118,15 +135,6 @@ class EnhancedPublicSpeakingCoach:
118
  print(" Loading Sentence Transformer...")
119
  self.sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
120
 
121
- # Load LLM for tips generation (using Flan-T5 - lightweight and effective)
122
- print(" Loading LLM for Tips Generation...")
123
- self.tips_generator = pipeline(
124
- "text2text-generation",
125
- model="google/flan-t5-base",
126
- device=0 if self.device == "cuda" else -1,
127
- max_length=512
128
- )
129
-
130
  # Load TTS for avatar voice
131
  self.tts_enabled = False
132
  self.tts_model = None
@@ -374,7 +382,8 @@ class EnhancedPublicSpeakingCoach:
374
  vocabulary_result,
375
  logical_flow_result,
376
  coherence_result,
377
- persuasion_result
 
378
  )
379
 
380
  # Step 12: Create improved version of transcript
@@ -400,7 +409,7 @@ class EnhancedPublicSpeakingCoach:
400
 
401
  # Generate audio for coaching tips
402
  print(" 🎙️ Generating avatar voice for coaching tips...")
403
- tips_text = "Here are your personalized coaching tips. " + " ".join(personalized_tips)
404
  tips_audio_url = self._generate_avatar_voice(
405
  tips_text,
406
  output_dir,
@@ -811,111 +820,268 @@ class EnhancedPublicSpeakingCoach:
811
  vocabulary: Dict,
812
  logical_flow: Dict,
813
  coherence: Dict,
814
- persuasion: Dict
 
815
  ) -> List[str]:
816
- """Generate personalized tips using LLM"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
817
  try:
818
- # Prepare analysis summary for LLM
819
- analysis_summary = f"""
820
- Speech Analysis Summary:
821
- - Pacing: {pacing['category']} ({pacing['words_per_minute']} WPM)
822
- - Prosody: {prosody['category']}
823
- - Filler words: {sum(fillers.values())} total
824
- - Long pauses: {silences['count']}
825
- - Sentiment: {sentiment['dominant_sentiment']}
826
- - Vocabulary score: {vocabulary['score']}/100
827
- - Logical flow: {logical_flow['flow_quality']}
828
- - Coherence: {coherence['coherence_quality']}
829
- - Persuasion: {persuasion['persuasion_level']}
830
- Generate 5 specific, actionable tips to improve this speech. Focus on the weakest areas.
831
- """
832
-
833
- # Generate tips using LLM
834
- prompt = f"Based on this speech analysis, provide 5 specific improvement tips:\n{analysis_summary}\n\nTips:"
835
-
836
- response = self.tips_generator(
837
- prompt,
838
- max_length=300,
839
- num_return_sequences=1,
840
- temperature=0.7
841
- )[0]['generated_text']
842
-
843
- # Parse tips (split by newlines or numbers)
844
  tips = []
845
- for line in response.split('\n'):
846
  line = line.strip()
847
- if line and len(line) > 10:
848
- # Remove numbering if present
849
- line = re.sub(r'^\d+[\.\)]\s*', '', line)
850
- if line:
851
- tips.append(line)
852
-
853
- # Fallback to rule-based tips if LLM fails
854
- if len(tips) < 3:
855
- tips = self._generate_fallback_tips(
856
- pacing, prosody, fillers, silences, vocabulary,
857
- logical_flow, coherence, persuasion
858
- )
859
 
860
- return tips[:5] # Return top 5
861
 
862
  except Exception as e:
863
- logging.warning(f"LLM tip generation failed: {e}")
864
- return self._generate_fallback_tips(
865
- pacing, prosody, fillers, silences, vocabulary,
866
- logical_flow, coherence, persuasion
867
- )
868
 
869
 
870
- def _generate_fallback_tips(
871
  self,
 
872
  pacing: Dict,
873
  prosody: Dict,
874
  fillers: Dict,
875
  silences: Dict,
 
876
  vocabulary: Dict,
877
  logical_flow: Dict,
878
  coherence: Dict,
879
- persuasion: Dict
 
880
  ) -> List[str]:
881
- """Generate rule-based tips as fallback"""
882
  tips = []
883
 
884
- # Pacing tips
885
- if pacing['category'] == 'slow':
886
- tips.append("Try speaking 10-15% faster to maintain audience engagement and energy.")
887
- elif pacing['category'] == 'fast':
888
- tips.append("Slow down slightly to ensure clarity and give your audience time to process your message.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
889
 
890
- # Prosody tips
891
- if prosody['category'] == 'monotone':
892
- tips.append("Add vocal variety by emphasizing key words and varying your pitch throughout the speech.")
 
 
 
893
 
894
- # Filler word tips
895
- if sum(fillers.values()) > 5:
896
- tips.append("Reduce filler words by pausing silently instead of saying 'um' or 'uh'. Practice makes perfect!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
897
 
898
- # Silence tips
899
- if silences['count'] > 3:
900
- tips.append("Work on smoother transitions between ideas to reduce long pauses.")
901
 
902
- # Vocabulary tips
903
- if vocabulary['score'] < 60:
904
- tips.append("Expand your vocabulary by incorporating more power words and descriptive language.")
 
 
 
 
 
905
 
906
- # Flow tips
907
- if logical_flow['score'] < 60:
908
- tips.append("Improve logical flow by using transition phrases like 'furthermore,' 'however,' and 'in conclusion.'")
909
 
910
- # Coherence tips
911
- if coherence['score'] < 60:
912
- tips.append("Strengthen coherence by making sure each point clearly connects to your main message.")
 
 
913
 
914
- # Persuasion tips
915
- if persuasion['score'] < 60:
916
- tips.append("Make your speech more persuasive by adding evidence, examples, and emotional appeals.")
917
 
918
- return tips[:5]
919
 
920
 
921
  def _create_improved_transcript(self, original: str, fillers: Dict) -> str:
@@ -1001,7 +1167,13 @@ if __name__ == "__main__":
1001
  print(f"✅ Created {test_file}\n")
1002
 
1003
  try:
1004
- coach = EnhancedPublicSpeakingCoach(whisper_model_size="base", enable_tts=True)
 
 
 
 
 
 
1005
  result = coach.analyze_speech(test_file)
1006
 
1007
  print("\n" + "="*70)
 
1
  """
2
+ Enhanced Public Speaking Coach with PERSONALIZED LLM Tips and Avatar Voice
3
+ Includes: Speech Analysis + OpenAI-Powered Personalized Tips + Text-to-Speech Avatar
4
  """
5
 
6
  import os
 
54
  print(" pip install transformers sentence-transformers torch")
55
  exit(1)
56
 
57
+ # Import OpenAI for better tips generation
58
+ try:
59
+ import openai
60
+ OPENAI_AVAILABLE = True
61
+ except ImportError:
62
+ print("\n⚠️ WARNING: OpenAI not installed. Using fallback tips.")
63
+ print(" To enable better tips: pip install openai")
64
+ OPENAI_AVAILABLE = False
65
+
66
  # Import TTS
67
  try:
68
  from TTS.api import TTS as CoquiTTS
 
90
  Complete speech analysis engine with LLM tips and avatar voice
91
  """
92
 
93
+ def __init__(self, whisper_model_size: str = "base", enable_tts: bool = True, openai_api_key: Optional[str] = None):
94
  """
95
  Initialize the enhanced coach engine
96
 
97
  Args:
98
  whisper_model_size: Whisper model size (tiny/base/small/medium)
99
  enable_tts: Enable text-to-speech avatar voice generation
100
+ openai_api_key: OpenAI API key for better tips (optional)
101
  """
102
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
103
  print(f"🚀 Initializing Enhanced Coach on {self.device}...")
104
 
105
+ # Set up OpenAI if available
106
+ self.use_openai = False
107
+ if OPENAI_AVAILABLE and openai_api_key:
108
+ openai.api_key = openai_api_key
109
+ self.use_openai = True
110
+ print(" ✅ OpenAI enabled for personalized tips")
111
+
112
  # Load Whisper for transcription
113
  print(f" Loading Whisper ({whisper_model_size})...")
114
  self.whisper = whisper.load_model(whisper_model_size, device=self.device)
 
135
  print(" Loading Sentence Transformer...")
136
  self.sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
137
 
 
 
 
 
 
 
 
 
 
138
  # Load TTS for avatar voice
139
  self.tts_enabled = False
140
  self.tts_model = None
 
382
  vocabulary_result,
383
  logical_flow_result,
384
  coherence_result,
385
+ persuasion_result,
386
+ overall_score
387
  )
388
 
389
  # Step 12: Create improved version of transcript
 
409
 
410
  # Generate audio for coaching tips
411
  print(" 🎙️ Generating avatar voice for coaching tips...")
412
+ tips_text = self._format_tips_for_audio(personalized_tips, avatar_gender)
413
  tips_audio_url = self._generate_avatar_voice(
414
  tips_text,
415
  output_dir,
 
820
  vocabulary: Dict,
821
  logical_flow: Dict,
822
  coherence: Dict,
823
+ persuasion: Dict,
824
+ overall_score: float
825
  ) -> List[str]:
826
+ """Generate truly personalized tips using OpenAI or enhanced fallback"""
827
+
828
+ # Try OpenAI first if available
829
+ if self.use_openai:
830
+ try:
831
+ tips = self._generate_openai_tips(
832
+ transcript, pacing, prosody, fillers, silences,
833
+ sentiment, vocabulary, logical_flow, coherence, persuasion, overall_score
834
+ )
835
+ if tips and len(tips) >= 3:
836
+ return tips
837
+ except Exception as e:
838
+ logging.warning(f"OpenAI tip generation failed: {e}")
839
+
840
+ # Use enhanced fallback tips
841
+ return self._generate_enhanced_fallback_tips(
842
+ transcript, pacing, prosody, fillers, silences,
843
+ sentiment, vocabulary, logical_flow, coherence, persuasion, overall_score
844
+ )
845
+
846
+
847
+ def _generate_openai_tips(
848
+ self,
849
+ transcript: str,
850
+ pacing: Dict,
851
+ prosody: Dict,
852
+ fillers: Dict,
853
+ silences: Dict,
854
+ sentiment: Dict,
855
+ vocabulary: Dict,
856
+ logical_flow: Dict,
857
+ coherence: Dict,
858
+ persuasion: Dict,
859
+ overall_score: float
860
+ ) -> List[str]:
861
+ """Generate personalized tips using OpenAI API"""
862
+
863
+ # Build detailed analysis summary
864
+ analysis_summary = f"""Speech Performance Analysis:
865
+
866
+ Overall Score: {overall_score}/10
867
+
868
+ Detailed Metrics:
869
+ - Pacing: {pacing['category']} at {pacing['words_per_minute']} words per minute
870
+ - Voice Variation: {prosody['category']} (pitch variation: {prosody['pitch_variation_hz']} Hz)
871
+ - Filler Words: {sum(fillers.values())} total ({', '.join([f'{k}: {v}' for k, v in fillers.items()]) if fillers else 'none'})
872
+ - Pauses: {silences['count']} long pauses
873
+ - Tone: {sentiment['dominant_sentiment']} ({sentiment['confidence']:.0%} confidence)
874
+ - Vocabulary: {vocabulary['score']}/100 (used {len(vocabulary['good_words_used'])} power words)
875
+ - Logical Flow: {logical_flow['flow_quality']} ({logical_flow['score']}/100)
876
+ - Coherence: {coherence['coherence_quality']} ({coherence['score']}/100)
877
+ - Persuasiveness: {persuasion['persuasion_level']} ({persuasion['score']}/100)
878
+
879
+ Speech excerpt: "{transcript[:200]}..."
880
+ """
881
+
882
+ # Create personalized prompt
883
+ prompt = f"""{analysis_summary}
884
+
885
+ You are a friendly, encouraging public speaking coach. Based on this person's speech analysis, provide 5 specific, actionable coaching tips.
886
+
887
+ Requirements:
888
+ 1. Be warm, supportive, and encouraging
889
+ 2. Focus on the 2-3 weakest areas that need improvement
890
+ 3. Give concrete examples for each tip (e.g., "Instead of saying 'um,' try pausing silently for 1-2 seconds")
891
+ 4. Use conversational, friendly language as if speaking to a friend
892
+ 5. Celebrate what they're doing well while gently addressing areas to improve
893
+ 6. Make tips practical and easy to implement immediately
894
+
895
+ Format each tip as a complete, friendly sentence. Number them 1-5."""
896
+
897
  try:
898
+ response = openai.ChatCompletion.create(
899
+ model="gpt-4o-mini",
900
+ messages=[
901
+ {"role": "system", "content": "You are an expert public speaking coach who gives personalized, friendly, actionable advice."},
902
+ {"role": "user", "content": prompt}
903
+ ],
904
+ max_tokens=500,
905
+ temperature=0.8
906
+ )
907
+
908
+ content = response.choices[0].message.content.strip()
909
+
910
+ # Parse tips
 
 
 
 
 
 
 
 
 
 
 
 
 
911
  tips = []
912
+ for line in content.split('\n'):
913
  line = line.strip()
914
+ # Remove numbering
915
+ line = re.sub(r'^\d+[\.\):\-]\s*', '', line)
916
+ if len(line) > 20: # Valid tip
917
+ tips.append(line)
 
 
 
 
 
 
 
 
918
 
919
+ return tips[:5]
920
 
921
  except Exception as e:
922
+ logging.error(f"OpenAI API error: {e}")
923
+ return []
 
 
 
924
 
925
 
926
+ def _generate_enhanced_fallback_tips(
927
  self,
928
+ transcript: str,
929
  pacing: Dict,
930
  prosody: Dict,
931
  fillers: Dict,
932
  silences: Dict,
933
+ sentiment: Dict,
934
  vocabulary: Dict,
935
  logical_flow: Dict,
936
  coherence: Dict,
937
+ persuasion: Dict,
938
+ overall_score: float
939
  ) -> List[str]:
940
+ """Generate personalized, friendly tips with examples (fallback)"""
941
  tips = []
942
 
943
+ # Calculate what needs improvement most
944
+ scores = {
945
+ 'pacing': self._get_pacing_score(pacing),
946
+ 'prosody': self._get_prosody_score(prosody),
947
+ 'fillers': self._get_filler_score(fillers),
948
+ 'silences': self._get_silence_score(silences),
949
+ 'vocabulary': vocabulary['score'] / 10.0,
950
+ 'flow': logical_flow['score'] / 10.0,
951
+ 'coherence': coherence['score'] / 10.0,
952
+ 'persuasion': persuasion['score'] / 10.0
953
+ }
954
+
955
+ # Sort by score (lowest first = needs most improvement)
956
+ improvement_areas = sorted(scores.items(), key=lambda x: x[1])
957
+
958
+ # Generate tips for weakest areas
959
+ wpm = pacing['words_per_minute']
960
+ total_fillers = sum(fillers.values())
961
+
962
+ for area, score in improvement_areas[:5]: # Top 5 areas needing improvement
963
+ if area == 'pacing':
964
+ if pacing['category'] == 'slow':
965
+ tips.append(f"Your pace is currently {wpm} words per minute. Try speeding up to 130-140 WPM - imagine you're telling an exciting story to a friend! Practice by reading aloud with a timer.")
966
+ elif pacing['category'] == 'fast':
967
+ tips.append(f"You're speaking at {wpm} words per minute, which is pretty fast! Slow down to about 140-150 WPM. Take a breath between sentences - your audience needs time to absorb your ideas.")
968
+
969
+ elif area == 'prosody':
970
+ if prosody['category'] == 'monotone':
971
+ tips.append(f"Add more vocal variety to keep your audience engaged! Try emphasizing key words - for example, if you say 'This is REALLY important,' make 'really' louder and higher pitched. Practice reading children's books out loud to build this skill.")
972
+
973
+ elif area == 'fillers':
974
+ if total_fillers > 5:
975
+ most_used = max(fillers.items(), key=lambda x: x[1])
976
+ tips.append(f"You said '{most_used[0]}' {most_used[1]} times. When you feel the urge to say it, pause silently instead - it makes you sound more confident! Try counting to 2 in your head during pauses.")
977
+
978
+ elif area == 'silences':
979
+ if silences['count'] > 5:
980
+ tips.append(f"You had {silences['count']} long pauses. That's okay! But try to keep pauses to 1-2 seconds. If you need to think, it's better to say 'Let me think about that...' than to go silent for too long.")
981
+ elif silences['count'] < 2:
982
+ tips.append(f"Don't be afraid to pause! Strategic 2-second pauses after important points give your audience time to process. Try pausing after questions like 'Why does this matter?' - it creates anticipation.")
983
+
984
+ elif area == 'vocabulary':
985
+ if vocabulary['score'] < 60:
986
+ good_words = vocabulary['good_words_used']
987
+ if good_words:
988
+ tips.append(f"Great job using power words like '{', '.join(good_words[:3])}'! Try adding more impact words like 'crucial,' 'remarkable,' or 'transform' to make your speech more memorable.")
989
+ else:
990
+ tips.append(f"Spice up your vocabulary! Instead of 'very good,' try 'excellent' or 'outstanding.' Instead of 'big problem,' say 'significant challenge.' Keep a list of power words on your phone!")
991
+
992
+ elif area == 'flow':
993
+ if logical_flow['score'] < 65:
994
+ tips.append(f"Connect your ideas more smoothly! Use transition phrases like 'Building on that...', 'Here's why this matters...', or 'Let me give you an example...' - they're like road signs that guide your audience through your speech.")
995
+
996
+ elif area == 'coherence':
997
+ if coherence['score'] < 65:
998
+ tips.append(f"Make your main message crystal clear! Try using signpost phrases: 'There are three reasons why...' or 'My main point is...' Then at the end, say 'To sum up...' and restate your key idea.")
999
+
1000
+ elif area == 'persuasion':
1001
+ if persuasion['score'] < 60:
1002
+ tips.append(f"Make your speech more convincing! Add phrases like 'Research shows that...' or 'Imagine if we could...' or 'The evidence is clear...' These make your points more compelling and credible.")
1003
+
1004
+ # If we don't have 5 tips yet, add some positive encouragement
1005
+ if len(tips) < 5 and overall_score >= 7.0:
1006
+ tips.append(f"You're doing great with a {overall_score:.1f}/10 score! Keep practicing regularly - even 5 minutes a day of reading aloud can make a huge difference in your confidence and delivery.")
1007
 
1008
+ # Always add one encouraging tip at the end
1009
+ if len(tips) < 5:
1010
+ if overall_score < 5.0:
1011
+ tips.append("Remember, every great speaker started somewhere! Focus on improving one thing at a time, and you'll see amazing progress. Record yourself weekly to track your improvement!")
1012
+ else:
1013
+ tips.append("You're making good progress! Keep recording yourself and listening back - you'll be surprised how quickly you improve. Consider joining a speaking group like Toastmasters to practice regularly!")
1014
 
1015
+ return tips[:5]
1016
+
1017
+
1018
+ def _get_pacing_score(self, pacing: Dict) -> float:
1019
+ """Convert pacing to 0-10 score"""
1020
+ wpm = pacing['words_per_minute']
1021
+ if 120 <= wpm <= 160:
1022
+ return 10.0
1023
+ elif 100 <= wpm < 120 or 160 < wpm <= 180:
1024
+ return 7.0
1025
+ else:
1026
+ return 4.0
1027
+
1028
+ def _get_prosody_score(self, prosody: Dict) -> float:
1029
+ """Convert prosody to 0-10 score"""
1030
+ return 10.0 if prosody['category'] == 'dynamic' else 4.0
1031
+
1032
+ def _get_filler_score(self, fillers: Dict) -> float:
1033
+ """Convert filler count to 0-10 score"""
1034
+ total = sum(fillers.values())
1035
+ if total == 0:
1036
+ return 10.0
1037
+ elif total <= 3:
1038
+ return 9.0
1039
+ elif total <= 5:
1040
+ return 7.0
1041
+ else:
1042
+ return max(2.0, 10.0 - (total * 0.3))
1043
+
1044
+ def _get_silence_score(self, silences: Dict) -> float:
1045
+ """Convert silence count to 0-10 score"""
1046
+ count = silences['count']
1047
+ if 2 <= count <= 5:
1048
+ return 10.0
1049
+ elif count <= 8:
1050
+ return 8.0
1051
+ else:
1052
+ return max(3.0, 10.0 - (count * 0.5))
1053
+
1054
+
1055
+ def _format_tips_for_audio(self, tips: List[str], gender: str) -> str:
1056
+ """Format tips in a natural, conversational way for audio"""
1057
+ avatar_name = "Alex" if gender == "male" else "Maya"
1058
 
1059
+ # Create a friendly introduction
1060
+ intro = f"Hey there! I'm {avatar_name}, your speaking coach. I've analyzed your speech, and I have some personalized tips to help you shine even brighter!"
 
1061
 
1062
+ # Add natural transitions between tips
1063
+ transitions = [
1064
+ "First,",
1065
+ "Next up,",
1066
+ "Here's another tip:",
1067
+ "Also, I noticed that",
1068
+ "And finally,"
1069
+ ]
1070
 
1071
+ # Build the audio script
1072
+ audio_parts = [intro]
 
1073
 
1074
+ for i, tip in enumerate(tips[:5]):
1075
+ if i < len(transitions):
1076
+ audio_parts.append(f"{transitions[i]} {tip}")
1077
+ else:
1078
+ audio_parts.append(tip)
1079
 
1080
+ # Add encouraging conclusion
1081
+ conclusion = "You're making great progress! Keep practicing these tips, and you'll see amazing results. I'm cheering for you!"
1082
+ audio_parts.append(conclusion)
1083
 
1084
+ return " ".join(audio_parts)
1085
 
1086
 
1087
  def _create_improved_transcript(self, original: str, fillers: Dict) -> str:
 
1167
  print(f"✅ Created {test_file}\n")
1168
 
1169
  try:
1170
+ # Get OpenAI API key from environment variable if available
1171
+ openai_key = os.getenv('OPENAI_API_KEY')
1172
+ coach = EnhancedPublicSpeakingCoach(
1173
+ whisper_model_size="base",
1174
+ enable_tts=True,
1175
+ openai_api_key=openai_key
1176
+ )
1177
  result = coach.analyze_speech(test_file)
1178
 
1179
  print("\n" + "="*70)