Spaces:
Sleeping
Sleeping
Update kid_coach_pipeline.py
Browse files- kid_coach_pipeline.py +264 -92
kid_coach_pipeline.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
"""
|
| 2 |
-
Enhanced Public Speaking Coach with LLM Tips and Avatar Voice
|
| 3 |
-
Includes: Speech Analysis +
|
| 4 |
"""
|
| 5 |
|
| 6 |
import os
|
|
@@ -54,6 +54,15 @@ except ImportError:
|
|
| 54 |
print(" pip install transformers sentence-transformers torch")
|
| 55 |
exit(1)
|
| 56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
# Import TTS
|
| 58 |
try:
|
| 59 |
from TTS.api import TTS as CoquiTTS
|
|
@@ -81,17 +90,25 @@ class EnhancedPublicSpeakingCoach:
|
|
| 81 |
Complete speech analysis engine with LLM tips and avatar voice
|
| 82 |
"""
|
| 83 |
|
| 84 |
-
def __init__(self, whisper_model_size: str = "base", enable_tts: bool = True):
|
| 85 |
"""
|
| 86 |
Initialize the enhanced coach engine
|
| 87 |
|
| 88 |
Args:
|
| 89 |
whisper_model_size: Whisper model size (tiny/base/small/medium)
|
| 90 |
enable_tts: Enable text-to-speech avatar voice generation
|
|
|
|
| 91 |
"""
|
| 92 |
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 93 |
print(f"🚀 Initializing Enhanced Coach on {self.device}...")
|
| 94 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
# Load Whisper for transcription
|
| 96 |
print(f" Loading Whisper ({whisper_model_size})...")
|
| 97 |
self.whisper = whisper.load_model(whisper_model_size, device=self.device)
|
|
@@ -118,15 +135,6 @@ class EnhancedPublicSpeakingCoach:
|
|
| 118 |
print(" Loading Sentence Transformer...")
|
| 119 |
self.sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
|
| 120 |
|
| 121 |
-
# Load LLM for tips generation (using Flan-T5 - lightweight and effective)
|
| 122 |
-
print(" Loading LLM for Tips Generation...")
|
| 123 |
-
self.tips_generator = pipeline(
|
| 124 |
-
"text2text-generation",
|
| 125 |
-
model="google/flan-t5-base",
|
| 126 |
-
device=0 if self.device == "cuda" else -1,
|
| 127 |
-
max_length=512
|
| 128 |
-
)
|
| 129 |
-
|
| 130 |
# Load TTS for avatar voice
|
| 131 |
self.tts_enabled = False
|
| 132 |
self.tts_model = None
|
|
@@ -374,7 +382,8 @@ class EnhancedPublicSpeakingCoach:
|
|
| 374 |
vocabulary_result,
|
| 375 |
logical_flow_result,
|
| 376 |
coherence_result,
|
| 377 |
-
persuasion_result
|
|
|
|
| 378 |
)
|
| 379 |
|
| 380 |
# Step 12: Create improved version of transcript
|
|
@@ -400,7 +409,7 @@ class EnhancedPublicSpeakingCoach:
|
|
| 400 |
|
| 401 |
# Generate audio for coaching tips
|
| 402 |
print(" 🎙️ Generating avatar voice for coaching tips...")
|
| 403 |
-
tips_text =
|
| 404 |
tips_audio_url = self._generate_avatar_voice(
|
| 405 |
tips_text,
|
| 406 |
output_dir,
|
|
@@ -811,111 +820,268 @@ class EnhancedPublicSpeakingCoach:
|
|
| 811 |
vocabulary: Dict,
|
| 812 |
logical_flow: Dict,
|
| 813 |
coherence: Dict,
|
| 814 |
-
persuasion: Dict
|
|
|
|
| 815 |
) -> List[str]:
|
| 816 |
-
"""Generate personalized tips using
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 817 |
try:
|
| 818 |
-
|
| 819 |
-
|
| 820 |
-
|
| 821 |
-
|
| 822 |
-
|
| 823 |
-
|
| 824 |
-
|
| 825 |
-
|
| 826 |
-
|
| 827 |
-
|
| 828 |
-
|
| 829 |
-
|
| 830 |
-
|
| 831 |
-
"""
|
| 832 |
-
|
| 833 |
-
# Generate tips using LLM
|
| 834 |
-
prompt = f"Based on this speech analysis, provide 5 specific improvement tips:\n{analysis_summary}\n\nTips:"
|
| 835 |
-
|
| 836 |
-
response = self.tips_generator(
|
| 837 |
-
prompt,
|
| 838 |
-
max_length=300,
|
| 839 |
-
num_return_sequences=1,
|
| 840 |
-
temperature=0.7
|
| 841 |
-
)[0]['generated_text']
|
| 842 |
-
|
| 843 |
-
# Parse tips (split by newlines or numbers)
|
| 844 |
tips = []
|
| 845 |
-
for line in
|
| 846 |
line = line.strip()
|
| 847 |
-
|
| 848 |
-
|
| 849 |
-
|
| 850 |
-
|
| 851 |
-
tips.append(line)
|
| 852 |
-
|
| 853 |
-
# Fallback to rule-based tips if LLM fails
|
| 854 |
-
if len(tips) < 3:
|
| 855 |
-
tips = self._generate_fallback_tips(
|
| 856 |
-
pacing, prosody, fillers, silences, vocabulary,
|
| 857 |
-
logical_flow, coherence, persuasion
|
| 858 |
-
)
|
| 859 |
|
| 860 |
-
return tips[:5]
|
| 861 |
|
| 862 |
except Exception as e:
|
| 863 |
-
logging.
|
| 864 |
-
return
|
| 865 |
-
pacing, prosody, fillers, silences, vocabulary,
|
| 866 |
-
logical_flow, coherence, persuasion
|
| 867 |
-
)
|
| 868 |
|
| 869 |
|
| 870 |
-
def
|
| 871 |
self,
|
|
|
|
| 872 |
pacing: Dict,
|
| 873 |
prosody: Dict,
|
| 874 |
fillers: Dict,
|
| 875 |
silences: Dict,
|
|
|
|
| 876 |
vocabulary: Dict,
|
| 877 |
logical_flow: Dict,
|
| 878 |
coherence: Dict,
|
| 879 |
-
persuasion: Dict
|
|
|
|
| 880 |
) -> List[str]:
|
| 881 |
-
"""Generate
|
| 882 |
tips = []
|
| 883 |
|
| 884 |
-
#
|
| 885 |
-
|
| 886 |
-
|
| 887 |
-
|
| 888 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 889 |
|
| 890 |
-
#
|
| 891 |
-
if
|
| 892 |
-
|
|
|
|
|
|
|
|
|
|
| 893 |
|
| 894 |
-
|
| 895 |
-
|
| 896 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 897 |
|
| 898 |
-
#
|
| 899 |
-
|
| 900 |
-
tips.append("Work on smoother transitions between ideas to reduce long pauses.")
|
| 901 |
|
| 902 |
-
#
|
| 903 |
-
|
| 904 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 905 |
|
| 906 |
-
#
|
| 907 |
-
|
| 908 |
-
tips.append("Improve logical flow by using transition phrases like 'furthermore,' 'however,' and 'in conclusion.'")
|
| 909 |
|
| 910 |
-
|
| 911 |
-
|
| 912 |
-
|
|
|
|
|
|
|
| 913 |
|
| 914 |
-
#
|
| 915 |
-
|
| 916 |
-
|
| 917 |
|
| 918 |
-
return
|
| 919 |
|
| 920 |
|
| 921 |
def _create_improved_transcript(self, original: str, fillers: Dict) -> str:
|
|
@@ -1001,7 +1167,13 @@ if __name__ == "__main__":
|
|
| 1001 |
print(f"✅ Created {test_file}\n")
|
| 1002 |
|
| 1003 |
try:
|
| 1004 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1005 |
result = coach.analyze_speech(test_file)
|
| 1006 |
|
| 1007 |
print("\n" + "="*70)
|
|
|
|
| 1 |
"""
|
| 2 |
+
Enhanced Public Speaking Coach with PERSONALIZED LLM Tips and Avatar Voice
|
| 3 |
+
Includes: Speech Analysis + OpenAI-Powered Personalized Tips + Text-to-Speech Avatar
|
| 4 |
"""
|
| 5 |
|
| 6 |
import os
|
|
|
|
| 54 |
print(" pip install transformers sentence-transformers torch")
|
| 55 |
exit(1)
|
| 56 |
|
| 57 |
+
# Import OpenAI for better tips generation
|
| 58 |
+
try:
|
| 59 |
+
import openai
|
| 60 |
+
OPENAI_AVAILABLE = True
|
| 61 |
+
except ImportError:
|
| 62 |
+
print("\n⚠️ WARNING: OpenAI not installed. Using fallback tips.")
|
| 63 |
+
print(" To enable better tips: pip install openai")
|
| 64 |
+
OPENAI_AVAILABLE = False
|
| 65 |
+
|
| 66 |
# Import TTS
|
| 67 |
try:
|
| 68 |
from TTS.api import TTS as CoquiTTS
|
|
|
|
| 90 |
Complete speech analysis engine with LLM tips and avatar voice
|
| 91 |
"""
|
| 92 |
|
| 93 |
+
def __init__(self, whisper_model_size: str = "base", enable_tts: bool = True, openai_api_key: Optional[str] = None):
|
| 94 |
"""
|
| 95 |
Initialize the enhanced coach engine
|
| 96 |
|
| 97 |
Args:
|
| 98 |
whisper_model_size: Whisper model size (tiny/base/small/medium)
|
| 99 |
enable_tts: Enable text-to-speech avatar voice generation
|
| 100 |
+
openai_api_key: OpenAI API key for better tips (optional)
|
| 101 |
"""
|
| 102 |
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 103 |
print(f"🚀 Initializing Enhanced Coach on {self.device}...")
|
| 104 |
|
| 105 |
+
# Set up OpenAI if available
|
| 106 |
+
self.use_openai = False
|
| 107 |
+
if OPENAI_AVAILABLE and openai_api_key:
|
| 108 |
+
openai.api_key = openai_api_key
|
| 109 |
+
self.use_openai = True
|
| 110 |
+
print(" ✅ OpenAI enabled for personalized tips")
|
| 111 |
+
|
| 112 |
# Load Whisper for transcription
|
| 113 |
print(f" Loading Whisper ({whisper_model_size})...")
|
| 114 |
self.whisper = whisper.load_model(whisper_model_size, device=self.device)
|
|
|
|
| 135 |
print(" Loading Sentence Transformer...")
|
| 136 |
self.sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
|
| 137 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
# Load TTS for avatar voice
|
| 139 |
self.tts_enabled = False
|
| 140 |
self.tts_model = None
|
|
|
|
| 382 |
vocabulary_result,
|
| 383 |
logical_flow_result,
|
| 384 |
coherence_result,
|
| 385 |
+
persuasion_result,
|
| 386 |
+
overall_score
|
| 387 |
)
|
| 388 |
|
| 389 |
# Step 12: Create improved version of transcript
|
|
|
|
| 409 |
|
| 410 |
# Generate audio for coaching tips
|
| 411 |
print(" 🎙️ Generating avatar voice for coaching tips...")
|
| 412 |
+
tips_text = self._format_tips_for_audio(personalized_tips, avatar_gender)
|
| 413 |
tips_audio_url = self._generate_avatar_voice(
|
| 414 |
tips_text,
|
| 415 |
output_dir,
|
|
|
|
| 820 |
vocabulary: Dict,
|
| 821 |
logical_flow: Dict,
|
| 822 |
coherence: Dict,
|
| 823 |
+
persuasion: Dict,
|
| 824 |
+
overall_score: float
|
| 825 |
) -> List[str]:
|
| 826 |
+
"""Generate truly personalized tips using OpenAI or enhanced fallback"""
|
| 827 |
+
|
| 828 |
+
# Try OpenAI first if available
|
| 829 |
+
if self.use_openai:
|
| 830 |
+
try:
|
| 831 |
+
tips = self._generate_openai_tips(
|
| 832 |
+
transcript, pacing, prosody, fillers, silences,
|
| 833 |
+
sentiment, vocabulary, logical_flow, coherence, persuasion, overall_score
|
| 834 |
+
)
|
| 835 |
+
if tips and len(tips) >= 3:
|
| 836 |
+
return tips
|
| 837 |
+
except Exception as e:
|
| 838 |
+
logging.warning(f"OpenAI tip generation failed: {e}")
|
| 839 |
+
|
| 840 |
+
# Use enhanced fallback tips
|
| 841 |
+
return self._generate_enhanced_fallback_tips(
|
| 842 |
+
transcript, pacing, prosody, fillers, silences,
|
| 843 |
+
sentiment, vocabulary, logical_flow, coherence, persuasion, overall_score
|
| 844 |
+
)
|
| 845 |
+
|
| 846 |
+
|
| 847 |
+
def _generate_openai_tips(
|
| 848 |
+
self,
|
| 849 |
+
transcript: str,
|
| 850 |
+
pacing: Dict,
|
| 851 |
+
prosody: Dict,
|
| 852 |
+
fillers: Dict,
|
| 853 |
+
silences: Dict,
|
| 854 |
+
sentiment: Dict,
|
| 855 |
+
vocabulary: Dict,
|
| 856 |
+
logical_flow: Dict,
|
| 857 |
+
coherence: Dict,
|
| 858 |
+
persuasion: Dict,
|
| 859 |
+
overall_score: float
|
| 860 |
+
) -> List[str]:
|
| 861 |
+
"""Generate personalized tips using OpenAI API"""
|
| 862 |
+
|
| 863 |
+
# Build detailed analysis summary
|
| 864 |
+
analysis_summary = f"""Speech Performance Analysis:
|
| 865 |
+
|
| 866 |
+
Overall Score: {overall_score}/10
|
| 867 |
+
|
| 868 |
+
Detailed Metrics:
|
| 869 |
+
- Pacing: {pacing['category']} at {pacing['words_per_minute']} words per minute
|
| 870 |
+
- Voice Variation: {prosody['category']} (pitch variation: {prosody['pitch_variation_hz']} Hz)
|
| 871 |
+
- Filler Words: {sum(fillers.values())} total ({', '.join([f'{k}: {v}' for k, v in fillers.items()]) if fillers else 'none'})
|
| 872 |
+
- Pauses: {silences['count']} long pauses
|
| 873 |
+
- Tone: {sentiment['dominant_sentiment']} ({sentiment['confidence']:.0%} confidence)
|
| 874 |
+
- Vocabulary: {vocabulary['score']}/100 (used {len(vocabulary['good_words_used'])} power words)
|
| 875 |
+
- Logical Flow: {logical_flow['flow_quality']} ({logical_flow['score']}/100)
|
| 876 |
+
- Coherence: {coherence['coherence_quality']} ({coherence['score']}/100)
|
| 877 |
+
- Persuasiveness: {persuasion['persuasion_level']} ({persuasion['score']}/100)
|
| 878 |
+
|
| 879 |
+
Speech excerpt: "{transcript[:200]}..."
|
| 880 |
+
"""
|
| 881 |
+
|
| 882 |
+
# Create personalized prompt
|
| 883 |
+
prompt = f"""{analysis_summary}
|
| 884 |
+
|
| 885 |
+
You are a friendly, encouraging public speaking coach. Based on this person's speech analysis, provide 5 specific, actionable coaching tips.
|
| 886 |
+
|
| 887 |
+
Requirements:
|
| 888 |
+
1. Be warm, supportive, and encouraging
|
| 889 |
+
2. Focus on the 2-3 weakest areas that need improvement
|
| 890 |
+
3. Give concrete examples for each tip (e.g., "Instead of saying 'um,' try pausing silently for 1-2 seconds")
|
| 891 |
+
4. Use conversational, friendly language as if speaking to a friend
|
| 892 |
+
5. Celebrate what they're doing well while gently addressing areas to improve
|
| 893 |
+
6. Make tips practical and easy to implement immediately
|
| 894 |
+
|
| 895 |
+
Format each tip as a complete, friendly sentence. Number them 1-5."""
|
| 896 |
+
|
| 897 |
try:
|
| 898 |
+
response = openai.ChatCompletion.create(
|
| 899 |
+
model="gpt-4o-mini",
|
| 900 |
+
messages=[
|
| 901 |
+
{"role": "system", "content": "You are an expert public speaking coach who gives personalized, friendly, actionable advice."},
|
| 902 |
+
{"role": "user", "content": prompt}
|
| 903 |
+
],
|
| 904 |
+
max_tokens=500,
|
| 905 |
+
temperature=0.8
|
| 906 |
+
)
|
| 907 |
+
|
| 908 |
+
content = response.choices[0].message.content.strip()
|
| 909 |
+
|
| 910 |
+
# Parse tips
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 911 |
tips = []
|
| 912 |
+
for line in content.split('\n'):
|
| 913 |
line = line.strip()
|
| 914 |
+
# Remove numbering
|
| 915 |
+
line = re.sub(r'^\d+[\.\):\-]\s*', '', line)
|
| 916 |
+
if len(line) > 20: # Valid tip
|
| 917 |
+
tips.append(line)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 918 |
|
| 919 |
+
return tips[:5]
|
| 920 |
|
| 921 |
except Exception as e:
|
| 922 |
+
logging.error(f"OpenAI API error: {e}")
|
| 923 |
+
return []
|
|
|
|
|
|
|
|
|
|
| 924 |
|
| 925 |
|
| 926 |
+
def _generate_enhanced_fallback_tips(
|
| 927 |
self,
|
| 928 |
+
transcript: str,
|
| 929 |
pacing: Dict,
|
| 930 |
prosody: Dict,
|
| 931 |
fillers: Dict,
|
| 932 |
silences: Dict,
|
| 933 |
+
sentiment: Dict,
|
| 934 |
vocabulary: Dict,
|
| 935 |
logical_flow: Dict,
|
| 936 |
coherence: Dict,
|
| 937 |
+
persuasion: Dict,
|
| 938 |
+
overall_score: float
|
| 939 |
) -> List[str]:
|
| 940 |
+
"""Generate personalized, friendly tips with examples (fallback)"""
|
| 941 |
tips = []
|
| 942 |
|
| 943 |
+
# Calculate what needs improvement most
|
| 944 |
+
scores = {
|
| 945 |
+
'pacing': self._get_pacing_score(pacing),
|
| 946 |
+
'prosody': self._get_prosody_score(prosody),
|
| 947 |
+
'fillers': self._get_filler_score(fillers),
|
| 948 |
+
'silences': self._get_silence_score(silences),
|
| 949 |
+
'vocabulary': vocabulary['score'] / 10.0,
|
| 950 |
+
'flow': logical_flow['score'] / 10.0,
|
| 951 |
+
'coherence': coherence['score'] / 10.0,
|
| 952 |
+
'persuasion': persuasion['score'] / 10.0
|
| 953 |
+
}
|
| 954 |
+
|
| 955 |
+
# Sort by score (lowest first = needs most improvement)
|
| 956 |
+
improvement_areas = sorted(scores.items(), key=lambda x: x[1])
|
| 957 |
+
|
| 958 |
+
# Generate tips for weakest areas
|
| 959 |
+
wpm = pacing['words_per_minute']
|
| 960 |
+
total_fillers = sum(fillers.values())
|
| 961 |
+
|
| 962 |
+
for area, score in improvement_areas[:5]: # Top 5 areas needing improvement
|
| 963 |
+
if area == 'pacing':
|
| 964 |
+
if pacing['category'] == 'slow':
|
| 965 |
+
tips.append(f"Your pace is currently {wpm} words per minute. Try speeding up to 130-140 WPM - imagine you're telling an exciting story to a friend! Practice by reading aloud with a timer.")
|
| 966 |
+
elif pacing['category'] == 'fast':
|
| 967 |
+
tips.append(f"You're speaking at {wpm} words per minute, which is pretty fast! Slow down to about 140-150 WPM. Take a breath between sentences - your audience needs time to absorb your ideas.")
|
| 968 |
+
|
| 969 |
+
elif area == 'prosody':
|
| 970 |
+
if prosody['category'] == 'monotone':
|
| 971 |
+
tips.append(f"Add more vocal variety to keep your audience engaged! Try emphasizing key words - for example, if you say 'This is REALLY important,' make 'really' louder and higher pitched. Practice reading children's books out loud to build this skill.")
|
| 972 |
+
|
| 973 |
+
elif area == 'fillers':
|
| 974 |
+
if total_fillers > 5:
|
| 975 |
+
most_used = max(fillers.items(), key=lambda x: x[1])
|
| 976 |
+
tips.append(f"You said '{most_used[0]}' {most_used[1]} times. When you feel the urge to say it, pause silently instead - it makes you sound more confident! Try counting to 2 in your head during pauses.")
|
| 977 |
+
|
| 978 |
+
elif area == 'silences':
|
| 979 |
+
if silences['count'] > 5:
|
| 980 |
+
tips.append(f"You had {silences['count']} long pauses. That's okay! But try to keep pauses to 1-2 seconds. If you need to think, it's better to say 'Let me think about that...' than to go silent for too long.")
|
| 981 |
+
elif silences['count'] < 2:
|
| 982 |
+
tips.append(f"Don't be afraid to pause! Strategic 2-second pauses after important points give your audience time to process. Try pausing after questions like 'Why does this matter?' - it creates anticipation.")
|
| 983 |
+
|
| 984 |
+
elif area == 'vocabulary':
|
| 985 |
+
if vocabulary['score'] < 60:
|
| 986 |
+
good_words = vocabulary['good_words_used']
|
| 987 |
+
if good_words:
|
| 988 |
+
tips.append(f"Great job using power words like '{', '.join(good_words[:3])}'! Try adding more impact words like 'crucial,' 'remarkable,' or 'transform' to make your speech more memorable.")
|
| 989 |
+
else:
|
| 990 |
+
tips.append(f"Spice up your vocabulary! Instead of 'very good,' try 'excellent' or 'outstanding.' Instead of 'big problem,' say 'significant challenge.' Keep a list of power words on your phone!")
|
| 991 |
+
|
| 992 |
+
elif area == 'flow':
|
| 993 |
+
if logical_flow['score'] < 65:
|
| 994 |
+
tips.append(f"Connect your ideas more smoothly! Use transition phrases like 'Building on that...', 'Here's why this matters...', or 'Let me give you an example...' - they're like road signs that guide your audience through your speech.")
|
| 995 |
+
|
| 996 |
+
elif area == 'coherence':
|
| 997 |
+
if coherence['score'] < 65:
|
| 998 |
+
tips.append(f"Make your main message crystal clear! Try using signpost phrases: 'There are three reasons why...' or 'My main point is...' Then at the end, say 'To sum up...' and restate your key idea.")
|
| 999 |
+
|
| 1000 |
+
elif area == 'persuasion':
|
| 1001 |
+
if persuasion['score'] < 60:
|
| 1002 |
+
tips.append(f"Make your speech more convincing! Add phrases like 'Research shows that...' or 'Imagine if we could...' or 'The evidence is clear...' These make your points more compelling and credible.")
|
| 1003 |
+
|
| 1004 |
+
# If we don't have 5 tips yet, add some positive encouragement
|
| 1005 |
+
if len(tips) < 5 and overall_score >= 7.0:
|
| 1006 |
+
tips.append(f"You're doing great with a {overall_score:.1f}/10 score! Keep practicing regularly - even 5 minutes a day of reading aloud can make a huge difference in your confidence and delivery.")
|
| 1007 |
|
| 1008 |
+
# Always add one encouraging tip at the end
|
| 1009 |
+
if len(tips) < 5:
|
| 1010 |
+
if overall_score < 5.0:
|
| 1011 |
+
tips.append("Remember, every great speaker started somewhere! Focus on improving one thing at a time, and you'll see amazing progress. Record yourself weekly to track your improvement!")
|
| 1012 |
+
else:
|
| 1013 |
+
tips.append("You're making good progress! Keep recording yourself and listening back - you'll be surprised how quickly you improve. Consider joining a speaking group like Toastmasters to practice regularly!")
|
| 1014 |
|
| 1015 |
+
return tips[:5]
|
| 1016 |
+
|
| 1017 |
+
|
| 1018 |
+
def _get_pacing_score(self, pacing: Dict) -> float:
|
| 1019 |
+
"""Convert pacing to 0-10 score"""
|
| 1020 |
+
wpm = pacing['words_per_minute']
|
| 1021 |
+
if 120 <= wpm <= 160:
|
| 1022 |
+
return 10.0
|
| 1023 |
+
elif 100 <= wpm < 120 or 160 < wpm <= 180:
|
| 1024 |
+
return 7.0
|
| 1025 |
+
else:
|
| 1026 |
+
return 4.0
|
| 1027 |
+
|
| 1028 |
+
def _get_prosody_score(self, prosody: Dict) -> float:
|
| 1029 |
+
"""Convert prosody to 0-10 score"""
|
| 1030 |
+
return 10.0 if prosody['category'] == 'dynamic' else 4.0
|
| 1031 |
+
|
| 1032 |
+
def _get_filler_score(self, fillers: Dict) -> float:
|
| 1033 |
+
"""Convert filler count to 0-10 score"""
|
| 1034 |
+
total = sum(fillers.values())
|
| 1035 |
+
if total == 0:
|
| 1036 |
+
return 10.0
|
| 1037 |
+
elif total <= 3:
|
| 1038 |
+
return 9.0
|
| 1039 |
+
elif total <= 5:
|
| 1040 |
+
return 7.0
|
| 1041 |
+
else:
|
| 1042 |
+
return max(2.0, 10.0 - (total * 0.3))
|
| 1043 |
+
|
| 1044 |
+
def _get_silence_score(self, silences: Dict) -> float:
|
| 1045 |
+
"""Convert silence count to 0-10 score"""
|
| 1046 |
+
count = silences['count']
|
| 1047 |
+
if 2 <= count <= 5:
|
| 1048 |
+
return 10.0
|
| 1049 |
+
elif count <= 8:
|
| 1050 |
+
return 8.0
|
| 1051 |
+
else:
|
| 1052 |
+
return max(3.0, 10.0 - (count * 0.5))
|
| 1053 |
+
|
| 1054 |
+
|
| 1055 |
+
def _format_tips_for_audio(self, tips: List[str], gender: str) -> str:
|
| 1056 |
+
"""Format tips in a natural, conversational way for audio"""
|
| 1057 |
+
avatar_name = "Alex" if gender == "male" else "Maya"
|
| 1058 |
|
| 1059 |
+
# Create a friendly introduction
|
| 1060 |
+
intro = f"Hey there! I'm {avatar_name}, your speaking coach. I've analyzed your speech, and I have some personalized tips to help you shine even brighter!"
|
|
|
|
| 1061 |
|
| 1062 |
+
# Add natural transitions between tips
|
| 1063 |
+
transitions = [
|
| 1064 |
+
"First,",
|
| 1065 |
+
"Next up,",
|
| 1066 |
+
"Here's another tip:",
|
| 1067 |
+
"Also, I noticed that",
|
| 1068 |
+
"And finally,"
|
| 1069 |
+
]
|
| 1070 |
|
| 1071 |
+
# Build the audio script
|
| 1072 |
+
audio_parts = [intro]
|
|
|
|
| 1073 |
|
| 1074 |
+
for i, tip in enumerate(tips[:5]):
|
| 1075 |
+
if i < len(transitions):
|
| 1076 |
+
audio_parts.append(f"{transitions[i]} {tip}")
|
| 1077 |
+
else:
|
| 1078 |
+
audio_parts.append(tip)
|
| 1079 |
|
| 1080 |
+
# Add encouraging conclusion
|
| 1081 |
+
conclusion = "You're making great progress! Keep practicing these tips, and you'll see amazing results. I'm cheering for you!"
|
| 1082 |
+
audio_parts.append(conclusion)
|
| 1083 |
|
| 1084 |
+
return " ".join(audio_parts)
|
| 1085 |
|
| 1086 |
|
| 1087 |
def _create_improved_transcript(self, original: str, fillers: Dict) -> str:
|
|
|
|
| 1167 |
print(f"✅ Created {test_file}\n")
|
| 1168 |
|
| 1169 |
try:
|
| 1170 |
+
# Get OpenAI API key from environment variable if available
|
| 1171 |
+
openai_key = os.getenv('OPENAI_API_KEY')
|
| 1172 |
+
coach = EnhancedPublicSpeakingCoach(
|
| 1173 |
+
whisper_model_size="base",
|
| 1174 |
+
enable_tts=True,
|
| 1175 |
+
openai_api_key=openai_key
|
| 1176 |
+
)
|
| 1177 |
result = coach.analyze_speech(test_file)
|
| 1178 |
|
| 1179 |
print("\n" + "="*70)
|