akpande2 commited on
Commit
9e0d500
·
verified ·
1 Parent(s): e08bd5b

Upload 6 files

Browse files
Files changed (6) hide show
  1. Dockerfile +47 -0
  2. env.example +20 -0
  3. kid_coach_pipeline.py +1194 -0
  4. main.py +335 -0
  5. requirements1.txt +13 -0
  6. test_api.py +71 -0
Dockerfile ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Production Dockerfile for Public Speaking Coach API
2
+ # Optimized for Hugging Face Spaces or any cloud deployment
3
+ FROM python:3.11-slim
4
+
5
+ # Set environment variables
6
+ ENV PYTHONUNBUFFERED=1 \
7
+ PYTHONDONTWRITEBYTECODE=1 \
8
+ PIP_NO_CACHE_DIR=1 \
9
+ PIP_DISABLE_PIP_VERSION_CHECK=1
10
+
11
+ # Install system dependencies
12
+ RUN apt-get update && apt-get install -y \
13
+ ffmpeg \
14
+ libsndfile1 \
15
+ git \
16
+ && rm -rf /var/lib/apt/lists/*
17
+
18
+ # Set working directory
19
+
20
+ ENV OMP_NUM_THREADS=1
21
+
22
+ WORKDIR /app
23
+
24
+ # Copy requirements first (for better caching)
25
+ COPY requirements.txt .
26
+
27
+ # Install Python dependencies
28
+ RUN pip install --no-cache-dir -r requirements.txt
29
+ RUN pip install uvicorn
30
+
31
+ # Copy application code
32
+ COPY kid_coach_pipeline.py .
33
+ COPY main.py .
34
+
35
+ # Create directory for temporary files
36
+ RUN mkdir -p /tmp/uploads
37
+
38
+ # Expose port
39
+ EXPOSE 7860
40
+
41
+ # Health check
42
+ HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
43
+ CMD python -c "import requests; requests.get('http://localhost:7860/health')"
44
+
45
+ # Run the application
46
+ # Use port 7860 for Hugging Face Spaces compatibility
47
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
env.example ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ===========================================
2
+ # ENVIRONMENT VARIABLES
3
+ # ===========================================
4
+
5
+ # OpenAI API Key (optional - for better tips)
6
+ # Get from: https://platform.openai.com/api-keys
7
+ OPENAI_API_KEY=sk-proj-xxxxx
8
+
9
+ # AWS S3 Configuration (only for production on AWS)
10
+ USE_S3=false # Set to "true" on AWS
11
+ S3_BUCKET_NAME=aurator-audio-outputs # Your S3 bucket name
12
+ AWS_REGION=us-east-1 # Your AWS region
13
+ AWS_ACCESS_KEY_ID=AKIAxxxxx # AWS credentials
14
+ AWS_SECRET_ACCESS_KEY=xxxxx # AWS credentials
15
+
16
+ # ===========================================
17
+ # FOR HUGGING FACE TESTING:
18
+ # Just add OPENAI_API_KEY in Settings > Variables
19
+ # Leave USE_S3=false (will use local storage)
20
+ # ===========================================
kid_coach_pipeline.py ADDED
@@ -0,0 +1,1194 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Enhanced Public Speaking Coach with PERSONALIZED LLM Tips and Avatar Voice
3
+ Includes: Speech Analysis + OpenAI-Powered Personalized Tips + Text-to-Speech Avatar
4
+ """
5
+
6
+ import os
7
+ import io
8
+ import json
9
+ import logging
10
+ import warnings
11
+ import re
12
+ import uuid
13
+ from typing import Dict, List, Any, Optional, Tuple
14
+ from dataclasses import dataclass, asdict
15
+ from pathlib import Path
16
+
17
+ import torch
18
+ import librosa
19
+ import numpy as np
20
+ import soundfile as sf
21
+ from scipy.signal import medfilt
22
+ from scipy.stats import zscore
23
+ import textstat
24
+ from TTS.api import TTS
25
+
26
+ # Suppress warnings
27
+ os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
28
+ logging.getLogger("whisper").setLevel(logging.ERROR)
29
+ logging.getLogger("transformers").setLevel(logging.ERROR)
30
+ warnings.filterwarnings("ignore")
31
+
32
+ # Validate Whisper installation
33
+ try:
34
+ import whisper
35
+ if not hasattr(whisper, "load_model"):
36
+ raise ImportError("Wrong whisper library installed")
37
+ except ImportError:
38
+ print("\n❌ CRITICAL: Install correct whisper library:")
39
+ print(" pip uninstall -y whisper && pip install openai-whisper")
40
+ exit(1)
41
+
42
+ # Import transformers for LLM
43
+ try:
44
+ from transformers import (
45
+ pipeline,
46
+ AutoTokenizer,
47
+ AutoModel,
48
+ AutoModelForSequenceClassification,
49
+ AutoModelForCausalLM
50
+ )
51
+ from sentence_transformers import SentenceTransformer
52
+ except ImportError:
53
+ print("\n❌ CRITICAL: Install required libraries:")
54
+ print(" pip install transformers sentence-transformers torch")
55
+ exit(1)
56
+
57
+ # Import OpenAI for better tips generation
58
+ try:
59
+ import openai
60
+ OPENAI_AVAILABLE = True
61
+ except ImportError:
62
+ print("\n⚠️ WARNING: OpenAI not installed. Using fallback tips.")
63
+ print(" To enable better tips: pip install openai")
64
+ OPENAI_AVAILABLE = False
65
+
66
+ # Import TTS
67
+ try:
68
+ from TTS.api import TTS as CoquiTTS
69
+ except ImportError:
70
+ print("\n⚠️ WARNING: TTS not installed. Avatar voice will be disabled.")
71
+ print(" To enable: pip install TTS")
72
+ CoquiTTS = None
73
+
74
+
75
+ # JSON Serialization Helper
76
+ class NumpyEncoder(json.JSONEncoder):
77
+ """Handles numpy types in JSON serialization"""
78
+ def default(self, obj):
79
+ if isinstance(obj, (np.integer, np.int64)):
80
+ return int(obj)
81
+ if isinstance(obj, (np.floating, np.float32, np.float64)):
82
+ return float(obj)
83
+ if isinstance(obj, np.ndarray):
84
+ return obj.tolist()
85
+ return super().default(obj)
86
+
87
+
88
+ class EnhancedPublicSpeakingCoach:
89
+ """
90
+ Complete speech analysis engine with LLM tips and avatar voice
91
+ """
92
+
93
+ def __init__(self, whisper_model_size: str = "base", enable_tts: bool = True, openai_api_key: Optional[str] = None):
94
+ """
95
+ Initialize the enhanced coach engine
96
+
97
+ Args:
98
+ whisper_model_size: Whisper model size (tiny/base/small/medium)
99
+ enable_tts: Enable text-to-speech avatar voice generation
100
+ openai_api_key: OpenAI API key for better tips (optional)
101
+ """
102
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
103
+ print(f"🚀 Initializing Enhanced Coach on {self.device}...")
104
+
105
+ # Set up OpenAI if available
106
+ self.use_openai = False
107
+ if OPENAI_AVAILABLE and openai_api_key:
108
+ openai.api_key = openai_api_key
109
+ self.use_openai = True
110
+ print(" ✅ OpenAI enabled for personalized tips")
111
+
112
+ # Load Whisper for transcription
113
+ print(f" Loading Whisper ({whisper_model_size})...")
114
+ self.whisper = whisper.load_model(whisper_model_size, device=self.device)
115
+
116
+ # Load sentiment analysis model (using a more reliable one)
117
+ print(" Loading Sentiment Model...")
118
+ try:
119
+ # Using cardiffnlp/twitter-roberta-base-sentiment-latest - more accurate
120
+ self.sentiment_analyzer = pipeline(
121
+ "sentiment-analysis",
122
+ model="cardiffnlp/twitter-roberta-base-sentiment-latest",
123
+ device=0 if self.device == "cuda" else -1
124
+ )
125
+ print(" ✅ Using RoBERTa sentiment model")
126
+ except Exception as e:
127
+ print(f" ⚠️ Failed to load RoBERTa model, falling back to DistilBERT: {e}")
128
+ self.sentiment_analyzer = pipeline(
129
+ "sentiment-analysis",
130
+ model="distilbert-base-uncased-finetuned-sst-2-english",
131
+ device=0 if self.device == "cuda" else -1
132
+ )
133
+
134
+ # Load sentence transformer for semantic analysis
135
+ print(" Loading Sentence Transformer...")
136
+ self.sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
137
+
138
+ # Load TTS for avatar voice
139
+ self.tts_enabled = False
140
+ self.tts_model = None
141
+ if enable_tts and CoquiTTS:
142
+ try:
143
+ print(" Loading TTS for Avatar Voice...")
144
+ # Using lightweight TTS model
145
+ self.tts_model = CoquiTTS(model_name="tts_models/en/ljspeech/tacotron2-DDC")
146
+ self.tts_enabled = True
147
+ print(" ✅ TTS enabled")
148
+ except Exception as e:
149
+ print(f" ⚠️ TTS initialization failed: {e}")
150
+ self.tts_enabled = False
151
+
152
+ # Linguistic patterns
153
+ self.filler_patterns = {
154
+ "um": r"\bum+h*\b",
155
+ "uh": r"\buh+h*\b",
156
+ "like": r"\blike\b",
157
+ "you know": r"\byou know\b",
158
+ "so": r"\bso+\b",
159
+ "actually": r"\bactually\b",
160
+ "basically": r"\bbasically\b",
161
+ "literally": r"\bliterally\b",
162
+ "i mean": r"\bi mean\b",
163
+ "kind of": r"\bkind of\b",
164
+ "sort of": r"\bsort of\b",
165
+ "right": r"\bright\b",
166
+ "okay": r"\bokay\b",
167
+ "well": r"\bwell\b"
168
+ }
169
+
170
+ self.power_words = {
171
+ "significant", "critical", "essential", "vital", "crucial",
172
+ "important", "remarkable", "extraordinary", "exceptional",
173
+ "achieve", "accomplish", "create", "develop", "innovate",
174
+ "transform", "revolutionize", "enhance", "optimize",
175
+ "evidence", "data", "research", "proven", "demonstrate",
176
+ "validate", "verify", "confirm", "establish",
177
+ "believe", "imagine", "discover", "realize", "understand",
178
+ "recognize", "appreciate", "consider", "envision",
179
+ "opportunity", "benefit", "advantage", "solution", "success",
180
+ "excellence", "quality", "value", "impact", "results",
181
+ "together", "collaborate", "participate", "engage", "contribute"
182
+ }
183
+
184
+ print("✅ Enhanced Coach Engine Ready!")
185
+
186
+
187
+ def _calculate_overall_score(
188
+ self,
189
+ pacing: Dict,
190
+ prosody: Dict,
191
+ fillers: Dict,
192
+ silences: Dict,
193
+ sentiment: Dict,
194
+ vocabulary: Dict,
195
+ logical_flow: Dict,
196
+ coherence: Dict,
197
+ persuasion: Dict
198
+ ) -> float:
199
+ """
200
+ Calculate overall score (0-10 scale) based on all metrics
201
+
202
+ Weighted scoring system:
203
+ - Pacing: 10%
204
+ - Prosody: 10%
205
+ - Fillers: 15% (fewer is better)
206
+ - Silences: 10%
207
+ - Sentiment: 10%
208
+ - Vocabulary: 15%
209
+ - Logical Flow: 10%
210
+ - Coherence: 10%
211
+ - Persuasion: 10%
212
+ """
213
+ total_score = 0.0
214
+
215
+ # 1. Pacing Score (10%) - 120-160 WPM is ideal
216
+ wpm = pacing['words_per_minute']
217
+ if 120 <= wpm <= 160:
218
+ pacing_score = 10.0
219
+ elif 100 <= wpm < 120 or 160 < wpm <= 180:
220
+ pacing_score = 7.0
221
+ elif 80 <= wpm < 100 or 180 < wpm <= 200:
222
+ pacing_score = 5.0
223
+ else:
224
+ pacing_score = 3.0
225
+ total_score += pacing_score * 0.10
226
+
227
+ # 2. Prosody Score (10%) - dynamic is good
228
+ if prosody['category'].lower() == 'dynamic':
229
+ prosody_score = 10.0
230
+ elif prosody['category'].lower() == 'monotone':
231
+ prosody_score = 4.0
232
+ else:
233
+ prosody_score = 7.0
234
+ total_score += prosody_score * 0.10
235
+
236
+ # 3. Filler Words Score (15%) - fewer is better
237
+ total_fillers = sum(fillers.values())
238
+ if total_fillers == 0:
239
+ filler_score = 10.0
240
+ elif total_fillers <= 3:
241
+ filler_score = 9.0
242
+ elif total_fillers <= 5:
243
+ filler_score = 7.0
244
+ elif total_fillers <= 10:
245
+ filler_score = 5.0
246
+ else:
247
+ filler_score = max(2.0, 10.0 - (total_fillers * 0.3))
248
+ total_score += filler_score * 0.15
249
+
250
+ # 4. Silences Score (10%) - 2-5 pauses is ideal
251
+ silence_count = silences['count']
252
+ if 2 <= silence_count <= 5:
253
+ silence_score = 10.0
254
+ elif silence_count <= 8:
255
+ silence_score = 8.0
256
+ elif silence_count == 0 or silence_count == 1:
257
+ silence_score = 6.0
258
+ else:
259
+ silence_score = max(3.0, 10.0 - (silence_count * 0.5))
260
+ total_score += silence_score * 0.10
261
+
262
+ # 5. Sentiment Score (10%) - positive is best
263
+ sentiment_type = sentiment['dominant_sentiment'].lower()
264
+ confidence = sentiment['confidence']
265
+ if sentiment_type == 'positive':
266
+ sentiment_score = 8.0 + (confidence * 2.0)
267
+ elif sentiment_type == 'neutral':
268
+ sentiment_score = 6.0 + (confidence * 1.0)
269
+ else: # negative
270
+ sentiment_score = max(3.0, 7.0 - (confidence * 3.0))
271
+ total_score += sentiment_score * 0.10
272
+
273
+ # 6. Vocabulary Score (15%) - convert 0-100 to 0-10
274
+ vocab_score = vocabulary['score'] / 10.0
275
+ total_score += vocab_score * 0.15
276
+
277
+ # 7. Logical Flow Score (10%) - convert 0-100 to 0-10
278
+ flow_score = logical_flow['score'] / 10.0
279
+ total_score += flow_score * 0.10
280
+
281
+ # 8. Coherence Score (10%) - convert 0-100 to 0-10
282
+ coherence_score = coherence['score'] / 10.0
283
+ total_score += coherence_score * 0.10
284
+
285
+ # 9. Persuasion Score (10%) - convert 0-100 to 0-10
286
+ persuasion_score = persuasion['score'] / 10.0
287
+ total_score += persuasion_score * 0.10
288
+
289
+ # Ensure score is in 0-10 range
290
+ final_score = max(0.0, min(10.0, total_score))
291
+
292
+ print(f" 📊 Overall Score Calculation:")
293
+ print(f" Pacing: {pacing_score:.1f} (weight: 10%)")
294
+ print(f" Prosody: {prosody_score:.1f} (weight: 10%)")
295
+ print(f" Fillers: {filler_score:.1f} (weight: 15%)")
296
+ print(f" Silences: {silence_score:.1f} (weight: 10%)")
297
+ print(f" Sentiment: {sentiment_score:.1f} (weight: 10%)")
298
+ print(f" Vocabulary: {vocab_score:.1f} (weight: 15%)")
299
+ print(f" Flow: {flow_score:.1f} (weight: 10%)")
300
+ print(f" Coherence: {coherence_score:.1f} (weight: 10%)")
301
+ print(f" Persuasion: {persuasion_score:.1f} (weight: 10%)")
302
+ print(f" ⭐ FINAL OVERALL SCORE: {final_score:.2f}/10")
303
+
304
+ return round(final_score, 2)
305
+
306
+
307
+ def analyze_speech(self, audio_path: str, output_dir: str = "/tmp/audio_outputs", enable_tts: bool = True, avatar_gender: str = 'male') -> Dict[str, Any]:
308
+ """
309
+ Main analysis pipeline with LLM tips and avatar voice
310
+
311
+ Args:
312
+ audio_path: Path to audio file
313
+ output_dir: Directory to save generated audio files
314
+
315
+ Returns:
316
+ Complete analysis as JSON-serializable dictionary with avatar audio
317
+ """
318
+ # Validation
319
+ if not os.path.exists(audio_path):
320
+ return {"error": "Audio file not found"}
321
+
322
+ # Create output directory
323
+ os.makedirs(output_dir, exist_ok=True)
324
+
325
+ print(f"\n🎤 Analyzing: {os.path.basename(audio_path)}")
326
+
327
+ try:
328
+ # Load audio
329
+ audio, sr = self._load_audio(audio_path)
330
+ duration = len(audio) / sr
331
+
332
+ if duration < 1.0:
333
+ return {"error": "Audio too short (minimum 1 second)"}
334
+
335
+ print(f" Duration: {duration:.1f}s")
336
+
337
+ # Step 1: Transcription
338
+ print(" 📝 Transcribing...")
339
+ transcript_data = self._transcribe_with_timestamps(audio)
340
+
341
+ if not transcript_data['text'].strip():
342
+ return {"error": "No speech detected"}
343
+
344
+ full_transcription = transcript_data['text']
345
+ words = transcript_data['words']
346
+
347
+ # Step 2-10: All analysis
348
+ print(" ⚡ Running analysis...")
349
+ pacing_result = self._analyze_pacing(words, duration)
350
+ prosody_result = self._analyze_prosody(audio, sr)
351
+ filler_result = self._detect_fillers_detailed(full_transcription)
352
+ silence_result = self._detect_silences(words)
353
+ sentiment_result = self._analyze_sentiment(full_transcription)
354
+ vocabulary_result = self._analyze_vocabulary(full_transcription, words)
355
+ logical_flow_result = self._analyze_logical_flow(full_transcription)
356
+ coherence_result = self._analyze_coherence(full_transcription)
357
+ persuasion_result = self._analyze_persuasion(full_transcription)
358
+
359
+ # ⭐ NEW: Calculate overall score
360
+ print(" 🎯 Calculating overall score...")
361
+ overall_score = self._calculate_overall_score(
362
+ pacing_result,
363
+ prosody_result,
364
+ filler_result,
365
+ silence_result,
366
+ sentiment_result,
367
+ vocabulary_result,
368
+ logical_flow_result,
369
+ coherence_result,
370
+ persuasion_result
371
+ )
372
+
373
+ # Step 11: Generate personalized tips using LLM
374
+ print(" 🤖 Generating personalized tips...")
375
+ personalized_tips = self._generate_personalized_tips(
376
+ full_transcription,
377
+ pacing_result,
378
+ prosody_result,
379
+ filler_result,
380
+ silence_result,
381
+ sentiment_result,
382
+ vocabulary_result,
383
+ logical_flow_result,
384
+ coherence_result,
385
+ persuasion_result,
386
+ overall_score
387
+ )
388
+
389
+ # Step 12: Create improved version of transcript
390
+ print(" ✨ Creating improved transcript...")
391
+ improved_transcript = self._create_improved_transcript(
392
+ full_transcription,
393
+ filler_result
394
+ )
395
+
396
+ # Step 13: Generate avatar voice (if enabled) - TWO SEPARATE AUDIOS
397
+ avatar_audio_url = None
398
+ tips_audio_url = None
399
+
400
+ if self.tts_enabled and self.tts_model and enable_tts:
401
+ # Generate audio for improved transcript
402
+ print(" 🎙️ Generating avatar voice for improved transcript...")
403
+ avatar_audio_url = self._generate_avatar_voice(
404
+ improved_transcript,
405
+ output_dir,
406
+ gender=avatar_gender,
407
+ prefix="improved"
408
+ )
409
+
410
+ # Generate audio for coaching tips
411
+ print(" 🎙️ Generating avatar voice for coaching tips...")
412
+ tips_text = self._format_tips_for_audio(personalized_tips, avatar_gender)
413
+ tips_audio_url = self._generate_avatar_voice(
414
+ tips_text,
415
+ output_dir,
416
+ gender=avatar_gender,
417
+ prefix="tips"
418
+ )
419
+
420
+ # Compile final result
421
+ result = {
422
+ "transcription": full_transcription,
423
+ "duration_seconds": round(duration, 2),
424
+ "word_count": len(words),
425
+
426
+ # ⭐ NEW: Overall score (0-10 scale)
427
+ "overall_score": overall_score,
428
+
429
+ "pacing": pacing_result,
430
+ "prosody": prosody_result,
431
+ "filler_words": filler_result,
432
+ "silence_detection": silence_result,
433
+ "sentiment_analysis": sentiment_result,
434
+ "vocabulary": vocabulary_result,
435
+ "logical_flow": logical_flow_result,
436
+ "coherence": coherence_result,
437
+ "persuasion": persuasion_result,
438
+
439
+ # NEW: LLM-generated content
440
+ "personalized_tips": personalized_tips,
441
+ "improved_transcript": improved_transcript,
442
+
443
+ # NEW: Separate audio URLs
444
+ "avatar_audio_url": avatar_audio_url, # For improved transcript
445
+ "tips_audio_url": tips_audio_url # For coaching tips
446
+ }
447
+
448
+ print("✅ Analysis complete!")
449
+ return result
450
+
451
+ except Exception as e:
452
+ import traceback
453
+ traceback.print_exc()
454
+ return {"error": f"Analysis failed: {str(e)}"}
455
+
456
+
457
+ def _load_audio(self, path: str) -> tuple:
458
+ """Load and normalize audio to 16kHz mono"""
459
+ try:
460
+ audio, sr = librosa.load(path, sr=16000, mono=True)
461
+ audio = librosa.util.normalize(audio)
462
+ return audio, sr
463
+ except Exception as e:
464
+ raise ValueError(f"Failed to load audio: {e}")
465
+
466
+
467
+ def _transcribe_with_timestamps(self, audio: np.ndarray) -> Dict:
468
+ """Transcribe with word-level timestamps"""
469
+ result = self.whisper.transcribe(
470
+ audio,
471
+ language='en',
472
+ word_timestamps=True,
473
+ fp16=(self.device == "cuda")
474
+ )
475
+
476
+ words = []
477
+ for segment in result['segments']:
478
+ if 'words' in segment:
479
+ for word_info in segment['words']:
480
+ words.append({
481
+ 'word': word_info['word'].strip(),
482
+ 'start': word_info['start'],
483
+ 'end': word_info['end']
484
+ })
485
+
486
+ return {
487
+ 'text': result['text'].strip(),
488
+ 'words': words
489
+ }
490
+
491
+
492
+ def _analyze_pacing(self, words: List[Dict], duration: float) -> Dict:
493
+ """Analyze speaking pace"""
494
+ word_count = len(words)
495
+ wpm = (word_count / duration * 60) if duration > 0 else 0
496
+
497
+ if wpm < 120:
498
+ category = "slow"
499
+ elif wpm <= 160:
500
+ category = "good"
501
+ else:
502
+ category = "fast"
503
+
504
+ return {
505
+ "category": category,
506
+ "words_per_minute": round(wpm, 1)
507
+ }
508
+
509
+
510
+ def _analyze_prosody(self, audio: np.ndarray, sr: int) -> Dict:
511
+ """Analyze prosody (pitch variation)"""
512
+ try:
513
+ f0 = librosa.yin(audio.astype(np.float64), fmin=80, fmax=400)
514
+ f0_clean = f0[f0 > 0]
515
+
516
+ if len(f0_clean) > 10:
517
+ pitch_std = np.std(f0_clean)
518
+ category = "monotone" if pitch_std < 25 else "dynamic"
519
+
520
+ return {
521
+ "category": category,
522
+ "pitch_variation_hz": round(float(pitch_std), 1)
523
+ }
524
+ else:
525
+ return {"category": "unknown", "pitch_variation_hz": 0.0}
526
+ except Exception as e:
527
+ logging.warning(f"Prosody analysis failed: {e}")
528
+ return {"category": "unknown", "pitch_variation_hz": 0.0}
529
+
530
+
531
+ def _detect_fillers_detailed(self, text: str) -> Dict:
532
+ """Detect filler words with counts"""
533
+ text_lower = text.lower()
534
+ filler_counts = {}
535
+
536
+ for filler_name, pattern in self.filler_patterns.items():
537
+ matches = re.findall(pattern, text_lower, re.IGNORECASE)
538
+ count = len(matches)
539
+ if count > 0:
540
+ filler_counts[filler_name] = count
541
+
542
+ return filler_counts
543
+
544
+
545
+ def _detect_silences(self, words: List[Dict]) -> Dict:
546
+ """Detect long pauses/silences"""
547
+ if len(words) < 2:
548
+ return {"count": 0, "total_silence_duration_seconds": 0.0}
549
+
550
+ silence_threshold = 2.0
551
+ silence_count = 0
552
+ total_silence_duration = 0.0
553
+
554
+ for i in range(len(words) - 1):
555
+ pause_duration = words[i+1]['start'] - words[i]['end']
556
+ if pause_duration >= silence_threshold:
557
+ silence_count += 1
558
+ total_silence_duration += pause_duration
559
+
560
+ return {
561
+ "count": silence_count,
562
+ "total_silence_duration_seconds": round(total_silence_duration, 2)
563
+ }
564
+
565
+
566
+ def _analyze_sentiment(self, text: str) -> Dict:
567
+ """Analyze dominant sentiment with improved accuracy"""
568
+ try:
569
+ # Clean the text
570
+ text = text.strip()
571
+ if not text:
572
+ return {"dominant_sentiment": "neutral", "confidence": 0.0}
573
+
574
+ print(f" 🔍 Analyzing sentiment for text length: {len(text)} chars")
575
+
576
+ # Split into sentences for better analysis
577
+ sentences = re.split(r'[.!?]+', text)
578
+ sentences = [s.strip() for s in sentences if len(s.strip()) > 5]
579
+
580
+ if not sentences:
581
+ return {"dominant_sentiment": "neutral", "confidence": 0.0}
582
+
583
+ print(f" 📊 Processing {len(sentences)} sentences")
584
+
585
+ # Analyze each sentence
586
+ sentiment_scores = {"positive": 0, "neutral": 0, "negative": 0}
587
+
588
+ for sentence in sentences:
589
+ if len(sentence) < 5:
590
+ continue
591
+
592
+ try:
593
+ # Truncate to model's max length
594
+ sentence_truncated = sentence[:512]
595
+ result = self.sentiment_analyzer(sentence_truncated)[0]
596
+
597
+ label = result['label'].lower()
598
+ score = result['score']
599
+
600
+ # Handle different model output formats
601
+ if 'positive' in label or label == 'pos':
602
+ sentiment_scores['positive'] += score
603
+ elif 'negative' in label or label == 'neg':
604
+ sentiment_scores['negative'] += score
605
+ elif 'neutral' in label or label == 'neu':
606
+ sentiment_scores['neutral'] += score
607
+ else:
608
+ # If label doesn't match expected format, treat as neutral
609
+ sentiment_scores['neutral'] += 0.5
610
+
611
+ print(f" Sentence: '{sentence[:50]}...' -> {label} ({score:.3f})")
612
+
613
+ except Exception as e:
614
+ print(f" ⚠️ Failed to analyze sentence: {e}")
615
+ sentiment_scores['neutral'] += 0.5
616
+
617
+ # Determine dominant sentiment
618
+ dominant = max(sentiment_scores, key=sentiment_scores.get)
619
+ total_score = sum(sentiment_scores.values())
620
+ confidence = sentiment_scores[dominant] / total_score if total_score > 0 else 0.0
621
+
622
+ print(f" 📈 Sentiment scores: {sentiment_scores}")
623
+ print(f" 🎯 Dominant: {dominant} with confidence {confidence:.3f}")
624
+
625
+ return {
626
+ "dominant_sentiment": dominant,
627
+ "confidence": round(confidence, 3)
628
+ }
629
+
630
+ except Exception as e:
631
+ logging.error(f"Sentiment analysis failed: {e}")
632
+ import traceback
633
+ traceback.print_exc()
634
+ return {"dominant_sentiment": "neutral", "confidence": 0.0}
635
+
636
+
637
+ def _analyze_vocabulary(self, text: str, words: List[Dict]) -> Dict:
638
+ """Analyze vocabulary quality"""
639
+ word_list = [w['word'].lower().strip('.,!?;:') for w in words]
640
+
641
+ good_words_found = []
642
+ for word in word_list:
643
+ if word in self.power_words and word not in good_words_found:
644
+ good_words_found.append(word)
645
+
646
+ unique_words = len(set(word_list))
647
+ total_words = len(word_list)
648
+ diversity_ratio = (unique_words / total_words) if total_words > 0 else 0
649
+
650
+ score = 0
651
+ score += min(40, len(good_words_found) * 5)
652
+ score += min(40, diversity_ratio * 80)
653
+
654
+ if unique_words >= 100:
655
+ score += 20
656
+ elif unique_words >= 50:
657
+ score += 15
658
+ elif unique_words >= 25:
659
+ score += 10
660
+ else:
661
+ score += 5
662
+
663
+ return {
664
+ "score": round(score),
665
+ "good_words_used": sorted(good_words_found)
666
+ }
667
+
668
+
669
+ def _analyze_logical_flow(self, text: str) -> Dict:
670
+ """Analyze logical flow"""
671
+ try:
672
+ sentences = re.split(r'[.!?]+', text)
673
+ sentences = [s.strip() for s in sentences if len(s.strip()) > 10]
674
+
675
+ if len(sentences) < 2:
676
+ return {"score": 50, "flow_quality": "insufficient_data"}
677
+
678
+ embeddings = self.sentence_model.encode(sentences)
679
+
680
+ similarities = []
681
+ for i in range(len(embeddings) - 1):
682
+ similarity = np.dot(embeddings[i], embeddings[i + 1]) / (
683
+ np.linalg.norm(embeddings[i]) * np.linalg.norm(embeddings[i + 1])
684
+ )
685
+ similarities.append(similarity)
686
+
687
+ avg_similarity = np.mean(similarities)
688
+
689
+ if 0.3 <= avg_similarity <= 0.7:
690
+ score = 70 + (30 * (1 - abs(avg_similarity - 0.5) / 0.2))
691
+ elif avg_similarity < 0.3:
692
+ score = 40 + (avg_similarity / 0.3) * 30
693
+ else:
694
+ score = 70 - ((avg_similarity - 0.7) / 0.3) * 30
695
+
696
+ score = max(0, min(100, score))
697
+
698
+ if score >= 80:
699
+ quality = "excellent"
700
+ elif score >= 65:
701
+ quality = "good"
702
+ elif score >= 50:
703
+ quality = "moderate"
704
+ else:
705
+ quality = "needs_improvement"
706
+
707
+ return {"score": round(score), "flow_quality": quality}
708
+
709
+ except Exception as e:
710
+ logging.warning(f"Logical flow analysis failed: {e}")
711
+ return {"score": 50, "flow_quality": "error"}
712
+
713
+
714
+ def _analyze_coherence(self, text: str) -> Dict:
715
+ """Analyze coherence"""
716
+ try:
717
+ sentences = re.split(r'[.!?]+', text)
718
+ sentences = [s.strip() for s in sentences if len(s.strip()) > 10]
719
+
720
+ if len(sentences) < 2:
721
+ return {"score": 50, "coherence_quality": "insufficient_data"}
722
+
723
+ discourse_markers = [
724
+ "however", "therefore", "moreover", "furthermore", "additionally",
725
+ "consequently", "nevertheless", "thus", "hence", "meanwhile",
726
+ "first", "second", "third", "finally", "in conclusion",
727
+ "for example", "for instance", "in particular", "specifically"
728
+ ]
729
+
730
+ text_lower = text.lower()
731
+ marker_count = sum(1 for marker in discourse_markers if marker in text_lower)
732
+
733
+ embeddings = self.sentence_model.encode(sentences)
734
+
735
+ coherence_scores = []
736
+ for i in range(len(embeddings)):
737
+ for j in range(i + 1, min(i + 3, len(embeddings))):
738
+ similarity = np.dot(embeddings[i], embeddings[j]) / (
739
+ np.linalg.norm(embeddings[i]) * np.linalg.norm(embeddings[j])
740
+ )
741
+ coherence_scores.append(similarity)
742
+
743
+ avg_coherence = np.mean(coherence_scores) if coherence_scores else 0.5
744
+
745
+ score = 0
746
+ score += min(60, avg_coherence * 100)
747
+ score += min(40, marker_count * 5)
748
+ score = max(0, min(100, score))
749
+
750
+ if score >= 80:
751
+ quality = "excellent"
752
+ elif score >= 65:
753
+ quality = "good"
754
+ elif score >= 50:
755
+ quality = "moderate"
756
+ else:
757
+ quality = "needs_improvement"
758
+
759
+ return {"score": round(score), "coherence_quality": quality}
760
+
761
+ except Exception as e:
762
+ logging.warning(f"Coherence analysis failed: {e}")
763
+ return {"score": 50, "coherence_quality": "error"}
764
+
765
+
766
+ def _analyze_persuasion(self, text: str) -> Dict:
767
+ """Analyze persuasive elements"""
768
+ try:
769
+ text_lower = text.lower()
770
+ score = 0
771
+
772
+ logical_connectors = [
773
+ "therefore", "thus", "consequently", "hence", "accordingly",
774
+ "because", "since", "as a result", "for this reason"
775
+ ]
776
+ evidence_markers = [
777
+ "research shows", "studies indicate", "data suggests",
778
+ "according to", "evidence demonstrates", "proven by"
779
+ ]
780
+ appeal_markers = [
781
+ "imagine", "consider", "think about", "what if",
782
+ "picture this", "envision"
783
+ ]
784
+ credibility_markers = [
785
+ "expert", "research", "study", "proven", "validated",
786
+ "established", "recognized"
787
+ ]
788
+
789
+ score += min(25, sum(1 for c in logical_connectors if c in text_lower) * 5)
790
+ score += min(25, sum(1 for m in evidence_markers if m in text_lower) * 8)
791
+ score += min(25, sum(1 for m in appeal_markers if m in text_lower) * 6)
792
+ score += min(25, sum(1 for m in credibility_markers if m in text_lower) * 5)
793
+
794
+ score = max(0, min(100, score))
795
+
796
+ if score >= 80:
797
+ level = "highly_persuasive"
798
+ elif score >= 60:
799
+ level = "persuasive"
800
+ elif score >= 40:
801
+ level = "moderately_persuasive"
802
+ else:
803
+ level = "needs_improvement"
804
+
805
+ return {"score": round(score), "persuasion_level": level}
806
+
807
+ except Exception as e:
808
+ logging.warning(f"Persuasion analysis failed: {e}")
809
+ return {"score": 50, "persuasion_level": "error"}
810
+
811
+
812
+ def _generate_personalized_tips(
813
+ self,
814
+ transcript: str,
815
+ pacing: Dict,
816
+ prosody: Dict,
817
+ fillers: Dict,
818
+ silences: Dict,
819
+ sentiment: Dict,
820
+ vocabulary: Dict,
821
+ logical_flow: Dict,
822
+ coherence: Dict,
823
+ persuasion: Dict,
824
+ overall_score: float
825
+ ) -> List[str]:
826
+ """Generate truly personalized tips using OpenAI or enhanced fallback"""
827
+
828
+ # Try OpenAI first if available
829
+ if self.use_openai:
830
+ try:
831
+ tips = self._generate_openai_tips(
832
+ transcript, pacing, prosody, fillers, silences,
833
+ sentiment, vocabulary, logical_flow, coherence, persuasion, overall_score
834
+ )
835
+ if tips and len(tips) >= 3:
836
+ return tips
837
+ except Exception as e:
838
+ logging.warning(f"OpenAI tip generation failed: {e}")
839
+
840
+ # Use enhanced fallback tips
841
+ return self._generate_enhanced_fallback_tips(
842
+ transcript, pacing, prosody, fillers, silences,
843
+ sentiment, vocabulary, logical_flow, coherence, persuasion, overall_score
844
+ )
845
+
846
+
847
+ def _generate_openai_tips(
848
+ self,
849
+ transcript: str,
850
+ pacing: Dict,
851
+ prosody: Dict,
852
+ fillers: Dict,
853
+ silences: Dict,
854
+ sentiment: Dict,
855
+ vocabulary: Dict,
856
+ logical_flow: Dict,
857
+ coherence: Dict,
858
+ persuasion: Dict,
859
+ overall_score: float
860
+ ) -> List[str]:
861
+ """Generate personalized tips using OpenAI API"""
862
+
863
+ # Build detailed analysis summary
864
+ analysis_summary = f"""Speech Performance Analysis:
865
+
866
+ Overall Score: {overall_score}/10
867
+
868
+ Detailed Metrics:
869
+ - Pacing: {pacing['category']} at {pacing['words_per_minute']} words per minute
870
+ - Voice Variation: {prosody['category']} (pitch variation: {prosody['pitch_variation_hz']} Hz)
871
+ - Filler Words: {sum(fillers.values())} total ({', '.join([f'{k}: {v}' for k, v in fillers.items()]) if fillers else 'none'})
872
+ - Pauses: {silences['count']} long pauses
873
+ - Tone: {sentiment['dominant_sentiment']} ({sentiment['confidence']:.0%} confidence)
874
+ - Vocabulary: {vocabulary['score']}/100 (used {len(vocabulary['good_words_used'])} power words)
875
+ - Logical Flow: {logical_flow['flow_quality']} ({logical_flow['score']}/100)
876
+ - Coherence: {coherence['coherence_quality']} ({coherence['score']}/100)
877
+ - Persuasiveness: {persuasion['persuasion_level']} ({persuasion['score']}/100)
878
+
879
+ Speech excerpt: "{transcript[:200]}..."
880
+ """
881
+
882
+ # Create personalized prompt
883
+ prompt = f"""{analysis_summary}
884
+
885
+ You are a friendly, encouraging public speaking coach. Based on this person's speech analysis, provide 5 specific, actionable coaching tips.
886
+
887
+ Requirements:
888
+ 1. Be warm, supportive, and encouraging
889
+ 2. Focus on the 2-3 weakest areas that need improvement
890
+ 3. Give concrete examples for each tip (e.g., "Instead of saying 'um,' try pausing silently for 1-2 seconds")
891
+ 4. Use conversational, friendly language as if speaking to a friend
892
+ 5. Celebrate what they're doing well while gently addressing areas to improve
893
+ 6. Make tips practical and easy to implement immediately
894
+
895
+ Format each tip as a complete, friendly sentence. Number them 1-5."""
896
+
897
+ try:
898
+ response = openai.ChatCompletion.create(
899
+ model="gpt-4o-mini",
900
+ messages=[
901
+ {"role": "system", "content": "You are an expert public speaking coach who gives personalized, friendly, actionable advice."},
902
+ {"role": "user", "content": prompt}
903
+ ],
904
+ max_tokens=500,
905
+ temperature=0.8
906
+ )
907
+
908
+ content = response.choices[0].message.content.strip()
909
+
910
+ # Parse tips
911
+ tips = []
912
+ for line in content.split('\n'):
913
+ line = line.strip()
914
+ # Remove numbering
915
+ line = re.sub(r'^\d+[\.\):\-]\s*', '', line)
916
+ if len(line) > 20: # Valid tip
917
+ tips.append(line)
918
+
919
+ return tips[:5]
920
+
921
+ except Exception as e:
922
+ logging.error(f"OpenAI API error: {e}")
923
+ return []
924
+
925
+
926
+ def _generate_enhanced_fallback_tips(
927
+ self,
928
+ transcript: str,
929
+ pacing: Dict,
930
+ prosody: Dict,
931
+ fillers: Dict,
932
+ silences: Dict,
933
+ sentiment: Dict,
934
+ vocabulary: Dict,
935
+ logical_flow: Dict,
936
+ coherence: Dict,
937
+ persuasion: Dict,
938
+ overall_score: float
939
+ ) -> List[str]:
940
+ """Generate personalized, friendly tips with examples (fallback)"""
941
+ tips = []
942
+
943
+ # Calculate what needs improvement most
944
+ scores = {
945
+ 'pacing': self._get_pacing_score(pacing),
946
+ 'prosody': self._get_prosody_score(prosody),
947
+ 'fillers': self._get_filler_score(fillers),
948
+ 'silences': self._get_silence_score(silences),
949
+ 'vocabulary': vocabulary['score'] / 10.0,
950
+ 'flow': logical_flow['score'] / 10.0,
951
+ 'coherence': coherence['score'] / 10.0,
952
+ 'persuasion': persuasion['score'] / 10.0
953
+ }
954
+
955
+ # Sort by score (lowest first = needs most improvement)
956
+ improvement_areas = sorted(scores.items(), key=lambda x: x[1])
957
+
958
+ # Generate tips for weakest areas
959
+ wpm = pacing['words_per_minute']
960
+ total_fillers = sum(fillers.values())
961
+
962
+ for area, score in improvement_areas[:5]: # Top 5 areas needing improvement
963
+ if area == 'pacing':
964
+ if pacing['category'] == 'slow':
965
+ tips.append(f"Your pace is currently {wpm} words per minute. Try speeding up to 130-140 WPM - imagine you're telling an exciting story to a friend! Practice by reading aloud with a timer.")
966
+ elif pacing['category'] == 'fast':
967
+ tips.append(f"You're speaking at {wpm} words per minute, which is pretty fast! Slow down to about 140-150 WPM. Take a breath between sentences - your audience needs time to absorb your ideas.")
968
+
969
+ elif area == 'prosody':
970
+ if prosody['category'] == 'monotone':
971
+ tips.append(f"Add more vocal variety to keep your audience engaged! Try emphasizing key words - for example, if you say 'This is REALLY important,' make 'really' louder and higher pitched. Practice reading children's books out loud to build this skill.")
972
+
973
+ elif area == 'fillers':
974
+ if total_fillers > 5:
975
+ most_used = max(fillers.items(), key=lambda x: x[1])
976
+ tips.append(f"You said '{most_used[0]}' {most_used[1]} times. When you feel the urge to say it, pause silently instead - it makes you sound more confident! Try counting to 2 in your head during pauses.")
977
+
978
+ elif area == 'silences':
979
+ if silences['count'] > 5:
980
+ tips.append(f"You had {silences['count']} long pauses. That's okay! But try to keep pauses to 1-2 seconds. If you need to think, it's better to say 'Let me think about that...' than to go silent for too long.")
981
+ elif silences['count'] < 2:
982
+ tips.append(f"Don't be afraid to pause! Strategic 2-second pauses after important points give your audience time to process. Try pausing after questions like 'Why does this matter?' - it creates anticipation.")
983
+
984
+ elif area == 'vocabulary':
985
+ if vocabulary['score'] < 60:
986
+ good_words = vocabulary['good_words_used']
987
+ if good_words:
988
+ tips.append(f"Great job using power words like '{', '.join(good_words[:3])}'! Try adding more impact words like 'crucial,' 'remarkable,' or 'transform' to make your speech more memorable.")
989
+ else:
990
+ tips.append(f"Spice up your vocabulary! Instead of 'very good,' try 'excellent' or 'outstanding.' Instead of 'big problem,' say 'significant challenge.' Keep a list of power words on your phone!")
991
+
992
+ elif area == 'flow':
993
+ if logical_flow['score'] < 65:
994
+ tips.append(f"Connect your ideas more smoothly! Use transition phrases like 'Building on that...', 'Here's why this matters...', or 'Let me give you an example...' - they're like road signs that guide your audience through your speech.")
995
+
996
+ elif area == 'coherence':
997
+ if coherence['score'] < 65:
998
+ tips.append(f"Make your main message crystal clear! Try using signpost phrases: 'There are three reasons why...' or 'My main point is...' Then at the end, say 'To sum up...' and restate your key idea.")
999
+
1000
+ elif area == 'persuasion':
1001
+ if persuasion['score'] < 60:
1002
+ tips.append(f"Make your speech more convincing! Add phrases like 'Research shows that...' or 'Imagine if we could...' or 'The evidence is clear...' These make your points more compelling and credible.")
1003
+
1004
+ # If we don't have 5 tips yet, add some positive encouragement
1005
+ if len(tips) < 5 and overall_score >= 7.0:
1006
+ tips.append(f"You're doing great with a {overall_score:.1f}/10 score! Keep practicing regularly - even 5 minutes a day of reading aloud can make a huge difference in your confidence and delivery.")
1007
+
1008
+ # Always add one encouraging tip at the end
1009
+ if len(tips) < 5:
1010
+ if overall_score < 5.0:
1011
+ tips.append("Remember, every great speaker started somewhere! Focus on improving one thing at a time, and you'll see amazing progress. Record yourself weekly to track your improvement!")
1012
+ else:
1013
+ tips.append("You're making good progress! Keep recording yourself and listening back - you'll be surprised how quickly you improve. Consider joining a speaking group like Toastmasters to practice regularly!")
1014
+
1015
+ return tips[:5]
1016
+
1017
+
1018
+ def _get_pacing_score(self, pacing: Dict) -> float:
1019
+ """Convert pacing to 0-10 score"""
1020
+ wpm = pacing['words_per_minute']
1021
+ if 120 <= wpm <= 160:
1022
+ return 10.0
1023
+ elif 100 <= wpm < 120 or 160 < wpm <= 180:
1024
+ return 7.0
1025
+ else:
1026
+ return 4.0
1027
+
1028
+ def _get_prosody_score(self, prosody: Dict) -> float:
1029
+ """Convert prosody to 0-10 score"""
1030
+ return 10.0 if prosody['category'] == 'dynamic' else 4.0
1031
+
1032
+ def _get_filler_score(self, fillers: Dict) -> float:
1033
+ """Convert filler count to 0-10 score"""
1034
+ total = sum(fillers.values())
1035
+ if total == 0:
1036
+ return 10.0
1037
+ elif total <= 3:
1038
+ return 9.0
1039
+ elif total <= 5:
1040
+ return 7.0
1041
+ else:
1042
+ return max(2.0, 10.0 - (total * 0.3))
1043
+
1044
+ def _get_silence_score(self, silences: Dict) -> float:
1045
+ """Convert silence count to 0-10 score"""
1046
+ count = silences['count']
1047
+ if 2 <= count <= 5:
1048
+ return 10.0
1049
+ elif count <= 8:
1050
+ return 8.0
1051
+ else:
1052
+ return max(3.0, 10.0 - (count * 0.5))
1053
+
1054
+
1055
+ def _format_tips_for_audio(self, tips: List[str], gender: str) -> str:
1056
+ """Format tips in a natural, conversational way for audio"""
1057
+ avatar_name = "Alex" if gender == "male" else "Maya"
1058
+
1059
+ # Create a friendly introduction
1060
+ intro = f"Hey there! I'm {avatar_name}, your speaking coach. I've analyzed your speech, and I have some personalized tips to help you shine even brighter!"
1061
+
1062
+ # Add natural transitions between tips
1063
+ transitions = [
1064
+ "First,",
1065
+ "Next up,",
1066
+ "Here's another tip:",
1067
+ "Also, I noticed that",
1068
+ "And finally,"
1069
+ ]
1070
+
1071
+ # Build the audio script
1072
+ audio_parts = [intro]
1073
+
1074
+ for i, tip in enumerate(tips[:5]):
1075
+ if i < len(transitions):
1076
+ audio_parts.append(f"{transitions[i]} {tip}")
1077
+ else:
1078
+ audio_parts.append(tip)
1079
+
1080
+ # Add encouraging conclusion
1081
+ conclusion = "You're making great progress! Keep practicing these tips, and you'll see amazing results. I'm cheering for you!"
1082
+ audio_parts.append(conclusion)
1083
+
1084
+ return " ".join(audio_parts)
1085
+
1086
+
1087
+ def _create_improved_transcript(self, original: str, fillers: Dict) -> str:
1088
+ """Create improved version of transcript (remove fillers, clean up)"""
1089
+ improved = original
1090
+
1091
+ # Remove filler words
1092
+ for filler_name, pattern in self.filler_patterns.items():
1093
+ if filler_name in fillers:
1094
+ # Replace fillers with nothing or appropriate punctuation
1095
+ improved = re.sub(pattern, '', improved, flags=re.IGNORECASE)
1096
+
1097
+ # Clean up multiple spaces
1098
+ improved = re.sub(r'\s+', ' ', improved)
1099
+
1100
+ # Fix punctuation
1101
+ improved = re.sub(r'\s+([,.!?])', r'\1', improved)
1102
+
1103
+ # Capitalize first letter of sentences
1104
+ improved = re.sub(r'(^|[.!?]\s+)([a-z])', lambda m: m.group(1) + m.group(2).upper(), improved)
1105
+
1106
+ return improved.strip()
1107
+
1108
+
1109
+ def _generate_avatar_voice(self, text: str, output_dir: str, gender: str = "male", prefix: str = "avatar") -> Optional[str]:
1110
+ """
1111
+ Generate avatar voice audio using TTS
1112
+
1113
+ Args:
1114
+ text: Text to synthesize
1115
+ output_dir: Directory to save audio file
1116
+ gender: Avatar gender ("male" or "female")
1117
+ prefix: Filename prefix (e.g., "improved", "tips")
1118
+
1119
+ Returns:
1120
+ Relative path to generated audio file or None if generation fails
1121
+ """
1122
+ try:
1123
+ if not self.tts_enabled or not self.tts_model:
1124
+ print(" ⚠️ TTS not enabled, skipping avatar voice generation")
1125
+ return None
1126
+
1127
+ # Generate unique filename with prefix
1128
+ audio_filename = f"{prefix}_{uuid.uuid4()}.wav"
1129
+ audio_path = os.path.join(output_dir, audio_filename)
1130
+
1131
+ # Truncate text if too long (TTS models have limits)
1132
+ max_length = 1000 # characters
1133
+ if len(text) > max_length:
1134
+ text = text[:max_length] + "..."
1135
+ print(f" ⚠️ Text truncated to {max_length} characters for TTS")
1136
+
1137
+ # Generate audio using TTS
1138
+ print(f" 🎙️ Generating {gender} {prefix} audio...")
1139
+ self.tts_model.tts_to_file(text=text, file_path=audio_path)
1140
+
1141
+ # Return relative path (assuming output_dir is served)
1142
+ return f"/audio/{audio_filename}"
1143
+
1144
+ except Exception as e:
1145
+ logging.error(f"Avatar voice generation failed: {e}")
1146
+ import traceback
1147
+ traceback.print_exc()
1148
+ return None
1149
+
1150
+
1151
+ # ================= MAIN =================
1152
+ if __name__ == "__main__":
1153
+ print("\n" + "="*70)
1154
+ print("ENHANCED PUBLIC SPEAKING COACH - TEST")
1155
+ print("="*70 + "\n")
1156
+
1157
+ test_file = "test_speech.wav"
1158
+
1159
+ if not os.path.exists(test_file):
1160
+ print("⚠️ No test file found. Generating dummy audio...")
1161
+ sr = 16000
1162
+ duration = 10
1163
+ t = np.linspace(0, duration, sr * duration)
1164
+ audio = 0.3 * np.sin(2 * np.pi * 200 * t) + 0.2 * np.sin(2 * np.pi * 300 * t)
1165
+ audio += 0.1 * np.random.randn(len(audio))
1166
+ sf.write(test_file, audio, sr)
1167
+ print(f"✅ Created {test_file}\n")
1168
+
1169
+ try:
1170
+ # Get OpenAI API key from environment variable if available
1171
+ openai_key = os.getenv('OPENAI_API_KEY')
1172
+ coach = EnhancedPublicSpeakingCoach(
1173
+ whisper_model_size="base",
1174
+ enable_tts=True,
1175
+ openai_api_key=openai_key
1176
+ )
1177
+ result = coach.analyze_speech(test_file)
1178
+
1179
+ print("\n" + "="*70)
1180
+ print("ANALYSIS RESULTS (JSON)")
1181
+ print("="*70)
1182
+ print(json.dumps(result, indent=2, cls=NumpyEncoder))
1183
+
1184
+ output_file = "speech_analysis_result.json"
1185
+ with open(output_file, 'w') as f:
1186
+ json.dump(result, f, indent=2, cls=NumpyEncoder)
1187
+
1188
+ print(f"\n✅ Results saved to: {output_file}")
1189
+ print("✅ Test completed successfully!")
1190
+
1191
+ except Exception as e:
1192
+ print(f"\n❌ ERROR: {e}")
1193
+ import traceback
1194
+ traceback.print_exc()
main.py ADDED
@@ -0,0 +1,335 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Production FastAPI Server with S3 Support
3
+ Works on: Hugging Face Space (testing) → AWS (production)
4
+ """
5
+
6
+ import os
7
+ import tempfile
8
+ from pathlib import Path
9
+ from typing import Optional
10
+ import time
11
+ import uuid
12
+
13
+ import uvicorn
14
+ from fastapi import FastAPI, UploadFile, File, HTTPException, status, Form
15
+ from fastapi.middleware.cors import CORSMiddleware
16
+ from fastapi.responses import JSONResponse, FileResponse
17
+ from fastapi.staticfiles import StaticFiles
18
+
19
+ from kid_coach_pipeline import EnhancedPublicSpeakingCoach
20
+
21
+ # Try to import boto3 (for AWS S3)
22
+ try:
23
+ import boto3
24
+ S3_AVAILABLE = True
25
+ except ImportError:
26
+ S3_AVAILABLE = False
27
+ print("⚠️ boto3 not available - S3 uploads disabled")
28
+
29
+ # ================= CONFIGURATION =================
30
+
31
+ app = FastAPI(
32
+ title="Aurator Speech Coach API",
33
+ description="AI-powered speech analysis with personalized coaching",
34
+ version="4.0.0"
35
+ )
36
+
37
+ app.add_middleware(
38
+ CORSMiddleware,
39
+ allow_origins=["*"],
40
+ allow_credentials=True,
41
+ allow_methods=["*"],
42
+ allow_headers=["*"],
43
+ )
44
+
45
+ # Audio output directory (local fallback)
46
+ AUDIO_OUTPUT_DIR = "/tmp/audio_outputs"
47
+ os.makedirs(AUDIO_OUTPUT_DIR, exist_ok=True)
48
+
49
+ # Mount for local testing
50
+ app.mount("/audio", StaticFiles(directory=AUDIO_OUTPUT_DIR), name="audio")
51
+
52
+ # AWS S3 Configuration (optional - for production)
53
+ USE_S3 = os.getenv("USE_S3", "false").lower() == "true" and S3_AVAILABLE
54
+ S3_BUCKET = os.getenv("S3_BUCKET_NAME", "aurator-audio-outputs")
55
+ S3_REGION = os.getenv("AWS_REGION", "us-east-1")
56
+
57
+ if USE_S3:
58
+ s3_client = boto3.client('s3', region_name=S3_REGION)
59
+ print(f"✅ S3 enabled: {S3_BUCKET}")
60
+ else:
61
+ print("📁 Using local file storage")
62
+
63
+ coach_engine: Optional[EnhancedPublicSpeakingCoach] = None
64
+
65
+ SUPPORTED_FORMATS = {'.wav', '.mp3', '.m4a', '.flac', '.ogg', '.aac', '.webm'}
66
+ MAX_FILE_SIZE = 50 * 1024 * 1024
67
+
68
+
69
+ # ================= S3 HELPERS =================
70
+
71
+ def upload_to_s3(file_path: str, file_type: str) -> str:
72
+ """Upload file to S3 and return public URL"""
73
+ if not USE_S3:
74
+ # Return local URL for HF testing
75
+ filename = os.path.basename(file_path)
76
+ return f"/audio/{filename}"
77
+
78
+ try:
79
+ # Generate S3 key with date structure
80
+ from datetime import datetime
81
+ now = datetime.now()
82
+ file_uuid = str(uuid.uuid4())
83
+ s3_key = f"{now.year}/{now.month:02d}/{now.day:02d}/{file_type}_{file_uuid}.wav"
84
+
85
+ # Upload to S3
86
+ s3_client.upload_file(
87
+ file_path,
88
+ S3_BUCKET,
89
+ s3_key,
90
+ ExtraArgs={'ContentType': 'audio/wav', 'ACL': 'public-read'}
91
+ )
92
+
93
+ # Return public URL
94
+ return f"https://{S3_BUCKET}.s3.{S3_REGION}.amazonaws.com/{s3_key}"
95
+
96
+ except Exception as e:
97
+ print(f"❌ S3 upload failed: {e}")
98
+ # Fallback to local URL
99
+ filename = os.path.basename(file_path)
100
+ return f"/audio/{filename}"
101
+
102
+
103
+ # ================= STARTUP =================
104
+
105
+ @app.on_event("startup")
106
+ async def startup_event():
107
+ global coach_engine
108
+
109
+ print("\n" + "="*60)
110
+ print("🚀 AURATOR SPEECH COACH API")
111
+ print("="*60)
112
+
113
+ try:
114
+ openai_key = os.getenv("OPENAI_API_KEY")
115
+
116
+ print("\n📦 Loading models...")
117
+ coach_engine = EnhancedPublicSpeakingCoach(
118
+ whisper_model_size="base",
119
+ enable_tts=True,
120
+ openai_api_key=openai_key
121
+ )
122
+ print("✅ Engine ready!")
123
+ print(f" Storage: {'S3' if USE_S3 else 'Local'}")
124
+ print(f" OpenAI: {'Enabled' if openai_key else 'Fallback mode'}")
125
+ print("\n" + "="*60 + "\n")
126
+
127
+ except Exception as e:
128
+ print(f"\n❌ STARTUP FAILED: {e}")
129
+ coach_engine = None
130
+
131
+
132
+ # ================= ENDPOINTS =================
133
+
134
+ @app.get("/")
135
+ async def root():
136
+ """API info"""
137
+ return {
138
+ "service": "Aurator Speech Coach API",
139
+ "version": "4.0.0",
140
+ "status": "online" if coach_engine else "degraded",
141
+ "storage": "s3" if USE_S3 else "local",
142
+ "endpoints": {
143
+ "analyze": "POST /api/analyze",
144
+ "health": "GET /api/health"
145
+ }
146
+ }
147
+
148
+
149
+ @app.get("/api/health")
150
+ async def health_check():
151
+ """Health check for AWS load balancer"""
152
+ return {
153
+ "status": "healthy" if coach_engine else "unhealthy",
154
+ "engine_loaded": coach_engine is not None,
155
+ "timestamp": time.time()
156
+ }
157
+
158
+
159
+ @app.post("/api/analyze")
160
+ async def analyze_speech(
161
+ audio_file: UploadFile = File(...),
162
+ avatar_gender: str = Form('male')
163
+ ):
164
+ """
165
+ Main endpoint: Analyze speech and return results
166
+
167
+ Request:
168
+ - audio_file: Audio file (wav/mp3/m4a/flac/ogg/aac/webm)
169
+ - avatar_gender: "male" or "female" (default: male)
170
+
171
+ Response:
172
+ {
173
+ "success": true,
174
+ "data": {
175
+ "overall_score": 8.6,
176
+ "transcription": {...},
177
+ "analysis": {...},
178
+ "coaching": {
179
+ "tips": [...],
180
+ "tips_audio_url": "https://...",
181
+ "improved_audio_url": "https://..."
182
+ }
183
+ },
184
+ "processing_time_ms": 3250,
185
+ "timestamp": "2025-12-16T..."
186
+ }
187
+ """
188
+ start_time = time.time()
189
+
190
+ # Validate engine
191
+ if coach_engine is None:
192
+ raise HTTPException(
193
+ status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
194
+ detail="Engine not initialized"
195
+ )
196
+
197
+ # Validate file
198
+ if not audio_file or not audio_file.filename:
199
+ raise HTTPException(
200
+ status_code=status.HTTP_400_BAD_REQUEST,
201
+ detail="No audio file provided"
202
+ )
203
+
204
+ file_ext = Path(audio_file.filename).suffix.lower()
205
+ if file_ext not in SUPPORTED_FORMATS:
206
+ raise HTTPException(
207
+ status_code=status.HTTP_400_BAD_REQUEST,
208
+ detail=f"Unsupported format: {file_ext}"
209
+ )
210
+
211
+ temp_file = None
212
+
213
+ try:
214
+ # Save uploaded file temporarily
215
+ content = await audio_file.read()
216
+
217
+ if len(content) > MAX_FILE_SIZE:
218
+ raise HTTPException(
219
+ status_code=status.HTTP_413_REQUEST_ENTITY_TOO_LARGE,
220
+ detail="File too large (max 50MB)"
221
+ )
222
+
223
+ with tempfile.NamedTemporaryFile(delete=False, suffix=file_ext) as temp:
224
+ temp.write(content)
225
+ temp_file = temp.name
226
+
227
+ print(f"\n🎤 Analyzing: {audio_file.filename} ({len(content)/1024:.1f} KB)")
228
+
229
+ # Run analysis
230
+ result = coach_engine.analyze_speech(
231
+ temp_file,
232
+ output_dir=AUDIO_OUTPUT_DIR,
233
+ enable_tts=True,
234
+ avatar_gender=avatar_gender
235
+ )
236
+
237
+ if "error" in result:
238
+ raise HTTPException(
239
+ status_code=status.HTTP_400_BAD_REQUEST,
240
+ detail=result["error"]
241
+ )
242
+
243
+ # Upload audio files to S3 (if enabled) or use local URLs
244
+ tips_audio_path = None
245
+ improved_audio_path = None
246
+
247
+ if result.get("tips_audio_url"):
248
+ local_path = os.path.join(AUDIO_OUTPUT_DIR, os.path.basename(result["tips_audio_url"]))
249
+ if os.path.exists(local_path):
250
+ tips_audio_url = upload_to_s3(local_path, "tips")
251
+ result["tips_audio_url"] = tips_audio_url
252
+
253
+ if result.get("avatar_audio_url"):
254
+ local_path = os.path.join(AUDIO_OUTPUT_DIR, os.path.basename(result["avatar_audio_url"]))
255
+ if os.path.exists(local_path):
256
+ improved_audio_url = upload_to_s3(local_path, "improved")
257
+ result["avatar_audio_url"] = improved_audio_url
258
+
259
+ processing_time = int((time.time() - start_time) * 1000)
260
+
261
+ print(f"✅ Complete in {processing_time}ms")
262
+
263
+ # Format response for React Native
264
+ response = {
265
+ "success": True,
266
+ "data": {
267
+ "overall_score": result.get("overall_score", 0),
268
+ "duration_seconds": result.get("duration_seconds", 0),
269
+ "word_count": result.get("word_count", 0),
270
+
271
+ "transcription": {
272
+ "text": result.get("transcription", ""),
273
+ "improved_text": result.get("improved_transcript", "")
274
+ },
275
+
276
+ "analysis": {
277
+ "pacing": result.get("pacing", {}),
278
+ "prosody": result.get("prosody", {}),
279
+ "fillers": result.get("filler_words", {}),
280
+ "silences": result.get("silence_detection", {}),
281
+ "sentiment": result.get("sentiment_analysis", {}),
282
+ "vocabulary": result.get("vocabulary", {}),
283
+ "flow": result.get("logical_flow", {}),
284
+ "coherence": result.get("coherence", {}),
285
+ "persuasion": result.get("persuasion", {})
286
+ },
287
+
288
+ "coaching": {
289
+ "tips": result.get("personalized_tips", []),
290
+ "tips_audio_url": result.get("tips_audio_url"),
291
+ "improved_audio_url": result.get("avatar_audio_url")
292
+ }
293
+ },
294
+ "processing_time_ms": processing_time,
295
+ "timestamp": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
296
+ }
297
+
298
+ return JSONResponse(content=response)
299
+
300
+ except HTTPException:
301
+ raise
302
+
303
+ except Exception as e:
304
+ import traceback
305
+ traceback.print_exc()
306
+
307
+ raise HTTPException(
308
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
309
+ detail=f"Analysis failed: {str(e)}"
310
+ )
311
+
312
+ finally:
313
+ # Cleanup
314
+ if temp_file and os.path.exists(temp_file):
315
+ try:
316
+ os.remove(temp_file)
317
+ except:
318
+ pass
319
+
320
+
321
+ @app.get("/audio/{filename}")
322
+ async def serve_audio(filename: str):
323
+ """Serve audio files (for local/HF testing)"""
324
+ file_path = os.path.join(AUDIO_OUTPUT_DIR, filename)
325
+
326
+ if not os.path.exists(file_path):
327
+ raise HTTPException(404, "Audio file not found")
328
+
329
+ return FileResponse(file_path, media_type="audio/wav")
330
+
331
+
332
+ # ================= RUN =================
333
+
334
+ if __name__ == "__main__":
335
+ uvicorn.run(app, host="0.0.0.0", port=8000)
requirements1.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ torch
2
+ transformers
3
+ sentence-transformers
4
+ openai-whisper
5
+ librosa
6
+ soundfile
7
+ textstat
8
+ TTS
9
+ fastapi
10
+ uvicorn
11
+ python-multipart
12
+ boto3
13
+ openai
test_api.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Quick API Test Script
3
+ Test the FastAPI server locally or on HF
4
+ """
5
+
6
+ import requests
7
+ import sys
8
+
9
+ # Change this to your HF Space URL or local
10
+ API_URL = "http://localhost:8000" # For local testing
11
+ # API_URL = "https://your-space.hf.space" # For HF testing
12
+
13
+ def test_health():
14
+ """Test health endpoint"""
15
+ print("Testing /api/health...")
16
+ response = requests.get(f"{API_URL}/api/health")
17
+ print(f"Status: {response.status_code}")
18
+ print(f"Response: {response.json()}\n")
19
+ return response.status_code == 200
20
+
21
+ def test_analyze(audio_file_path):
22
+ """Test analyze endpoint"""
23
+ print(f"Testing /api/analyze with {audio_file_path}...")
24
+
25
+ with open(audio_file_path, 'rb') as f:
26
+ files = {'audio_file': f}
27
+ data = {'avatar_gender': 'male'}
28
+
29
+ response = requests.post(
30
+ f"{API_URL}/api/analyze",
31
+ files=files,
32
+ data=data,
33
+ timeout=60
34
+ )
35
+
36
+ print(f"Status: {response.status_code}")
37
+
38
+ if response.status_code == 200:
39
+ result = response.json()
40
+ print(f"Success: {result['success']}")
41
+ print(f"Overall Score: {result['data']['overall_score']}")
42
+ print(f"Processing Time: {result['processing_time_ms']}ms")
43
+ print(f"Tips Count: {len(result['data']['coaching']['tips'])}")
44
+ print(f"Tips Audio: {result['data']['coaching']['tips_audio_url']}")
45
+ print(f"Improved Audio: {result['data']['coaching']['improved_audio_url']}")
46
+ else:
47
+ print(f"Error: {response.text}")
48
+
49
+ return response.status_code == 200
50
+
51
+ if __name__ == "__main__":
52
+ print("="*60)
53
+ print("API TEST SCRIPT")
54
+ print("="*60 + "\n")
55
+
56
+ # Test health
57
+ if not test_health():
58
+ print("❌ Health check failed!")
59
+ sys.exit(1)
60
+
61
+ print("✅ Health check passed!\n")
62
+
63
+ # Test analyze (provide your audio file)
64
+ if len(sys.argv) > 1:
65
+ audio_file = sys.argv[1]
66
+ if test_analyze(audio_file):
67
+ print("\n✅ Analysis test passed!")
68
+ else:
69
+ print("\n❌ Analysis test failed!")
70
+ else:
71
+ print("ℹ️ To test analysis: python test_api.py your_audio.wav")