sparshmehta commited on
Commit
0178c47
·
verified ·
1 Parent(s): db6cc7f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +461 -636
app.py CHANGED
@@ -66,19 +66,27 @@ class ProgressTracker:
66
  self.status.update(label=f"{message} ({progress:.1%}) - ETA: {remaining:.0f}s")
67
 
68
  class AudioFeatureExtractor:
69
- """Handles audio feature extraction with improved memory management"""
70
  def __init__(self):
71
  self.sr = 16000
72
  self.hop_length = 512
73
  self.n_fft = 2048
74
  self.chunk_duration = 300
75
 
76
- def extract_features(self, audio_path: str, progress_callback=None) -> Dict[str, float]:
77
  """Extract audio features with chunked processing"""
78
  try:
79
  if progress_callback:
80
  progress_callback(0.1, "Loading audio file...")
81
 
 
 
 
 
 
 
 
 
82
  features = {
83
  "pitch_mean": 0.0,
84
  "pitch_std": 0.0,
@@ -87,7 +95,9 @@ class AudioFeatureExtractor:
87
  "pauses_per_minute": 0.0,
88
  "rising_patterns": 0,
89
  "falling_patterns": 0,
90
- "variations_per_minute": 0.0
 
 
91
  }
92
 
93
  # Process audio in chunks
@@ -106,7 +116,7 @@ class AudioFeatureExtractor:
106
  accumulated_features.append(chunk_features)
107
 
108
  # Combine features from all chunks
109
- features = self._combine_features(accumulated_features)
110
 
111
  if progress_callback:
112
  progress_callback(1.0, "Feature extraction complete!")
@@ -117,7 +127,7 @@ class AudioFeatureExtractor:
117
  logger.error(f"Error in feature extraction: {e}")
118
  raise AudioProcessingError(f"Feature extraction failed: {str(e)}")
119
 
120
- def _process_chunk(self, chunk: np.ndarray) -> Dict[str, float]:
121
  """Process a single chunk of audio"""
122
  D = librosa.stft(chunk, n_fft=self.n_fft, hop_length=self.hop_length)
123
  S = np.abs(D)
@@ -131,10 +141,14 @@ class AudioFeatureExtractor:
131
  frame_length=self.n_fft
132
  )
133
 
 
 
 
134
  return {
135
  "rms": rms,
136
  "f0": f0[voiced_flag == 1] if f0 is not None else np.array([]),
137
- "duration": len(chunk) / self.sr
 
138
  }
139
 
140
  def _combine_features(self, features: List[Dict[str, Any]]) -> Dict[str, float]:
@@ -142,6 +156,7 @@ class AudioFeatureExtractor:
142
  all_f0 = np.concatenate([f["f0"] for f in features if len(f["f0"]) > 0])
143
  all_rms = np.concatenate([f["rms"] for f in features])
144
  total_duration = sum(f["duration"] for f in features)
 
145
 
146
  pitch_mean = np.mean(all_f0) if len(all_f0) > 0 else 0
147
  pitch_std = np.std(all_f0) if len(all_f0) > 0 else 0
@@ -151,7 +166,7 @@ class AudioFeatureExtractor:
151
  "pitch_std": float(pitch_std),
152
  "mean_amplitude": float(np.mean(all_rms)),
153
  "amplitude_deviation": float(np.std(all_rms) / np.mean(all_rms)) if np.mean(all_rms) > 0 else 0,
154
- "pauses_per_minute": float(len(librosa.effects.split(np.concatenate([f["rms"] for f in features]), top_db=20)) / (total_duration / 60)),
155
  "rising_patterns": int(np.sum(np.diff(all_f0) > 0)) if len(all_f0) > 1 else 0,
156
  "falling_patterns": int(np.sum(np.diff(all_f0) < 0)) if len(all_f0) > 1 else 0,
157
  "variations_per_minute": float((np.sum(np.diff(all_f0) != 0) if len(all_f0) > 1 else 0) / (total_duration / 60))
@@ -165,702 +180,512 @@ class ContentAnalyzer:
165
  self.retry_delay = 1
166
 
167
  def analyze_content(self, transcript: str, progress_callback=None) -> Dict[str, Any]:
168
- """Analyze teaching content with retry logic and robust JSON handling"""
169
  for attempt in range(self.retry_count):
170
  try:
171
  if progress_callback:
172
- progress_callback(0.2, "Preparing content analysis...")
173
 
174
- prompt = self._create_analysis_prompt(transcript)
 
175
 
176
  if progress_callback:
177
- progress_callback(0.5, "Processing with AI model...")
 
 
 
178
 
179
  response = self.client.chat.completions.create(
180
- model="gpt-4o-mini",
181
  messages=[
182
- {"role": "system", "content": "You are a teaching expert providing a structured JSON analysis. Always respond with a valid JSON object."},
 
 
 
 
183
  {"role": "user", "content": prompt}
184
- ],
185
- response_format={"type": "json_object"}
186
  )
187
 
188
  if progress_callback:
189
  progress_callback(0.8, "Formatting results...")
190
 
191
- # Ensure we have valid JSON
192
- result_text = response.choices[0].message.content.strip()
193
-
194
- try:
195
- result = json.loads(result_text)
196
- except json.JSONDecodeError:
197
- # Fallback to a default structure if JSON parsing fails
198
- result = {
199
- "subjectMatterAccuracy": {"score": 0, "citations": []},
200
- "firstPrinciplesApproach": {"score": 0, "citations": []},
201
- "examplesAndContext": {"score": 0, "citations": []},
202
- "cohesiveStorytelling": {"score": 0, "citations": []},
203
- "engagement": {"score": 0, "citations": []},
204
- "professionalTone": {"score": 0, "citations": []}
205
- }
206
 
207
  if progress_callback:
208
  progress_callback(1.0, "Content analysis complete!")
209
 
210
- return result
211
 
212
  except Exception as e:
213
  logger.error(f"Content analysis attempt {attempt + 1} failed: {e}")
214
  if attempt == self.retry_count - 1:
215
- # Return a default structure on final failure
216
- return {
217
- "subjectMatterAccuracy": {"score": 0, "citations": []},
218
- "firstPrinciplesApproach": {"score": 0, "citations": []},
219
- "examplesAndContext": {"score": 0, "citations": []},
220
- "cohesiveStorytelling": {"score": 0, "citations": []},
221
- "engagement": {"score": 0, "citations": []},
222
- "professionalTone": {"score": 0, "citations": []}
223
- }
224
- time.sleep(self.retry_delay * (2 ** attempt))
225
-
226
- def _create_analysis_prompt(self, transcript: str) -> str:
227
- """Create the analysis prompt"""
228
- return f"""Analyze this teaching content and provide scores and citations:
229
- Transcript: {transcript}
230
- For each category below, provide:
231
- 1. Score (0 or 1)
232
- 2. Supporting citations with timestamps (if score is 0, cite problematic areas)
233
- Concept Assessment:
234
- 1. Subject Matter Accuracy
235
- 2. First Principles Approach
236
- 3. Examples and Business Context
237
- 4. Cohesive Storytelling
238
- 5. Engagement and Interaction
239
- 6. Professional Tone
240
- Code Assessment:
241
- 1. Depth of Explanation
242
- 2. Output Interpretation
243
- 3. Breaking down Complexity
244
- Format as JSON."""
245
-
246
- class RecommendationGenerator:
247
- """Generates teaching recommendations using OpenAI API"""
248
- def __init__(self, api_key: str):
249
- self.client = OpenAI(api_key=api_key)
250
- self.retry_count = 3
251
- self.retry_delay = 1
252
-
253
- def generate_recommendations(self,
254
- metrics: Dict[str, Any],
255
- content_analysis: Dict[str, Any],
256
- progress_callback=None) -> Dict[str, Any]:
257
- """Generate recommendations with robust JSON handling"""
258
- for attempt in range(self.retry_count):
259
- try:
260
- if progress_callback:
261
- progress_callback(0.2, "Preparing recommendation analysis...")
262
-
263
- prompt = self._create_recommendation_prompt(metrics, content_analysis)
264
-
265
- if progress_callback:
266
- progress_callback(0.5, "Generating recommendations...")
267
-
268
- response = self.client.chat.completions.create(
269
- model="gpt-4o-mini",
270
- messages=[
271
- {"role": "system", "content": "You are a teaching expert providing actionable recommendations. Always respond with a valid JSON object."},
272
- {"role": "user", "content": prompt}
273
- ],
274
- response_format={"type": "json_object"}
275
- )
276
-
277
- if progress_callback:
278
- progress_callback(0.8, "Formatting recommendations...")
279
-
280
- # Ensure we have valid JSON
281
- result_text = response.choices[0].message.content.strip()
282
-
283
- try:
284
- result = json.loads(result_text)
285
- except json.JSONDecodeError:
286
- # Fallback to a default structure if JSON parsing fails
287
- result = {
288
- "geographyFit": "Unknown",
289
- "improvements": [
290
- "Unable to generate specific recommendations"
291
- ],
292
- "rigor": "Undetermined"
293
- }
294
-
295
- if progress_callback:
296
- progress_callback(1.0, "Recommendations complete!")
297
-
298
- return result
299
-
300
- except Exception as e:
301
- logger.error(f"Recommendation generation attempt {attempt + 1} failed: {e}")
302
- if attempt == self.retry_count - 1:
303
- # Return a default structure on final failure
304
- return {
305
- "geographyFit": "Unknown",
306
- "improvements": [
307
- "Unable to generate specific recommendations"
308
- ],
309
- "rigor": "Undetermined"
310
- }
311
  time.sleep(self.retry_delay * (2 ** attempt))
312
 
313
- def _create_recommendation_prompt(self, metrics: Dict[str, Any], content_analysis: Dict[str, Any]) -> str:
314
- """Create the recommendation prompt"""
315
- return f"""Based on the following metrics and analysis, provide recommendations:
316
- Metrics: {json.dumps(metrics)}
317
- Content Analysis: {json.dumps(content_analysis)}
318
- Provide:
319
- 1. Specific improvements needed
320
- 2. Rigor assessment considering technical and teaching abilities
321
- Format as JSON with keys: geographyFit, improvements (array), rigor"""
322
-
323
- class MentorEvaluator:
324
- """Main class for video evaluation"""
325
- def __init__(self, model_cache_dir: Optional[str] = None):
326
- """Initialize with proper model caching"""
327
- self.api_key = st.secrets["OPENAI_API_KEY"]
328
- if not self.api_key:
329
- raise ValueError("OPENAI_API_KEY environment variable must be set")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
330
 
331
- # Create a specific directory for the whisper model
332
- if model_cache_dir:
333
- self.model_cache_dir = model_cache_dir
334
- else:
335
- # Create a persistent directory in the user's home directory
336
- self.model_cache_dir = os.path.join(os.path.expanduser("~"), ".cache", "whisper")
337
- os.makedirs(self.model_cache_dir, exist_ok=True)
338
 
339
- self._whisper_model = None
340
- self._feature_extractor = None
341
- self._content_analyzer = None
342
- self._recommendation_generator = None
343
-
344
- @property
345
- def whisper_model(self):
346
- """Lazy loading of whisper model with proper cache directory handling"""
347
- if self._whisper_model is None:
348
- try:
349
- # First try to load from cache
350
- self._whisper_model = WhisperModel(
351
- "small",
352
- device="cpu",
353
- compute_type="int8",
354
- download_root=self.model_cache_dir,
355
- local_files_only=True
356
- )
357
- except Exception as e:
358
- logger.info(f"Could not load model from cache, downloading: {e}")
359
- # If loading from cache fails, download the model
360
- self._whisper_model = WhisperModel(
361
- "small",
362
- device="cpu",
363
- compute_type="int8",
364
- download_root=self.model_cache_dir,
365
- local_files_only=False
366
- )
367
- logger.info("Model downloaded successfully")
368
- return self._whisper_model
369
-
370
- @property
371
- def feature_extractor(self):
372
- """Lazy loading of feature extractor"""
373
- if self._feature_extractor is None:
374
- self._feature_extractor = AudioFeatureExtractor()
375
- return self._feature_extractor
376
-
377
- @property
378
- def content_analyzer(self):
379
- """Lazy loading of content analyzer"""
380
- if self._content_analyzer is None:
381
- self._content_analyzer = ContentAnalyzer(api_key=self.api_key)
382
- return self._content_analyzer
383
-
384
- @property
385
- def recommendation_generator(self):
386
- """Lazy loading of recommendation generator"""
387
- if self._recommendation_generator is None:
388
- self._recommendation_generator = RecommendationGenerator(api_key=self.api_key)
389
- return self._recommendation_generator
390
-
391
- def evaluate_video(self, video_path: str) -> Dict[str, Any]:
392
- """Evaluate video with proper resource management"""
393
- with temporary_file(suffix=".wav") as temp_audio:
394
- try:
395
- # Extract audio
396
- with st.status("Extracting audio...") as status:
397
- progress_bar = st.progress(0)
398
- tracker = ProgressTracker(status, progress_bar)
399
- self._extract_audio(video_path, temp_audio, tracker.update)
400
-
401
- # Extract features
402
- with st.status("Extracting audio features...") as status:
403
- progress_bar = st.progress(0)
404
- tracker = ProgressTracker(status, progress_bar)
405
- audio_features = self.feature_extractor.extract_features(
406
- temp_audio,
407
- tracker.update
408
- )
409
-
410
- # Transcribe
411
- with st.status("Transcribing audio...") as status:
412
- progress_bar = st.progress(0)
413
- tracker = ProgressTracker(status, progress_bar)
414
- transcript = self._transcribe_audio(temp_audio, tracker.update)
415
 
416
- # Analyze content
417
- with st.status("Analyzing content...") as status:
418
- progress_bar = st.progress(0)
419
- tracker = ProgressTracker(status, progress_bar)
420
- content_analysis = self.content_analyzer.analyze_content(
421
- transcript,
422
- tracker.update
423
- )
424
 
425
- # Evaluate speech
426
- with st.status("Evaluating speech metrics...") as status:
427
- progress_bar = st.progress(0)
428
- tracker = ProgressTracker(status, progress_bar)
429
- speech_metrics = self._evaluate_speech_metrics(
430
- transcript,
431
- audio_features,
432
- tracker.update
433
- )
434
 
435
- # Generate recommendations
436
- with st.status("Generating recommendations...") as status:
437
- progress_bar = st.progress(0)
438
- tracker = ProgressTracker(status, progress_bar)
439
- recommendations = self.recommendation_generator.generate_recommendations(
440
- speech_metrics,
441
- content_analysis,
442
- tracker.update
443
- )
444
 
445
- return {
446
- "communication": speech_metrics,
447
- "teaching": content_analysis,
448
- "recommendations": recommendations,
449
- "transcript": transcript
450
- }
451
 
452
- except Exception as e:
453
- logger.error(f"Error in video evaluation: {e}")
454
- raise
455
 
456
- def _extract_audio(self, video_path: str, output_path: str, progress_callback=None) -> str:
457
- """Extract audio from video"""
458
  try:
459
- if progress_callback:
460
- progress_callback(0.1, "Checking dependencies...")
461
-
462
- if not shutil.which('ffmpeg'):
463
- raise AudioProcessingError("FFmpeg is not installed")
464
-
465
- if not os.path.exists(video_path):
466
- raise FileNotFoundError(f"Video file not found: {video_path}")
467
-
468
- if not os.access(os.path.dirname(output_path), os.W_OK):
469
- raise AudioProcessingError(f"No write permission for output directory: {os.path.dirname(output_path)}")
470
-
471
- if progress_callback:
472
- progress_callback(0.3, "Configuring audio extraction...")
473
-
474
- ffmpeg_cmd = [
475
- 'ffmpeg',
476
- '-i', video_path,
477
- '-ar', '16000',
478
- '-ac', '1',
479
- '-f', 'wav',
480
- '-v', 'warning',
481
- '-y',
482
- output_path
483
- ]
484
-
485
- if progress_callback:
486
- progress_callback(0.5, "Extracting audio...")
487
-
488
- result = subprocess.run(
489
- ffmpeg_cmd,
490
- capture_output=True,
491
- text=True
492
- )
493
-
494
- if result.returncode != 0:
495
- raise AudioProcessingError(f"FFmpeg Error: {result.stderr}")
496
-
497
- if progress_callback:
498
- progress_callback(1.0, "Audio extraction complete!")
499
-
500
- return output_path
501
-
 
 
 
 
 
 
502
  except Exception as e:
503
- logger.error(f"Error in audio extraction: {e}")
504
- raise AudioProcessingError(f"Audio extraction failed: {str(e)}")
505
 
506
- def _transcribe_audio(self, audio_path: str, progress_callback=None) -> str:
507
- """Transcribe audio with improved memory management"""
508
- try:
509
- if progress_callback:
510
- progress_callback(0.1, "Loading transcription model...")
511
-
512
- audio_info = sf.info(audio_path)
513
- total_duration = audio_info.duration
514
- chunk_duration = 5 * 60 # 5-minute chunks
515
- overlap_duration = 10 # 10-second overlap
516
-
517
- transcripts = []
518
- total_chunks = int(np.ceil(total_duration / (chunk_duration - overlap_duration)))
519
-
520
- with sf.SoundFile(audio_path) as f:
521
- for i in range(total_chunks):
522
- if progress_callback:
523
- progress_callback(0.4 + (i / total_chunks) * 0.4,
524
- f"Transcribing chunk {i + 1}/{total_chunks}...")
525
-
526
- # Calculate positions in samples
527
- start_sample = int(i * (chunk_duration - overlap_duration) * f.samplerate)
528
- f.seek(start_sample)
529
- chunk = f.read(frames=int(chunk_duration * f.samplerate))
530
-
531
- with temporary_file(suffix=".wav") as chunk_path:
532
- sf.write(chunk_path, chunk, f.samplerate)
533
- # The fix: properly handle the segments from faster-whisper
534
- segments, _ = self.whisper_model.transcribe(chunk_path)
535
- # Combine all segment texts
536
- chunk_text = ' '.join(segment.text for segment in segments)
537
- transcripts.append(chunk_text)
538
-
539
- if progress_callback:
540
- progress_callback(1.0, "Transcription complete!")
541
-
542
- return " ".join(transcripts)
543
-
544
- except Exception as e:
545
- logger.error(f"Error in transcription: {e}")
546
- raise
547
 
548
- def _evaluate_speech_metrics(self, transcript: str, audio_features: Dict[str, float],
 
 
 
 
 
 
 
 
549
  progress_callback=None) -> Dict[str, Any]:
550
- """Evaluate speech metrics with improved error handling"""
551
  try:
552
  if progress_callback:
553
- progress_callback(0.2, "Calculating basic metrics...")
554
-
555
- # Calculate duration based on word count and average speaking rate
556
- words = len(transcript.split())
557
- duration_minutes = len(transcript) / 500 # Approximate duration
558
- wpm = words / duration_minutes if duration_minutes > 0 else 0
559
-
560
  if progress_callback:
561
- progress_callback(0.4, "Analyzing filler words...")
562
-
563
- filler_words = len(re.findall(r'\b(um|uh|like|you know)\b', transcript.lower()))
564
- fillers_per_minute = filler_words / duration_minutes if duration_minutes > 0 else 0
565
-
 
 
 
 
 
 
 
 
 
566
  if progress_callback:
567
- progress_callback(0.6, "Checking grammar...")
568
-
569
- error_patterns = r'\b(is|are|was|were)\s+\w+ing\b'
570
- grammatical_errors = len(re.findall(error_patterns, transcript))
571
- errors_per_minute = grammatical_errors / duration_minutes if duration_minutes > 0 else 0
572
-
573
  if progress_callback:
574
- progress_callback(0.8, "Compiling results...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
575
 
576
- metrics = {
577
- "speed": {
578
- "score": 1 if 120 <= wpm <= 160 else 0,
579
- "wpm": wpm
 
 
 
 
 
 
580
  },
581
- "fluency": {
582
- "score": 1 if fillers_per_minute <= 5 else 0,
583
- "fillersPerMin": fillers_per_minute,
584
- "errorsPerMin": errors_per_minute
585
  },
586
- "flow": {
587
- "score": 1 if 10 <= audio_features["pauses_per_minute"] <= 15 else 0,
588
- "pausesPerMin": audio_features["pauses_per_minute"]
 
 
589
  },
590
- "intonation": {
591
- "pitch": audio_features["pitch_mean"],
592
- "pitchScore": 1 if 77 <= audio_features["pitch_std"] <= 92 else 0,
593
- "pitchVariation": audio_features["pitch_std"],
594
- "patternScore": 1 if audio_features["variations_per_minute"] > 5 else 0,
595
- "risingPatterns": audio_features["rising_patterns"],
596
- "fallingPatterns": audio_features["falling_patterns"],
597
- "variationsPerMin": audio_features["variations_per_minute"]
 
 
 
 
 
 
 
 
 
598
  },
599
- "energy": {
600
- "score": 1 if audio_features["mean_amplitude"] > 100 else 0,
601
- "meanAmplitude": audio_features["mean_amplitude"],
602
- "amplitudeDeviation": audio_features["amplitude_deviation"]
603
  }
 
 
 
 
 
 
604
  }
 
605
 
606
- if progress_callback:
607
- progress_callback(1.0, "Speech metrics complete!")
608
-
609
- return metrics
610
-
611
- except Exception as e:
612
- logger.error(f"Error in speech metrics evaluation: {e}")
613
- raise
 
 
 
 
 
 
 
 
 
 
 
 
 
614
 
615
- def validate_video_file(file_path: str):
616
- """Validate video file before processing"""
617
- valid_extensions = {'.mp4', '.avi', '.mov'}
618
 
619
- if not os.path.exists(file_path):
620
- raise ValueError("Video file does not exist")
621
-
622
- if os.path.splitext(file_path)[1].lower() not in valid_extensions:
623
- raise ValueError("Unsupported video format")
624
-
625
- if os.path.getsize(file_path) > 2 * 1024 * 1024 * 1024: # 2GB
626
- raise ValueError("File size exceeds 2GB limit")
627
-
628
- try:
629
- probe = subprocess.run(
630
- ['ffprobe', '-v', 'quiet', file_path],
631
- capture_output=True,
632
- text=True
633
  )
634
- if probe.returncode != 0:
635
- raise ValueError("Invalid video file")
636
- except subprocess.SubprocessError:
637
- raise ValueError("Unable to validate video file")
638
-
639
- def display_evaluation(evaluation: Dict[str, Any]):
640
- """Display evaluation results with improved error handling"""
641
- try:
642
- tabs = st.tabs(["Communication", "Teaching", "Recommendations", "Transcript"])
643
-
644
- with tabs[0]:
645
- with st.status("Loading communication metrics...") as status:
646
- progress_bar = st.progress(0)
647
- progress_bar.progress(0.2)
648
- st.header("Communication")
649
-
650
- # Speed metrics
651
- st.subheader("Speed")
652
- col1, col2 = st.columns(2)
653
- with col1:
654
- st.metric("Score", "Pass" if evaluation["communication"]["speed"]["score"] == 1
655
- else "Need Improvement")
656
- with col2:
657
- st.metric("Words per Minute",
658
- f"{evaluation['communication']['speed']['wpm']:.1f}")
659
- st.caption("Acceptable Range: 120-160 WPM")
660
- progress_bar.progress(0.4)
661
-
662
- # Fluency metrics
663
- st.subheader("Fluency")
664
- col1, col2, col3 = st.columns(3)
665
- with col1:
666
- st.metric("Score", "Pass" if evaluation["communication"]["fluency"]["score"] == 1
667
- else "Need Improvement")
668
- with col2:
669
- st.metric("Fillers/Min",
670
- f"{evaluation['communication']['fluency']['fillersPerMin']:.1f}")
671
- with col3:
672
- st.metric("Errors/Min",
673
- f"{evaluation['communication']['fluency']['errorsPerMin']:.1f}")
674
- progress_bar.progress(0.6)
675
-
676
- # Flow metrics
677
- st.subheader("Flow")
678
- col1, col2 = st.columns(2)
679
- with col1:
680
- st.metric("Score", "Pass" if evaluation["communication"]["flow"]["score"] == 1
681
- else "Need Improvement")
682
- with col2:
683
- st.metric("Pauses/Min",
684
- f"{evaluation['communication']['flow']['pausesPerMin']:.1f}")
685
-
686
- # Intonation metrics
687
- st.subheader("Intonation")
688
- col1, col2 = st.columns(2)
689
- with col1:
690
- st.metric("Pitch Score", "Pass" if evaluation["communication"]["intonation"]["pitchScore"] == 1
691
- else "Need Improvement")
692
- with col2:
693
- st.metric("Pattern Score", "Pass" if evaluation["communication"]["intonation"]["patternScore"] == 1
694
- else "Need Improvement")
695
- progress_bar.progress(0.8)
696
-
697
- # Energy metrics
698
- st.subheader("Energy")
699
- st.metric("Score", "Pass" if evaluation["communication"]["energy"]["score"] == 1
700
- else "Need Improvement")
701
- progress_bar.progress(1.0)
702
- status.update(label="Communication metrics loaded!", state="complete")
703
-
704
- # Teaching tab
705
- with tabs[1]:
706
- st.header("Teaching Analysis")
707
- st.json(evaluation["teaching"])
708
-
709
- # Recommendations tab
710
- with tabs[2]:
711
- st.header("Recommendations")
712
- st.json(evaluation["recommendations"])
713
-
714
- # Transcript tab
715
- with tabs[3]:
716
- st.header("Transcript")
717
- st.text(evaluation["transcript"])
718
-
719
- except Exception as e:
720
- logger.error(f"Error displaying evaluation: {e}")
721
- st.error(f"Error displaying results: {str(e)}")
722
-
723
- def check_dependencies() -> List[str]:
724
- """Check if required dependencies are installed"""
725
- missing = []
726
 
727
- if not shutil.which('ffmpeg'):
728
- missing.append("FFmpeg")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
729
 
730
- return missing
 
 
 
 
 
 
 
731
 
732
  def main():
733
- try:
734
- st.set_page_config(page_title="🎓 Mentor Demo Review System", layout="wide")
735
-
736
- st.title("🎓 Mentor Demo Review System")
737
-
738
- # Check dependencies with progress
739
- with st.status("Checking system requirements...") as status:
 
740
  progress_bar = st.progress(0)
741
-
742
- status.update(label="Checking FFmpeg installation...")
743
- progress_bar.progress(0.3)
744
- missing_deps = check_dependencies()
745
-
746
- progress_bar.progress(0.6)
747
- if missing_deps:
748
- status.update(label="Missing dependencies detected!", state="error")
749
- st.error(f"Missing required dependencies: {', '.join(missing_deps)}")
750
- st.markdown("""
751
- Please install the missing dependencies:
752
- ```bash
753
- sudo apt-get update
754
- sudo apt-get install ffmpeg
755
- ```
756
- """)
757
- return
758
-
759
- progress_bar.progress(1.0)
760
- status.update(label="System requirements satisfied!", state="complete")
761
-
762
- with st.sidebar:
763
- st.header("Instructions")
764
- st.markdown("""
765
- 1. Upload your teaching video
766
- 2. Wait for the analysis
767
- 3. Review the detailed feedback
768
- 4. Download the report
769
-
770
- **Supported formats:** MP4, AVI, MOV
771
- **Maximum file size:** 500mb
772
- """)
773
-
774
- st.header("Processing Status")
775
- st.info("Upload a video to begin analysis")
776
-
777
- uploaded_file = st.file_uploader(
778
- "Upload Teaching Video",
779
- type=['mp4', 'avi', 'mov'],
780
- help="Upload your teaching video in MP4, AVI, or MOV format"
781
- )
782
-
783
- if uploaded_file:
784
- # Create temp directory for processing
785
- temp_dir = tempfile.mkdtemp()
786
- video_path = os.path.join(temp_dir, uploaded_file.name)
787
 
788
  try:
789
- # Save uploaded file with progress
790
- with st.status("Saving uploaded file...") as status:
791
- progress_bar = st.progress(0)
792
-
793
- # Save in chunks to show progress
794
- chunk_size = 1024 * 1024 # 1MB chunks
795
- file_size = len(uploaded_file.getbuffer())
796
- chunks = file_size // chunk_size + 1
797
-
798
- with open(video_path, 'wb') as f:
799
- for i in range(chunks):
800
- start = i * chunk_size
801
- end = min(start + chunk_size, file_size)
802
- f.write(uploaded_file.getbuffer()[start:end])
803
- progress = (i + 1) / chunks
804
- status.update(label=f"Saving file: {progress:.1%}")
805
- progress_bar.progress(progress)
806
-
807
- status.update(label="File saved successfully!", state="complete")
808
 
809
- # Validate file size
810
- file_size = os.path.getsize(video_path) / (1024 * 1024 * 1024) # Size in GB
811
- if file_size > 2:
812
- st.error("File size exceeds 2GB limit. Please upload a smaller file.")
813
- return
814
 
815
- # Process video
816
- with st.spinner("Processing video"):
817
- evaluator = MentorEvaluator()
818
- evaluation = evaluator.evaluate_video(video_path)
 
 
819
 
820
  # Display results
821
- st.success("Analysis complete!")
822
- display_evaluation(evaluation)
823
-
824
- # Add download button with progress
825
- with st.status("Preparing download...") as status:
826
- progress_bar = st.progress(0)
827
-
828
- status.update(label="Formatting JSON...")
829
- progress_bar.progress(0.3)
830
-
831
- json_str = json.dumps(evaluation, indent=2)
832
-
833
- progress_bar.progress(0.6)
834
- status.update(label="Creating download button...")
835
-
836
- st.download_button(
837
- "📥 Download Full Report",
838
- json_str,
839
- "evaluation_report.json",
840
- "application/json",
841
- help="Download the complete evaluation report in JSON format"
842
- )
843
-
844
- progress_bar.progress(1.0)
845
- status.update(label="Download ready!", state="complete")
846
 
847
  except Exception as e:
848
- st.error(f"Error during evaluation: {str(e)}")
849
-
 
850
  finally:
851
- # Clean up temp files with progress
852
- with st.status("Cleaning up...") as status:
853
- progress_bar = st.progress(0)
854
-
855
- if 'temp_dir' in locals():
856
- status.update(label="Removing temporary files...")
857
- progress_bar.progress(0.5)
858
- shutil.rmtree(temp_dir)
859
- progress_bar.progress(1.0)
860
- status.update(label="Cleanup completed!", state="complete")
861
-
862
- except Exception as e:
863
- st.error(f"Application error: {str(e)}")
864
 
865
  if __name__ == "__main__":
866
- main()
 
66
  self.status.update(label=f"{message} ({progress:.1%}) - ETA: {remaining:.0f}s")
67
 
68
  class AudioFeatureExtractor:
69
+ """Handles audio feature extraction"""
70
  def __init__(self):
71
  self.sr = 16000
72
  self.hop_length = 512
73
  self.n_fft = 2048
74
  self.chunk_duration = 300
75
 
76
+ def extract_features(self, audio_path: str, transcript: str, progress_callback=None) -> Dict[str, float]:
77
  """Extract audio features with chunked processing"""
78
  try:
79
  if progress_callback:
80
  progress_callback(0.1, "Loading audio file...")
81
 
82
+ # Get audio duration
83
+ with sf.SoundFile(audio_path) as f:
84
+ duration = len(f) / f.samplerate
85
+
86
+ # Calculate words per minute
87
+ words = len(transcript.split())
88
+ words_per_minute = (words / duration) * 60
89
+
90
  features = {
91
  "pitch_mean": 0.0,
92
  "pitch_std": 0.0,
 
95
  "pauses_per_minute": 0.0,
96
  "rising_patterns": 0,
97
  "falling_patterns": 0,
98
+ "variations_per_minute": 0.0,
99
+ "duration": duration,
100
+ "words_per_minute": words_per_minute
101
  }
102
 
103
  # Process audio in chunks
 
116
  accumulated_features.append(chunk_features)
117
 
118
  # Combine features from all chunks
119
+ features.update(self._combine_features(accumulated_features))
120
 
121
  if progress_callback:
122
  progress_callback(1.0, "Feature extraction complete!")
 
127
  logger.error(f"Error in feature extraction: {e}")
128
  raise AudioProcessingError(f"Feature extraction failed: {str(e)}")
129
 
130
+ def _process_chunk(self, chunk: np.ndarray) -> Dict[str, Any]:
131
  """Process a single chunk of audio"""
132
  D = librosa.stft(chunk, n_fft=self.n_fft, hop_length=self.hop_length)
133
  S = np.abs(D)
 
141
  frame_length=self.n_fft
142
  )
143
 
144
+ # Detect silences for pause analysis
145
+ non_silent = librosa.effects.split(chunk, top_db=20)
146
+
147
  return {
148
  "rms": rms,
149
  "f0": f0[voiced_flag == 1] if f0 is not None else np.array([]),
150
+ "duration": len(chunk) / self.sr,
151
+ "pauses": len(non_silent)
152
  }
153
 
154
  def _combine_features(self, features: List[Dict[str, Any]]) -> Dict[str, float]:
 
156
  all_f0 = np.concatenate([f["f0"] for f in features if len(f["f0"]) > 0])
157
  all_rms = np.concatenate([f["rms"] for f in features])
158
  total_duration = sum(f["duration"] for f in features)
159
+ total_pauses = sum(f["pauses"] for f in features)
160
 
161
  pitch_mean = np.mean(all_f0) if len(all_f0) > 0 else 0
162
  pitch_std = np.std(all_f0) if len(all_f0) > 0 else 0
 
166
  "pitch_std": float(pitch_std),
167
  "mean_amplitude": float(np.mean(all_rms)),
168
  "amplitude_deviation": float(np.std(all_rms) / np.mean(all_rms)) if np.mean(all_rms) > 0 else 0,
169
+ "pauses_per_minute": float(total_pauses / (total_duration / 60)),
170
  "rising_patterns": int(np.sum(np.diff(all_f0) > 0)) if len(all_f0) > 1 else 0,
171
  "falling_patterns": int(np.sum(np.diff(all_f0) < 0)) if len(all_f0) > 1 else 0,
172
  "variations_per_minute": float((np.sum(np.diff(all_f0) != 0) if len(all_f0) > 1 else 0) / (total_duration / 60))
 
180
  self.retry_delay = 1
181
 
182
  def analyze_content(self, transcript: str, progress_callback=None) -> Dict[str, Any]:
183
+ """Analyze teaching content"""
184
  for attempt in range(self.retry_count):
185
  try:
186
  if progress_callback:
187
+ progress_callback(0.2, "Analyzing speech patterns...")
188
 
189
+ # Analyze speech patterns first
190
+ speech_patterns = self._analyze_speech_patterns(transcript)
191
 
192
  if progress_callback:
193
+ progress_callback(0.5, "Analyzing teaching content...")
194
+
195
+ # Create a more structured analysis prompt
196
+ prompt = self._create_analysis_prompt(transcript)
197
 
198
  response = self.client.chat.completions.create(
199
+ model="gpt-4-turbo-preview",
200
  messages=[
201
+ {"role": "system", "content": """
202
+ You are a teaching expert conducting a detailed analysis.
203
+ Provide specific examples and clear metrics for each category.
204
+ Focus on actionable insights and clear evidence.
205
+ """},
206
  {"role": "user", "content": prompt}
207
+ ]
 
208
  )
209
 
210
  if progress_callback:
211
  progress_callback(0.8, "Formatting results...")
212
 
213
+ # Combine speech patterns and content analysis
214
+ analysis = self._parse_analysis(response.choices[0].message.content)
215
+ analysis.update(speech_patterns)
 
 
 
 
 
 
 
 
 
 
 
 
216
 
217
  if progress_callback:
218
  progress_callback(1.0, "Content analysis complete!")
219
 
220
+ return analysis
221
 
222
  except Exception as e:
223
  logger.error(f"Content analysis attempt {attempt + 1} failed: {e}")
224
  if attempt == self.retry_count - 1:
225
+ return self._get_default_analysis()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
226
  time.sleep(self.retry_delay * (2 ** attempt))
227
 
228
+ def _analyze_speech_patterns(self, transcript: str) -> Dict[str, Any]:
229
+ """Analyze speech patterns including filler words and grammar"""
230
+ try:
231
+ response = self.client.chat.completions.create(
232
+ model="gpt-4-turbo-preview",
233
+ messages=[
234
+ {"role": "system", "content": "Analyze the speech patterns in this transcript."},
235
+ {"role": "user", "content": f"""
236
+ Please analyze this transcript and provide:
237
+ 1. Number of filler words (um, uh, like, you know, etc.)
238
+ 2. Number of grammatical errors
239
+ 3. List of specific examples of each
240
+
241
+ Transcript:
242
+ {transcript}
243
+ """}
244
+ ]
245
+ )
246
+
247
+ # Parse the response to extract metrics
248
+ analysis_text = response.choices[0].message.content
249
+
250
+ # Extract filler word count
251
+ filler_match = re.search(r'(\d+)\s+filler\s+words?', analysis_text, re.IGNORECASE)
252
+ filler_count = int(filler_match.group(1)) if filler_match else 0
253
+
254
+ # Extract grammar error count
255
+ grammar_match = re.search(r'(\d+)\s+grammatical\s+errors?', analysis_text, re.IGNORECASE)
256
+ grammar_count = int(grammar_match.group(1)) if grammar_match else 0
257
+
258
+ # Calculate per-minute rates
259
+ words = len(transcript.split())
260
+ minutes = words / 150 # Assuming average speaking rate of 150 words per minute
261
+
262
+ return {
263
+ "filler_words_per_minute": round(filler_count / minutes, 2),
264
+ "grammar_errors_per_minute": round(grammar_count / minutes, 2),
265
+ "filler_examples": self._extract_examples(analysis_text, "filler"),
266
+ "grammar_examples": self._extract_examples(analysis_text, "grammar")
267
+ }
268
+
269
+ except Exception as e:
270
+ logger.error(f"Speech pattern analysis failed: {e}")
271
+ return {
272
+ "filler_words_per_minute": 0,
273
+ "grammar_errors_per_minute": 0,
274
+ "filler_examples": [],
275
+ "grammar_examples": []
276
+ }
277
 
278
+ def _extract_examples(self, text: str, category: str) -> List[str]:
279
+ """Extract examples from analysis text"""
280
+ examples = []
281
+ lines = text.split('\n')
282
+ in_category = False
 
 
283
 
284
+ for line in lines:
285
+ if category.lower() in line.lower() and "example" in line.lower():
286
+ in_category = True
287
+ continue
288
+ if in_category and line.strip().startswith('-'):
289
+ examples.append(line.strip()[1:].strip())
290
+ elif in_category and line.strip() and not line.strip().startswith('-'):
291
+ in_category = False
292
+
293
+ return examples[:5] # Return top 5 examples
294
+
295
+ def _create_analysis_prompt(self, transcript: str) -> str:
296
+ return f"""
297
+ Analyze this teaching content considering:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
298
 
299
+ 1. Teaching Effectiveness (1-5 scale)
300
+ - Clarity of explanation
301
+ - Student engagement
302
+ - Knowledge depth
 
 
 
 
303
 
304
+ 2. Content Organization (1-5 scale)
305
+ - Logical flow
306
+ - Structure
307
+ - Time management
 
 
 
 
 
308
 
309
+ 3. Communication Skills (1-5 scale)
310
+ - Voice modulation
311
+ - Pace
312
+ - Energy level
 
 
 
 
 
313
 
314
+ Provide specific examples and metrics for each category.
315
+ Format the response in a clear, structured way.
 
 
 
 
316
 
317
+ Transcript:
318
+ {transcript}
319
+ """
320
 
321
+ def _parse_analysis(self, analysis_text: str) -> Dict[str, Any]:
322
+ """Parse and structure the analysis response"""
323
  try:
324
+ analysis = {
325
+ "effectiveness": {
326
+ "score": 0,
327
+ "strengths": [],
328
+ "improvements": []
329
+ },
330
+ "organization": {
331
+ "score": 0,
332
+ "strengths": [],
333
+ "improvements": []
334
+ },
335
+ "communication": {
336
+ "score": 0,
337
+ "strengths": [],
338
+ "improvements": []
339
+ }
340
+ }
341
+
342
+ current_category = None
343
+ for line in analysis_text.split('\n'):
344
+ line = line.strip()
345
+ if not line:
346
+ continue
347
+
348
+ # Identify category and score
349
+ if "effectiveness" in line.lower():
350
+ current_category = "effectiveness"
351
+ score_match = re.search(r'(\d+)/5', line)
352
+ if score_match:
353
+ analysis[current_category]["score"] = int(score_match.group(1))
354
+ elif "organization" in line.lower():
355
+ current_category = "organization"
356
+ score_match = re.search(r'(\d+)/5', line)
357
+ if score_match:
358
+ analysis[current_category]["score"] = int(score_match.group(1))
359
+ elif "communication" in line.lower():
360
+ current_category = "communication"
361
+ score_match = re.search(r'(\d+)/5', line)
362
+ if score_match:
363
+ analysis[current_category]["score"] = int(score_match.group(1))
364
+
365
+ # Add points to appropriate category
366
+ if current_category and line.startswith('+'):
367
+ analysis[current_category]["strengths"].append(line[1:].strip())
368
+ elif current_category and line.startswith('-'):
369
+ analysis[current_category]["improvements"].append(line[1:].strip())
370
+
371
+ return analysis
372
+
373
  except Exception as e:
374
+ logger.error(f"Error parsing analysis: {e}")
375
+ return self._get_default_analysis()
376
 
377
+ def _get_default_analysis(self) -> Dict[str, Any]:
378
+ """Return default analysis structure"""
379
+ return {
380
+ "effectiveness": {
381
+ "score": 0,
382
+ "strengths": ["Analysis failed"],
383
+ "improvements": ["Analysis failed"]
384
+ },
385
+ "organization": {
386
+ "score": 0,
387
+ "strengths": ["Analysis failed"],
388
+ "improvements": ["Analysis failed"]
389
+ },
390
+ "communication": {
391
+ "score": 0,
392
+ "strengths": ["Analysis failed"],
393
+ "improvements": ["Analysis failed"]
394
+ },
395
+ "filler_words_per_minute": 0,
396
+ "grammar_errors_per_minute": 0,
397
+ "filler_examples": [],
398
+ "grammar_examples": []
399
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
400
 
401
+ class RecommendationGenerator:
402
+ """Generates teaching recommendations"""
403
+ def __init__(self, api_key: str):
404
+ self.client = OpenAI(api_key=api_key)
405
+ self.retry_count = 3
406
+ self.retry_delay = 1
407
+
408
+ def generate_recommendations(self, metrics: Dict[str, Any],
409
+ content_analysis: Dict[str, Any],
410
  progress_callback=None) -> Dict[str, Any]:
411
+ """Generate detailed recommendations"""
412
  try:
413
  if progress_callback:
414
+ progress_callback(0.2, "Analyzing metrics...")
415
+
416
+ # Create detailed prompt
417
+ prompt = self._create_recommendation_prompt(metrics, content_analysis)
418
+
 
 
419
  if progress_callback:
420
+ progress_callback(0.4, "Generating recommendations...")
421
+
422
+ response = self.client.chat.completions.create(
423
+ model="gpt-4-turbo-preview",
424
+ messages=[
425
+ {"role": "system", "content": """
426
+ Generate specific, actionable teaching recommendations.
427
+ Focus on practical improvements with clear next steps.
428
+ Format the response in a clear, easy-to-read structure.
429
+ """},
430
+ {"role": "user", "content": prompt}
431
+ ]
432
+ )
433
+
434
  if progress_callback:
435
+ progress_callback(0.8, "Formatting recommendations...")
436
+
437
+ recommendations = self._parse_recommendations(response.choices[0].message.content)
438
+
 
 
439
  if progress_callback:
440
+ progress_callback(1.0, "Analysis complete!")
441
+
442
+ return self._format_output(metrics, content_analysis, recommendations)
443
+
444
+ except Exception as e:
445
+ logger.error(f"Error generating recommendations: {e}")
446
+ return self._get_default_recommendations()
447
+
448
+ def _create_recommendation_prompt(self, metrics: Dict[str, Any],
449
+ content_analysis: Dict[str, Any]) -> str:
450
+ return f"""
451
+ Based on the following teaching performance data, provide specific recommendations:
452
+
453
+ Performance Metrics:
454
+ - Speaking Rate: {metrics.get('words_per_minute', 0):.1f} words/minute
455
+ - Pauses: {metrics.get('pauses_per_minute', 0):.1f} pauses/minute
456
+ - Filler Words: {metrics.get('filler_words_per_minute', 0):.1f} per minute
457
+ - Grammar Errors: {metrics.get('grammar_errors_per_minute', 0):.1f} per minute
458
+
459
+ Content Assessment:
460
+ Teaching Effectiveness: {content_analysis.get('effectiveness', {}).get('score', 0)}/5
461
+ Organization: {content_analysis.get('organization', {}).get('score', 0)}/5
462
+ Communication: {content_analysis.get('communication', {}).get('score', 0)}/5
463
+
464
+ Provide specific recommendations for:
465
+ 1. Delivery Improvement
466
+ 2. Content Organization
467
+ 3. Student Engagement
468
+ 4. Professional Development
469
+
470
+ For each area:
471
+ - Key actions to take
472
+ - Specific exercises or practices
473
+ - Resources to use
474
+ - Expected outcomes
475
+ """
476
+
477
+ def _parse_recommendations(self, response_text: str) -> Dict[str, List[str]]:
478
+ """Parse recommendations into structured format"""
479
+ recommendations = {
480
+ "delivery": [],
481
+ "organization": [],
482
+ "engagement": [],
483
+ "development": []
484
+ }
485
+
486
+ current_section = None
487
+ for line in response_text.split('\n'):
488
+ line = line.strip()
489
+ if not line:
490
+ continue
491
+
492
+ # Identify sections
493
+ if "delivery" in line.lower():
494
+ current_section = "delivery"
495
+ elif "organization" in line.lower():
496
+ current_section = "organization"
497
+ elif "engagement" in line.lower():
498
+ current_section = "engagement"
499
+ elif "development" in line.lower():
500
+ current_section = "development"
501
+ elif current_section and line.startswith('-'):
502
+ recommendations[current_section].append(line[1:].strip())
503
+
504
+ return recommendations
505
 
506
+ def _format_output(self, metrics: Dict[str, Any],
507
+ content_analysis: Dict[str, Any],
508
+ recommendations: Dict[str, List[str]]) -> Dict[str, Any]:
509
+ """Format the final output in a clean, readable structure"""
510
+ return {
511
+ "performance_metrics": {
512
+ "speaking_rate": {
513
+ "value": round(metrics.get('words_per_minute', 0), 1),
514
+ "unit": "words/minute",
515
+ "status": "good" if 120 <= metrics.get('words_per_minute', 0) <= 160 else "needs_improvement"
516
  },
517
+ "pauses": {
518
+ "value": round(metrics.get('pauses_per_minute', 0), 1),
519
+ "unit": "pauses/minute",
520
+ "status": "good" if 2 <= metrics.get('pauses_per_minute', 0) <= 4 else "needs_improvement"
521
  },
522
+ "filler_words": {
523
+ "value": round(metrics.get('filler_words_per_minute', 0), 1),
524
+ "unit": "per minute",
525
+ "status": "good" if metrics.get('filler_words_per_minute', 0) <= 3 else "needs_improvement",
526
+ "examples": metrics.get('filler_examples', [])
527
  },
528
+ "grammar": {
529
+ "value": round(metrics.get('grammar_errors_per_minute', 0), 1),
530
+ "unit": "errors/minute",
531
+ "status": "good" if metrics.get('grammar_errors_per_minute', 0) <= 1 else "needs_improvement",
532
+ "examples": metrics.get('grammar_examples', [])
533
+ }
534
+ },
535
+ "content_assessment": {
536
+ "effectiveness": {
537
+ "score": content_analysis.get('effectiveness', {}).get('score', 0),
538
+ "strengths": content_analysis.get('effectiveness', {}).get('strengths', []),
539
+ "improvements": content_analysis.get('effectiveness', {}).get('improvements', [])
540
+ },
541
+ "organization": {
542
+ "score": content_analysis.get('organization', {}).get('score', 0),
543
+ "strengths": content_analysis.get('organization', {}).get('strengths', []),
544
+ "improvements": content_analysis.get('organization', {}).get('improvements', [])
545
  },
546
+ "communication": {
547
+ "score": content_analysis.get('communication', {}).get('score', 0),
548
+ "strengths": content_analysis.get('communication', {}).get('strengths', []),
549
+ "improvements": content_analysis.get('communication', {}).get('improvements', [])
550
  }
551
+ },
552
+ "recommendations": {
553
+ "delivery_improvement": recommendations.get('delivery', []),
554
+ "content_organization": recommendations.get('organization', []),
555
+ "student_engagement": recommendations.get('engagement', []),
556
+ "professional_development": recommendations.get('development', [])
557
  }
558
+ }
559
 
560
+ def _get_default_recommendations(self) -> Dict[str, Any]:
561
+ """Return default recommendations structure"""
562
+ return {
563
+ "performance_metrics": {
564
+ "speaking_rate": {"value": 0, "unit": "words/minute", "status": "error"},
565
+ "pauses": {"value": 0, "unit": "pauses/minute", "status": "error"},
566
+ "filler_words": {"value": 0, "unit": "per minute", "status": "error", "examples": []},
567
+ "grammar": {"value": 0, "unit": "errors/minute", "status": "error", "examples": []}
568
+ },
569
+ "content_assessment": {
570
+ "effectiveness": {"score": 0, "strengths": ["Analysis failed"], "improvements": ["Analysis failed"]},
571
+ "organization": {"score": 0, "strengths": ["Analysis failed"], "improvements": ["Analysis failed"]},
572
+ "communication": {"score": 0, "strengths": ["Analysis failed"], "improvements": ["Analysis failed"]}
573
+ },
574
+ "recommendations": {
575
+ "delivery_improvement": ["Analysis failed"],
576
+ "content_organization": ["Analysis failed"],
577
+ "student_engagement": ["Analysis failed"],
578
+ "professional_development": ["Analysis failed"]
579
+ }
580
+ }
581
 
582
+ def format_streamlit_output(analysis_results: Dict[str, Any]) -> None:
583
+ """Format and display results in Streamlit"""
584
+ st.header("Teaching Analysis Results")
585
 
586
+ # Performance Metrics
587
+ st.subheader("Performance Metrics")
588
+ metrics = analysis_results["performance_metrics"]
589
+
590
+ col1, col2, col3, col4 = st.columns(4)
591
+
592
+ with col1:
593
+ st.metric(
594
+ "Speaking Rate",
595
+ f"{metrics['speaking_rate']['value']} {metrics['speaking_rate']['unit']}",
596
+ delta="Good" if metrics['speaking_rate']['status'] == "good" else "Needs Improvement"
 
 
 
597
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
598
 
599
+ with col2:
600
+ st.metric(
601
+ "Pauses",
602
+ f"{metrics['pauses']['value']} {metrics['pauses']['unit']}",
603
+ delta="Good" if metrics['pauses']['status'] == "good" else "Needs Improvement"
604
+ )
605
+
606
+ with col3:
607
+ st.metric(
608
+ "Filler Words",
609
+ f"{metrics['filler_words']['value']} {metrics['filler_words']['unit']}",
610
+ delta="Good" if metrics['filler_words']['status'] == "good" else "Needs Improvement"
611
+ )
612
+
613
+ with col4:
614
+ st.metric(
615
+ "Grammar Errors",
616
+ f"{metrics['grammar']['value']} {metrics['grammar']['unit']}",
617
+ delta="Good" if metrics['grammar']['status'] == "good" else "Needs Improvement"
618
+ )
619
+
620
+ # Content Assessment
621
+ st.subheader("Content Assessment")
622
+ assessment = analysis_results["content_assessment"]
623
+
624
+ for category in ["effectiveness", "organization", "communication"]:
625
+ with st.expander(f"{category.title()} (Score: {assessment[category]['score']}/5)"):
626
+ col1, col2 = st.columns(2)
627
+ with col1:
628
+ st.markdown("**Strengths**")
629
+ for strength in assessment[category]["strengths"]:
630
+ st.markdown(f"- {strength}")
631
+ with col2:
632
+ st.markdown("**Areas for Improvement**")
633
+ for improvement in assessment[category]["improvements"]:
634
+ st.markdown(f"- {improvement}")
635
 
636
+ # Recommendations
637
+ st.subheader("Recommendations")
638
+ recommendations = analysis_results["recommendations"]
639
+
640
+ for category, items in recommendations.items():
641
+ with st.expander(category.replace('_', ' ').title()):
642
+ for item in items:
643
+ st.markdown(f"- {item}")
644
 
645
  def main():
646
+ st.title("Teaching Analysis Tool")
647
+
648
+ # File upload
649
+ uploaded_file = st.file_uploader("Upload your teaching recording", type=['wav', 'mp3'])
650
+
651
+ if uploaded_file:
652
+ with st.spinner("Analyzing your teaching..."):
653
+ # Create progress tracking
654
  progress_bar = st.progress(0)
655
+ status = st.empty()
656
+ progress_tracker = ProgressTracker(status, progress_bar)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
657
 
658
  try:
659
+ # Process the recording
660
+ audio_processor = AudioFeatureExtractor()
661
+ content_analyzer = ContentAnalyzer(st.secrets["OPENAI_API_KEY"])
662
+ recommendation_generator = RecommendationGenerator(st.secrets["OPENAI_API_KEY"])
663
+
664
+ # Extract features and analyze
665
+ with temporary_file(suffix='.wav') as temp_path:
666
+ uploaded_file.save(temp_path)
667
+ audio_features = audio_processor.extract_features(temp_path, progress_callback=progress_tracker.update)
 
 
 
 
 
 
 
 
 
 
668
 
669
+ # Analyze content
670
+ analysis = content_analyzer.analyze_content(transcript, progress_callback=progress_tracker.update)
 
 
 
671
 
672
+ # Generate recommendations
673
+ results = recommendation_generator.generate_recommendations(
674
+ audio_features,
675
+ analysis,
676
+ progress_callback=progress_tracker.update
677
+ )
678
 
679
  # Display results
680
+ format_streamlit_output(results)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
681
 
682
  except Exception as e:
683
+ st.error(f"An error occurred during analysis: {str(e)}")
684
+ logger.error(f"Analysis failed: {e}", exc_info=True)
685
+
686
  finally:
687
+ progress_bar.empty()
688
+ status.empty()
 
 
 
 
 
 
 
 
 
 
 
689
 
690
  if __name__ == "__main__":
691
+ main()