sparshmehta commited on
Commit
974feca
ยท
verified ยท
1 Parent(s): 6898c78

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +316 -477
app.py CHANGED
@@ -6,568 +6,407 @@ from moviepy.editor import VideoFileClip
6
  import whisper
7
  from openai import OpenAI
8
  import tempfile
9
- from scipy.signal import find_peaks
10
- import gc
11
  import warnings
12
  import re
13
  from contextlib import contextmanager
 
 
 
14
 
15
- class SimplifiedAudioAnalyzer:
16
- def __init__(self, sr=4000, chunk_size=30):
17
- self.sr = sr
18
- self.chunk_size = chunk_size
19
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  def analyze_audio(self, audio_path):
21
- """
22
- Simplified audio analysis with key features extraction
 
 
 
 
23
 
24
- Args:
25
- audio_path (str): Path to the audio file
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
- Returns:
28
- dict: Audio analysis metrics
29
- """
30
- # Load audio with minimal processing
31
- y, _ = librosa.load(audio_path, sr=self.sr, mono=True)
32
 
33
- # Basic features
34
- rms = np.sqrt(np.mean(y**2))
35
- f0 = librosa.yin(y, fmin=librosa.note_to_hz("C2"), fmax=librosa.note_to_hz("C6"))
 
 
 
 
 
 
 
36
 
37
- # Pause detection
38
- amplitude_envelope = np.abs(y)
39
- silence_threshold = np.median(amplitude_envelope) * 0.5
40
- peaks, _ = find_peaks(-amplitude_envelope, height=-silence_threshold)
41
- pause_durations = np.diff(peaks) / self.sr
 
42
 
43
- # Pitch patterns
44
- pitch_valid = f0[np.isfinite(f0)]
 
 
 
 
 
 
 
 
 
45
 
46
  return {
47
- "pitch_analysis": {
48
- "statistics": {
49
- "mean": float(np.nanmean(pitch_valid)) if len(pitch_valid) > 0 else 0,
50
- "std": float(np.nanstd(pitch_valid)) if len(pitch_valid) > 0 else 0,
51
- "range": float(np.nanmax(pitch_valid) - np.nanmin(pitch_valid)) if len(pitch_valid) > 0 else 0
52
- },
53
- "patterns": {
54
- "rising_count": int(np.sum(np.diff(pitch_valid) > 0)),
55
- "falling_count": int(np.sum(np.diff(pitch_valid) < 0))
56
- }
57
- },
58
- "rhythm_analysis": {
59
- "pause_stats": {
60
- "total_pauses": len(peaks),
61
- "mean_pause_duration": float(np.mean(pause_durations)) if len(pause_durations) > 0 else 0
62
- }
63
- },
64
- "energy_dynamics": {
65
- "rms_energy_mean": float(rms),
66
- "rms_energy_std": float(np.std(y)),
67
- "energy_range": float(np.percentile(y, 95) - np.percentile(y, 5))
68
- },
69
- "spectral_centroid_mean": float(librosa.feature.spectral_centroid(y=y, sr=self.sr)[0].mean())
70
  }
71
 
72
- class CPUMentorEvaluator:
73
  def __init__(self):
74
- """Initialize the evaluator for CPU usage."""
75
- self.api_key = st.secrets.get("OPENAI_API_KEY")
76
- if not self.api_key:
77
- raise ValueError("OpenAI API key not found in secrets")
78
-
79
  self.client = OpenAI(api_key=self.api_key)
80
  self.whisper_model = None
81
-
82
- def _clear_memory(self):
83
- """Clear memory and run garbage collection."""
84
- if hasattr(self, 'whisper_model') and self.whisper_model is not None:
85
- del self.whisper_model
86
- self.whisper_model = None
87
- gc.collect()
88
-
89
- @contextmanager
90
- def load_whisper_model(self):
91
- """Load Whisper model with proper memory management."""
92
- try:
93
- self._clear_memory()
94
- self.whisper_model = whisper.load_model("tiny", device="cpu")
95
- yield self.whisper_model
96
- finally:
97
- if self.whisper_model is not None:
98
- del self.whisper_model
99
- self.whisper_model = None
100
- gc.collect()
101
-
102
  def extract_audio(self, video_path):
103
- """Extract audio from video file with optimized settings."""
104
- temp_audio = None
105
- video = None
106
  try:
107
- self._clear_memory()
108
- temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix='.wav')
109
- video = VideoFileClip(video_path, audio=True, target_resolution=(360,None), verbose=False)
110
- video.audio.write_audiofile(temp_audio.name, fps=8000, verbose=False, logger=None)
 
 
 
 
 
 
 
 
 
 
 
111
  return temp_audio.name
112
  except Exception as e:
113
- if temp_audio and os.path.exists(temp_audio.name):
114
  os.unlink(temp_audio.name)
115
- raise Exception(f"Audio extraction failed: {str(e)}")
116
- finally:
117
- if video:
118
- video.close()
119
- self._clear_memory()
120
 
121
- def analyze_audio_features(self, audio_path):
122
- """Simplified audio features analysis."""
123
- analyzer = SimplifiedAudioAnalyzer()
124
- return analyzer.analyze_audio(audio_path)
 
 
 
 
 
 
 
 
 
 
 
125
 
126
- def calculate_speech_metrics(self, transcript, audio_duration):
127
- """Calculate words per minute and other speech metrics."""
 
128
  words = len(transcript.split())
129
- minutes = audio_duration / 60
130
  return {
131
- 'words_per_minute': words / minutes if minutes > 0 else 0,
132
- 'total_words': words,
133
- 'duration_minutes': minutes
134
  }
135
 
136
- def _analyze_voice_quality(self, transcript, audio_features):
137
- """Analyze voice quality with simplified GPT-4o-mini interaction."""
138
  try:
139
- prompt = f"""Analyze the following voice metrics for teaching quality:
140
- Transcript excerpt: {transcript[:]}...
141
- Voice Metrics:
142
- - Pitch Mean: {audio_features['pitch_analysis']['statistics']['mean']:.1f}Hz
143
- - Pitch Variation: {audio_features['pitch_analysis']['statistics']['std']:.1f}Hz
144
- - Energy Dynamics: {audio_features['energy_dynamics']['rms_energy_mean']:.2f}
145
- Evaluate voice quality focusing on:
146
- 1. Clarity and projection
147
- 2. Emotional engagement
148
- 3. Professional tone
149
- """
150
- response = self.client.chat.completions.create(
151
- model="gpt-4o-mini",
152
- messages=[
153
- {"role": "system", "content": "You are an expert in voice analysis."},
154
- {"role": "user", "content": prompt}
155
- ],
156
- max_tokens=500
157
- )
158
- return response.choices[0].message.content
159
- except Exception as e:
160
- return f"Voice quality analysis failed: {str(e)}"
161
 
162
- def _analyze_teaching_content(self, transcript):
163
- """Analyze teaching content for accuracy, principles, and examples."""
164
- try:
165
- prompt = f"""Analyze this teaching transcript for:
166
- 1. Subject Matter Accuracy:
167
- - Identify any factual errors, wrong assumptions, or incorrect correlations
168
- - Rate accuracy on a scale of 0-1
169
- 2. First Principles Approach:
170
- - Evaluate if concepts are built from fundamentals before introducing technical terms
171
- - Rate approach on a scale of 0-1
172
- 3. Examples and Business Context:
173
- - Assess use of business examples and practical context
174
- - Rate contextual relevance on a scale of 0-1
175
- Transcript: {transcript}...
176
- Provide specific citations for any identified issues.
177
- """
178
- response = self.client.chat.completions.create(
179
- model="gpt-4o-mini",
180
- messages=[
181
- {"role": "system", "content": "You are an expert in pedagogical assessment."},
182
- {"role": "user", "content": prompt}
183
- ],
184
- max_tokens=500
185
- )
186
- return response.choices[0].message.content
187
- except Exception as e:
188
- return f"Teaching content analysis failed: {str(e)}"
189
 
190
- def _analyze_code_explanation(self, transcript):
191
- """Analyze code explanation quality."""
192
- try:
193
- prompt = f"""Analyze the code explanation in this transcript for:
194
- 1. Depth of Explanation:
195
- - Evaluate coverage of syntax, libraries, functions, and methods
196
- - Rate depth on a scale of 0-1
197
- 2. Output Interpretation:
198
- - Assess business context interpretation of results
199
- - Rate interpretation on a scale of 0-1
200
- 3. Complexity Breakdown:
201
- - Evaluate explanation of code modules and logical flow
202
- - Rate breakdown quality on a scale of 0-1
203
- Transcript: {transcript}...
204
- Provide specific citations for any identified issues.
205
- """
206
  response = self.client.chat.completions.create(
207
- model="gpt-4o-mini",
208
  messages=[
209
- {"role": "system", "content": "You are an expert in code review and teaching."},
210
  {"role": "user", "content": prompt}
211
  ],
212
- max_tokens=500
 
213
  )
214
  return response.choices[0].message.content
215
  except Exception as e:
216
- return f"Code explanation analysis failed: {str(e)}"
217
 
218
- def generate_enhanced_report(self, video_path):
219
- """Generate structured evaluation report."""
220
  audio_path = None
221
  try:
 
222
  audio_path = self.extract_audio(video_path)
223
-
224
- with self.load_whisper_model() as model:
225
- result = model.transcribe(audio_path)
226
- transcript = result["text"]
227
-
228
- audio_features = self.analyze_audio_features(audio_path)
229
- audio_duration = librosa.get_duration(path=audio_path)
230
- speech_metrics = self.calculate_speech_metrics(transcript, audio_duration)
231
-
232
- wpm = speech_metrics['words_per_minute']
233
- wpm_score = 1 if 120 <= wpm <= 160 else 0
234
-
235
- filler_words = len(re.findall(r'\b(um|uh|like|you know|basically)\b', transcript.lower()))
236
- fpm = (filler_words / speech_metrics['duration_minutes'])
237
-
238
- ppm = audio_features['rhythm_analysis']['pause_stats']['total_pauses'] / speech_metrics['duration_minutes']
239
- pause_score = 1 if 2 <= ppm <= 8 else 0
240
-
241
- energy_values = audio_features['energy_dynamics']
242
- energy_summary = {
243
- 'min': np.percentile([energy_values['rms_energy_mean']], 0),
244
- 'q1': np.percentile([energy_values['rms_energy_mean']], 25),
245
- 'median': np.percentile([energy_values['rms_energy_mean']], 50),
246
- 'q3': np.percentile([energy_values['rms_energy_mean']], 75),
247
- 'max': np.percentile([energy_values['rms_energy_mean']], 100)
 
 
248
  }
249
-
250
- teaching_analysis = self._analyze_teaching_content(transcript)
251
- code_analysis = self._analyze_code_explanation(transcript)
252
- voice_quality = self._analyze_voice_quality(transcript, audio_features)
253
-
254
- intonation_score = 1 if (audio_features['pitch_analysis']['patterns']['rising_count'] +
255
- audio_features['pitch_analysis']['patterns']['falling_count']) / speech_metrics['duration_minutes'] > 5 else 0
256
-
257
- energy_score = 1 if (energy_values['rms_energy_std'] / energy_values['rms_energy_mean']) > 0.2 else 0
258
-
259
- report = f"""REPORT
260
- 1. COMMUNICATION
261
- 1. Speech Speed:
262
- - Words per Minute: {wpm:.1f}
263
- - Score: {wpm_score} (Acceptable range: 120-160 WPM)
264
- 2. Voice Quality:
265
- {voice_quality}
266
- 3. Fluency:
267
- - Fillers per Minute: {fpm:.1f}
268
- - Score: {1 if fpm < 3 else 0}
269
- 4. Break/Flow:
270
- - Pauses per Minute: {ppm:.1f}
271
- - Score: {pause_score}
272
- 5. Intonation:
273
- - Rising patterns: {audio_features['pitch_analysis']['patterns']['rising_count']}
274
- - Falling patterns: {audio_features['pitch_analysis']['patterns']['falling_count']}
275
- - Score: {intonation_score}
276
- 6. Energy:
277
- Five-point summary:
278
- - Min: {energy_summary['min']:.2f}
279
- - Q1: {energy_summary['q1']:.2f}
280
- - Median: {energy_summary['median']:.2f}
281
- - Q3: {energy_summary['q3']:.2f}
282
- - Max: {energy_summary['max']:.2f}
283
- - Score: {energy_score}
284
- 2. TEACHING
285
- 1. Content Analysis:
286
- {teaching_analysis}
287
- 2. Code Explanation:
288
- {code_analysis}
289
- Full Transcript:
290
- {transcript}
291
- """
292
  return report
293
 
294
- except Exception as e:
295
- raise Exception(f"Report generation failed: {str(e)}")
296
  finally:
297
  if audio_path and os.path.exists(audio_path):
298
  os.unlink(audio_path)
299
- self._clear_memory()
300
 
301
- def create_temp_directory():
302
- """Create a temporary directory for file processing."""
303
- temp_dir = tempfile.mkdtemp()
304
- return temp_dir
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
305
 
306
  def main():
307
- st.set_page_config(
308
- page_title="Mentor Demo Review Tool",
309
- page_icon="๐ŸŽ“",
310
- layout="wide"
311
- )
312
-
313
- # Custom CSS for better styling
314
  st.markdown("""
315
  <style>
316
- .metric-value {
317
- font-size: 24px;
318
- font-weight: bold;
319
- color: #1f77b4;
320
- }
321
- .metric-label {
322
- font-size: 14px;
323
- color: #666;
324
- }
325
- .section-card {
326
- background-color: white;
327
- padding: 20px;
328
- border-radius: 10px;
329
- box-shadow: 0 2px 4px rgba(0,0,0,0.1);
330
- margin-bottom: 20px;
331
- }
332
- .score-good { color: #28a745; }
333
- .score-warning { color: #ffc107; }
334
- .score-poor { color: #dc3545; }
335
- .analysis-section {
336
- margin-top: 20px;
337
- padding: 15px;
338
- border-left: 3px solid #1f77b4;
339
- background-color: #f8f9fa;
340
- }
341
  </style>
342
  """, unsafe_allow_html=True)
343
-
344
  st.title("๐ŸŽ“ Mentor Demo Review Tool")
345
-
346
- # Sidebar with instructions
347
  with st.sidebar:
348
  st.header("Instructions")
349
  st.markdown("""
350
  1. Upload your teaching video
351
- 2. Wait for analysis to complete
352
- 3. Review the detailed feedback
353
- 4. Download the full report
354
 
355
  **Supported Formats:**
356
  - MP4
357
  - AVI
358
  - MOV
359
- - MKV
360
-
361
- **Analysis Includes:**
362
- - Speech metrics
363
- - Teaching quality
364
- - Voice analysis
365
- - Content evaluation
366
  """)
367
 
368
  st.markdown("---")
369
- st.markdown("### Privacy Note")
370
- st.info("Videos are processed securely and deleted immediately after analysis.")
371
-
372
- # Main content
373
- uploaded_file = st.file_uploader("Upload your teaching video", type=['mp4', 'avi', 'mov', 'mkv'])
374
-
375
  if uploaded_file:
376
  try:
377
- if not st.session_state.get('analysis_complete', False):
378
- with st.status("Analyzing video...", expanded=True) as status:
379
- st.write("Saving video file...")
380
- temp_dir = create_temp_directory()
381
- temp_video_path = os.path.join(temp_dir, uploaded_file.name)
382
-
383
- with open(temp_video_path, 'wb') as f:
384
- f.write(uploaded_file.getbuffer())
385
-
386
- st.write("Initializing analysis...")
387
- evaluator = CPUMentorEvaluator()
388
-
389
- st.write("Generating report...")
390
- report = evaluator.generate_enhanced_report(temp_video_path)
391
- st.session_state.report_data = report
392
- st.session_state.analysis_complete = True
393
-
394
- status.update(label="Analysis complete!", state="complete", expanded=False)
395
-
396
- if st.session_state.get('analysis_complete', False):
397
- report = st.session_state.report_data
398
 
399
- # Create tabs for organized display
400
- comm_tab, teach_tab, trans_tab = st.tabs([
401
- "๐Ÿ“Š Communication Analysis",
402
- "๐Ÿ“ Teaching Evaluation",
403
- "๐Ÿ“„ Transcript"
404
- ])
405
 
406
- with comm_tab:
407
- st.markdown("## ๐ŸŽฏ Communication Analysis")
408
-
409
- # Speech Metrics Section
410
- st.markdown("### Speech Metrics")
411
- col1, col2, col3 = st.columns(3)
412
-
413
- # Extract and display all communication metrics
414
- speech_section = re.search(r"1\. COMMUNICATION(.*?)2\. TEACHING", report, re.DOTALL)
415
- if speech_section:
416
- speech_text = speech_section.group(1)
417
-
418
- # Speech Speed
419
- wpm_match = re.search(r"Words per Minute: (\d+\.?\d*)", speech_text)
420
- if wpm_match:
421
- wpm = float(wpm_match.group(1))
422
- with col1:
423
- st.markdown("#### Speech Speed")
424
- color = "good" if 120 <= wpm <= 160 else "warning"
425
- st.markdown(f'<div class="metric-value score-{color}">{wpm:.1f} WPM</div>', unsafe_allow_html=True)
426
- st.markdown('<div class="metric-label">Target: 120-160 WPM</div>', unsafe_allow_html=True)
427
-
428
- # Fluency
429
- fpm_match = re.search(r"Fillers per Minute: (\d+\.?\d*)", speech_text)
430
- if fpm_match:
431
- fpm = float(fpm_match.group(1))
432
- with col2:
433
- st.markdown("#### Fluency")
434
- color = "good" if fpm < 3 else "poor"
435
- st.markdown(f'<div class="metric-value score-{color}">{fpm:.1f} FPM</div>', unsafe_allow_html=True)
436
- st.markdown('<div class="metric-label">Fillers per Minute</div>', unsafe_allow_html=True)
437
-
438
- # Pauses
439
- ppm_match = re.search(r"Pauses per Minute: (\d+\.?\d*)", speech_text)
440
- if ppm_match:
441
- ppm = float(ppm_match.group(1))
442
- with col3:
443
- st.markdown("#### Strategic Pauses")
444
- color = "good" if 2 <= ppm <= 8 else "warning"
445
- st.markdown(f'<div class="metric-value score-{color}">{ppm:.1f} PPM</div>', unsafe_allow_html=True)
446
- st.markdown('<div class="metric-label">Pauses per Minute</div>', unsafe_allow_html=True)
447
-
448
- # Voice Quality Analysis
449
- st.markdown("### ๐ŸŽค Voice Quality Analysis")
450
- voice_section = re.search(r"Voice Quality:(.*?)3\. Fluency:", report, re.DOTALL)
451
- if voice_section:
452
- with st.expander("Detailed Voice Analysis", expanded=True):
453
- st.markdown(voice_section.group(1).strip())
454
-
455
- # Intonation Analysis
456
- st.markdown("### ๐Ÿ“ˆ Intonation Patterns")
457
- intonation_section = re.search(r"5\. Intonation:(.*?)6\. Energy:", report, re.DOTALL)
458
- if intonation_section:
459
- with st.expander("Intonation Analysis", expanded=True):
460
- st.markdown(intonation_section.group(1).strip())
461
-
462
- # Energy Analysis
463
- st.markdown("### โšก Energy Profile")
464
- energy_section = re.search(r"6\. Energy:(.*?)2\. TEACHING", report, re.DOTALL)
465
- if energy_section:
466
- with st.expander("Energy Analysis", expanded=True):
467
- st.markdown(energy_section.group(1).strip())
468
 
469
- with teach_tab:
470
- st.markdown("## ๐Ÿ“š Teaching Analysis")
 
 
 
 
 
471
 
472
- # Content Analysis
473
- st.markdown("### Content Analysis")
474
- content_section = re.search(r"Content Analysis:(.*?)Code Explanation:", report, re.DOTALL)
475
- if content_section:
476
- with st.expander("Detailed Content Analysis", expanded=True):
477
- content_analysis = content_section.group(1).strip()
478
-
479
- # Parse and display scores
480
- accuracy_score = re.search(r"Rate accuracy.*?(\d+\.?\d*)", content_analysis)
481
- principles_score = re.search(r"Rate approach.*?(\d+\.?\d*)", content_analysis)
482
- context_score = re.search(r"Rate contextual.*?(\d+\.?\d*)", content_analysis)
483
-
484
- col1, col2, col3 = st.columns(3)
485
- if accuracy_score:
486
- with col1:
487
- score = float(accuracy_score.group(1))
488
- color = "good" if score >= 0.8 else "warning" if score >= 0.6 else "poor"
489
- st.markdown("#### Content Accuracy")
490
- st.markdown(f'<div class="metric-value score-{color}">{score:.2f}</div>', unsafe_allow_html=True)
491
-
492
- if principles_score:
493
- with col2:
494
- score = float(principles_score.group(1))
495
- color = "good" if score >= 0.8 else "warning" if score >= 0.6 else "poor"
496
- st.markdown("#### First Principles")
497
- st.markdown(f'<div class="metric-value score-{color}">{score:.2f}</div>', unsafe_allow_html=True)
498
-
499
- if context_score:
500
- with col3:
501
- score = float(context_score.group(1))
502
- color = "good" if score >= 0.8 else "warning" if score >= 0.6 else "poor"
503
- st.markdown("#### Business Context")
504
- st.markdown(f'<div class="metric-value score-{color}">{score:.2f}</div>', unsafe_allow_html=True)
505
-
506
- st.markdown("#### Detailed Analysis")
507
- st.markdown(content_analysis)
508
 
509
- # Code Explanation Analysis
510
- st.markdown("### ๐Ÿ’ป Code Explanation Quality")
511
- code_section = re.search(r"Code Explanation:(.*?)Full Transcript:", report, re.DOTALL)
512
- if code_section:
513
- with st.expander("Code Teaching Analysis", expanded=True):
514
- code_analysis = code_section.group(1).strip()
515
-
516
- # Parse and display scores
517
- depth_score = re.search(r"Rate depth.*?(\d+\.?\d*)", code_analysis)
518
- interpretation_score = re.search(r"Rate interpretation.*?(\d+\.?\d*)", code_analysis)
519
- breakdown_score = re.search(r"Rate breakdown.*?(\d+\.?\d*)", code_analysis)
520
-
521
- col1, col2, col3 = st.columns(3)
522
- if depth_score:
523
- with col1:
524
- score = float(depth_score.group(1))
525
- color = "good" if score >= 0.8 else "warning" if score >= 0.6 else "poor"
526
- st.markdown("#### Explanation Depth")
527
- st.markdown(f'<div class="metric-value score-{color}">{score:.2f}</div>', unsafe_allow_html=True)
528
-
529
- if interpretation_score:
530
- with col2:
531
- score = float(interpretation_score.group(1))
532
- color = "good" if score >= 0.8 else "warning" if score >= 0.6 else "poor"
533
- st.markdown("#### Output Interpretation")
534
- st.markdown(f'<div class="metric-value score-{color}">{score:.2f}</div>', unsafe_allow_html=True)
535
-
536
- if breakdown_score:
537
- with col3:
538
- score = float(breakdown_score.group(1))
539
- color = "good" if score >= 0.8 else "warning" if score >= 0.6 else "poor"
540
- st.markdown("#### Complexity Breakdown")
541
- st.markdown(f'<div class="metric-value score-{color}">{score:.2f}</div>', unsafe_allow_html=True)
542
-
543
- st.markdown("#### Detailed Analysis")
544
- st.markdown(code_analysis)
545
 
546
- with trans_tab:
547
- st.markdown("## ๐Ÿ“ Full Transcript")
548
- transcript_section = re.search(r"Full Transcript:(.*?)(?=\Z)", report, re.DOTALL)
549
- if transcript_section:
550
- st.markdown(transcript_section.group(1).strip())
551
 
552
  # Download button
 
553
  st.download_button(
554
- label="๐Ÿ“ฅ Download Full Report",
555
- data=report,
556
- file_name="mentor_analysis_report.txt",
557
- mime="text/plain",
558
- help="Download the complete analysis report including all metrics and recommendations"
559
  )
560
-
561
  except Exception as e:
562
- st.error(f"An error occurred during analysis: {str(e)}")
563
- st.error("Please try uploading the video again or contact support if the issue persists.")
564
-
565
  finally:
566
  # Cleanup
567
- if 'temp_dir' in locals() and os.path.exists(temp_dir):
568
  import shutil
569
  shutil.rmtree(temp_dir)
570
  gc.collect()
571
 
572
  if __name__ == "__main__":
573
- main()
 
6
  import whisper
7
  from openai import OpenAI
8
  import tempfile
 
 
9
  import warnings
10
  import re
11
  from contextlib import contextmanager
12
+ import gc
13
+ from concurrent.futures import ThreadPoolExecutor
14
+ import pandas as pd
15
 
16
+ class LightweightAudioAnalyzer:
17
+ def __init__(self):
18
+ self.sr = 4000 # Reduced sample rate
19
+ self.hop_length = 1024 # Increased hop length for faster processing
20
+ self.n_fft = 2048
21
+ self.chunk_duration = 120 # Increased chunk size for fewer iterations
22
+
23
+ def _process_chunk(self, audio_path, offset, chunk_duration):
24
+ """Process a single audio chunk"""
25
+ y, _ = librosa.load(
26
+ audio_path,
27
+ offset=offset,
28
+ duration=chunk_duration,
29
+ sr=self.sr,
30
+ mono=True
31
+ )
32
+
33
+ with warnings.catch_warnings():
34
+ warnings.simplefilter("ignore")
35
+ stft = librosa.stft(y, n_fft=self.n_fft, hop_length=self.hop_length)
36
+ rms = librosa.feature.rms(S=np.abs(stft), hop_length=self.hop_length)[0]
37
+
38
+ pitches, _ = librosa.piptrack(
39
+ S=np.abs(stft),
40
+ sr=self.sr,
41
+ hop_length=self.hop_length,
42
+ fmin=50,
43
+ fmax=400
44
+ )
45
+
46
+ chunk_data = {
47
+ 'rms': rms,
48
+ 'pitch': np.mean(pitches, axis=0)
49
+ }
50
+
51
+ del y, stft, pitches
52
+ return chunk_data
53
+
54
  def analyze_audio(self, audio_path):
55
+ """Parallel audio analysis with minimal memory usage"""
56
+ duration = librosa.get_duration(path=audio_path)
57
+ chunks = range(0, int(duration), self.chunk_duration)
58
+
59
+ pitch_data = []
60
+ rms_data = []
61
 
62
+ # Process chunks in parallel
63
+ with ThreadPoolExecutor(max_workers=3) as executor:
64
+ futures = []
65
+ for offset in chunks:
66
+ chunk_duration = min(self.chunk_duration, duration - offset)
67
+ futures.append(
68
+ executor.submit(self._process_chunk, audio_path, offset, chunk_duration)
69
+ )
70
+
71
+ # Collect results
72
+ for future in futures:
73
+ chunk_data = future.result()
74
+ rms_data.extend(chunk_data['rms'])
75
+ pitch_data.extend(chunk_data['pitch'])
76
 
77
+ # Calculate pause statistics
78
+ silence_threshold = np.mean(rms_data) * 0.1
79
+ silent_frames = np.array(rms_data) < silence_threshold
80
+ frame_time = self.hop_length / self.sr
 
81
 
82
+ pause_durations = []
83
+ current_pause = 0
84
+ for is_silent in silent_frames:
85
+ if is_silent:
86
+ current_pause += 1
87
+ elif current_pause > 0:
88
+ duration = current_pause * frame_time
89
+ if duration > 0.3: # Only count pauses > 300ms
90
+ pause_durations.append(duration)
91
+ current_pause = 0
92
 
93
+ # Compile statistics
94
+ pitch_stats = {
95
+ "mean": float(np.nanmean(pitch_data)),
96
+ "std": float(np.nanstd(pitch_data)),
97
+ "range": float(np.nanpercentile(pitch_data, 95) - np.nanpercentile(pitch_data, 5))
98
+ }
99
 
100
+ energy_stats = {
101
+ "mean": float(np.mean(rms_data)),
102
+ "std": float(np.std(rms_data)),
103
+ "range": float(np.percentile(rms_data, 95) - np.percentile(rms_data, 5))
104
+ }
105
+
106
+ pause_stats = {
107
+ "total_pauses": len(pause_durations),
108
+ "mean_duration": float(np.mean(pause_durations)) if pause_durations else 0.0,
109
+ "pauses_per_minute": len(pause_durations) / (duration / 60)
110
+ }
111
 
112
  return {
113
+ "pitch_analysis": {"statistics": pitch_stats},
114
+ "energy_dynamics": energy_stats,
115
+ "pause_analysis": pause_stats
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  }
117
 
118
+ class OptimizedMentorEvaluator:
119
  def __init__(self):
120
+ self.api_key = st.secrets["OPENAI_API_KEY"]
 
 
 
 
121
  self.client = OpenAI(api_key=self.api_key)
122
  self.whisper_model = None
123
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  def extract_audio(self, video_path):
125
+ """Optimized audio extraction"""
126
+ temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix='.wav')
 
127
  try:
128
+ video = VideoFileClip(
129
+ video_path,
130
+ audio=True,
131
+ target_resolution=(240, None),
132
+ fps_source="tbr"
133
+ )
134
+ video.audio.write_audiofile(
135
+ temp_audio.name,
136
+ fps=8000,
137
+ nbytes=2,
138
+ codec='pcm_s16le',
139
+ verbose=False,
140
+ logger=None
141
+ )
142
+ video.close()
143
  return temp_audio.name
144
  except Exception as e:
145
+ if os.path.exists(temp_audio.name):
146
  os.unlink(temp_audio.name)
147
+ raise e
 
 
 
 
148
 
149
+ def transcribe_audio(self, audio_path):
150
+ """Optimized transcription"""
151
+ try:
152
+ model = whisper.load_model("tiny", device="cpu")
153
+ result = model.transcribe(
154
+ audio_path,
155
+ fp16=False,
156
+ language="en",
157
+ task="transcribe",
158
+ beam_size=3
159
+ )
160
+ return result["text"]
161
+ finally:
162
+ del model
163
+ gc.collect()
164
 
165
+ def _analyze_speaking_patterns(self, transcript):
166
+ """Analyze speaking patterns and filler words"""
167
+ filler_words = len(re.findall(r'\b(um|uh|like|you know|basically)\b', transcript.lower()))
168
  words = len(transcript.split())
169
+
170
  return {
171
+ "filler_word_count": filler_words,
172
+ "total_words": words,
173
+ "filler_word_rate": filler_words / words if words > 0 else 0
174
  }
175
 
176
+ def analyze_content(self, transcript):
177
+ """Comprehensive content analysis"""
178
  try:
179
+ prompt = f"""Analyze this teaching transcript (3 sentences max for each category):
180
+ 1. Content Clarity (0-1):
181
+ 2. Examples Usage (0-1):
182
+ 3. Technical Accuracy (0-1):
183
+ 4. Areas for Improvement:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
 
185
+ Transcript: {transcript[:2000]}..."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
  response = self.client.chat.completions.create(
188
+ model="gpt-3.5-turbo",
189
  messages=[
190
+ {"role": "system", "content": "You are a teaching expert. Be concise."},
191
  {"role": "user", "content": prompt}
192
  ],
193
+ max_tokens=300,
194
+ temperature=0.7
195
  )
196
  return response.choices[0].message.content
197
  except Exception as e:
198
+ return f"Analysis failed: {str(e)}"
199
 
200
+ def generate_report(self, video_path):
201
+ """Generate comprehensive report"""
202
  audio_path = None
203
  try:
204
+ # Extract and analyze audio
205
  audio_path = self.extract_audio(video_path)
206
+ analyzer = LightweightAudioAnalyzer()
207
+
208
+ with st.status("Analyzing audio..."):
209
+ audio_features = analyzer.analyze_audio(audio_path)
210
+
211
+ with st.status("Transcribing speech..."):
212
+ transcript = self.transcribe_audio(audio_path)
213
+
214
+ # Calculate metrics
215
+ duration = librosa.get_duration(path=audio_path)
216
+ speaking_patterns = self._analyze_speaking_patterns(transcript)
217
+ wpm = (speaking_patterns["total_words"] / duration) * 60
218
+
219
+ with st.status("Analyzing content..."):
220
+ content_analysis = self.analyze_content(transcript)
221
+
222
+ # Compile report
223
+ report = {
224
+ "speech_metrics": {
225
+ "wpm": wpm,
226
+ "duration_minutes": duration / 60,
227
+ "total_words": speaking_patterns["total_words"],
228
+ "filler_word_rate": speaking_patterns["filler_word_rate"]
229
+ },
230
+ "audio_analysis": audio_features,
231
+ "content_analysis": content_analysis,
232
+ "transcript": transcript
233
  }
234
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
235
  return report
236
 
 
 
237
  finally:
238
  if audio_path and os.path.exists(audio_path):
239
  os.unlink(audio_path)
240
+ gc.collect()
241
 
242
+ def create_metrics_display(metrics):
243
+ """Create formatted metrics display"""
244
+ col1, col2, col3 = st.columns(3)
245
+
246
+ with col1:
247
+ st.metric(
248
+ "Speaking Rate",
249
+ f"{metrics['speech_metrics']['wpm']:.1f} WPM",
250
+ delta="Good" if 120 <= metrics['speech_metrics']['wpm'] <= 160 else "Review",
251
+ delta_color="normal"
252
+ )
253
+
254
+ with col2:
255
+ filler_rate = metrics['speech_metrics']['filler_word_rate'] * 100
256
+ st.metric(
257
+ "Filler Words",
258
+ f"{filler_rate:.1f}%",
259
+ delta="Good" if filler_rate < 5 else "Review",
260
+ delta_color="normal"
261
+ )
262
+
263
+ with col3:
264
+ pauses = metrics['audio_analysis']['pause_analysis']['pauses_per_minute']
265
+ st.metric(
266
+ "Pauses/Minute",
267
+ f"{pauses:.1f}",
268
+ delta="Good" if 2 <= pauses <= 8 else "Review",
269
+ delta_color="normal"
270
+ )
271
+
272
+ def create_downloadable_report(metrics):
273
+ """Create formatted report for download"""
274
+ report = f"""MENTOR DEMO ANALYSIS REPORT
275
+ Generated on: {pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S')}
276
+
277
+ 1. SPEECH METRICS
278
+ ----------------
279
+ Speaking Rate: {metrics['speech_metrics']['wpm']:.1f} WPM
280
+ Duration: {metrics['speech_metrics']['duration_minutes']:.1f} minutes
281
+ Total Words: {metrics['speech_metrics']['total_words']}
282
+ Filler Word Rate: {metrics['speech_metrics']['filler_word_rate']*100:.1f}%
283
+
284
+ 2. AUDIO ANALYSIS
285
+ ----------------
286
+ Pitch Statistics:
287
+ - Mean: {metrics['audio_analysis']['pitch_analysis']['statistics']['mean']:.1f} Hz
288
+ - Variation: {metrics['audio_analysis']['pitch_analysis']['statistics']['std']:.1f} Hz
289
+
290
+ Energy Dynamics:
291
+ - Mean: {metrics['audio_analysis']['energy_dynamics']['mean']:.2f}
292
+ - Variation: {metrics['audio_analysis']['energy_dynamics']['std']:.2f}
293
+
294
+ Pause Analysis:
295
+ - Total Pauses: {metrics['audio_analysis']['pause_analysis']['total_pauses']}
296
+ - Average Duration: {metrics['audio_analysis']['pause_analysis']['mean_duration']:.2f}s
297
+ - Pauses per Minute: {metrics['audio_analysis']['pause_analysis']['pauses_per_minute']:.1f}
298
+
299
+ 3. CONTENT ANALYSIS
300
+ -----------------
301
+ {metrics['content_analysis']}
302
+
303
+ 4. TRANSCRIPT
304
+ -----------
305
+ {metrics['transcript']}
306
+ """
307
+ return report
308
 
309
  def main():
310
+ st.set_page_config(page_title="Mentor Review Tool", layout="wide")
311
+
312
+ # Custom CSS
 
 
 
 
313
  st.markdown("""
314
  <style>
315
+ .metric-value { font-size: 24px; font-weight: bold; color: #1f77b4; }
316
+ .metric-label { font-size: 14px; color: #666; }
317
+ .stTabs > div > div { padding-top: 20px; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
318
  </style>
319
  """, unsafe_allow_html=True)
320
+
321
  st.title("๐ŸŽ“ Mentor Demo Review Tool")
322
+
323
+ # Sidebar
324
  with st.sidebar:
325
  st.header("Instructions")
326
  st.markdown("""
327
  1. Upload your teaching video
328
+ 2. Wait for analysis (~5-10 minutes)
329
+ 3. Review the feedback
330
+ 4. Download the report
331
 
332
  **Supported Formats:**
333
  - MP4
334
  - AVI
335
  - MOV
 
 
 
 
 
 
 
336
  """)
337
 
338
  st.markdown("---")
339
+ st.markdown("### Processing Time")
340
+ st.info("Analysis typically takes 5-10 minutes for a 15-minute video.")
341
+
342
+ uploaded_file = st.file_uploader("Upload your teaching video", type=['mp4', 'avi', 'mov'])
343
+
 
344
  if uploaded_file:
345
  try:
346
+ with st.spinner("Processing video..."):
347
+ # Save uploaded file
348
+ temp_dir = tempfile.mkdtemp()
349
+ temp_video_path = os.path.join(temp_dir, uploaded_file.name)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
350
 
351
+ with open(temp_video_path, 'wb') as f:
352
+ f.write(uploaded_file.getbuffer())
 
 
 
 
353
 
354
+ # Generate report
355
+ evaluator = OptimizedMentorEvaluator()
356
+ metrics = evaluator.generate_report(temp_video_path)
357
+
358
+ # Display results
359
+ st.success("Analysis complete!")
360
+
361
+ # Create tabs for organization
362
+ tabs = st.tabs(["๐Ÿ“Š Metrics", "๐ŸŽ™๏ธ Audio Analysis", "๐Ÿ“ Content", "๐Ÿ“„ Transcript"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
363
 
364
+ with tabs[0]:
365
+ st.header("Key Metrics")
366
+ create_metrics_display(metrics)
367
+
368
+ with tabs[1]:
369
+ st.header("Audio Analysis")
370
+ col1, col2 = st.columns(2)
371
 
372
+ with col1:
373
+ st.subheader("Pitch Analysis")
374
+ pitch_stats = metrics['audio_analysis']['pitch_analysis']['statistics']
375
+ st.write(f"Mean Pitch: {pitch_stats['mean']:.1f} Hz")
376
+ st.write(f"Pitch Variation: {pitch_stats['std']:.1f} Hz")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
377
 
378
+ with col2:
379
+ st.subheader("Energy Analysis")
380
+ energy_stats = metrics['audio_analysis']['energy_dynamics']
381
+ st.write(f"Mean Energy: {energy_stats['mean']:.2f}")
382
+ st.write(f"Energy Variation: {energy_stats['std']:.2f}")
383
+
384
+ with tabs[2]:
385
+ st.header("Content Analysis")
386
+ st.write(metrics['content_analysis'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
387
 
388
+ with tabs[3]:
389
+ st.header("Transcript")
390
+ st.write(metrics['transcript'])
 
 
391
 
392
  # Download button
393
+ report = create_downloadable_report(metrics)
394
  st.download_button(
395
+ "๐Ÿ“ฅ Download Full Report",
396
+ report,
397
+ "mentor_analysis_report.txt",
398
+ "text/plain"
 
399
  )
400
+
401
  except Exception as e:
402
+ st.error(f"Error: {str(e)}")
403
+
 
404
  finally:
405
  # Cleanup
406
+ if 'temp_dir' in locals():
407
  import shutil
408
  shutil.rmtree(temp_dir)
409
  gc.collect()
410
 
411
  if __name__ == "__main__":
412
+ main()