Spaces:

sparshmehta
/

main_app

Sleeping

App Files Files Community

sparshmehta commited on Jan 31, 2025

Commit

2984357

verified ·

1 Parent(s): 1f91bc5

Create app.py

Browse files

Files changed (1) hide show

app.py +560 -0

app.py ADDED Viewed

	@@ -0,0 +1,560 @@

+import streamlit as st
+import os
+import numpy as np
+import librosa
+from moviepy.editor import VideoFileClip
+import whisper
+from openai import OpenAI
+import tempfile
+from scipy.signal import find_peaks
+import gc
+import warnings
+import re
+from contextlib import contextmanager
+class CPUMentorEvaluator:
+    def __init__(self):
+        """Initialize the evaluator for CPU usage."""
+        self.api_key = st.secrets["OPENAI_API_KEY"]
+        if not self.api_key:
+            raise ValueError("OpenAI API key not found in secrets")
+        self.client = OpenAI(api_key=self.api_key)
+        self.whisper_model = None
+        self.accent_classifier = None
+    def _clear_memory(self):
+        """Clear memory and run garbage collection."""
+        if hasattr(self, 'whisper_model') and self.whisper_model is not None:
+            del self.whisper_model
+            self.whisper_model = None
+        if hasattr(self, 'accent_classifier') and self.accent_classifier is not None:
+            del self.accent_classifier
+            self.accent_classifier = None
+        gc.collect()
+    @contextmanager
+    def load_whisper_model(self):
+        """Load Whisper model with proper memory management."""
+        try:
+            self._clear_memory()
+            self.whisper_model = whisper.load_model("tiny", device="cpu")
+            yield self.whisper_model
+        finally:
+            if self.whisper_model is not None:
+                del self.whisper_model
+                self.whisper_model = None
+            gc.collect()
+    def extract_audio(self, video_path):
+        """Extract audio from video file with optimized settings."""
+        temp_audio = None
+        video = None
+        try:
+            self._clear_memory()
+            temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix='.wav')
+            video = VideoFileClip(video_path, audio=True, target_resolution=(480,None), verbose=False)
+            video.audio.write_audiofile(temp_audio.name, fps=16000, verbose=False, logger=None)
+            return temp_audio.name
+        except Exception as e:
+            if temp_audio and os.path.exists(temp_audio.name):
+                os.unlink(temp_audio.name)
+            raise Exception(f"Audio extraction failed: {str(e)}")
+        finally:
+            if video:
+                video.close()
+            self._clear_memory()
+    def analyze_audio_features(self, audio_path):
+        """Analyze audio features with optimized memory usage for CPU."""
+        try:
+            CHUNK_SIZE = 60
+            duration = librosa.get_duration(path=audio_path)
+            num_chunks = int(np.ceil(duration / CHUNK_SIZE))
+            pitch_values = []
+            rms_values = []
+            spectral_centroids = []
+            spectral_rolloffs = []
+            mfccs_buffer = []
+            HOP_LENGTH = 512
+            N_FFT = 2048
+            for chunk_idx in range(num_chunks):
+                start_time = chunk_idx * CHUNK_SIZE
+                dur = min(CHUNK_SIZE, duration - start_time)
+                y, sr = librosa.load(audio_path, offset=start_time, duration=dur, sr=16000)
+                with warnings.catch_warnings():
+                    warnings.simplefilter("ignore")
+                    stft = librosa.stft(y, n_fft=N_FFT, hop_length=HOP_LENGTH)
+                    S = np.abs(stft)
+                    rms = np.sqrt(np.mean(S**2, axis=0))
+                    rms_values.extend(rms)
+                    f0, voiced_flag, _ = librosa.pyin(
+                        y,
+                        fmin=librosa.note_to_hz('C2'),
+                        fmax=librosa.note_to_hz('C7'),
+                        sr=sr,
+                        frame_length=N_FFT,
+                        hop_length=HOP_LENGTH,
+                        fill_na=None
+                    )
+                    pitch_values.extend(f0[voiced_flag])
+                    spectral_centroids.extend(librosa.feature.spectral_centroid(
+                        S=S, sr=sr, hop_length=HOP_LENGTH)[0])
+                    spectral_rolloffs.extend(librosa.feature.spectral_rolloff(
+                        S=S, sr=sr, hop_length=HOP_LENGTH)[0])
+                    mfcc = librosa.feature.mfcc(
+                        y=y,
+                        sr=sr,
+                        n_mfcc=8,
+                        n_fft=N_FFT,
+                        hop_length=HOP_LENGTH
+                    )
+                    mfccs_buffer.append(mfcc)
+                del stft, S
+                gc.collect()
+            pitch_array = np.array(pitch_values)
+            rms_array = np.array(rms_values)
+            spectral_centroids = np.array(spectral_centroids)
+            spectral_rolloffs = np.array(spectral_rolloffs)
+            pitch_stats = {
+                'mean': float(np.nanmean(pitch_array)),
+                'std': float(np.nanstd(pitch_array)),
+                'range': float(np.nanpercentile(pitch_array, 95) -
+                             np.nanpercentile(pitch_array, 5))
+            }
+            silence_threshold = np.mean(rms_array) * 0.1
+            silent_frames = rms_array < silence_threshold
+            frame_time = HOP_LENGTH / sr
+            pause_stats = self._analyze_pauses(silent_frames, frame_time)
+            result = {
+                'pitch_analysis': {
+                    'statistics': pitch_stats,
+                    'patterns': {
+                        'rising_count': int(np.sum(np.diff(pitch_values) > 20)),
+                        'falling_count': int(np.sum(np.diff(pitch_values) < -20))
+                    }
+                },
+                'voice_quality': {
+                    'spectral_centroid_mean': float(np.mean(spectral_centroids)),
+                    'spectral_rolloff_mean': float(np.mean(spectral_rolloffs)),
+                    'mfcc_stats': {
+                        'mean': np.mean(np.concatenate(mfccs_buffer, axis=1), axis=1).tolist(),
+                        'std': np.std(np.concatenate(mfccs_buffer, axis=1), axis=1).tolist()
+                    }
+                },
+                'rhythm_analysis': {
+                    'pause_stats': pause_stats,
+                    'tempo': float(librosa.beat.tempo(onset_envelope=librosa.onset.onset_strength(
+                        y=librosa.load(audio_path, duration=30, sr=16000)[0],
+                        sr=16000
+                    ))[0])
+                },
+                'energy_dynamics': {
+                    'rms_energy_mean': float(np.mean(rms_values)),
+                    'rms_energy_std': float(np.std(rms_values)),
+                    'energy_range': float(np.percentile(rms_values, 95) -
+                                       np.percentile(rms_values, 5))
+                }
+            }
+            del pitch_array, rms_array, spectral_centroids, spectral_rolloffs
+            gc.collect()
+            return result
+        except Exception as e:
+            raise Exception(f"Audio analysis failed: {str(e)}")
+        finally:
+            self._clear_memory()
+    def _analyze_pauses(self, silent_frames, frame_time):
+        """Analyze pauses with minimal memory usage."""
+        pause_durations = []
+        current_pause = 0
+        for is_silent in silent_frames:
+            if is_silent:
+                current_pause += 1
+            elif current_pause > 0:
+                duration = current_pause * frame_time
+                if duration > 0.3:  # Only count pauses longer than 300ms
+                    pause_durations.append(duration)
+                current_pause = 0
+        if pause_durations:
+            return {
+                'total_pauses': len(pause_durations),
+                'mean_pause_duration': float(np.mean(pause_durations))
+            }
+        return {
+            'total_pauses': 0,
+            'mean_pause_duration': 0.0
+        }
+    def calculate_speech_metrics(self, transcript, audio_duration):
+        """Calculate words per minute and other speech metrics."""
+        words = len(transcript.split())
+        minutes = audio_duration / 60
+        return {
+            'words_per_minute': words / minutes if minutes > 0 else 0,
+            'total_words': words,
+            'duration_minutes': minutes
+        }
+    def _analyze_voice_quality(self, transcript, audio_features):
+        """Analyze voice quality aspects."""
+        try:
+            prompt = f"""Analyze the following voice metrics for teaching quality:
+Transcript excerpt: {transcript[:1000]}...
+Voice Metrics:
+- Pitch Mean: {audio_features['pitch_analysis']['statistics']['mean']:.1f}Hz
+- Pitch Variation: {audio_features['pitch_analysis']['statistics']['std']:.1f}Hz
+- Energy Dynamics: {audio_features['energy_dynamics']['rms_energy_mean']:.2f}
+Evaluate voice quality focusing on:
+1. Clarity and projection
+2. Emotional engagement
+3. Professional tone
+"""
+            response = self.client.chat.completions.create(
+                model="gpt-4",
+                messages=[
+                    {"role": "system", "content": "You are an expert in voice analysis."},
+                    {"role": "user", "content": prompt}
+                ],
+                max_tokens=500
+            )
+            return response.choices[0].message.content
+        except Exception as e:
+            return f"Voice quality analysis failed: {str(e)}"
+    def _analyze_teaching_content(self, transcript):
+        """Analyze teaching content for accuracy, principles, and examples."""
+        try:
+            prompt = f"""Analyze this teaching transcript for:
+1. Subject Matter Accuracy:
+  - Identify any factual errors, wrong assumptions, or incorrect correlations
+  - Rate accuracy on a scale of 0-1
+2. First Principles Approach:
+  - Evaluate if concepts are built from fundamentals before introducing technical terms
+  - Rate approach on a scale of 0-1
+3. Examples and Business Context:
+  - Assess use of business examples and practical context
+  - Rate contextual relevance on a scale of 0-1
+Transcript: {transcript}...
+Provide specific citations for any identified issues.
+"""
+            response = self.client.chat.completions.create(
+                model="gpt-4",
+                messages=[
+                    {"role": "system", "content": "You are an expert in pedagogical assessment."},
+                    {"role": "user", "content": prompt}
+                ],
+                max_tokens=500
+            )
+            return response.choices[0].message.content
+        except Exception as e:
+            return f"Teaching content analysis failed: {str(e)}"
+    def _analyze_code_explanation(self, transcript):
+        """Analyze code explanation quality."""
+        try:
+            prompt = f"""Analyze the code explanation in this transcript for:
+1. Depth of Explanation:
+  - Evaluate coverage of syntax, libraries, functions, and methods
+  - Rate depth on a scale of 0-1
+2. Output Interpretation:
+  - Assess business context interpretation of results
+  - Rate interpretation on a scale of 0-1
+3. Complexity Breakdown:
+  - Evaluate explanation of code modules and logical flow
+  - Rate breakdown quality on a scale of 0-1
+Transcript: {transcript}...
+Provide specific citations for any identified issues.
+"""
+            response = self.client.chat.completions.create(
+                model="gpt-4",
+                messages=[
+                    {"role": "system", "content": "You are an expert in code review and teaching."},
+                    {"role": "user", "content": prompt}
+                ],
+                max_tokens=500
+            )
+            return response.choices[0].message.content
+        except Exception as e:
+            return f"Code explanation analysis failed: {str(e)}"
+    def generate_enhanced_report(self, video_path):
+        """Generate structured evaluation report."""
+        audio_path = None
+        try:
+            audio_path = self.extract_audio(video_path)
+            with self.load_whisper_model() as model:
+                result = model.transcribe(audio_path)
+                transcript = result["text"]
+            audio_features = self.analyze_audio_features(audio_path)
+            audio_duration = librosa.get_duration(path=audio_path)
+            speech_metrics = self.calculate_speech_metrics(transcript, audio_duration)
+            wpm = speech_metrics['words_per_minute']
+            wpm_score = 1 if 120 <= wpm <= 160 else 0
+            filler_words = len(re.findall(r'\b(um|uh|like|you know|basically)\b', transcript.lower()))
+            fpm = (filler_words / speech_metrics['duration_minutes'])
+            ppm = audio_features['rhythm_analysis']['pause_stats']['total_pauses'] / speech_metrics['duration_minutes']
+            pause_score = 1 if 2 <= ppm <= 8 else 0
+            energy_values = audio_features['energy_dynamics']
+            energy_summary = {
+                'min': np.percentile([energy_values['rms_energy_mean']], 0),
+                'q1': np.percentile([energy_values['rms_energy_mean']], 25),
+                'median': np.percentile([energy_values['rms_energy_mean']], 50),
+                'q3': np.percentile([energy_values['rms_energy_mean']], 75),
+                'max': np.percentile([energy_values['rms_energy_mean']], 100)
+            }
+            teaching_analysis = self._analyze_teaching_content(transcript)
+            code_analysis = self._analyze_code_explanation(transcript)
+            voice_quality = self._analyze_voice_quality(transcript, audio_features)
+            intonation_score = 1 if (audio_features['pitch_analysis']['patterns']['rising_count'] +
+                                   audio_features['pitch_analysis']['patterns']['falling_count']) / speech_metrics['duration_minutes'] > 5 else 0
+            energy_score = 1 if (energy_values['rms_energy_std'] / energy_values['rms_energy_mean']) > 0.2 else 0
+            report = f"""REPORT
+1. COMMUNICATION
+    1. Speech Speed:
+        - Words per Minute: {wpm:.1f}
+        - Score: {wpm_score} (Acceptable range: 120-160 WPM)
+    2. Voice Quality:
+        {voice_quality}
+    3. Fluency:
+        - Fillers per Minute: {fpm:.1f}
+        - Score: {1 if fpm < 3 else 0}
+    4. Break/Flow:
+        - Pauses per Minute: {ppm:.1f}
+        - Score: {pause_score}
+    5. Intonation:
+        - Rising patterns: {audio_features['pitch_analysis']['patterns']['rising_count']}
+        - Falling patterns: {audio_features['pitch_analysis']['patterns']['falling_count']}
+        - Score: {intonation_score}
+    6. Energy:
+        Five-point summary:
+        - Min: {energy_summary['min']:.2f}
+        - Q1: {energy_summary['q1']:.2f}
+        - Median: {energy_summary['median']:.2f}
+        - Q3: {energy_summary['q3']:.2f}
+        - Max: {energy_summary['max']:.2f}
+        - Score: {energy_score}
+2. TEACHING
+    1. Content Analysis:
+        {teaching_analysis}
+    2. Code Explanation:
+        {code_analysis}
+Full Transcript:
+{transcript}
+"""
+            return report
+        except Exception as e:
+            raise Exception(f"Report generation failed: {str(e)}")
+        finally:
+            if audio_path and os.path.exists(audio_path):
+                os.unlink(audio_path)
+            self._clear_memory()
+def create_temp_directory():
+    """Create a temporary directory for file processing."""
+    temp_dir = tempfile.mkdtemp()
+    return temp_dir
+def main():
+    st.set_page_config(
+        page_title="Mentor Speech Evaluator",
+        page_icon="🎓",
+        layout="wide"
+    )
+    st.title("🎓 Mentor Speech Analysis Tool")
+    # Add custom CSS
+    st.markdown("""
+        <style>
+        .stProgress > div > div > div > div {
+            background-color: #1f77b4;
+        }
+        .metric-card {
+            background-color: #f8f9fa;
+            padding: 20px;
+            border-radius: 10px;
+            box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+        }
+        </style>
+    """, unsafe_allow_html=True)
+    st.markdown("""
+    This tool analyzes teaching videos and provides detailed feedback on:
+    - Communication quality
+    - Speech patterns
+    - Teaching effectiveness
+    - Code explanation clarity
+    """)
+    # Initialize session state
+    if 'analysis_complete' not in st.session_state:
+        st.session_state.analysis_complete = False
+    if 'report_data' not in st.session_state:
+        st.session_state.report_data = None
+    # File uploader
+    uploaded_file = st.file_uploader("Upload a video file", type=['mp4', 'avi', 'mov', 'mkv'])
+    if uploaded_file:
+        try:
+            if not st.session_state.analysis_complete:
+                # Create progress bar and status
+                progress_bar = st.progress(0)
+                status_text = st.empty()
+                # Save uploaded file temporarily
+                temp_dir = create_temp_directory()
+                temp_video_path = os.path.join(temp_dir, uploaded_file.name)
+                with open(temp_video_path, 'wb') as f:
+                    f.write(uploaded_file.getbuffer())
+                status_text.text("Initializing analysis...")
+                progress_bar.progress(10)
+                # Initialize evaluator
+                evaluator = CPUMentorEvaluator()
+                status_text.text("Processing video...")
+                progress_bar.progress(30)
+                # Generate report
+                report = evaluator.generate_enhanced_report(temp_video_path)
+                st.session_state.report_data = report
+                st.session_state.analysis_complete = True
+                progress_bar.progress(100)
+                status_text.text("Analysis complete!")
+            # Display results
+            if st.session_state.analysis_complete and st.session_state.report_data:
+                report = st.session_state.report_data
+                # Split report into sections
+                sections = report.split('\n\n')
+                # Create tabs for different aspects of analysis
+                tab1, tab2, tab3 = st.tabs(["Communication", "Teaching", "Transcript"])
+                with tab1:
+                    st.subheader("💬 Communication Analysis")
+                    communication_metrics = sections[2] if len(sections) > 2 else "Analysis not available"
+                    # Create metrics display using columns
+                    cols = st.columns(3)
+                    # Extract and display key metrics
+                    if "Words per Minute:" in communication_metrics:
+                        wpm = float(re.search(r"Words per Minute: (\d+\.?\d*)", communication_metrics).group(1))
+                        cols[0].metric("Speech Speed (WPM)", f"{wpm:.1f}")
+                    if "Fillers per Minute:" in communication_metrics:
+                        fpm = float(re.search(r"Fillers per Minute: (\d+\.?\d*)", communication_metrics).group(1))
+                        cols[1].metric("Filler Words (per min)", f"{fpm:.1f}")
+                    if "Pauses per Minute:" in communication_metrics:
+                        ppm = float(re.search(r"Pauses per Minute: (\d+\.?\d*)", communication_metrics).group(1))
+                        cols[2].metric("Pauses (per min)", f"{ppm:.1f}")
+                    st.markdown(communication_metrics)
+                with tab2:
+                    st.subheader("📚 Teaching Analysis")
+                    teaching_metrics = sections[3] if len(sections) > 3 else "Analysis not available"
+                    st.markdown(teaching_metrics)
+                with tab3:
+                    st.subheader("📝 Full Transcript")
+                    transcript_section = sections[-1] if len(sections) > 4 else "Transcript not available"
+                    st.markdown(transcript_section)
+                # Download button for full report
+                st.download_button(
+                    label="📥 Download Full Report",
+                    data=report,
+                    file_name="mentor_analysis_report.txt",
+                    mime="text/plain",
+                    key="download_report"
+                )
+        except Exception as e:
+            st.error(f"An error occurred: {str(e)}")
+        finally:
+            # Cleanup
+            if 'temp_dir' in locals() and os.path.exists(temp_dir):
+                import shutil
+                shutil.rmtree(temp_dir)
+            gc.collect()
+    # Sidebar
+    with st.sidebar:
+        st.markdown("""
+        ### About
+        This tool uses advanced AI to analyze teaching videos and provide feedback on:
+        - Speech speed and clarity
+        - Voice quality and engagement
+        - Teaching effectiveness
+        - Code explanation quality
+        ### Usage Tips
+        1. Upload a video file (MP4, AVI, MOV, or MKV)
+        2. Wait for the analysis to complete
+        3. View results in organized sections
+        4. Download the full report for detailed feedback
+        ### Privacy Note
+        All uploaded videos are processed securely and deleted immediately after analysis.
+        No data is stored permanently.
+        """)
+if __name__ == "__main__":
+    main()