Spaces:

sparshmehta
/

main_app

Sleeping

App Files Files Community

sparshmehta commited on Feb 4, 2025

Commit

0178c47

verified ·

1 Parent(s): db6cc7f

Update app.py

Browse files

Files changed (1) hide show

app.py +461 -636

app.py CHANGED Viewed

@@ -66,19 +66,27 @@ class ProgressTracker:
             self.status.update(label=f"{message} ({progress:.1%}) - ETA: {remaining:.0f}s")
 class AudioFeatureExtractor:
-    """Handles audio feature extraction with improved memory management"""
     def __init__(self):
         self.sr = 16000
         self.hop_length = 512
         self.n_fft = 2048
         self.chunk_duration = 300
-    def extract_features(self, audio_path: str, progress_callback=None) -> Dict[str, float]:
         """Extract audio features with chunked processing"""
         try:
             if progress_callback:
                 progress_callback(0.1, "Loading audio file...")
             features = {
                 "pitch_mean": 0.0,
                 "pitch_std": 0.0,
@@ -87,7 +95,9 @@ class AudioFeatureExtractor:
                 "pauses_per_minute": 0.0,
                 "rising_patterns": 0,
                 "falling_patterns": 0,
-                "variations_per_minute": 0.0
             }
             # Process audio in chunks
@@ -106,7 +116,7 @@ class AudioFeatureExtractor:
                     accumulated_features.append(chunk_features)
             # Combine features from all chunks
-            features = self._combine_features(accumulated_features)
             if progress_callback:
                 progress_callback(1.0, "Feature extraction complete!")
@@ -117,7 +127,7 @@ class AudioFeatureExtractor:
             logger.error(f"Error in feature extraction: {e}")
             raise AudioProcessingError(f"Feature extraction failed: {str(e)}")
-    def _process_chunk(self, chunk: np.ndarray) -> Dict[str, float]:
         """Process a single chunk of audio"""
         D = librosa.stft(chunk, n_fft=self.n_fft, hop_length=self.hop_length)
         S = np.abs(D)
@@ -131,10 +141,14 @@ class AudioFeatureExtractor:
             frame_length=self.n_fft
         )
         return {
             "rms": rms,
             "f0": f0[voiced_flag == 1] if f0 is not None else np.array([]),
-            "duration": len(chunk) / self.sr
         }
     def _combine_features(self, features: List[Dict[str, Any]]) -> Dict[str, float]:
@@ -142,6 +156,7 @@ class AudioFeatureExtractor:
         all_f0 = np.concatenate([f["f0"] for f in features if len(f["f0"]) > 0])
         all_rms = np.concatenate([f["rms"] for f in features])
         total_duration = sum(f["duration"] for f in features)
         pitch_mean = np.mean(all_f0) if len(all_f0) > 0 else 0
         pitch_std = np.std(all_f0) if len(all_f0) > 0 else 0
@@ -151,7 +166,7 @@ class AudioFeatureExtractor:
             "pitch_std": float(pitch_std),
             "mean_amplitude": float(np.mean(all_rms)),
             "amplitude_deviation": float(np.std(all_rms) / np.mean(all_rms)) if np.mean(all_rms) > 0 else 0,
-            "pauses_per_minute": float(len(librosa.effects.split(np.concatenate([f["rms"] for f in features]), top_db=20)) / (total_duration / 60)),
             "rising_patterns": int(np.sum(np.diff(all_f0) > 0)) if len(all_f0) > 1 else 0,
             "falling_patterns": int(np.sum(np.diff(all_f0) < 0)) if len(all_f0) > 1 else 0,
             "variations_per_minute": float((np.sum(np.diff(all_f0) != 0) if len(all_f0) > 1 else 0) / (total_duration / 60))
@@ -165,702 +180,512 @@ class ContentAnalyzer:
         self.retry_delay = 1
     def analyze_content(self, transcript: str, progress_callback=None) -> Dict[str, Any]:
-        """Analyze teaching content with retry logic and robust JSON handling"""
         for attempt in range(self.retry_count):
             try:
                 if progress_callback:
-                    progress_callback(0.2, "Preparing content analysis...")
-                prompt = self._create_analysis_prompt(transcript)
                 if progress_callback:
-                    progress_callback(0.5, "Processing with AI model...")
                 response = self.client.chat.completions.create(
-                    model="gpt-4o-mini",
                     messages=[
-                        {"role": "system", "content": "You are a teaching expert providing a structured JSON analysis. Always respond with a valid JSON object."},
                         {"role": "user", "content": prompt}
-                    ],
-                    response_format={"type": "json_object"}
                 )
                 if progress_callback:
                     progress_callback(0.8, "Formatting results...")
-                # Ensure we have valid JSON
-                result_text = response.choices[0].message.content.strip()
-                try:
-                    result = json.loads(result_text)
-                except json.JSONDecodeError:
-                    # Fallback to a default structure if JSON parsing fails
-                    result = {
-                        "subjectMatterAccuracy": {"score": 0, "citations": []},
-                        "firstPrinciplesApproach": {"score": 0, "citations": []},
-                        "examplesAndContext": {"score": 0, "citations": []},
-                        "cohesiveStorytelling": {"score": 0, "citations": []},
-                        "engagement": {"score": 0, "citations": []},
-                        "professionalTone": {"score": 0, "citations": []}
-                    }
                 if progress_callback:
                     progress_callback(1.0, "Content analysis complete!")
-                return result
             except Exception as e:
                 logger.error(f"Content analysis attempt {attempt + 1} failed: {e}")
                 if attempt == self.retry_count - 1:
-                    # Return a default structure on final failure
-                    return {
-                        "subjectMatterAccuracy": {"score": 0, "citations": []},
-                        "firstPrinciplesApproach": {"score": 0, "citations": []},
-                        "examplesAndContext": {"score": 0, "citations": []},
-                        "cohesiveStorytelling": {"score": 0, "citations": []},
-                        "engagement": {"score": 0, "citations": []},
-                        "professionalTone": {"score": 0, "citations": []}
-                    }
-                time.sleep(self.retry_delay * (2 ** attempt))
-    def _create_analysis_prompt(self, transcript: str) -> str:
-        """Create the analysis prompt"""
-        return f"""Analyze this teaching content and provide scores and citations:
-Transcript: {transcript}
-For each category below, provide:
-1. Score (0 or 1)
-2. Supporting citations with timestamps (if score is 0, cite problematic areas)
-Concept Assessment:
-1. Subject Matter Accuracy
-2. First Principles Approach
-3. Examples and Business Context
-4. Cohesive Storytelling
-5. Engagement and Interaction
-6. Professional Tone
-Code Assessment:
-1. Depth of Explanation
-2. Output Interpretation
-3. Breaking down Complexity
-Format as JSON."""
-class RecommendationGenerator:
-    """Generates teaching recommendations using OpenAI API"""
-    def __init__(self, api_key: str):
-        self.client = OpenAI(api_key=api_key)
-        self.retry_count = 3
-        self.retry_delay = 1
-    def generate_recommendations(self,
-                           metrics: Dict[str, Any],
-                           content_analysis: Dict[str, Any],
-                           progress_callback=None) -> Dict[str, Any]:
-        """Generate recommendations with robust JSON handling"""
-        for attempt in range(self.retry_count):
-            try:
-                if progress_callback:
-                    progress_callback(0.2, "Preparing recommendation analysis...")
-                prompt = self._create_recommendation_prompt(metrics, content_analysis)
-                if progress_callback:
-                    progress_callback(0.5, "Generating recommendations...")
-                response = self.client.chat.completions.create(
-                    model="gpt-4o-mini",
-                    messages=[
-                        {"role": "system", "content": "You are a teaching expert providing actionable recommendations. Always respond with a valid JSON object."},
-                        {"role": "user", "content": prompt}
-                    ],
-                    response_format={"type": "json_object"}
-                )
-                if progress_callback:
-                    progress_callback(0.8, "Formatting recommendations...")
-                # Ensure we have valid JSON
-                result_text = response.choices[0].message.content.strip()
-                try:
-                    result = json.loads(result_text)
-                except json.JSONDecodeError:
-                    # Fallback to a default structure if JSON parsing fails
-                    result = {
-                        "geographyFit": "Unknown",
-                        "improvements": [
-                            "Unable to generate specific recommendations"
-                        ],
-                        "rigor": "Undetermined"
-                    }
-                if progress_callback:
-                    progress_callback(1.0, "Recommendations complete!")
-                return result
-            except Exception as e:
-                logger.error(f"Recommendation generation attempt {attempt + 1} failed: {e}")
-                if attempt == self.retry_count - 1:
-                    # Return a default structure on final failure
-                    return {
-                        "geographyFit": "Unknown",
-                        "improvements": [
-                            "Unable to generate specific recommendations"
-                        ],
-                        "rigor": "Undetermined"
-                    }
                 time.sleep(self.retry_delay * (2 ** attempt))
-    def _create_recommendation_prompt(self, metrics: Dict[str, Any], content_analysis: Dict[str, Any]) -> str:
-        """Create the recommendation prompt"""
-        return f"""Based on the following metrics and analysis, provide recommendations:
-Metrics: {json.dumps(metrics)}
-Content Analysis: {json.dumps(content_analysis)}
-Provide:
-1. Specific improvements needed
-2. Rigor assessment considering technical and teaching abilities
-Format as JSON with keys: geographyFit, improvements (array), rigor"""
-class MentorEvaluator:
-    """Main class for video evaluation"""
-    def __init__(self, model_cache_dir: Optional[str] = None):
-        """Initialize with proper model caching"""
-        self.api_key = st.secrets["OPENAI_API_KEY"]
-        if not self.api_key:
-            raise ValueError("OPENAI_API_KEY environment variable must be set")
-        # Create a specific directory for the whisper model
-        if model_cache_dir:
-            self.model_cache_dir = model_cache_dir
-        else:
-            # Create a persistent directory in the user's home directory
-            self.model_cache_dir = os.path.join(os.path.expanduser("~"), ".cache", "whisper")
-            os.makedirs(self.model_cache_dir, exist_ok=True)
-        self._whisper_model = None
-        self._feature_extractor = None
-        self._content_analyzer = None
-        self._recommendation_generator = None
-    @property
-    def whisper_model(self):
-        """Lazy loading of whisper model with proper cache directory handling"""
-        if self._whisper_model is None:
-            try:
-                # First try to load from cache
-                self._whisper_model = WhisperModel(
-                    "small",
-                    device="cpu",
-                    compute_type="int8",
-                    download_root=self.model_cache_dir,
-                    local_files_only=True
-                )
-            except Exception as e:
-                logger.info(f"Could not load model from cache, downloading: {e}")
-                # If loading from cache fails, download the model
-                self._whisper_model = WhisperModel(
-                    "small",
-                    device="cpu",
-                    compute_type="int8",
-                    download_root=self.model_cache_dir,
-                    local_files_only=False
-                )
-                logger.info("Model downloaded successfully")
-        return self._whisper_model
-    @property
-    def feature_extractor(self):
-        """Lazy loading of feature extractor"""
-        if self._feature_extractor is None:
-            self._feature_extractor = AudioFeatureExtractor()
-        return self._feature_extractor
-    @property
-    def content_analyzer(self):
-        """Lazy loading of content analyzer"""
-        if self._content_analyzer is None:
-            self._content_analyzer = ContentAnalyzer(api_key=self.api_key)
-        return self._content_analyzer
-    @property
-    def recommendation_generator(self):
-        """Lazy loading of recommendation generator"""
-        if self._recommendation_generator is None:
-            self._recommendation_generator = RecommendationGenerator(api_key=self.api_key)
-        return self._recommendation_generator
-    def evaluate_video(self, video_path: str) -> Dict[str, Any]:
-        """Evaluate video with proper resource management"""
-        with temporary_file(suffix=".wav") as temp_audio:
-            try:
-                # Extract audio
-                with st.status("Extracting audio...") as status:
-                    progress_bar = st.progress(0)
-                    tracker = ProgressTracker(status, progress_bar)
-                    self._extract_audio(video_path, temp_audio, tracker.update)
-                # Extract features
-                with st.status("Extracting audio features...") as status:
-                    progress_bar = st.progress(0)
-                    tracker = ProgressTracker(status, progress_bar)
-                    audio_features = self.feature_extractor.extract_features(
-                        temp_audio,
-                        tracker.update
-                    )
-                # Transcribe
-                with st.status("Transcribing audio...") as status:
-                    progress_bar = st.progress(0)
-                    tracker = ProgressTracker(status, progress_bar)
-                    transcript = self._transcribe_audio(temp_audio, tracker.update)
-                # Analyze content
-                with st.status("Analyzing content...") as status:
-                    progress_bar = st.progress(0)
-                    tracker = ProgressTracker(status, progress_bar)
-                    content_analysis = self.content_analyzer.analyze_content(
-                        transcript,
-                        tracker.update
-                    )
-                # Evaluate speech
-                with st.status("Evaluating speech metrics...") as status:
-                    progress_bar = st.progress(0)
-                    tracker = ProgressTracker(status, progress_bar)
-                    speech_metrics = self._evaluate_speech_metrics(
-                        transcript,
-                        audio_features,
-                        tracker.update
-                    )
-                # Generate recommendations
-                with st.status("Generating recommendations...") as status:
-                    progress_bar = st.progress(0)
-                    tracker = ProgressTracker(status, progress_bar)
-                    recommendations = self.recommendation_generator.generate_recommendations(
-                        speech_metrics,
-                        content_analysis,
-                        tracker.update
-                    )
-                return {
-                    "communication": speech_metrics,
-                    "teaching": content_analysis,
-                    "recommendations": recommendations,
-                    "transcript": transcript
-                }
-            except Exception as e:
-                logger.error(f"Error in video evaluation: {e}")
-                raise
-    def _extract_audio(self, video_path: str, output_path: str, progress_callback=None) -> str:
-        """Extract audio from video"""
         try:
-            if progress_callback:
-                progress_callback(0.1, "Checking dependencies...")
-            if not shutil.which('ffmpeg'):
-                raise AudioProcessingError("FFmpeg is not installed")
-            if not os.path.exists(video_path):
-                raise FileNotFoundError(f"Video file not found: {video_path}")
-            if not os.access(os.path.dirname(output_path), os.W_OK):
-                raise AudioProcessingError(f"No write permission for output directory: {os.path.dirname(output_path)}")
-            if progress_callback:
-                progress_callback(0.3, "Configuring audio extraction...")
-            ffmpeg_cmd = [
-                'ffmpeg',
-                '-i', video_path,
-                '-ar', '16000',
-                '-ac', '1',
-                '-f', 'wav',
-                '-v', 'warning',
-                '-y',
-                output_path
-            ]
-            if progress_callback:
-                progress_callback(0.5, "Extracting audio...")
-            result = subprocess.run(
-                ffmpeg_cmd,
-                capture_output=True,
-                text=True
-            )
-            if result.returncode != 0:
-                raise AudioProcessingError(f"FFmpeg Error: {result.stderr}")
-            if progress_callback:
-                progress_callback(1.0, "Audio extraction complete!")
-            return output_path
         except Exception as e:
-            logger.error(f"Error in audio extraction: {e}")
-            raise AudioProcessingError(f"Audio extraction failed: {str(e)}")
-    def _transcribe_audio(self, audio_path: str, progress_callback=None) -> str:
-        """Transcribe audio with improved memory management"""
-        try:
-            if progress_callback:
-                progress_callback(0.1, "Loading transcription model...")
-            audio_info = sf.info(audio_path)
-            total_duration = audio_info.duration
-            chunk_duration = 5 * 60  # 5-minute chunks
-            overlap_duration = 10  # 10-second overlap
-            transcripts = []
-            total_chunks = int(np.ceil(total_duration / (chunk_duration - overlap_duration)))
-            with sf.SoundFile(audio_path) as f:
-                for i in range(total_chunks):
-                    if progress_callback:
-                        progress_callback(0.4 + (i / total_chunks) * 0.4,
-                                       f"Transcribing chunk {i + 1}/{total_chunks}...")
-                    # Calculate positions in samples
-                    start_sample = int(i * (chunk_duration - overlap_duration) * f.samplerate)
-                    f.seek(start_sample)
-                    chunk = f.read(frames=int(chunk_duration * f.samplerate))
-                    with temporary_file(suffix=".wav") as chunk_path:
-                        sf.write(chunk_path, chunk, f.samplerate)
-                        # The fix: properly handle the segments from faster-whisper
-                        segments, _ = self.whisper_model.transcribe(chunk_path)
-                        # Combine all segment texts
-                        chunk_text = ' '.join(segment.text for segment in segments)
-                        transcripts.append(chunk_text)
-            if progress_callback:
-                progress_callback(1.0, "Transcription complete!")
-            return " ".join(transcripts)
-        except Exception as e:
-            logger.error(f"Error in transcription: {e}")
-            raise
-    def _evaluate_speech_metrics(self, transcript: str, audio_features: Dict[str, float],
                                progress_callback=None) -> Dict[str, Any]:
-        """Evaluate speech metrics with improved error handling"""
         try:
             if progress_callback:
-                progress_callback(0.2, "Calculating basic metrics...")
-            # Calculate duration based on word count and average speaking rate
-            words = len(transcript.split())
-            duration_minutes = len(transcript) / 500  # Approximate duration
-            wpm = words / duration_minutes if duration_minutes > 0 else 0
             if progress_callback:
-                progress_callback(0.4, "Analyzing filler words...")
-            filler_words = len(re.findall(r'\b(um|uh|like|you know)\b', transcript.lower()))
-            fillers_per_minute = filler_words / duration_minutes if duration_minutes > 0 else 0
             if progress_callback:
-                progress_callback(0.6, "Checking grammar...")
-            error_patterns = r'\b(is|are|was|were)\s+\w+ing\b'
-            grammatical_errors = len(re.findall(error_patterns, transcript))
-            errors_per_minute = grammatical_errors / duration_minutes if duration_minutes > 0 else 0
             if progress_callback:
-                progress_callback(0.8, "Compiling results...")
-            metrics = {
-                "speed": {
-                    "score": 1 if 120 <= wpm <= 160 else 0,
-                    "wpm": wpm
                 },
-                "fluency": {
-                    "score": 1 if fillers_per_minute <= 5 else 0,
-                    "fillersPerMin": fillers_per_minute,
-                    "errorsPerMin": errors_per_minute
                 },
-                "flow": {
-                    "score": 1 if 10 <= audio_features["pauses_per_minute"] <= 15 else 0,
-                    "pausesPerMin": audio_features["pauses_per_minute"]
                 },
-                "intonation": {
-                    "pitch": audio_features["pitch_mean"],
-                    "pitchScore": 1 if 77 <= audio_features["pitch_std"] <= 92 else 0,
-                    "pitchVariation": audio_features["pitch_std"],
-                    "patternScore": 1 if audio_features["variations_per_minute"] > 5 else 0,
-                    "risingPatterns": audio_features["rising_patterns"],
-                    "fallingPatterns": audio_features["falling_patterns"],
-                    "variationsPerMin": audio_features["variations_per_minute"]
                 },
-                "energy": {
-                    "score": 1 if audio_features["mean_amplitude"] > 100 else 0,
-                    "meanAmplitude": audio_features["mean_amplitude"],
-                    "amplitudeDeviation": audio_features["amplitude_deviation"]
                 }
             }
-            if progress_callback:
-                progress_callback(1.0, "Speech metrics complete!")
-            return metrics
-        except Exception as e:
-            logger.error(f"Error in speech metrics evaluation: {e}")
-            raise
-def validate_video_file(file_path: str):
-    """Validate video file before processing"""
-    valid_extensions = {'.mp4', '.avi', '.mov'}
-    if not os.path.exists(file_path):
-        raise ValueError("Video file does not exist")
-    if os.path.splitext(file_path)[1].lower() not in valid_extensions:
-        raise ValueError("Unsupported video format")
-    if os.path.getsize(file_path) > 2 * 1024 * 1024 * 1024:  # 2GB
-        raise ValueError("File size exceeds 2GB limit")
-    try:
-        probe = subprocess.run(
-            ['ffprobe', '-v', 'quiet', file_path],
-            capture_output=True,
-            text=True
         )
-        if probe.returncode != 0:
-            raise ValueError("Invalid video file")
-    except subprocess.SubprocessError:
-        raise ValueError("Unable to validate video file")
-def display_evaluation(evaluation: Dict[str, Any]):
-    """Display evaluation results with improved error handling"""
-    try:
-        tabs = st.tabs(["Communication", "Teaching", "Recommendations", "Transcript"])
-        with tabs[0]:
-            with st.status("Loading communication metrics...") as status:
-                progress_bar = st.progress(0)
-                progress_bar.progress(0.2)
-                st.header("Communication")
-                # Speed metrics
-                st.subheader("Speed")
-                col1, col2 = st.columns(2)
-                with col1:
-                    st.metric("Score", "Pass" if evaluation["communication"]["speed"]["score"] == 1
-                             else "Need Improvement")
-                with col2:
-                    st.metric("Words per Minute",
-                             f"{evaluation['communication']['speed']['wpm']:.1f}")
-                st.caption("Acceptable Range: 120-160 WPM")
-                progress_bar.progress(0.4)
-                # Fluency metrics
-                st.subheader("Fluency")
-                col1, col2, col3 = st.columns(3)
-                with col1:
-                    st.metric("Score", "Pass" if evaluation["communication"]["fluency"]["score"] == 1
-                             else "Need Improvement")
-                with col2:
-                    st.metric("Fillers/Min",
-                             f"{evaluation['communication']['fluency']['fillersPerMin']:.1f}")
-                with col3:
-                    st.metric("Errors/Min",
-                             f"{evaluation['communication']['fluency']['errorsPerMin']:.1f}")
-                progress_bar.progress(0.6)
-                # Flow metrics
-                st.subheader("Flow")
-                col1, col2 = st.columns(2)
-                with col1:
-                    st.metric("Score", "Pass" if evaluation["communication"]["flow"]["score"] == 1
-                             else "Need Improvement")
-                with col2:
-                    st.metric("Pauses/Min",
-                             f"{evaluation['communication']['flow']['pausesPerMin']:.1f}")
-                # Intonation metrics
-                st.subheader("Intonation")
-                col1, col2 = st.columns(2)
-                with col1:
-                    st.metric("Pitch Score", "Pass" if evaluation["communication"]["intonation"]["pitchScore"] == 1
-                             else "Need Improvement")
-                with col2:
-                    st.metric("Pattern Score", "Pass" if evaluation["communication"]["intonation"]["patternScore"] == 1
-                             else "Need Improvement")
-                progress_bar.progress(0.8)
-                # Energy metrics
-                st.subheader("Energy")
-                st.metric("Score", "Pass" if evaluation["communication"]["energy"]["score"] == 1
-                         else "Need Improvement")
-                progress_bar.progress(1.0)
-                status.update(label="Communication metrics loaded!", state="complete")
-        # Teaching tab
-        with tabs[1]:
-            st.header("Teaching Analysis")
-            st.json(evaluation["teaching"])
-        # Recommendations tab
-        with tabs[2]:
-            st.header("Recommendations")
-            st.json(evaluation["recommendations"])
-        # Transcript tab
-        with tabs[3]:
-            st.header("Transcript")
-            st.text(evaluation["transcript"])
-    except Exception as e:
-        logger.error(f"Error displaying evaluation: {e}")
-        st.error(f"Error displaying results: {str(e)}")
-def check_dependencies() -> List[str]:
-    """Check if required dependencies are installed"""
-    missing = []
-    if not shutil.which('ffmpeg'):
-        missing.append("FFmpeg")
-    return missing
 def main():
-    try:
-        st.set_page_config(page_title="🎓 Mentor Demo Review System", layout="wide")
-        st.title("🎓 Mentor Demo Review System")
-        # Check dependencies with progress
-        with st.status("Checking system requirements...") as status:
             progress_bar = st.progress(0)
-            status.update(label="Checking FFmpeg installation...")
-            progress_bar.progress(0.3)
-            missing_deps = check_dependencies()
-            progress_bar.progress(0.6)
-            if missing_deps:
-                status.update(label="Missing dependencies detected!", state="error")
-                st.error(f"Missing required dependencies: {', '.join(missing_deps)}")
-                st.markdown("""
-                Please install the missing dependencies:
-                ```bash
-                sudo apt-get update
-                sudo apt-get install ffmpeg
-                ```
-                """)
-                return
-            progress_bar.progress(1.0)
-            status.update(label="System requirements satisfied!", state="complete")
-        with st.sidebar:
-            st.header("Instructions")
-            st.markdown("""
-            1. Upload your teaching video
-            2. Wait for the analysis
-            3. Review the detailed feedback
-            4. Download the report
-            **Supported formats:** MP4, AVI, MOV
-            **Maximum file size:** 500mb
-            """)
-            st.header("Processing Status")
-            st.info("Upload a video to begin analysis")
-        uploaded_file = st.file_uploader(
-            "Upload Teaching Video",
-            type=['mp4', 'avi', 'mov'],
-            help="Upload your teaching video in MP4, AVI, or MOV format"
-        )
-        if uploaded_file:
-            # Create temp directory for processing
-            temp_dir = tempfile.mkdtemp()
-            video_path = os.path.join(temp_dir, uploaded_file.name)
             try:
-                # Save uploaded file with progress
-                with st.status("Saving uploaded file...") as status:
-                    progress_bar = st.progress(0)
-                    # Save in chunks to show progress
-                    chunk_size = 1024 * 1024  # 1MB chunks
-                    file_size = len(uploaded_file.getbuffer())
-                    chunks = file_size // chunk_size + 1
-                    with open(video_path, 'wb') as f:
-                        for i in range(chunks):
-                            start = i * chunk_size
-                            end = min(start + chunk_size, file_size)
-                            f.write(uploaded_file.getbuffer()[start:end])
-                            progress = (i + 1) / chunks
-                            status.update(label=f"Saving file: {progress:.1%}")
-                            progress_bar.progress(progress)
-                    status.update(label="File saved successfully!", state="complete")
-                # Validate file size
-                file_size = os.path.getsize(video_path) / (1024 * 1024 * 1024)  # Size in GB
-                if file_size > 2:
-                    st.error("File size exceeds 2GB limit. Please upload a smaller file.")
-                    return
-                # Process video
-                with st.spinner("Processing video"):
-                    evaluator = MentorEvaluator()
-                    evaluation = evaluator.evaluate_video(video_path)
                 # Display results
-                st.success("Analysis complete!")
-                display_evaluation(evaluation)
-                # Add download button with progress
-                with st.status("Preparing download...") as status:
-                    progress_bar = st.progress(0)
-                    status.update(label="Formatting JSON...")
-                    progress_bar.progress(0.3)
-                    json_str = json.dumps(evaluation, indent=2)
-                    progress_bar.progress(0.6)
-                    status.update(label="Creating download button...")
-                    st.download_button(
-                        "📥 Download Full Report",
-                        json_str,
-                        "evaluation_report.json",
-                        "application/json",
-                        help="Download the complete evaluation report in JSON format"
-                    )
-                    progress_bar.progress(1.0)
-                    status.update(label="Download ready!", state="complete")
             except Exception as e:
-                st.error(f"Error during evaluation: {str(e)}")
             finally:
-                # Clean up temp files with progress
-                with st.status("Cleaning up...") as status:
-                    progress_bar = st.progress(0)
-                    if 'temp_dir' in locals():
-                        status.update(label="Removing temporary files...")
-                        progress_bar.progress(0.5)
-                        shutil.rmtree(temp_dir)
-                        progress_bar.progress(1.0)
-                        status.update(label="Cleanup completed!", state="complete")
-    except Exception as e:
-        st.error(f"Application error: {str(e)}")
 if __name__ == "__main__":
-    main()

             self.status.update(label=f"{message} ({progress:.1%}) - ETA: {remaining:.0f}s")
 class AudioFeatureExtractor:
+    """Handles audio feature extraction"""
     def __init__(self):
         self.sr = 16000
         self.hop_length = 512
         self.n_fft = 2048
         self.chunk_duration = 300
+    def extract_features(self, audio_path: str, transcript: str, progress_callback=None) -> Dict[str, float]:
         """Extract audio features with chunked processing"""
         try:
             if progress_callback:
                 progress_callback(0.1, "Loading audio file...")
+            # Get audio duration
+            with sf.SoundFile(audio_path) as f:
+                duration = len(f) / f.samplerate
+            # Calculate words per minute
+            words = len(transcript.split())
+            words_per_minute = (words / duration) * 60
             features = {
                 "pitch_mean": 0.0,
                 "pitch_std": 0.0,
                 "pauses_per_minute": 0.0,
                 "rising_patterns": 0,
                 "falling_patterns": 0,
+                "variations_per_minute": 0.0,
+                "duration": duration,
+                "words_per_minute": words_per_minute
             }
             # Process audio in chunks
                     accumulated_features.append(chunk_features)
             # Combine features from all chunks
+            features.update(self._combine_features(accumulated_features))
             if progress_callback:
                 progress_callback(1.0, "Feature extraction complete!")
             logger.error(f"Error in feature extraction: {e}")
             raise AudioProcessingError(f"Feature extraction failed: {str(e)}")
+    def _process_chunk(self, chunk: np.ndarray) -> Dict[str, Any]:
         """Process a single chunk of audio"""
         D = librosa.stft(chunk, n_fft=self.n_fft, hop_length=self.hop_length)
         S = np.abs(D)
             frame_length=self.n_fft
         )
+        # Detect silences for pause analysis
+        non_silent = librosa.effects.split(chunk, top_db=20)
         return {
             "rms": rms,
             "f0": f0[voiced_flag == 1] if f0 is not None else np.array([]),
+            "duration": len(chunk) / self.sr,
+            "pauses": len(non_silent)
         }
     def _combine_features(self, features: List[Dict[str, Any]]) -> Dict[str, float]:
         all_f0 = np.concatenate([f["f0"] for f in features if len(f["f0"]) > 0])
         all_rms = np.concatenate([f["rms"] for f in features])
         total_duration = sum(f["duration"] for f in features)
+        total_pauses = sum(f["pauses"] for f in features)
         pitch_mean = np.mean(all_f0) if len(all_f0) > 0 else 0
         pitch_std = np.std(all_f0) if len(all_f0) > 0 else 0
             "pitch_std": float(pitch_std),
             "mean_amplitude": float(np.mean(all_rms)),
             "amplitude_deviation": float(np.std(all_rms) / np.mean(all_rms)) if np.mean(all_rms) > 0 else 0,
+            "pauses_per_minute": float(total_pauses / (total_duration / 60)),
             "rising_patterns": int(np.sum(np.diff(all_f0) > 0)) if len(all_f0) > 1 else 0,
             "falling_patterns": int(np.sum(np.diff(all_f0) < 0)) if len(all_f0) > 1 else 0,
             "variations_per_minute": float((np.sum(np.diff(all_f0) != 0) if len(all_f0) > 1 else 0) / (total_duration / 60))
         self.retry_delay = 1
     def analyze_content(self, transcript: str, progress_callback=None) -> Dict[str, Any]:
+        """Analyze teaching content"""
         for attempt in range(self.retry_count):
             try:
                 if progress_callback:
+                    progress_callback(0.2, "Analyzing speech patterns...")
+                # Analyze speech patterns first
+                speech_patterns = self._analyze_speech_patterns(transcript)
                 if progress_callback:
+                    progress_callback(0.5, "Analyzing teaching content...")
+                # Create a more structured analysis prompt
+                prompt = self._create_analysis_prompt(transcript)
                 response = self.client.chat.completions.create(
+                    model="gpt-4-turbo-preview",
                     messages=[
+                        {"role": "system", "content": """
+                        You are a teaching expert conducting a detailed analysis.
+                        Provide specific examples and clear metrics for each category.
+                        Focus on actionable insights and clear evidence.
+                        """},
                         {"role": "user", "content": prompt}
+                    ]
                 )
                 if progress_callback:
                     progress_callback(0.8, "Formatting results...")
+                # Combine speech patterns and content analysis
+                analysis = self._parse_analysis(response.choices[0].message.content)
+                analysis.update(speech_patterns)
                 if progress_callback:
                     progress_callback(1.0, "Content analysis complete!")
+                return analysis
             except Exception as e:
                 logger.error(f"Content analysis attempt {attempt + 1} failed: {e}")
                 if attempt == self.retry_count - 1:
+                    return self._get_default_analysis()
                 time.sleep(self.retry_delay * (2 ** attempt))
+    def _analyze_speech_patterns(self, transcript: str) -> Dict[str, Any]:
+        """Analyze speech patterns including filler words and grammar"""
+        try:
+            response = self.client.chat.completions.create(
+                model="gpt-4-turbo-preview",
+                messages=[
+                    {"role": "system", "content": "Analyze the speech patterns in this transcript."},
+                    {"role": "user", "content": f"""
+                    Please analyze this transcript and provide:
+                    1. Number of filler words (um, uh, like, you know, etc.)
+                    2. Number of grammatical errors
+                    3. List of specific examples of each
+                    Transcript:
+                    {transcript}
+                    """}
+                ]
+            )
+            # Parse the response to extract metrics
+            analysis_text = response.choices[0].message.content
+            # Extract filler word count
+            filler_match = re.search(r'(\d+)\s+filler\s+words?', analysis_text, re.IGNORECASE)
+            filler_count = int(filler_match.group(1)) if filler_match else 0
+            # Extract grammar error count
+            grammar_match = re.search(r'(\d+)\s+grammatical\s+errors?', analysis_text, re.IGNORECASE)
+            grammar_count = int(grammar_match.group(1)) if grammar_match else 0
+            # Calculate per-minute rates
+            words = len(transcript.split())
+            minutes = words / 150  # Assuming average speaking rate of 150 words per minute
+            return {
+                "filler_words_per_minute": round(filler_count / minutes, 2),
+                "grammar_errors_per_minute": round(grammar_count / minutes, 2),
+                "filler_examples": self._extract_examples(analysis_text, "filler"),
+                "grammar_examples": self._extract_examples(analysis_text, "grammar")
+            }
+        except Exception as e:
+            logger.error(f"Speech pattern analysis failed: {e}")
+            return {
+                "filler_words_per_minute": 0,
+                "grammar_errors_per_minute": 0,
+                "filler_examples": [],
+                "grammar_examples": []
+            }
+    def _extract_examples(self, text: str, category: str) -> List[str]:
+        """Extract examples from analysis text"""
+        examples = []
+        lines = text.split('\n')
+        in_category = False
+        for line in lines:
+            if category.lower() in line.lower() and "example" in line.lower():
+                in_category = True
+                continue
+            if in_category and line.strip().startswith('-'):
+                examples.append(line.strip()[1:].strip())
+            elif in_category and line.strip() and not line.strip().startswith('-'):
+                in_category = False
+        return examples[:5]  # Return top 5 examples
+    def _create_analysis_prompt(self, transcript: str) -> str:
+        return f"""
+        Analyze this teaching content considering:
+        1. Teaching Effectiveness (1-5 scale)
+        - Clarity of explanation
+        - Student engagement
+        - Knowledge depth
+        2. Content Organization (1-5 scale)
+        - Logical flow
+        - Structure
+        - Time management
+        3. Communication Skills (1-5 scale)
+        - Voice modulation
+        - Pace
+        - Energy level
+        Provide specific examples and metrics for each category.
+        Format the response in a clear, structured way.
+        Transcript:
+        {transcript}
+        """
+    def _parse_analysis(self, analysis_text: str) -> Dict[str, Any]:
+        """Parse and structure the analysis response"""
         try:
+            analysis = {
+                "effectiveness": {
+                    "score": 0,
+                    "strengths": [],
+                    "improvements": []
+                },
+                "organization": {
+                    "score": 0,
+                    "strengths": [],
+                    "improvements": []
+                },
+                "communication": {
+                    "score": 0,
+                    "strengths": [],
+                    "improvements": []
+                }
+            }
+            current_category = None
+            for line in analysis_text.split('\n'):
+                line = line.strip()
+                if not line:
+                    continue
+                # Identify category and score
+                if "effectiveness" in line.lower():
+                    current_category = "effectiveness"
+                    score_match = re.search(r'(\d+)/5', line)
+                    if score_match:
+                        analysis[current_category]["score"] = int(score_match.group(1))
+                elif "organization" in line.lower():
+                    current_category = "organization"
+                    score_match = re.search(r'(\d+)/5', line)
+                    if score_match:
+                        analysis[current_category]["score"] = int(score_match.group(1))
+                elif "communication" in line.lower():
+                    current_category = "communication"
+                    score_match = re.search(r'(\d+)/5', line)
+                    if score_match:
+                        analysis[current_category]["score"] = int(score_match.group(1))
+                # Add points to appropriate category
+                if current_category and line.startswith('+'):
+                    analysis[current_category]["strengths"].append(line[1:].strip())
+                elif current_category and line.startswith('-'):
+                    analysis[current_category]["improvements"].append(line[1:].strip())
+            return analysis
         except Exception as e:
+            logger.error(f"Error parsing analysis: {e}")
+            return self._get_default_analysis()
+    def _get_default_analysis(self) -> Dict[str, Any]:
+        """Return default analysis structure"""
+        return {
+            "effectiveness": {
+                "score": 0,
+                "strengths": ["Analysis failed"],
+                "improvements": ["Analysis failed"]
+            },
+            "organization": {
+                "score": 0,
+                "strengths": ["Analysis failed"],
+                "improvements": ["Analysis failed"]
+            },
+            "communication": {
+                "score": 0,
+                "strengths": ["Analysis failed"],
+                "improvements": ["Analysis failed"]
+            },
+            "filler_words_per_minute": 0,
+            "grammar_errors_per_minute": 0,
+            "filler_examples": [],
+            "grammar_examples": []
+        }
+class RecommendationGenerator:
+    """Generates teaching recommendations"""
+    def __init__(self, api_key: str):
+        self.client = OpenAI(api_key=api_key)
+        self.retry_count = 3
+        self.retry_delay = 1
+    def generate_recommendations(self, metrics: Dict[str, Any],
+                               content_analysis: Dict[str, Any],
                                progress_callback=None) -> Dict[str, Any]:
+        """Generate detailed recommendations"""
         try:
             if progress_callback:
+                progress_callback(0.2, "Analyzing metrics...")
+            # Create detailed prompt
+            prompt = self._create_recommendation_prompt(metrics, content_analysis)
             if progress_callback:
+                progress_callback(0.4, "Generating recommendations...")
+            response = self.client.chat.completions.create(
+                model="gpt-4-turbo-preview",
+                messages=[
+                    {"role": "system", "content": """
+                    Generate specific, actionable teaching recommendations.
+                    Focus on practical improvements with clear next steps.
+                    Format the response in a clear, easy-to-read structure.
+                    """},
+                    {"role": "user", "content": prompt}
+                ]
+            )
             if progress_callback:
+                progress_callback(0.8, "Formatting recommendations...")
+            recommendations = self._parse_recommendations(response.choices[0].message.content)
             if progress_callback:
+                progress_callback(1.0, "Analysis complete!")
+            return self._format_output(metrics, content_analysis, recommendations)
+        except Exception as e:
+            logger.error(f"Error generating recommendations: {e}")
+            return self._get_default_recommendations()
+    def _create_recommendation_prompt(self, metrics: Dict[str, Any],
+                                    content_analysis: Dict[str, Any]) -> str:
+        return f"""
+        Based on the following teaching performance data, provide specific recommendations:
+        Performance Metrics:
+        - Speaking Rate: {metrics.get('words_per_minute', 0):.1f} words/minute
+        - Pauses: {metrics.get('pauses_per_minute', 0):.1f} pauses/minute
+        - Filler Words: {metrics.get('filler_words_per_minute', 0):.1f} per minute
+        - Grammar Errors: {metrics.get('grammar_errors_per_minute', 0):.1f} per minute
+        Content Assessment:
+        Teaching Effectiveness: {content_analysis.get('effectiveness', {}).get('score', 0)}/5
+        Organization: {content_analysis.get('organization', {}).get('score', 0)}/5
+        Communication: {content_analysis.get('communication', {}).get('score', 0)}/5
+        Provide specific recommendations for:
+        1. Delivery Improvement
+        2. Content Organization
+        3. Student Engagement
+        4. Professional Development
+        For each area:
+        - Key actions to take
+        - Specific exercises or practices
+        - Resources to use
+        - Expected outcomes
+        """
+    def _parse_recommendations(self, response_text: str) -> Dict[str, List[str]]:
+        """Parse recommendations into structured format"""
+        recommendations = {
+            "delivery": [],
+            "organization": [],
+            "engagement": [],
+            "development": []
+        }
+        current_section = None
+        for line in response_text.split('\n'):
+            line = line.strip()
+            if not line:
+                continue
+            # Identify sections
+            if "delivery" in line.lower():
+                current_section = "delivery"
+            elif "organization" in line.lower():
+                current_section = "organization"
+            elif "engagement" in line.lower():
+                current_section = "engagement"
+            elif "development" in line.lower():
+                current_section = "development"
+            elif current_section and line.startswith('-'):
+                recommendations[current_section].append(line[1:].strip())
+        return recommendations
+    def _format_output(self, metrics: Dict[str, Any],
+                      content_analysis: Dict[str, Any],
+                      recommendations: Dict[str, List[str]]) -> Dict[str, Any]:
+        """Format the final output in a clean, readable structure"""
+        return {
+            "performance_metrics": {
+                "speaking_rate": {
+                    "value": round(metrics.get('words_per_minute', 0), 1),
+                    "unit": "words/minute",
+                    "status": "good" if 120 <= metrics.get('words_per_minute', 0) <= 160 else "needs_improvement"
                 },
+                "pauses": {
+                    "value": round(metrics.get('pauses_per_minute', 0), 1),
+                    "unit": "pauses/minute",
+                    "status": "good" if 2 <= metrics.get('pauses_per_minute', 0) <= 4 else "needs_improvement"
                 },
+                "filler_words": {
+                    "value": round(metrics.get('filler_words_per_minute', 0), 1),
+                    "unit": "per minute",
+                    "status": "good" if metrics.get('filler_words_per_minute', 0) <= 3 else "needs_improvement",
+                    "examples": metrics.get('filler_examples', [])
                 },
+                "grammar": {
+                    "value": round(metrics.get('grammar_errors_per_minute', 0), 1),
+                    "unit": "errors/minute",
+                    "status": "good" if metrics.get('grammar_errors_per_minute', 0) <= 1 else "needs_improvement",
+                    "examples": metrics.get('grammar_examples', [])
+                }
+            },
+            "content_assessment": {
+                "effectiveness": {
+                    "score": content_analysis.get('effectiveness', {}).get('score', 0),
+                    "strengths": content_analysis.get('effectiveness', {}).get('strengths', []),
+                    "improvements": content_analysis.get('effectiveness', {}).get('improvements', [])
+                },
+                "organization": {
+                    "score": content_analysis.get('organization', {}).get('score', 0),
+                    "strengths": content_analysis.get('organization', {}).get('strengths', []),
+                    "improvements": content_analysis.get('organization', {}).get('improvements', [])
                 },
+                "communication": {
+                    "score": content_analysis.get('communication', {}).get('score', 0),
+                    "strengths": content_analysis.get('communication', {}).get('strengths', []),
+                    "improvements": content_analysis.get('communication', {}).get('improvements', [])
                 }
+            },
+            "recommendations": {
+                "delivery_improvement": recommendations.get('delivery', []),
+                "content_organization": recommendations.get('organization', []),
+                "student_engagement": recommendations.get('engagement', []),
+                "professional_development": recommendations.get('development', [])
             }
+        }
+    def _get_default_recommendations(self) -> Dict[str, Any]:
+        """Return default recommendations structure"""
+        return {
+            "performance_metrics": {
+                "speaking_rate": {"value": 0, "unit": "words/minute", "status": "error"},
+                "pauses": {"value": 0, "unit": "pauses/minute", "status": "error"},
+                "filler_words": {"value": 0, "unit": "per minute", "status": "error", "examples": []},
+                "grammar": {"value": 0, "unit": "errors/minute", "status": "error", "examples": []}
+            },
+            "content_assessment": {
+                "effectiveness": {"score": 0, "strengths": ["Analysis failed"], "improvements": ["Analysis failed"]},
+                "organization": {"score": 0, "strengths": ["Analysis failed"], "improvements": ["Analysis failed"]},
+                "communication": {"score": 0, "strengths": ["Analysis failed"], "improvements": ["Analysis failed"]}
+            },
+            "recommendations": {
+                "delivery_improvement": ["Analysis failed"],
+                "content_organization": ["Analysis failed"],
+                "student_engagement": ["Analysis failed"],
+                "professional_development": ["Analysis failed"]
+            }
+        }
+def format_streamlit_output(analysis_results: Dict[str, Any]) -> None:
+    """Format and display results in Streamlit"""
+    st.header("Teaching Analysis Results")
+    # Performance Metrics
+    st.subheader("Performance Metrics")
+    metrics = analysis_results["performance_metrics"]
+    col1, col2, col3, col4 = st.columns(4)
+    with col1:
+        st.metric(
+            "Speaking Rate",
+            f"{metrics['speaking_rate']['value']} {metrics['speaking_rate']['unit']}",
+            delta="Good" if metrics['speaking_rate']['status'] == "good" else "Needs Improvement"
         )
+    with col2:
+        st.metric(
+            "Pauses",
+            f"{metrics['pauses']['value']} {metrics['pauses']['unit']}",
+            delta="Good" if metrics['pauses']['status'] == "good" else "Needs Improvement"
+        )
+    with col3:
+        st.metric(
+            "Filler Words",
+            f"{metrics['filler_words']['value']} {metrics['filler_words']['unit']}",
+            delta="Good" if metrics['filler_words']['status'] == "good" else "Needs Improvement"
+        )
+    with col4:
+        st.metric(
+            "Grammar Errors",
+            f"{metrics['grammar']['value']} {metrics['grammar']['unit']}",
+            delta="Good" if metrics['grammar']['status'] == "good" else "Needs Improvement"
+        )
+    # Content Assessment
+    st.subheader("Content Assessment")
+    assessment = analysis_results["content_assessment"]
+    for category in ["effectiveness", "organization", "communication"]:
+        with st.expander(f"{category.title()} (Score: {assessment[category]['score']}/5)"):
+            col1, col2 = st.columns(2)
+            with col1:
+                st.markdown("**Strengths**")
+                for strength in assessment[category]["strengths"]:
+                    st.markdown(f"- {strength}")
+            with col2:
+                st.markdown("**Areas for Improvement**")
+                for improvement in assessment[category]["improvements"]:
+                    st.markdown(f"- {improvement}")
+    # Recommendations
+    st.subheader("Recommendations")
+    recommendations = analysis_results["recommendations"]
+    for category, items in recommendations.items():
+        with st.expander(category.replace('_', ' ').title()):
+            for item in items:
+                st.markdown(f"- {item}")
 def main():
+    st.title("Teaching Analysis Tool")
+    # File upload
+    uploaded_file = st.file_uploader("Upload your teaching recording", type=['wav', 'mp3'])
+    if uploaded_file:
+        with st.spinner("Analyzing your teaching..."):
+            # Create progress tracking
             progress_bar = st.progress(0)
+            status = st.empty()
+            progress_tracker = ProgressTracker(status, progress_bar)
             try:
+                # Process the recording
+                audio_processor = AudioFeatureExtractor()
+                content_analyzer = ContentAnalyzer(st.secrets["OPENAI_API_KEY"])
+                recommendation_generator = RecommendationGenerator(st.secrets["OPENAI_API_KEY"])
+                # Extract features and analyze
+                with temporary_file(suffix='.wav') as temp_path:
+                    uploaded_file.save(temp_path)
+                    audio_features = audio_processor.extract_features(temp_path, progress_callback=progress_tracker.update)
+                # Analyze content
+                analysis = content_analyzer.analyze_content(transcript, progress_callback=progress_tracker.update)
+                # Generate recommendations
+                results = recommendation_generator.generate_recommendations(
+                    audio_features,
+                    analysis,
+                    progress_callback=progress_tracker.update
+                )
                 # Display results
+                format_streamlit_output(results)
             except Exception as e:
+                st.error(f"An error occurred during analysis: {str(e)}")
+                logger.error(f"Analysis failed: {e}", exc_info=True)
             finally:
+                progress_bar.empty()
+                status.empty()
 if __name__ == "__main__":
+    main()