Spaces:

sparshmehta
/

main_app

Sleeping

App Files Files Community

sparshmehta commited on Feb 12, 2025

Commit

e1d9244

verified ·

1 Parent(s): e36dab5

Update app.py

Browse files

Files changed (1) hide show

app.py +573 -243

app.py CHANGED Viewed

@@ -48,22 +48,44 @@ def temporary_file(suffix: Optional[str] = None):
                 logger.warning(f"Failed to remove temporary file {temp_path}: {e}")
 class ProgressTracker:
-    """Handles progress tracking and ETA calculations"""
     def __init__(self, status_element, progress_bar):
         self.status = status_element
         self.progress = progress_bar
         self.start_time = time.time()
-    def update(self, progress: float, message: str):
-        """Update progress with ETA calculation"""
-        self.status.update(label=f"{message} ({progress:.1%})")
-        self.progress.progress(progress)
         if progress > 0:
             elapsed = time.time() - self.start_time
-            estimated_total = elapsed / progress
-            remaining = estimated_total - elapsed
-            self.status.update(label=f"{message} ({progress:.1%}) - ETA: {remaining:.0f}s")
 class AudioFeatureExtractor:
     """Handles audio feature extraction with improved pause detection"""
@@ -224,8 +246,6 @@ class ContentAnalyzer:
         self.client = OpenAI(api_key=api_key)
         self.retry_count = 3
         self.retry_delay = 1
-        self.GPT4_INPUT_COST = 0.15 / 1_000_000  # $0.15 per 1M tokens input
-        self.GPT4_OUTPUT_COST = 0.60 / 1_000_000  # $0.60 per 1M tokens output
     def analyze_content(self, transcript: str, progress_callback=None) -> Dict[str, Any]:
         """Analyze teaching content with more lenient validation and robust JSON handling"""
@@ -350,7 +370,7 @@ class ContentAnalyzer:
                     raise
             except Exception as e:
-                logger.error(f"Content analysis attempt {attempt + 1} failed: {e}")
                 if attempt == self.retry_count - 1:
                     logger.error("All attempts failed, returning default structure")
                     return {
@@ -657,6 +677,55 @@ Consider:
 - Use of examples and analogies
 - Engagement style"""
 class MentorEvaluator:
     """Main class for video evaluation"""
     def __init__(self, model_cache_dir: Optional[str] = None):
@@ -677,9 +746,7 @@ class MentorEvaluator:
         self._feature_extractor = None
         self._content_analyzer = None
         self._recommendation_generator = None
-        # Cost per minute for Whisper transcription
-        self.WHISPER_COST_PER_MINUTE = 0.006  # $0.006 per minute of audio
     @property
     def whisper_model(self):
@@ -689,7 +756,7 @@ class MentorEvaluator:
                 logger.info("Attempting to initialize Whisper model...")
                 # First try to initialize model with downloading allowed
                 self._whisper_model = WhisperModel(
-                    "medium",
                     device="cpu",
                     compute_type="int8",
                     download_root=self.model_cache_dir,
@@ -702,7 +769,7 @@ class MentorEvaluator:
                 try:
                     logger.info("Attempting to load model from local cache...")
                     self._whisper_model = WhisperModel(
-                        "medium",
                         device="cpu",
                         compute_type="int8",
                         download_root=self.model_cache_dir,
@@ -739,7 +806,7 @@ class MentorEvaluator:
         return self._recommendation_generator
     def evaluate_video(self, video_path: str) -> Dict[str, Any]:
-        """Evaluate video with proper resource management"""
         with temporary_file(suffix=".wav") as temp_audio:
             try:
                 # Extract audio
@@ -748,49 +815,52 @@ class MentorEvaluator:
                     tracker = ProgressTracker(status, progress_bar)
                     self._extract_audio(video_path, temp_audio, tracker.update)
-                # Extract features
-                with st.status("Extracting audio features...") as status:
-                    progress_bar = st.progress(0)
-                    tracker = ProgressTracker(status, progress_bar)
-                    audio_features = self.feature_extractor.extract_features(
-                        temp_audio,
-                        tracker.update
-                    )
-                # Transcribe
-                with st.status("Transcribing audio...") as status:
-                    progress_bar = st.progress(0)
-                    tracker = ProgressTracker(status, progress_bar)
-                    transcript = self._transcribe_audio(temp_audio, tracker.update)
-                # Analyze content
-                with st.status("Analyzing content...") as status:
-                    progress_bar = st.progress(0)
-                    tracker = ProgressTracker(status, progress_bar)
-                    content_analysis = self.content_analyzer.analyze_content(
-                        transcript,
-                        tracker.update
-                    )
-                # Evaluate speech
-                with st.status("Evaluating speech metrics...") as status:
-                    progress_bar = st.progress(0)
-                    tracker = ProgressTracker(status, progress_bar)
-                    speech_metrics = self._evaluate_speech_metrics(
-                        transcript,
-                        audio_features,
-                        tracker.update
-                    )
-                # Generate recommendations
-                with st.status("Generating recommendations...") as status:
-                    progress_bar = st.progress(0)
-                    tracker = ProgressTracker(status, progress_bar)
-                    recommendations = self.recommendation_generator.generate_recommendations(
-                        speech_metrics,
-                        content_analysis,
-                        tracker.update
-                    )
                 return {
                     "communication": speech_metrics,
@@ -854,47 +924,134 @@ class MentorEvaluator:
             raise AudioProcessingError(f"Audio extraction failed: {str(e)}")
     def _transcribe_audio(self, audio_path: str, progress_callback=None) -> str:
-        """Transcribe audio with improved memory management"""
         try:
             if progress_callback:
                 progress_callback(0.1, "Loading transcription model...")
             audio_info = sf.info(audio_path)
             total_duration = audio_info.duration
-            chunk_duration = 5 * 60  # 5-minute chunks
-            overlap_duration = 10  # 10-second overlap
-            transcripts = []
-            total_chunks = int(np.ceil(total_duration / (chunk_duration - overlap_duration)))
-            with sf.SoundFile(audio_path) as f:
-                for i in range(total_chunks):
-                    if progress_callback:
-                        progress_callback(0.4 + (i / total_chunks) * 0.4,
-                                       f"Transcribing chunk {i + 1}/{total_chunks}...")
-                    # Calculate positions in samples
-                    start_sample = int(i * (chunk_duration - overlap_duration) * f.samplerate)
-                    f.seek(start_sample)
-                    chunk = f.read(frames=int(chunk_duration * f.samplerate))
-                    with temporary_file(suffix=".wav") as chunk_path:
-                        sf.write(chunk_path, chunk, f.samplerate)
-                        # The fix: properly handle the segments from faster-whisper
-                        segments, _ = self.whisper_model.transcribe(chunk_path)
-                        # Combine all segment texts
-                        chunk_text = ' '.join(segment.text for segment in segments)
-                        transcripts.append(chunk_text)
             if progress_callback:
                 progress_callback(1.0, "Transcription complete!")
-            return " ".join(transcripts)
         except Exception as e:
             logger.error(f"Error in transcription: {e}")
             raise
     def calculate_speech_metrics(self, transcript: str, audio_duration: float) -> Dict[str, float]:
         """Calculate words per minute and other speech metrics."""
         words = len(transcript.split())
@@ -1215,12 +1372,68 @@ def display_evaluation(evaluation: Dict[str, Any]):
             recommendations = evaluation.get("recommendations", {})
-            # Geography Fit with improved formatting
-            # with st.expander("🌍 Geography Fit", expanded=True):
-            #     geography_fit = recommendations.get("geographyFit", "Not specified")
-            #     st.info(geography_fit)
-            # Improvements Needed with better formatting
             with st.expander("💡 Areas for Improvement", expanded=True):
                 improvements = recommendations.get("improvements", [])
                 if isinstance(improvements, list):
@@ -1501,6 +1714,108 @@ def check_dependencies() -> List[str]:
     return missing
 def main():
     try:
         # Set page config must be the first Streamlit command
@@ -1509,10 +1824,35 @@ def main():
         # Add custom CSS for animations and styling
         st.markdown("""
             <style>
-                /* Modern animations */
                 @keyframes fadeIn {
-                    from { opacity: 0; transform: translateY(20px); }
-                    to { opacity: 1; transform: translateY(0); }
                 }
                 @keyframes slideIn {
@@ -1526,153 +1866,92 @@ def main():
                     100% { transform: scale(1); }
                 }
-                @keyframes gradientBG {
-                    0% { background-position: 0% 50%; }
-                    50% { background-position: 100% 50%; }
-                    100% { background-position: 0% 50%; }
-                }
-                /* Modern styling */
-                .stApp {
-                    background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
                 }
-                .main-title {
-                    text-align: center;
-                    color: #2c3e50;
-                    font-size: 2.5rem;
-                    font-weight: 700;
-                    margin: 2rem 0;
-                    padding: 1rem;
-                    border-radius: 10px;
-                    background: linear-gradient(120deg, #84fab0 0%, #8fd3f4 100%);
-                    animation: fadeIn 1s ease-out;
-                }
-                .card {
-                    background: white;
-                    padding: 1.5rem;
-                    border-radius: 15px;
-                    box-shadow: 0 10px 20px rgba(0,0,0,0.1);
-                    margin: 1rem 0;
-                    animation: fadeIn 0.5s ease-out;
                 }
-                .card:hover {
-                    transform: translateY(-5px);
                 }
                 .metric-card {
-                    background: linear-gradient(120deg, #a1c4fd 0%, #c2e9fb 100%);
-                    color: #1a202c;
-                    padding: 1rem;
                     border-radius: 10px;
-                    text-align: center;
-                    animation: fadeIn 0.5s ease-out;
                 }
-                .sidebar-content {
-                    background: rgba(255, 255, 255, 0.9);
-                    padding: 1.5rem;
-                    border-radius: 10px;
-                    margin: 1rem 0;
                 }
                 .stButton>button {
-                    background: linear-gradient(120deg, #4facfe 0%, #00f2fe 100%);
-                    color: white;
-                    border: none;
-                    padding: 0.75rem 1.5rem;
-                    border-radius: 25px;
-                    font-weight: 600;
                     transition: all 0.3s ease;
                 }
                 .stButton>button:hover {
-                    transform: translateY(-2px);
-                    box-shadow: 0 5px 15px rgba(0,0,0,0.2);
                 }
-                .stProgress > div > div {
-                    background: linear-gradient(90deg, #4facfe 0%, #00f2fe 100%);
-                }
-                /* Status indicators */
-                .status-processing {
-                    background: linear-gradient(120deg, #a1c4fd 0%, #c2e9fb 100%);
-                    padding: 1rem;
-                    border-radius: 10px;
-                    text-align: center;
-                    animation: pulse 2s infinite;
-                }
-                .status-complete {
-                    background: linear-gradient(120deg, #84fab0 0%, #8fd3f4 100%);
-                    padding: 1rem;
-                    border-radius: 10px;
-                    text-align: center;
-                }
-                /* Expander styling */
-                .streamlit-expanderHeader {
-                    background: linear-gradient(90deg, #f6f9fc 0%, #f0f4f8 100%);
-                    border-radius: 8px;
-                    padding: 0.5rem 1rem;
-                    font-weight: 600;
                 }
-                /* File uploader styling */
-                .uploadedFile {
-                    background: white;
-                    padding: 1rem;
-                    border-radius: 10px;
-                    box-shadow: 0 5px 15px rgba(0,0,0,0.1);
                 }
-                /* Metrics styling */
-                div[data-testid="stMetricValue"] {
-                    font-size: 2rem;
-                    font-weight: 700;
-                    color: #2c3e50;
                 }
-                /* Success/Error message styling */
-                .stSuccess, .stError {
-                    padding: 1rem;
-                    border-radius: 10px;
-                    animation: fadeIn 0.5s ease-out;
                 }
             </style>
             <div class="fade-in">
-                <h1 class="main-title">
                     🎓 Mentor Demo Review System
                 </h1>
             </div>
         """, unsafe_allow_html=True)
-        # Sidebar with modern styling
         with st.sidebar:
             st.markdown("""
-                <div class="sidebar-content">
-                    <h2 style='text-align: center; color: #2c3e50;'>📋 Instructions</h2>
-                    <p style='color: #4a5568;'>Follow these steps to evaluate your teaching demo:</p>
-                    <ol style='color: #4a5568;'>
-                        <li>Upload your teaching demo video</li>
-                        <li>Wait for the analysis to complete</li>
-                        <li>Review your detailed evaluation</li>
                         <li>Download the report</li>
                     </ol>
                 </div>
             """, unsafe_allow_html=True)
-            st.markdown("""
-                <div class="sidebar-content">
-                    <h3 style='color: #2c3e50;'>📁 File Requirements</h3>
-                    <p style='color: #4a5568;'><strong>Supported formats:</strong> MP4, AVI, MOV<br>
-                    <strong>Maximum file size:</strong> 500MB</p>
-                </div>
-            """, unsafe_allow_html=True)
         # Check dependencies with progress
         with st.status("Checking system requirements...") as status:
@@ -1698,25 +1977,36 @@ def main():
             progress_bar.progress(1.0)
             status.update(label="System requirements satisfied!", state="complete")
-        # File uploader with modern styling
-        st.markdown("""
-            <div class="card">
-                <h3 style='color: #2c3e50; text-align: center;'>📤 Upload Your Teaching Video</h3>
-            </div>
-        """, unsafe_allow_html=True)
         uploaded_file = st.file_uploader(
-            "Choose a video file",
             type=['mp4', 'avi', 'mov'],
             help="Upload your teaching video in MP4, AVI, or MOV format"
         )
         if uploaded_file:
-            # Add a modern processing animation
             st.markdown("""
-                <div class="status-processing">
-                    <h3>🔄 Processing Your Video</h3>
-                    <p>Please wait while we analyze your teaching demo...</p>
                 </div>
             """, unsafe_allow_html=True)
@@ -1727,6 +2017,8 @@ def main():
             try:
                 # Save uploaded file with progress
                 with st.status("Saving uploaded file...") as status:
                     progress_bar = st.progress(0)
                     # Save in chunks to show progress
@@ -1746,49 +2038,87 @@ def main():
                     status.update(label="File saved successfully!", state="complete")
                 # Validate file size
-                file_size = os.path.getsize(video_path) / (1024 * 1024)  # Size in MB
-                if file_size > 500:  # 500MB limit
-                    st.error("File size exceeds 500MB limit. Please upload a smaller file.")
                     return
-                # Process video and store results
                 if 'evaluation_results' not in st.session_state:
                     with st.spinner("Processing video"):
                         evaluator = MentorEvaluator()
-                        st.session_state.evaluation_results = evaluator.evaluate_video(video_path)
-                # Display completion status
-                st.markdown("""
-                    <div class="status-complete">
-                        <h3>✅ Analysis Complete!</h3>
-                        <p>Review your detailed evaluation below</p>
-                    </div>
-                """, unsafe_allow_html=True)
-                # Display evaluation in a card
-                st.markdown('<div class="card">', unsafe_allow_html=True)
                 display_evaluation(st.session_state.evaluation_results)
-                st.markdown('</div>', unsafe_allow_html=True)
-                # Download section
-                st.markdown("""
-                    <div class="card" style="text-align: center;">
-                        <h3 style='color: #2c3e50;'>📥 Download Your Report</h3>
-                    </div>
-                """, unsafe_allow_html=True)
-                if st.download_button(
-                    "Download Full Report",
-                    json.dumps(st.session_state.evaluation_results, indent=2),
-                    "evaluation_report.json",
-                    "application/json",
-                    help="Download the complete evaluation report in JSON format"
-                ):
-                    st.success("Report downloaded successfully!")
             except Exception as e:
                 st.error(f"Error during evaluation: {str(e)}")
             finally:
                 # Clean up temp files
                 if 'temp_dir' in locals():

                 logger.warning(f"Failed to remove temporary file {temp_path}: {e}")
 class ProgressTracker:
+    """Handles progress tracking and ETA calculations with step tracking"""
     def __init__(self, status_element, progress_bar):
         self.status = status_element
         self.progress = progress_bar
         self.start_time = time.time()
+        self.current_step = ""
+        self.total_steps = [
+            "Loading Audio",
+            "Extracting Features",
+            "Transcribing Audio",
+            "Analyzing Content",
+            "Generating Recommendations"
+        ]
+        self.step_index = 0
+    def update(self, progress: float, message: str, batch_info: str = None):
+        """Update progress with ETA calculation and step tracking"""
+        # Update current step if it's changed
+        if message != self.current_step:
+            self.current_step = message
+            self.step_index = self.total_steps.index(message) if message in self.total_steps else self.step_index
+        # Calculate overall progress including step progress
+        overall_progress = (self.step_index + progress) / len(self.total_steps)
+        self.progress.progress(overall_progress)
+        # Format status message
+        status_msg = f"Step {self.step_index + 1}/{len(self.total_steps)}: {message}"
+        if batch_info:
+            status_msg += f" | {batch_info}"
         if progress > 0:
             elapsed = time.time() - self.start_time
+            estimated_total = elapsed / overall_progress if overall_progress > 0 else 0
+            remaining = max(0, estimated_total - elapsed)
+            status_msg += f" ({progress:.1%}) - ETA: {remaining:.0f}s"
+        self.status.update(label=status_msg)
 class AudioFeatureExtractor:
     """Handles audio feature extraction with improved pause detection"""
         self.client = OpenAI(api_key=api_key)
         self.retry_count = 3
         self.retry_delay = 1
     def analyze_content(self, transcript: str, progress_callback=None) -> Dict[str, Any]:
         """Analyze teaching content with more lenient validation and robust JSON handling"""
                     raise
             except Exception as e:
+                logger.error(f"Content analysis attempt {attempt + 1} failed: {str(e)}")
                 if attempt == self.retry_count - 1:
                     logger.error("All attempts failed, returning default structure")
                     return {
 - Use of examples and analogies
 - Engagement style"""
+class CostCalculator:
+    """Calculates API and processing costs"""
+    def __init__(self):
+        self.GPT4_INPUT_COST = 0.15 / 1_000_000  # $0.15 per 1M tokens input
+        self.GPT4_OUTPUT_COST = 0.60 / 1_000_000  # $0.60 per 1M tokens output
+        self.costs = {
+            'transcription': 0.0,
+            'content_analysis': 0.0,
+            'recommendations': 0.0,
+            'total': 0.0
+        }
+    def estimate_tokens(self, text: str) -> int:
+        """Rough estimation of token count based on words"""
+        return len(text.split()) * 1.3  # Approximate tokens per word
+    def add_transcription_cost(self, duration_seconds: float):
+        """Calculate Whisper transcription cost"""
+        # Assuming a fixed rate per minute of audio
+        cost = (duration_seconds / 60) * 0.006  # $0.006 per minute
+        self.costs['transcription'] = cost
+        self.costs['total'] += cost
+        print(f"\nTranscription Cost: ${cost:.4f}")
+    def add_gpt4_cost(self, input_text: str, output_text: str, operation: str):
+        """Calculate GPT-4 API cost for a single operation"""
+        input_tokens = self.estimate_tokens(input_text)
+        output_tokens = self.estimate_tokens(output_text)
+        input_cost = input_tokens * self.GPT4_INPUT_COST
+        output_cost = output_tokens * self.GPT4_OUTPUT_COST
+        total_cost = input_cost + output_cost
+        self.costs[operation] = total_cost
+        self.costs['total'] += total_cost
+        print(f"\n{operation.replace('_', ' ').title()} Cost:")
+        print(f"Input tokens: {input_tokens:.0f} (${input_cost:.4f})")
+        print(f"Output tokens: {output_tokens:.0f} (${output_cost:.4f})")
+        print(f"Operation total: ${total_cost:.4f}")
+    def print_total_cost(self):
+        """Print total cost breakdown"""
+        print("\n=== Cost Breakdown ===")
+        for key, cost in self.costs.items():
+            if key != 'total':
+                print(f"{key.replace('_', ' ').title()}: ${cost:.4f}")
+        print(f"\nTotal Cost: ${self.costs['total']:.4f}")
 class MentorEvaluator:
     """Main class for video evaluation"""
     def __init__(self, model_cache_dir: Optional[str] = None):
         self._feature_extractor = None
         self._content_analyzer = None
         self._recommendation_generator = None
+        self.cost_calculator = CostCalculator()
     @property
     def whisper_model(self):
                 logger.info("Attempting to initialize Whisper model...")
                 # First try to initialize model with downloading allowed
                 self._whisper_model = WhisperModel(
+                    "small",
                     device="cpu",
                     compute_type="int8",
                     download_root=self.model_cache_dir,
                 try:
                     logger.info("Attempting to load model from local cache...")
                     self._whisper_model = WhisperModel(
+                        "small",
                         device="cpu",
                         compute_type="int8",
                         download_root=self.model_cache_dir,
         return self._recommendation_generator
     def evaluate_video(self, video_path: str) -> Dict[str, Any]:
+        """Evaluate video with proper resource management and cost tracking"""
         with temporary_file(suffix=".wav") as temp_audio:
             try:
                 # Extract audio
                     tracker = ProgressTracker(status, progress_bar)
                     self._extract_audio(video_path, temp_audio, tracker.update)
+                # Get audio duration for cost calculation
+                audio_info = sf.info(temp_audio)
+                duration_seconds = audio_info.duration
+                self.cost_calculator.add_transcription_cost(duration_seconds)
+                # Extract features and transcribe
+                audio_features = self.feature_extractor.extract_features(
+                    temp_audio,
+                    tracker.update
+                )
+                transcript = self._transcribe_audio(temp_audio, tracker.update)
+                # Analyze content with cost tracking
+                content_prompt = self.content_analyzer._create_analysis_prompt(transcript)
+                content_analysis = self.content_analyzer.analyze_content(transcript, tracker.update)
+                self.cost_calculator.add_gpt4_cost(
+                    content_prompt,
+                    json.dumps(content_analysis),
+                    'content_analysis'
+                )
+                # Evaluate speech metrics
+                speech_metrics = self._evaluate_speech_metrics(
+                    transcript,
+                    audio_features,
+                    tracker.update
+                )
+                # Generate recommendations with cost tracking
+                rec_prompt = self.recommendation_generator._create_recommendation_prompt(
+                    speech_metrics,
+                    content_analysis
+                )
+                recommendations = self.recommendation_generator.generate_recommendations(
+                    speech_metrics,
+                    content_analysis,
+                    tracker.update
+                )
+                self.cost_calculator.add_gpt4_cost(
+                    rec_prompt,
+                    json.dumps(recommendations),
+                    'recommendations'
+                )
+                # Print final cost breakdown
+                self.cost_calculator.print_total_cost()
                 return {
                     "communication": speech_metrics,
             raise AudioProcessingError(f"Audio extraction failed: {str(e)}")
     def _transcribe_audio(self, audio_path: str, progress_callback=None) -> str:
+        """Transcribe audio with optimized performance using batching and parallel processing"""
         try:
             if progress_callback:
                 progress_callback(0.1, "Loading transcription model...")
+            # Check if GPU is available and set device accordingly
+            device = "cuda" if torch.cuda.is_available() else "cpu"
+            compute_type = "float16" if device == "cuda" else "int8"
+            # Generate cache key based on file content
+            cache_key = f"transcript_{hash(open(audio_path, 'rb').read())}"
+            # Check cache first
+            if cache_key in st.session_state:
+                logger.info("Using cached transcription")
+                return st.session_state[cache_key]
+            # Initialize model with optimized settings
+            model = WhisperModel(
+                "medium",
+                device=device,
+                compute_type=compute_type,
+                download_root=self.model_cache_dir,
+                local_files_only=False,
+                cpu_threads=4,
+                num_workers=2
+            )
+            if progress_callback:
+                progress_callback(0.2, "Starting transcription...")
+            # Get audio duration for progress calculation
             audio_info = sf.info(audio_path)
             total_duration = audio_info.duration
+            # First pass to count total segments
+            segments_preview, _ = model.transcribe(
+                audio_path,
+                beam_size=5,
+                word_timestamps=True,
+                vad_filter=True,
+                vad_parameters=dict(
+                    min_silence_duration_ms=500,
+                    speech_pad_ms=100
+                )
+            )
+            total_segments = sum(1 for _ in segments_preview)
+            def progress_updater(current_segment, segment_start, segment_duration):
+                """Callback function to update progress based on segment position"""
+                progress = min((segment_start + segment_duration) / total_duration, 1.0)
+                progress = 0.2 + (progress * 0.7)  # Scale progress between 20% and 90%
+                if progress_callback:
+                    time_remaining = ((total_duration - (segment_start + segment_duration)) /
+                                   (segment_start + segment_duration) *
+                                   (time.time() - start_time) if segment_start > 0 else 0)
+                    status_message = (
+                        f"Transcribing batch {current_segment}/{total_segments} "
+                        f"({progress:.1%}) - "
+                        f"ETA: {int(time_remaining)}s"
+                    )
+                    progress_callback(progress, status_message)
+            # Start timing for ETA calculation
+            start_time = time.time()
+            # Transcribe with progress updates
+            segments, _ = model.transcribe(
+                audio_path,
+                beam_size=5,
+                word_timestamps=True,
+                vad_filter=True,
+                vad_parameters=dict(
+                    min_silence_duration_ms=500,
+                    speech_pad_ms=100
+                )
+            )
+            # Process segments and update progress
+            transcript_parts = []
+            for i, segment in enumerate(segments, 1):
+                transcript_parts.append(segment.text)
+                progress_updater(i, segment.start, segment.end - segment.start)
+            # Combine segments into final transcript
+            transcript = ' '.join(transcript_parts)
+            # Cache the result
+            st.session_state[cache_key] = transcript
             if progress_callback:
                 progress_callback(1.0, "Transcription complete!")
+            return transcript
         except Exception as e:
             logger.error(f"Error in transcription: {e}")
             raise
+    def _merge_transcripts(self, transcripts: List[str]) -> str:
+        """Merge transcripts with overlap deduplication"""
+        if not transcripts:
+            return ""
+        def clean_text(text):
+            # Remove extra spaces and normalize punctuation
+            return ' '.join(text.split())
+        def find_overlap(text1, text2):
+            # Find overlapping text between consecutive chunks
+            words1 = text1.split()
+            words2 = text2.split()
+            for i in range(min(len(words1), 20), 0, -1):  # Check up to 20 words
+                if ' '.join(words1[-i:]) == ' '.join(words2[:i]):
+                    return i
+            return 0
+        merged = clean_text(transcripts[0])
+        for i in range(1, len(transcripts)):
+            current = clean_text(transcripts[i])
+            overlap_size = find_overlap(merged, current)
+            merged += ' ' + current.split(' ', overlap_size)[-1]
+        return merged
     def calculate_speech_metrics(self, transcript: str, audio_duration: float) -> Dict[str, float]:
         """Calculate words per minute and other speech metrics."""
         words = len(transcript.split())
             recommendations = evaluation.get("recommendations", {})
+            # Calculate Overall Score
+            communication_metrics = evaluation.get("communication", {})
+            teaching_data = evaluation.get("teaching", {})
+            # Calculate Communication Score
+            comm_scores = []
+            for category in ["speed", "fluency", "flow", "intonation", "energy"]:
+                if category in communication_metrics:
+                    if "score" in communication_metrics[category]:
+                        comm_scores.append(communication_metrics[category]["score"])
+            communication_score = (sum(comm_scores) / len(comm_scores) * 100) if comm_scores else 0
+            # Calculate Teaching Score (combining concept and code assessment)
+            concept_assessment = teaching_data.get("Concept Assessment", {})
+            code_assessment = teaching_data.get("Code Assessment", {})
+            teaching_scores = []
+            # Add concept scores
+            for category in concept_assessment.values():
+                if isinstance(category, dict) and "Score" in category:
+                    teaching_scores.append(category["Score"])
+            # Add code scores
+            for category in code_assessment.values():
+                if isinstance(category, dict) and "Score" in category:
+                    teaching_scores.append(category["Score"])
+            teaching_score = (sum(teaching_scores) / len(teaching_scores) * 100) if teaching_scores else 0
+            # Calculate Overall Score (50-50 weight between communication and teaching)
+            overall_score = (communication_score + teaching_score) / 2
+            # Display Overall Scores at the top of recommendations
+            st.markdown("### 📊 Overall Performance")
+            col1, col2, col3 = st.columns(3)
+            with col1:
+                st.metric(
+                    "Communication Score",
+                    f"{communication_score:.1f}%",
+                    delta="Pass" if communication_score >= 70 else "Needs Improvement",
+                    delta_color="normal" if communication_score >= 70 else "inverse"
+                )
+            with col2:
+                st.metric(
+                    "Teaching Score",
+                    f"{teaching_score:.1f}%",
+                    delta="Pass" if teaching_score >= 70 else "Needs Improvement",
+                    delta_color="normal" if teaching_score >= 70 else "inverse"
+                )
+            with col3:
+                st.metric(
+                    "Overall Score",
+                    f"{overall_score:.1f}%",
+                    delta="Pass" if overall_score >= 70 else "Needs Improvement",
+                    delta_color="normal" if overall_score >= 70 else "inverse"
+                )
+            # Continue with existing recommendations display
             with st.expander("💡 Areas for Improvement", expanded=True):
                 improvements = recommendations.get("improvements", [])
                 if isinstance(improvements, list):
     return missing
+def generate_pdf_report(evaluation_data: Dict[str, Any]) -> bytes:
+    """Generate a formatted PDF report from evaluation data"""
+    try:
+        from reportlab.lib import colors
+        from reportlab.lib.pagesizes import letter
+        from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
+        from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle
+        from io import BytesIO
+        # Create PDF buffer
+        buffer = BytesIO()
+        doc = SimpleDocTemplate(buffer, pagesize=letter)
+        styles = getSampleStyleSheet()
+        story = []
+        # Title
+        title_style = ParagraphStyle(
+            'CustomTitle',
+            parent=styles['Heading1'],
+            fontSize=24,
+            spaceAfter=30
+        )
+        story.append(Paragraph("Mentor Demo Evaluation Report", title_style))
+        story.append(Spacer(1, 20))
+        # Communication Metrics Section
+        story.append(Paragraph("Communication Metrics", styles['Heading2']))
+        comm_metrics = evaluation_data.get("communication", {})
+        # Create tables for each metric category
+        for category in ["speed", "fluency", "flow", "intonation", "energy"]:
+            if category in comm_metrics:
+                metrics = comm_metrics[category]
+                story.append(Paragraph(category.title(), styles['Heading3']))
+                data = [[k.replace('_', ' ').title(), str(v)] for k, v in metrics.items()]
+                t = Table(data, colWidths=[200, 200])
+                t.setStyle(TableStyle([
+                    ('BACKGROUND', (0, 0), (-1, 0), colors.grey),
+                    ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
+                    ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
+                    ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
+                    ('FONTSIZE', (0, 0), (-1, 0), 14),
+                    ('BOTTOMPADDING', (0, 0), (-1, 0), 12),
+                    ('BACKGROUND', (0, 1), (-1, -1), colors.beige),
+                    ('TEXTCOLOR', (0, 1), (-1, -1), colors.black),
+                    ('FONTNAME', (0, 1), (-1, -1), 'Helvetica'),
+                    ('FONTSIZE', (0, 1), (-1, -1), 12),
+                    ('GRID', (0, 0), (-1, -1), 1, colors.black)
+                ]))
+                story.append(t)
+                story.append(Spacer(1, 20))
+        # Teaching Analysis Section
+        story.append(Paragraph("Teaching Analysis", styles['Heading2']))
+        teaching_data = evaluation_data.get("teaching", {})
+        for assessment_type in ["Concept Assessment", "Code Assessment"]:
+            if assessment_type in teaching_data:
+                story.append(Paragraph(assessment_type, styles['Heading3']))
+                categories = teaching_data[assessment_type]
+                for category, details in categories.items():
+                    score = details.get("Score", 0)
+                    citations = details.get("Citations", [])
+                    data = [
+                        [category, "Score: " + ("Pass" if score == 1 else "Needs Improvement")],
+                        ["Citations:", ""]
+                    ] + [["-", citation] for citation in citations]
+                    t = Table(data, colWidths=[200, 300])
+                    t.setStyle(TableStyle([
+                        ('BACKGROUND', (0, 0), (-1, 0), colors.grey),
+                        ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
+                        ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
+                        ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
+                        ('GRID', (0, 0), (-1, -1), 1, colors.black)
+                    ]))
+                    story.append(t)
+                    story.append(Spacer(1, 20))
+        # Recommendations Section
+        story.append(Paragraph("Recommendations", styles['Heading2']))
+        recommendations = evaluation_data.get("recommendations", {})
+        if "improvements" in recommendations:
+            story.append(Paragraph("Areas for Improvement:", styles['Heading3']))
+            for improvement in recommendations["improvements"]:
+                story.append(Paragraph("• " + improvement, styles['Normal']))
+        # Build PDF
+        doc.build(story)
+        pdf_data = buffer.getvalue()
+        buffer.close()
+        return pdf_data
+    except Exception as e:
+        logger.error(f"Error generating PDF report: {e}")
+        raise RuntimeError(f"Failed to generate PDF report: {str(e)}")
 def main():
     try:
         # Set page config must be the first Streamlit command
         # Add custom CSS for animations and styling
         st.markdown("""
             <style>
+                /* Shimmer animation keyframes */
+                @keyframes shimmer {
+                    0% {
+                        background-position: -1000px 0;
+                    }
+                    100% {
+                        background-position: 1000px 0;
+                    }
+                }
+                .title-shimmer {
+                    text-align: center;
+                    color: #1f77b4;
+                    position: relative;
+                    overflow: hidden;
+                    background: linear-gradient(
+                        90deg,
+                        rgba(255, 255, 255, 0) 0%,
+                        rgba(255, 255, 255, 0.8) 50%,
+                        rgba(255, 255, 255, 0) 100%
+                    );
+                    background-size: 1000px 100%;
+                    animation: shimmer 3s infinite linear;
+                }
+                /* Existing animations */
                 @keyframes fadeIn {
+                    from { opacity: 0; }
+                    to { opacity: 1; }
                 }
                 @keyframes slideIn {
                     100% { transform: scale(1); }
                 }
+                .fade-in {
+                    animation: fadeIn 1s ease-in;
                 }
+                .slide-in {
+                    animation: slideIn 0.5s ease-out;
                 }
+                .pulse {
+                    animation: pulse 2s infinite;
                 }
                 .metric-card {
+                    background-color: #f0f2f6;
                     border-radius: 10px;
+                    padding: 20px;
+                    margin: 10px 0;
+                    box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
+                    transition: transform 0.3s ease;
                 }
+                .metric-card:hover {
+                    transform: translateY(-5px);
                 }
                 .stButton>button {
                     transition: all 0.3s ease;
                 }
                 .stButton>button:hover {
+                    transform: scale(1.05);
                 }
+                .category-header {
+                    background: linear-gradient(90deg, #1f77b4, #2c3e50);
+                    color: white;
+                    padding: 10px;
+                    border-radius: 5px;
+                    margin: 10px 0;
                 }
+                .score-badge {
+                    padding: 5px 10px;
+                    border-radius: 15px;
+                    font-weight: bold;
                 }
+                .score-pass {
+                    background-color: #28a745;
+                    color: white;
                 }
+                .score-fail {
+                    background-color: #dc3545;
+                    color: white;
                 }
             </style>
             <div class="fade-in">
+                <h1 class="title-shimmer">
                     🎓 Mentor Demo Review System
                 </h1>
             </div>
         """, unsafe_allow_html=True)
+        # Sidebar with instructions and status
         with st.sidebar:
             st.markdown("""
+                <div class="slide-in">
+                    <h2>Instructions</h2>
+                    <ol>
+                        <li>Upload your teaching video</li>
+                        <li>Wait for the analysis</li>
+                        <li>Review the detailed feedback</li>
                         <li>Download the report</li>
                     </ol>
                 </div>
             """, unsafe_allow_html=True)
+            # Add file format information separately
+            st.markdown("**Supported formats:** MP4, AVI, MOV")
+            st.markdown("**Maximum file size:** 500MB")
+            # Create a placeholder for status updates in the sidebar
+            status_placeholder = st.empty()
+            status_placeholder.info("Upload a video to begin analysis")
         # Check dependencies with progress
         with st.status("Checking system requirements...") as status:
             progress_bar.progress(1.0)
             status.update(label="System requirements satisfied!", state="complete")
+        # Temporary: Add radio button for input type selection
+        input_type = st.radio(
+            "Select Input Type (Temporary Feature)",
+            ["Video Only", "Video + Transcript"],
+            help="Temporary feature: Choose to upload video only or video with transcript"
+        )
         uploaded_file = st.file_uploader(
+            "Upload Teaching Video",
             type=['mp4', 'avi', 'mov'],
             help="Upload your teaching video in MP4, AVI, or MOV format"
         )
+        # Temporary: Add transcript uploader if Video + Transcript is selected
+        uploaded_transcript = None
+        if input_type == "Video + Transcript":
+            uploaded_transcript = st.file_uploader(
+                "Upload Transcript (Optional)",
+                type=['txt'],
+                help="Upload your transcript in TXT format"
+            )
         if uploaded_file:
+            # Update status in sidebar
+            status_placeholder.info("Video uploaded, beginning processing...")
+            # Add a pulsing animation while processing
             st.markdown("""
+                <div class="pulse" style="text-align: center;">
+                    <h3>Processing your video...</h3>
                 </div>
             """, unsafe_allow_html=True)
             try:
                 # Save uploaded file with progress
                 with st.status("Saving uploaded file...") as status:
+                    # Update sidebar status
+                    status_placeholder.info("Saving uploaded file...")
                     progress_bar = st.progress(0)
                     # Save in chunks to show progress
                     status.update(label="File saved successfully!", state="complete")
                 # Validate file size
+                file_size = os.path.getsize(video_path) / (1024 * 1024 * 1024)  # Size in GB
+                if file_size > 2:
+                    st.error("File size exceeds 2GB limit. Please upload a smaller file.")
                     return
+                # Store evaluation results in session state
                 if 'evaluation_results' not in st.session_state:
+                    # Update sidebar status
+                    status_placeholder.info("Processing video and generating analysis...")
+                    # Process video only if results aren't already in session state
                     with st.spinner("Processing video"):
                         evaluator = MentorEvaluator()
+                        # Temporary: Handle transcript if provided
+                        if uploaded_transcript:
+                            transcript_text = uploaded_transcript.getvalue().decode('utf-8')
+                            # Extract audio features but skip transcription
+                            audio_features = evaluator.feature_extractor.extract_features(video_path)
+                            # Evaluate speech metrics
+                            speech_metrics = evaluator._evaluate_speech_metrics(
+                                transcript_text,
+                                audio_features
+                            )
+                            # Analyze content
+                            content_analysis = evaluator.content_analyzer.analyze_content(transcript_text)
+                            # Generate recommendations
+                            recommendations = evaluator.recommendation_generator.generate_recommendations(
+                                speech_metrics,
+                                content_analysis
+                            )
+                            # Combine results
+                            st.session_state.evaluation_results = {
+                                "communication": speech_metrics,
+                                "teaching": content_analysis,
+                                "recommendations": recommendations,
+                                "transcript": transcript_text
+                            }
+                        else:
+                            # Original flow: full video evaluation
+                            st.session_state.evaluation_results = evaluator.evaluate_video(video_path)
+                # Update sidebar status for completion
+                status_placeholder.success("Analysis complete! Review results below.")
+                # Display results using stored evaluation
+                st.success("Analysis complete!")
                 display_evaluation(st.session_state.evaluation_results)
+                # Add download options
+                col1, col2 = st.columns(2)
+                with col1:
+                    if st.download_button(
+                        "📥 Download JSON Report",
+                        json.dumps(st.session_state.evaluation_results, indent=2),
+                        "evaluation_report.json",
+                        "application/json",
+                        help="Download the raw evaluation data in JSON format"
+                    ):
+                        st.success("JSON report downloaded successfully!")
+                with col2:
+                    if st.download_button(
+                        "📄 Download Full Report (PDF)",
+                        generate_pdf_report(st.session_state.evaluation_results),
+                        "evaluation_report.pdf",
+                        "application/pdf",
+                        help="Download a formatted PDF report with detailed analysis"
+                    ):
+                        st.success("PDF report downloaded successfully!")
             except Exception as e:
+                # Update sidebar status for error
+                status_placeholder.error(f"Error during processing: {str(e)}")
                 st.error(f"Error during evaluation: {str(e)}")
             finally:
                 # Clean up temp files
                 if 'temp_dir' in locals():