Spaces:

lochn
/

LectureSummarizer

Sleeping

App Files Files Community

lochn commited on May 28, 2025

Commit

5b5fd29

verified ·

1 Parent(s): 386b12b

Update app.py

Browse files

Files changed (1) hide show

app.py +324 -251

app.py CHANGED Viewed

@@ -4,21 +4,22 @@ import time
 import tempfile
 import shutil
 from pathlib import Path
-from typing import List, Dict, Optional
-import threading
 import json
 import gradio as gr
-import torch
 import numpy as np
 # Try to import optional dependencies
 try:
     import whisper
     WHISPER_AVAILABLE = True
 except ImportError:
     WHISPER_AVAILABLE = False
-    print("Whisper not available, will use fallback")
 try:
     import spacy
@@ -26,67 +27,28 @@ try:
     try:
         nlp = spacy.load("en_core_web_sm")
         SPACY_AVAILABLE = True
     except OSError:
         SPACY_AVAILABLE = False
-        print("spaCy model not available, using fallback")
 except ImportError:
     SPACY_AVAILABLE = False
-    print("spaCy not available, using fallback")
 try:
     from transformers import pipeline
     TRANSFORMERS_AVAILABLE = True
 except ImportError:
     TRANSFORMERS_AVAILABLE = False
-    print("Transformers not available, using fallback")
-# Global timeout handler
-class TimeoutError(Exception):
-    pass
-def run_with_timeout(func, timeout_seconds, *args, **kwargs):
-    """Run a function with a timeout"""
-    result = [None]
-    exception = [None]
-    def target():
-        try:
-            result[0] = func(*args, **kwargs)
-        except Exception as e:
-            exception[0] = e
-    thread = threading.Thread(target=target)
-    thread.daemon = True
-    thread.start()
-    thread.join(timeout_seconds)
-    if thread.is_alive():
-        raise TimeoutError(f"Function timed out after {timeout_seconds} seconds")
-    if exception[0]:
-        raise exception[0]
-    return result[0]
-def check_dependencies():
-    """Check which dependencies are available"""
-    deps = {
-        'ffmpeg': check_ffmpeg(),
-        'whisper': WHISPER_AVAILABLE,
-        'spacy': SPACY_AVAILABLE,
-        'transformers': TRANSFORMERS_AVAILABLE
-    }
-    print(f"Available dependencies: {deps}")
-    return deps
 def check_ffmpeg():
     """Check if ffmpeg is available"""
     try:
-        result = subprocess.run(["ffmpeg", "-version"], capture_output=True, timeout=10)
         return result.returncode == 0
     except:
         return False
@@ -99,7 +61,7 @@ def get_video_info(video_path: str) -> Dict:
             "ffprobe", "-v", "quiet", "-print_format", "json", "-show_format",
             "-show_streams", video_path
         ]
-        result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
         if result.returncode == 0:
             info = json.loads(result.stdout)
@@ -141,12 +103,7 @@ def extract_audio_simple(video_path: str, audio_path: str, start_time: float = 0
         print(f"Extracting audio: {' '.join(cmd)}")
-        result = subprocess.run(
-            cmd,
-            capture_output=True,
-            text=True,
-            timeout=60
-        )
         if result.returncode == 0:
             if os.path.exists(audio_path) and os.path.getsize(audio_path) > 1000:
@@ -159,128 +116,123 @@ def extract_audio_simple(video_path: str, audio_path: str, start_time: float = 0
             print(f"FFmpeg error: {result.stderr}")
             return False
-    except subprocess.TimeoutExpired:
-        print("Audio extraction timed out")
-        return False
     except Exception as e:
         print(f"Error extracting audio: {str(e)}")
         return False
-def transcribe_audio_whisper(audio_path: str) -> List[Dict]:
-    """Transcribe using OpenAI Whisper"""
     try:
-        print(f"Starting Whisper transcription of {audio_path}")
         if not WHISPER_AVAILABLE:
-            return [{"text": "Whisper not available", "timestamp": (0, 30)}]
-        def do_transcribe():
-            model = whisper.load_model("tiny")
-            result = model.transcribe(audio_path, language="en")
-            return result
-        result = run_with_timeout(do_transcribe, 120)
-        if result and "segments" in result:
-            segments = []
-            for seg in result["segments"]:
-                segments.append({
-                    "text": seg.get("text", "").strip(),
-                    "timestamp": (seg.get("start", 0), seg.get("end", 30))
-                })
-            return segments
-        elif result and "text" in result:
-            return [{"text": result["text"], "timestamp": (0, 30)}]
         else:
-            return [{"text": "Transcription failed", "timestamp": (0, 30)}]
     except Exception as e:
         print(f"Whisper transcription error: {str(e)}")
-        return [{"text": f"Transcription failed: {str(e)}", "timestamp": (0, 30)}]
-def transcribe_audio_transformers(audio_path: str) -> List[Dict]:
-    """Transcribe using Transformers pipeline as fallback"""
     try:
-        print(f"Starting Transformers transcription of {audio_path}")
         if not TRANSFORMERS_AVAILABLE:
-            return [{"text": "Transformers not available", "timestamp": (0, 30)}]
-        def do_transcribe():
-            asr = pipeline(
-                "automatic-speech-recognition",
-                model="openai/whisper-tiny",
-                device=0 if torch.cuda.is_available() else -1
-            )
-            result = asr(audio_path, return_timestamps=True)
-            return result
-        result = run_with_timeout(do_transcribe, 120)
-        if isinstance(result, dict):
-            if "chunks" in result:
-                return result["chunks"]
-            else:
-                return [{"text": result.get("text", ""), "timestamp": (0, 30)}]
         else:
-            return [{"text": str(result), "timestamp": (0, 30)}]
     except Exception as e:
         print(f"Transformers transcription error: {str(e)}")
-        return [{"text": f"Transcription failed: {str(e)}", "timestamp": (0, 30)}]
-def transcribe_audio_fallback(audio_path: str) -> List[Dict]:
-    """Fallback transcription method"""
-    return [{"text": "Transcription not available - no speech recognition models loaded", "timestamp": (0, 30)}]
-def transcribe_audio(audio_path: str) -> List[Dict]:
-    """Main transcription function with fallbacks"""
     # Try Whisper first
     if WHISPER_AVAILABLE:
         try:
-            return transcribe_audio_whisper(audio_path)
         except Exception as e:
             print(f"Whisper failed: {e}")
     # Try Transformers as fallback
     if TRANSFORMERS_AVAILABLE:
         try:
-            return transcribe_audio_transformers(audio_path)
         except Exception as e:
             print(f"Transformers failed: {e}")
     # Use fallback
-    return transcribe_audio_fallback(audio_path)
-def extract_key_phrases_spacy(text: str, top_n: int = 5) -> List[str]:
-    """Extract key phrases using spaCy"""
-    if not SPACY_AVAILABLE or nlp is None:
-        return extract_key_phrases_simple(text, top_n)
-    try:
-        doc = nlp(text)
-        phrases = [chunk.text.strip() for chunk in doc.noun_chunks if len(chunk.text.strip()) > 2]
-        seen = set()
-        unique_phrases = [p for p in phrases if not (p.lower() in seen or seen.add(p.lower()))]
-        return unique_phrases[:top_n]
-    except Exception as e:
-        print(f"spaCy key phrase extraction failed: {e}")
-        return extract_key_phrases_simple(text, top_n)
 def extract_key_phrases_simple(text: str, top_n: int = 5) -> List[str]:
-    """Simple key phrase extraction fallback"""
     if not text:
         return []
     words = text.split()
     key_words = [
-        w.strip('.,!?";') for w in words
         if len(w) > 4 and w.isalpha() and w.lower() not in {
             'this', 'that', 'with', 'have', 'will', 'from', 'they', 'been',
             'were', 'said', 'each', 'which', 'their', 'time', 'would', 'there'
@@ -293,48 +245,8 @@ def extract_key_phrases_simple(text: str, top_n: int = 5) -> List[str]:
     return unique_words[:top_n]
-def extract_key_phrases(text: str, top_n: int = 5) -> List[str]:
-    """Main key phrase extraction with fallback"""
-    if SPACY_AVAILABLE:
-        return extract_key_phrases_spacy(text, top_n)
-    else:
-        return extract_key_phrases_simple(text, top_n)
-def summarize_text_transformers(text: str) -> str:
-    """Summarize using transformers"""
-    if not TRANSFORMERS_AVAILABLE or len(text.split()) < 10:
-        return summarize_text_simple(text)
-    try:
-        summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
-        words = text.split()
-        if len(words) > 500:
-            text = " ".join(words[:500])
-        input_length = len(words)
-        max_new_tokens = min(100, max(20, input_length // 3))
-        min_length = min(15, max(5, input_length // 8))
-        result = summarizer(
-            text,
-            max_new_tokens=max_new_tokens,
-            min_length=min_length,
-            do_sample=False
-        )
-        if isinstance(result, list) and len(result) > 0:
-            return result[0]["summary_text"].strip()
-        return text
-    except Exception as e:
-        print(f"Transformers summarization failed: {e}")
-        return summarize_text_simple(text)
 def summarize_text_simple(text: str) -> str:
-    """Simple text summarization fallback"""
     if not text or len(text.split()) < 10:
         return text
@@ -346,19 +258,12 @@ def summarize_text_simple(text: str) -> str:
     elif len(sentences) <= 5:
         return '. '.join(sentences[:2]) + '.'
     else:
         middle_idx = len(sentences) // 2
         summary_sentences = [sentences[0], sentences[middle_idx], sentences[-1]]
         return '. '.join(summary_sentences) + '.'
-def summarize_text(text: str) -> str:
-    """Main summarization function with fallback"""
-    if TRANSFORMERS_AVAILABLE:
-        return summarize_text_transformers(text)
-    else:
-        return summarize_text_simple(text)
 def format_timestamp(seconds: float) -> str:
     """Format seconds into MM:SS format"""
     minutes = int(seconds // 60)
@@ -371,46 +276,64 @@ def process_video_segment(video_path: str, start_time: float, duration: float, s
     try:
         print(f"Processing segment {segment_id}: {start_time}s - {start_time + duration}s")
         audio_path = os.path.join(temp_dir, f"segment_{segment_id:03d}.wav")
         if not extract_audio_simple(video_path, audio_path, start_time, duration):
             return {
                 "segment": segment_id,
                 "start_time": format_timestamp(start_time),
                 "end_time": format_timestamp(start_time + duration),
                 "text": "Audio extraction failed",
                 "summary": "Failed to process this segment",
-                "key_phrases": []
             }
-        segments = transcribe_audio(audio_path)
-        full_text = " ".join([seg["text"] for seg in segments if seg["text"]])
         try:
             os.remove(audio_path)
         except:
             pass
-        if not full_text.strip():
             return {
                 "segment": segment_id,
                 "start_time": format_timestamp(start_time),
                 "end_time": format_timestamp(start_time + duration),
-                "text": "No speech detected",
                 "summary": "No content in this segment",
-                "key_phrases": []
             }
-        summary = summarize_text(full_text)
-        key_phrases = extract_key_phrases(full_text)
         return {
             "segment": segment_id,
             "start_time": format_timestamp(start_time),
             "end_time": format_timestamp(start_time + duration),
-            "text": full_text,
             "summary": summary,
-            "key_phrases": key_phrases
         }
     except Exception as e:
@@ -419,47 +342,49 @@ def process_video_segment(video_path: str, start_time: float, duration: float, s
             "segment": segment_id,
             "start_time": format_timestamp(start_time),
             "end_time": format_timestamp(start_time + duration),
             "text": f"Processing failed: {str(e)}",
             "summary": "Error occurred during processing",
-            "key_phrases": []
         }
 def run_pipeline(video_file: str, progress=gr.Progress()) -> List[Dict]:
     """Main pipeline function"""
     if not video_file:
-        return [{"error": "No video file provided"}]
-    # Check dependencies
-    deps = check_dependencies()
-    if not deps['ffmpeg']:
-        return [{"error": "FFmpeg is not available in this environment"}]
-    if not (deps['whisper'] or deps['transformers']):
-        return [{"error": "No speech recognition models available. Please install whisper or transformers."}]
     print(f"Processing video: {video_file}")
     progress(0.1, desc="Analyzing video...")
     video_info = get_video_info(video_file)
     print(f"Video info: {video_info}")
     if not video_info['has_audio']:
-        return [{"error": "Video has no audio track"}]
     duration = video_info['duration']
     if duration == 0:
-        return [{"error": "Could not determine video duration"}]
     max_duration = min(duration, 600)  # Max 10 minutes
     segment_length = 120  # 2 minutes per segment
     progress(0.2, desc=f"Video duration: {duration:.1f}s, processing {max_duration:.1f}s...")
     temp_dir = tempfile.mkdtemp(prefix="lecture_capture_")
     try:
         segments_to_process = []
         current_time = 0
         segment_id = 1
@@ -479,6 +404,7 @@ def run_pipeline(video_file: str, progress=gr.Progress()) -> List[Dict]:
         print(f"Will process {len(segments_to_process)} segments")
         timeline = []
         for i, seg_info in enumerate(segments_to_process):
             progress(
@@ -487,9 +413,7 @@ def run_pipeline(video_file: str, progress=gr.Progress()) -> List[Dict]:
             )
             try:
-                result = run_with_timeout(
-                    process_video_segment,
-                    300,
                     video_file,
                     seg_info['start_time'],
                     seg_info['duration'],
@@ -498,66 +422,205 @@ def run_pipeline(video_file: str, progress=gr.Progress()) -> List[Dict]:
                 )
                 timeline.append(result)
-            except TimeoutError:
-                print(f"Segment {i+1} timed out")
-                timeline.append({
-                    "segment": seg_info['segment_id'],
-                    "start_time": format_timestamp(seg_info['start_time']),
-                    "end_time": format_timestamp(seg_info['start_time'] + seg_info['duration']),
-                    "text": "Processing timed out",
-                    "summary": "Segment processing exceeded time limit",
-                    "key_phrases": []
-                })
             except Exception as e:
                 print(f"Error processing segment {i+1}: {str(e)}")
                 timeline.append({
                     "segment": seg_info['segment_id'],
                     "start_time": format_timestamp(seg_info['start_time']),
                     "end_time": format_timestamp(seg_info['start_time'] + seg_info['duration']),
                     "text": f"Error: {str(e)}",
                     "summary": "Processing failed",
-                    "key_phrases": []
                 })
-        progress(1.0, desc="Processing complete!")
         if not timeline:
-            return [{"error": "No segments were successfully processed"}]
-        # Add dependency info to the result
-        timeline.insert(0, {
-            "info": "Processing completed",
-            "dependencies_used": {k: v for k, v in deps.items() if v},
-            "total_segments": len(timeline) - 1
-        })
-        return timeline
     except Exception as e:
         import traceback
         print(f"Pipeline error: {str(e)}")
         print(traceback.format_exc())
-        return [{"error": f"Pipeline failed: {str(e)}"}]
     finally:
-        try:
-            shutil.rmtree(temp_dir)
-            print("Cleaned up temporary files")
-        except Exception as e:
-            print(f"Failed to clean up temp directory: {str(e)}")
 def create_interface():
     with gr.Blocks(title="Lecture Capture AI Pipeline", theme=gr.themes.Soft()) as demo:
         gr.Markdown("""
-        # 🎓 Lecture Capture AI Pipeline (Robust Version)
         Upload a lecture video to automatically generate:
         - 📝 Transcription with timestamps
         - 📋 Summaries for each segment
         - 🔑 Key phrases extraction
-        **Features**: Automatic fallbacks, dependency checking, enhanced error handling
         """)
         with gr.Row():
@@ -573,46 +636,56 @@ def create_interface():
                     size="lg"
                 )
                 gr.Markdown("""
                 ### 💡 Tips:
                 - Videos up to 10 minutes work best
                 - Clear audio improves results
-                - Processing takes 2-5 minutes
-                - Automatic fallbacks if models unavailable
                 """)
             with gr.Column(scale=2):
-                output_json = gr.JSON(
-                    label="📊 Generated Timeline",
-                    height=600
-                )
         process_btn.click(
             fn=run_pipeline,
             inputs=[video_input],
-            outputs=[output_json],
             show_progress=True
         )
         gr.Markdown("""
         ### 🔧 Technical Details:
-        - Primary: OpenAI Whisper for transcription
-        - Fallback: Transformers pipeline
-        - Text processing: spaCy + simple fallbacks
-        - Automatic dependency detection
         """)
     return demo
 if __name__ == "__main__":
-    # Check dependencies on startup
-    deps = check_dependencies()
-    print("=== Dependency Check ===")
-    for dep, available in deps.items():
-        status = "✅" if available else "❌"
-        print(f"{status} {dep}: {available}")
-    print("========================")
     demo = create_interface()
     demo.launch(debug=True)

 import tempfile
 import shutil
 from pathlib import Path
 import json
+import datetime
+import threading
+from typing import List, Dict, Optional
 import gradio as gr
 import numpy as np
 # Try to import optional dependencies
 try:
     import whisper
     WHISPER_AVAILABLE = True
+    print("✅ Whisper available")
 except ImportError:
     WHISPER_AVAILABLE = False
+    print("❌ Whisper not available")
 try:
     import spacy
     try:
         nlp = spacy.load("en_core_web_sm")
         SPACY_AVAILABLE = True
+        print("✅ spaCy model available")
     except OSError:
         SPACY_AVAILABLE = False
+        print("❌ spaCy model not available")
 except ImportError:
     SPACY_AVAILABLE = False
+    print("❌ spaCy not available")
 try:
     from transformers import pipeline
+    import torch
     TRANSFORMERS_AVAILABLE = True
+    print("✅ Transformers available")
 except ImportError:
     TRANSFORMERS_AVAILABLE = False
+    print("❌ Transformers not available")
 def check_ffmpeg():
     """Check if ffmpeg is available"""
     try:
+        result = subprocess.run(["ffmpeg", "-version"], capture_output=True)
         return result.returncode == 0
     except:
         return False
             "ffprobe", "-v", "quiet", "-print_format", "json", "-show_format",
             "-show_streams", video_path
         ]
+        result = subprocess.run(cmd, capture_output=True, text=True)
         if result.returncode == 0:
             info = json.loads(result.stdout)
         print(f"Extracting audio: {' '.join(cmd)}")
+        result = subprocess.run(cmd, capture_output=True, text=True)
         if result.returncode == 0:
             if os.path.exists(audio_path) and os.path.getsize(audio_path) > 1000:
             print(f"FFmpeg error: {result.stderr}")
             return False
     except Exception as e:
         print(f"Error extracting audio: {str(e)}")
         return False
+def extract_frame(video_path: str, timestamp: float, output_path: str) -> bool:
+    """Extract frame from video at specific timestamp"""
     try:
+        cmd = [
+            "ffmpeg", "-y",
+            "-ss", str(timestamp),
+            "-i", video_path,
+            "-vframes", "1",
+            "-q:v", "2",
+            output_path
+        ]
+        result = subprocess.run(cmd, capture_output=True, text=True)
+        if result.returncode == 0 and os.path.exists(output_path):
+            return True
+        return False
+    except Exception as e:
+        print(f"Error extracting frame: {e}")
+        return False
+def transcribe_audio_whisper_simple(audio_path: str) -> str:
+    """Simplified Whisper transcription that just returns text"""
+    try:
         if not WHISPER_AVAILABLE:
+            return "Whisper not available"
+        print(f"Starting Whisper transcription of {audio_path}")
+        # Load the smallest model
+        model = whisper.load_model("tiny")
+        # Use faster settings
+        options = {
+            "language": "en",
+            "task": "transcribe",
+            "fp16": False,
+            "beam_size": 1
+        }
+        # Transcribe
+        result = model.transcribe(audio_path, **options)
+        if result and "text" in result:
+            return result["text"].strip()
         else:
+            return "Transcription failed"
     except Exception as e:
         print(f"Whisper transcription error: {str(e)}")
+        return f"Transcription error: {str(e)}"
+def transcribe_audio_transformers_simple(audio_path: str) -> str:
+    """Simplified Transformers transcription that just returns text"""
     try:
         if not TRANSFORMERS_AVAILABLE:
+            return "Transformers not available"
+        print(f"Starting Transformers transcription of {audio_path}")
+        # Use the smallest model with minimal settings
+        asr = pipeline(
+            "automatic-speech-recognition",
+            model="openai/whisper-tiny",
+            device=-1  # Force CPU
+        )
+        # Simple transcription
+        result = asr(audio_path)
+        if isinstance(result, dict) and "text" in result:
+            return result["text"].strip()
+        elif isinstance(result, str):
+            return result.strip()
         else:
+            return str(result)
     except Exception as e:
         print(f"Transformers transcription error: {str(e)}")
+        return f"Transcription error: {str(e)}"
+def transcribe_audio_simple(audio_path: str) -> str:
+    """Main transcription function that returns simple text"""
     # Try Whisper first
     if WHISPER_AVAILABLE:
         try:
+            return transcribe_audio_whisper_simple(audio_path)
         except Exception as e:
             print(f"Whisper failed: {e}")
     # Try Transformers as fallback
     if TRANSFORMERS_AVAILABLE:
         try:
+            return transcribe_audio_transformers_simple(audio_path)
         except Exception as e:
             print(f"Transformers failed: {e}")
     # Use fallback
+    return "Transcription not available - no speech recognition models loaded"
 def extract_key_phrases_simple(text: str, top_n: int = 5) -> List[str]:
+    """Simple key phrase extraction"""
     if not text:
         return []
     words = text.split()
     key_words = [
+        w.strip('.,!?";:()') for w in words
         if len(w) > 4 and w.isalpha() and w.lower() not in {
             'this', 'that', 'with', 'have', 'will', 'from', 'they', 'been',
             'were', 'said', 'each', 'which', 'their', 'time', 'would', 'there'
     return unique_words[:top_n]
 def summarize_text_simple(text: str) -> str:
+    """Simple text summarization"""
     if not text or len(text.split()) < 10:
         return text
     elif len(sentences) <= 5:
         return '. '.join(sentences[:2]) + '.'
     else:
+        # Take first, middle, and last sentences
         middle_idx = len(sentences) // 2
         summary_sentences = [sentences[0], sentences[middle_idx], sentences[-1]]
         return '. '.join(summary_sentences) + '.'
 def format_timestamp(seconds: float) -> str:
     """Format seconds into MM:SS format"""
     minutes = int(seconds // 60)
     try:
         print(f"Processing segment {segment_id}: {start_time}s - {start_time + duration}s")
+        # Create paths
         audio_path = os.path.join(temp_dir, f"segment_{segment_id:03d}.wav")
+        frame_path = os.path.join(temp_dir, f"frame_{segment_id:03d}.jpg")
+        # Extract audio for this segment
         if not extract_audio_simple(video_path, audio_path, start_time, duration):
             return {
                 "segment": segment_id,
                 "start_time": format_timestamp(start_time),
                 "end_time": format_timestamp(start_time + duration),
+                "start_seconds": start_time,
+                "end_seconds": start_time + duration,
                 "text": "Audio extraction failed",
                 "summary": "Failed to process this segment",
+                "key_phrases": [],
+                "frame": None
             }
+        # Extract a frame from the middle of the segment
+        frame_time = start_time + (duration / 2)
+        frame_extracted = extract_frame(video_path, frame_time, frame_path)
+        # Transcribe audio
+        text = transcribe_audio_simple(audio_path)
+        # Clean up audio file
         try:
             os.remove(audio_path)
         except:
             pass
+        if not text or text.startswith("Transcription"):
             return {
                 "segment": segment_id,
                 "start_time": format_timestamp(start_time),
                 "end_time": format_timestamp(start_time + duration),
+                "start_seconds": start_time,
+                "end_seconds": start_time + duration,
+                "text": text or "No speech detected",
                 "summary": "No content in this segment",
+                "key_phrases": [],
+                "frame": frame_path if frame_extracted else None
             }
+        # Generate summary and key phrases
+        summary = summarize_text_simple(text)
+        key_phrases = extract_key_phrases_simple(text)
         return {
             "segment": segment_id,
             "start_time": format_timestamp(start_time),
             "end_time": format_timestamp(start_time + duration),
+            "start_seconds": start_time,
+            "end_seconds": start_time + duration,
+            "text": text,
             "summary": summary,
+            "key_phrases": key_phrases,
+            "frame": frame_path if frame_extracted else None
         }
     except Exception as e:
             "segment": segment_id,
             "start_time": format_timestamp(start_time),
             "end_time": format_timestamp(start_time + duration),
+            "start_seconds": start_time,
+            "end_seconds": start_time + duration,
             "text": f"Processing failed: {str(e)}",
             "summary": "Error occurred during processing",
+            "key_phrases": [],
+            "frame": None
         }
 def run_pipeline(video_file: str, progress=gr.Progress()) -> List[Dict]:
     """Main pipeline function"""
     if not video_file:
+        return [], "No video file provided", None
+    # Check if ffmpeg is available
+    if not check_ffmpeg():
+        return [], "FFmpeg is not available in this environment", None
     print(f"Processing video: {video_file}")
     progress(0.1, desc="Analyzing video...")
+    # Get video information
     video_info = get_video_info(video_file)
     print(f"Video info: {video_info}")
     if not video_info['has_audio']:
+        return [], "Video has no audio track", None
     duration = video_info['duration']
     if duration == 0:
+        return [], "Could not determine video duration", None
+    # Limit processing time
     max_duration = min(duration, 600)  # Max 10 minutes
     segment_length = 120  # 2 minutes per segment
     progress(0.2, desc=f"Video duration: {duration:.1f}s, processing {max_duration:.1f}s...")
+    # Create temporary directory
     temp_dir = tempfile.mkdtemp(prefix="lecture_capture_")
     try:
+        # Calculate segments
         segments_to_process = []
         current_time = 0
         segment_id = 1
         print(f"Will process {len(segments_to_process)} segments")
+        # Process each segment
         timeline = []
         for i, seg_info in enumerate(segments_to_process):
             progress(
             )
             try:
+                result = process_video_segment(
                     video_file,
                     seg_info['start_time'],
                     seg_info['duration'],
                 )
                 timeline.append(result)
             except Exception as e:
                 print(f"Error processing segment {i+1}: {str(e)}")
                 timeline.append({
                     "segment": seg_info['segment_id'],
                     "start_time": format_timestamp(seg_info['start_time']),
                     "end_time": format_timestamp(seg_info['start_time'] + seg_info['duration']),
+                    "start_seconds": seg_info['start_time'],
+                    "end_seconds": seg_info['start_time'] + seg_info['duration'],
                     "text": f"Error: {str(e)}",
                     "summary": "Processing failed",
+                    "key_phrases": [],
+                    "frame": None
                 })
+        progress(0.9, desc="Generating visual timeline...")
         if not timeline:
+            return [], "No segments were successfully processed", None
+        # Generate HTML for visual timeline
+        html_timeline = generate_visual_timeline(timeline, video_file)
+        # Generate summary of the entire video
+        all_text = " ".join([segment["text"] for segment in timeline if not segment["text"].startswith("Error") and not segment["text"].startswith("Processing")])
+        video_summary = summarize_text_simple(all_text) if all_text else "No valid transcription available"
+        progress(1.0, desc="Processing complete!")
+        return timeline, html_timeline, video_summary
     except Exception as e:
         import traceback
         print(f"Pipeline error: {str(e)}")
         print(traceback.format_exc())
+        return [], f"Pipeline failed: {str(e)}", None
     finally:
+        # Don't delete temp_dir as we need the frames for display
+        # We'll clean it up at the end of the session
+        pass
+def generate_visual_timeline(timeline: List[Dict], video_path: str) -> str:
+    """Generate HTML for visual timeline"""
+    if not timeline:
+        return "<p>No timeline data available</p>"
+    html = """
+    <style>
+        .timeline-container {
+            font-family: Arial, sans-serif;
+            max-width: 100%;
+            margin: 0 auto;
+        }
+        .timeline-segment {
+            display: flex;
+            margin-bottom: 20px;
+            padding: 15px;
+            border-radius: 8px;
+            background-color: #f9f9f9;
+            box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+        }
+        .timeline-segment:nth-child(odd) {
+            background-color: #f0f7ff;
+        }
+        .timeline-thumbnail {
+            flex: 0 0 160px;
+            margin-right: 15px;
+        }
+        .timeline-thumbnail img {
+            width: 160px;
+            height: 90px;
+            object-fit: cover;
+            border-radius: 4px;
+        }
+        .timeline-content {
+            flex: 1;
+        }
+        .timeline-header {
+            display: flex;
+            justify-content: space-between;
+            margin-bottom: 8px;
+        }
+        .timeline-timestamp {
+            font-weight: bold;
+            color: #555;
+        }
+        .timeline-summary {
+            font-weight: bold;
+            margin-bottom: 8px;
+        }
+        .timeline-text {
+            margin-bottom: 8px;
+            color: #333;
+        }
+        .timeline-tags {
+            display: flex;
+            flex-wrap: wrap;
+            gap: 5px;
+        }
+        .timeline-tag {
+            background-color: #e1ecf4;
+            color: #39739d;
+            padding: 2px 8px;
+            border-radius: 12px;
+            font-size: 12px;
+        }
+        .timeline-placeholder {
+            background-color: #ddd;
+            display: flex;
+            align-items: center;
+            justify-content: center;
+            color: #666;
+            font-size: 12px;
+        }
+        .timeline-error {
+            color: #d32f2f;
+            font-style: italic;
+        }
+    </style>
+    <div class="timeline-container">
+    """
+    for segment in timeline:
+        # Skip if this is the info segment
+        if "info" in segment:
+            continue
+        segment_id = segment.get("segment", "")
+        start_time = segment.get("start_time", "")
+        end_time = segment.get("end_time", "")
+        text = segment.get("text", "")
+        summary = segment.get("summary", "")
+        key_phrases = segment.get("key_phrases", [])
+        frame_path = segment.get("frame")
+        # Check if this segment has an error
+        has_error = text.startswith("Error") or text.startswith("Processing failed") or text.startswith("Transcription error")
+        html += f"""
+        <div class="timeline-segment">
+            <div class="timeline-thumbnail">
+        """
+        if frame_path and os.path.exists(frame_path):
+            # Use base64 encoding for the image
+            import base64
+            try:
+                with open(frame_path, "rb") as img_file:
+                    img_data = base64.b64encode(img_file.read()).decode('utf-8')
+                    html += f'<img src="data:image/jpeg;base64,{img_data}" alt="Frame at {start_time}">'
+            except:
+                html += f'<div class="timeline-placeholder" style="width:160px;height:90px;">No thumbnail</div>'
+        else:
+            html += f'<div class="timeline-placeholder" style="width:160px;height:90px;">No thumbnail</div>'
+        html += """
+            </div>
+            <div class="timeline-content">
+                <div class="timeline-header">
+        """
+        html += f'<div class="timeline-timestamp">Segment {segment_id}: {start_time} - {end_time}</div>'
+        html += """
+                </div>
+        """
+        if has_error:
+            html += f'<div class="timeline-error">{text}</div>'
+        else:
+            html += f'<div class="timeline-summary">{summary}</div>'
+            html += f'<div class="timeline-text">{text}</div>'
+            if key_phrases:
+                html += '<div class="timeline-tags">'
+                for phrase in key_phrases:
+                    html += f'<span class="timeline-tag">{phrase}</span>'
+                html += '</div>'
+        html += """
+            </div>
+        </div>
+        """
+    html += "</div>"
+    return html
 def create_interface():
     with gr.Blocks(title="Lecture Capture AI Pipeline", theme=gr.themes.Soft()) as demo:
         gr.Markdown("""
+        # 🎓 Lecture Capture AI Pipeline (Visual Timeline)
         Upload a lecture video to automatically generate:
         - 📝 Transcription with timestamps
         - 📋 Summaries for each segment
         - 🔑 Key phrases extraction
+        - 🖼️ Visual timeline with thumbnails
         """)
         with gr.Row():
                     size="lg"
                 )
+                video_summary = gr.Textbox(
+                    label="📋 Video Summary",
+                    placeholder="Video summary will appear here after processing",
+                    lines=4
+                )
                 gr.Markdown("""
                 ### 💡 Tips:
                 - Videos up to 10 minutes work best
                 - Clear audio improves results
+                - Processing may take several minutes
                 """)
             with gr.Column(scale=2):
+                with gr.Tabs():
+                    with gr.TabItem("Visual Timeline"):
+                        timeline_html = gr.HTML(
+                            label="Visual Timeline",
+                            value="<p>Timeline will appear here after processing</p>"
+                        )
+                    with gr.TabItem("Raw Data"):
+                        timeline_json = gr.JSON(
+                            label="Timeline Data"
+                        )
         process_btn.click(
             fn=run_pipeline,
             inputs=[video_input],
+            outputs=[timeline_json, timeline_html, video_summary],
             show_progress=True
         )
         gr.Markdown("""
         ### 🔧 Technical Details:
+        - Uses OpenAI Whisper for transcription
+        - Simplified processing for better compatibility
+        - Visual timeline with thumbnails
+        - No timeouts to ensure processing completes
         """)
     return demo
 if __name__ == "__main__":
+    # Check if ffmpeg is available
+    if check_ffmpeg():
+        print("✅ FFmpeg available")
+    else:
+        print("❌ FFmpeg not available")
     demo = create_interface()
     demo.launch(debug=True)