lochn commited on
Commit
dc70ca4
Β·
verified Β·
1 Parent(s): 21438ed

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +314 -286
app.py CHANGED
@@ -5,12 +5,21 @@ import tempfile
5
  import shutil
6
  from pathlib import Path
7
  from typing import List, Dict, Optional
 
 
8
 
9
  import spacy
10
  import gradio as gr
11
  from transformers import pipeline
12
  import torch
13
 
 
 
 
 
 
 
 
14
  # β€”β€”β€” spaCy setup for HF Spaces β€”β€”β€”
15
  def setup_spacy():
16
  """Setup spaCy model with proper error handling for HF Spaces"""
@@ -18,94 +27,116 @@ def setup_spacy():
18
  nlp = spacy.load("en_core_web_sm")
19
  return nlp
20
  except OSError:
21
- print("Downloading spaCy model...")
22
- try:
23
- from spacy.cli import download as spacy_download
24
- spacy_download("en_core_web_sm")
25
- nlp = spacy.load("en_core_web_sm")
26
- return nlp
27
- except Exception as e:
28
- print(f"Failed to download spaCy model: {e}")
29
- return None
30
 
31
  nlp = setup_spacy()
32
 
33
 
34
- def retry_on_rate_limit(func, max_retries=2, initial_delay=3, backoff=1.5):
35
- def wrapper(*args, **kwargs):
36
- delay = initial_delay
37
- for attempt in range(max_retries):
38
- try:
39
- return func(*args, **kwargs)
40
- except Exception as e:
41
- if "rate limit" in str(e).lower() or "429" in str(e):
42
- if attempt < max_retries - 1:
43
- print(f"Rate limit detected, retrying in {delay}s...")
44
- time.sleep(delay)
45
- delay *= backoff
46
- else:
47
- print("Maximum retries reached for rate limit.")
48
- raise
49
- else:
50
- # For non-rate-limit errors, raise immediately
51
- raise
52
- return wrapper
 
 
 
 
 
53
 
54
 
55
  def check_ffmpeg():
56
- """Check if ffmpeg is available in HF Spaces"""
57
  try:
58
- subprocess.run(["ffmpeg", "-version"], capture_output=True, check=True)
59
- return True
60
- except (subprocess.CalledProcessError, FileNotFoundError):
61
  return False
62
 
63
 
64
- def chunk_video(input_path: str, chunk_length: int = 180, output_dir: str = None) -> List[Path]:
65
- """Chunk video with temporary directory handling for HF Spaces"""
66
- if output_dir is None:
67
- output_dir = tempfile.mkdtemp(prefix="chunks_")
68
-
69
- Path(output_dir).mkdir(exist_ok=True)
70
- output_pattern = os.path.join(output_dir, "chunk_%03d.mp4")
71
-
72
  try:
73
  cmd = [
74
- "ffmpeg", "-y", "-i", input_path,
75
- "-f", "segment", "-segment_time", str(chunk_length),
76
- "-reset_timestamps", "1", "-c", "copy",
77
- output_pattern
78
  ]
79
- result = subprocess.run(cmd, capture_output=True, text=True, timeout=300)
80
 
81
- if result.returncode != 0:
82
- print(f"FFmpeg error: {result.stderr}")
83
- return []
84
 
85
- return sorted(Path(output_dir).glob("chunk_*.mp4"))
86
- except subprocess.TimeoutExpired:
87
- print("Video chunking timed out")
88
- return []
 
 
 
 
 
 
 
 
 
89
  except Exception as e:
90
- print(f"Error chunking video: {str(e)}")
91
- return []
 
92
 
93
 
94
- def extract_audio(video_path: str, audio_path: str) -> bool:
95
- """Extract audio with better error handling for HF Spaces"""
96
  try:
97
  cmd = [
98
- "ffmpeg", "-y", "-i", video_path,
99
- "-vn", "-c:a", "pcm_s16le", "-ar", "16000", "-ac", "1",
100
- "-t", "180", # Limit to 3 minutes per chunk
 
 
 
 
 
 
101
  audio_path
102
  ]
103
- result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
104
 
105
- if result.returncode != 0:
106
- print(f"Audio extraction error: {result.stderr}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  return False
108
- return True
109
  except subprocess.TimeoutExpired:
110
  print("Audio extraction timed out")
111
  return False
@@ -114,119 +145,83 @@ def extract_audio(video_path: str, audio_path: str) -> bool:
114
  return False
115
 
116
 
117
- def extract_key_phrases(text: str, top_n: int = 5) -> List[str]:
118
- """Extract key phrases with fallback if spaCy is not available"""
119
- if nlp is None:
120
- # Fallback: simple word extraction
121
- words = text.split()
122
- key_words = [w for w in words if len(w) > 4 and w.isalpha()]
123
- return list(dict.fromkeys(key_words))[:top_n]
124
-
125
- try:
126
- doc = nlp(text)
127
- phrases = [chunk.text.strip() for chunk in doc.noun_chunks if len(chunk.text.strip()) > 2]
128
- seen = set()
129
- unique_phrases = [p for p in phrases if not (p.lower() in seen or seen.add(p.lower()))]
130
- return unique_phrases[:top_n]
131
- except Exception as e:
132
- print(f"Error extracting key phrases: {str(e)}")
133
- return []
134
-
135
-
136
- def extract_frame(video_path: str, timestamp: str, output_path: str) -> bool:
137
- """Extract frame with timeout for HF Spaces"""
138
  try:
139
- cmd = ["ffmpeg", "-y", "-i", video_path, "-ss", timestamp, "-frames:v", "1", "-q:v", "2", output_path]
140
- result = subprocess.run(cmd, capture_output=True, text=True, timeout=15)
141
 
142
- if result.returncode != 0:
143
- return False
144
- return True
145
- except (subprocess.TimeoutExpired, Exception):
146
- return False
147
-
148
-
149
- @retry_on_rate_limit
150
- def transcribe_audio(asr_pipeline, audio_path: str) -> List[Dict]:
151
- """Transcribe audio with improved error handling"""
152
- try:
153
- # Use the pipeline with proper parameters
154
- result = asr_pipeline(
155
- audio_path,
156
- return_timestamps=True,
157
- chunk_length_s=30,
158
- stride_length_s=5
159
- )
160
 
161
- if isinstance(result, dict):
162
- if "chunks" in result:
163
- return result["chunks"]
164
- else:
165
- # Handle single result
166
- text = result.get("text", "")
167
- timestamps = result.get("timestamps", [(0.0, 30.0)])
168
- if isinstance(timestamps, list) and len(timestamps) > 0:
169
- return [{"text": text, "timestamp": timestamps[0]}]
170
- else:
171
- return [{"text": text, "timestamp": (0.0, 30.0)}]
172
- elif isinstance(result, list):
173
- # Handle list of results
174
  segments = []
175
- for i, item in enumerate(result):
176
- if isinstance(item, dict):
177
- segments.append({
178
- "text": item.get("text", ""),
179
- "timestamp": item.get("timestamp", (i*30, (i+1)*30))
180
- })
181
  return segments
 
 
182
  else:
183
- return [{"text": str(result), "timestamp": (0.0, 30.0)}]
184
 
185
  except Exception as e:
186
  print(f"Transcription error: {str(e)}")
187
- return [{"text": "Transcription failed", "timestamp": (0.0, 30.0)}]
188
 
189
 
190
- @retry_on_rate_limit
191
- def summarize_text(summarizer_pipeline, text: str) -> str:
192
- """Summarize text with proper length handling"""
193
- if not text.strip():
194
- return "No content to summarize."
195
 
196
- # Clean and prepare text
197
- text = text.strip()
198
  words = text.split()
 
 
 
 
 
 
 
 
 
 
 
 
199
 
200
- # Skip very short texts
201
- if len(words) < 10:
202
- return text # Return original if too short
 
 
 
 
203
 
204
- # Truncate if too long
205
- if len(words) > 500:
206
- text = " ".join(words[:500])
207
 
208
- try:
209
- # Calculate appropriate lengths
210
- input_length = len(words)
211
- max_new_tokens = min(100, max(20, input_length // 3))
212
- min_length = min(15, max(5, input_length // 8))
213
-
214
- result = summarizer_pipeline(
215
- text,
216
- max_new_tokens=max_new_tokens,
217
- min_length=min_length,
218
- do_sample=False,
219
- early_stopping=True
220
- )
221
-
222
- if isinstance(result, list) and len(result) > 0:
223
- summary = result[0]["summary_text"].strip()
224
- return summary if summary else text
225
  return text
226
-
227
- except Exception as e:
228
- print(f"Summarization error: {str(e)}")
229
- return text # Return original text if summarization fails
 
 
 
230
 
231
 
232
  def format_timestamp(seconds: float) -> str:
@@ -236,168 +231,201 @@ def format_timestamp(seconds: float) -> str:
236
  return f"{minutes:02d}:{remaining_seconds:02d}"
237
 
238
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
239
  def run_pipeline(video_file: str, progress=gr.Progress()) -> List[Dict]:
240
- """Main pipeline function optimized for HF Spaces"""
241
  if not video_file:
242
  return [{"error": "No video file provided"}]
243
 
 
 
244
  # Check if ffmpeg is available
245
  if not check_ffmpeg():
246
  return [{"error": "FFmpeg is not available in this environment"}]
247
 
248
- progress(0.1, desc="Initializing models...")
249
 
250
- # Initialize models with proper configuration
251
- try:
252
- # Configure Whisper with proper settings
253
- asr = pipeline(
254
- "automatic-speech-recognition",
255
- model="openai/whisper-tiny", # Use tiny model for better compatibility
256
- device=0 if torch.cuda.is_available() else -1,
257
- model_kwargs={
258
- "attn_implementation": "eager" # Fix attention implementation warning
259
- }
260
- )
261
- progress(0.2, desc="ASR model loaded...")
262
-
263
- # Configure BART with proper settings
264
- summarizer = pipeline(
265
- "summarization",
266
- model="facebook/bart-large-cnn",
267
- device=0 if torch.cuda.is_available() else -1
268
- )
269
- progress(0.3, desc="Summarization model loaded...")
270
-
271
- except Exception as e:
272
- return [{"error": f"Failed to load models: {str(e)}"}]
273
-
274
- # Create temporary directories
275
  temp_dir = tempfile.mkdtemp(prefix="lecture_capture_")
276
- chunks_dir = os.path.join(temp_dir, "chunks")
277
- frames_dir = os.path.join(temp_dir, "frames")
278
 
279
  try:
280
- Path(chunks_dir).mkdir(exist_ok=True)
281
- Path(frames_dir).mkdir(exist_ok=True)
282
-
283
- progress(0.4, desc="Processing video chunks...")
284
-
285
- # Process video with shorter chunks
286
- chunks = chunk_video(video_file, chunk_length=180, output_dir=chunks_dir)
287
- if not chunks:
288
- return [{"error": "No video chunks were created. Video may be corrupted or unsupported format."}]
289
-
290
- # Limit number of chunks for HF Spaces
291
- chunks = chunks[:5] # Process max 5 chunks (15 minutes)
292
-
293
- progress(0.5, desc=f"Processing {len(chunks)} chunks...")
294
 
295
- # Process each chunk
296
- all_segments = []
297
- for i, chunk in enumerate(chunks):
298
- progress(0.5 + (0.3 * i / len(chunks)), desc=f"Processing chunk {i+1}/{len(chunks)}...")
299
 
300
- wav_path = str(chunk).replace(".mp4", ".wav")
301
-
302
- # Extract audio
303
- if not extract_audio(str(chunk), wav_path):
304
- print(f"Failed to extract audio from chunk {i}")
305
- continue
306
-
307
- # Transcribe with better error handling
308
- try:
309
- chunk_segments = transcribe_audio(asr, wav_path)
310
-
311
- # Calculate absolute timestamps
312
- chunk_start_time = i * 180 # 180 seconds per chunk
313
-
314
- for seg in chunk_segments:
315
- timestamp = seg.get("timestamp", (0.0, 30.0))
316
- if isinstance(timestamp, tuple) and len(timestamp) == 2:
317
- start_time = chunk_start_time + timestamp[0]
318
- end_time = chunk_start_time + timestamp[1]
319
- else:
320
- start_time = chunk_start_time
321
- end_time = chunk_start_time + 30
322
-
323
- text = seg.get("text", "").strip()
324
- if text: # Only add non-empty segments
325
- all_segments.append({
326
- "text": text,
327
- "start": format_timestamp(start_time),
328
- "end": format_timestamp(end_time),
329
- "start_seconds": start_time,
330
- "end_seconds": end_time
331
- })
332
-
333
- except Exception as e:
334
- print(f"Error processing chunk {i}: {str(e)}")
335
- continue
336
 
337
- # Clean up audio file immediately
338
- try:
339
- os.remove(wav_path)
340
- except:
341
- pass
342
-
343
- if not all_segments:
344
- return [{"error": "No segments were successfully processed"}]
345
-
346
- progress(0.8, desc="Generating summaries and extracting key phrases...")
347
 
348
- # Sort segments by start time
349
- all_segments.sort(key=lambda x: x["start_seconds"])
350
 
351
- # Generate timeline (limit to 15 segments for HF Spaces)
352
  timeline = []
353
- for i, segment in enumerate(all_segments[:15]):
354
- segment_text = segment["text"]
 
 
 
355
 
356
- # Generate summary
357
  try:
358
- summary = summarize_text(summarizer, segment_text) if len(segment_text.split()) > 5 else segment_text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
359
  except Exception as e:
360
- summary = segment_text
361
-
362
- # Extract key phrases
363
- key_phrases = extract_key_phrases(segment_text) if segment_text else []
364
-
365
- timeline.append({
366
- "segment": i + 1,
367
- "start_time": segment["start"],
368
- "end_time": segment["end"],
369
- "text": segment_text,
370
- "summary": summary,
371
- "key_phrases": key_phrases
372
- })
373
 
374
  progress(1.0, desc="Processing complete!")
 
 
 
 
375
  return timeline
376
-
377
  except Exception as e:
378
  import traceback
379
- return [{"error": f"Pipeline failed: {str(e)}", "details": traceback.format_exc()}]
 
 
380
 
381
  finally:
382
  # Clean up temporary files
383
  try:
384
  shutil.rmtree(temp_dir)
 
385
  except Exception as e:
386
  print(f"Failed to clean up temp directory: {str(e)}")
387
 
388
 
389
- # β€”β€”β€” Gradio UI optimized for HF Spaces β€”β€”β€”
390
  def create_interface():
391
  with gr.Blocks(title="Lecture Capture AI Pipeline", theme=gr.themes.Soft()) as demo:
392
  gr.Markdown("""
393
- # πŸŽ“ Lecture Capture AI Pipeline
394
 
395
  Upload a lecture video to automatically generate:
396
  - πŸ“ Transcription with timestamps
397
  - πŸ“‹ Summaries for each segment
398
  - πŸ”‘ Key phrases extraction
399
 
400
- **Note**: Optimized for Hugging Face Spaces. Processing limited to 15 minutes of video.
401
  """)
402
 
403
  with gr.Row():
@@ -415,10 +443,10 @@ def create_interface():
415
 
416
  gr.Markdown("""
417
  ### πŸ’‘ Tips:
418
- - Videos up to 15 minutes work best
419
- - Clear audio improves transcription quality
420
  - Processing takes 2-5 minutes
421
- - Supported formats: MP4, AVI, MOV
422
  """)
423
 
424
  with gr.Column(scale=2):
@@ -435,11 +463,11 @@ def create_interface():
435
  )
436
 
437
  gr.Markdown("""
438
- ### πŸ”§ Technical Details:
439
- - Uses Whisper (tiny) for speech recognition
440
- - BART for text summarization
441
- - spaCy for key phrase extraction
442
- - Optimized for Hugging Face Spaces environment
443
  """)
444
 
445
  return demo
@@ -447,4 +475,4 @@ def create_interface():
447
 
448
  if __name__ == "__main__":
449
  demo = create_interface()
450
- demo.launch()
 
5
  import shutil
6
  from pathlib import Path
7
  from typing import List, Dict, Optional
8
+ import threading
9
+ import signal
10
 
11
  import spacy
12
  import gradio as gr
13
  from transformers import pipeline
14
  import torch
15
 
16
+ # Global timeout handler
17
+ class TimeoutError(Exception):
18
+ pass
19
+
20
+ def timeout_handler(signum, frame):
21
+ raise TimeoutError("Operation timed out")
22
+
23
  # β€”β€”β€” spaCy setup for HF Spaces β€”β€”β€”
24
  def setup_spacy():
25
  """Setup spaCy model with proper error handling for HF Spaces"""
 
27
  nlp = spacy.load("en_core_web_sm")
28
  return nlp
29
  except OSError:
30
+ print("spaCy model not found, using fallback...")
31
+ return None
 
 
 
 
 
 
 
32
 
33
  nlp = setup_spacy()
34
 
35
 
36
+ def run_with_timeout(func, timeout_seconds, *args, **kwargs):
37
+ """Run a function with a timeout"""
38
+ result = [None]
39
+ exception = [None]
40
+
41
+ def target():
42
+ try:
43
+ result[0] = func(*args, **kwargs)
44
+ except Exception as e:
45
+ exception[0] = e
46
+
47
+ thread = threading.Thread(target=target)
48
+ thread.daemon = True
49
+ thread.start()
50
+ thread.join(timeout_seconds)
51
+
52
+ if thread.is_alive():
53
+ # Force cleanup if thread is still running
54
+ raise TimeoutError(f"Function timed out after {timeout_seconds} seconds")
55
+
56
+ if exception[0]:
57
+ raise exception[0]
58
+
59
+ return result[0]
60
 
61
 
62
  def check_ffmpeg():
63
+ """Check if ffmpeg is available"""
64
  try:
65
+ result = subprocess.run(["ffmpeg", "-version"], capture_output=True, timeout=10)
66
+ return result.returncode == 0
67
+ except:
68
  return False
69
 
70
 
71
+ def get_video_info(video_path: str) -> Dict:
72
+ """Get video information using ffprobe"""
 
 
 
 
 
 
73
  try:
74
  cmd = [
75
+ "ffprobe", "-v", "quiet", "-print_format", "json", "-show_format",
76
+ "-show_streams", video_path
 
 
77
  ]
78
+ result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
79
 
80
+ if result.returncode == 0:
81
+ import json
82
+ info = json.loads(result.stdout)
83
 
84
+ # Extract video stream info
85
+ video_streams = [s for s in info.get('streams', []) if s.get('codec_type') == 'video']
86
+ audio_streams = [s for s in info.get('streams', []) if s.get('codec_type') == 'audio']
87
+
88
+ duration = float(info.get('format', {}).get('duration', 0))
89
+
90
+ return {
91
+ 'duration': duration,
92
+ 'has_video': len(video_streams) > 0,
93
+ 'has_audio': len(audio_streams) > 0,
94
+ 'video_codec': video_streams[0].get('codec_name') if video_streams else None,
95
+ 'audio_codec': audio_streams[0].get('codec_name') if audio_streams else None
96
+ }
97
  except Exception as e:
98
+ print(f"Error getting video info: {e}")
99
+
100
+ return {'duration': 0, 'has_video': False, 'has_audio': False}
101
 
102
 
103
+ def extract_audio_simple(video_path: str, audio_path: str, start_time: float = 0, duration: float = 180) -> bool:
104
+ """Extract audio with simpler approach and better error handling"""
105
  try:
106
  cmd = [
107
+ "ffmpeg", "-y",
108
+ "-ss", str(start_time), # Start time
109
+ "-i", video_path,
110
+ "-t", str(duration), # Duration
111
+ "-vn", # No video
112
+ "-acodec", "pcm_s16le", # Audio codec
113
+ "-ar", "16000", # Sample rate
114
+ "-ac", "1", # Mono
115
+ "-f", "wav", # Output format
116
  audio_path
117
  ]
 
118
 
119
+ print(f"Extracting audio: {' '.join(cmd)}")
120
+
121
+ result = subprocess.run(
122
+ cmd,
123
+ capture_output=True,
124
+ text=True,
125
+ timeout=60 # 1 minute timeout
126
+ )
127
+
128
+ if result.returncode == 0:
129
+ # Check if file was created and has content
130
+ if os.path.exists(audio_path) and os.path.getsize(audio_path) > 1000:
131
+ print(f"Audio extracted successfully: {os.path.getsize(audio_path)} bytes")
132
+ return True
133
+ else:
134
+ print("Audio file created but seems empty")
135
+ return False
136
+ else:
137
+ print(f"FFmpeg error: {result.stderr}")
138
  return False
139
+
140
  except subprocess.TimeoutExpired:
141
  print("Audio extraction timed out")
142
  return False
 
145
  return False
146
 
147
 
148
+ def transcribe_audio_simple(audio_path: str) -> List[Dict]:
149
+ """Simple transcription without complex pipeline parameters"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  try:
151
+ print(f"Starting transcription of {audio_path}")
 
152
 
153
+ # Use a simpler approach
154
+ import whisper
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
 
156
+ # Load smaller model
157
+ model = whisper.load_model("tiny")
158
+
159
+ # Transcribe with timeout
160
+ def do_transcribe():
161
+ result = model.transcribe(audio_path, language="en")
162
+ return result
163
+
164
+ result = run_with_timeout(do_transcribe, 120) # 2 minute timeout
165
+
166
+ if result and "segments" in result:
 
 
167
  segments = []
168
+ for seg in result["segments"]:
169
+ segments.append({
170
+ "text": seg.get("text", "").strip(),
171
+ "timestamp": (seg.get("start", 0), seg.get("end", 30))
172
+ })
 
173
  return segments
174
+ elif result and "text" in result:
175
+ return [{"text": result["text"], "timestamp": (0, 30)}]
176
  else:
177
+ return [{"text": "Transcription failed", "timestamp": (0, 30)}]
178
 
179
  except Exception as e:
180
  print(f"Transcription error: {str(e)}")
181
+ return [{"text": f"Transcription failed: {str(e)}", "timestamp": (0, 30)}]
182
 
183
 
184
+ def extract_key_phrases_simple(text: str, top_n: int = 5) -> List[str]:
185
+ """Simple key phrase extraction"""
186
+ if not text:
187
+ return []
 
188
 
189
+ # Simple approach: extract longer words
 
190
  words = text.split()
191
+ # Filter for meaningful words
192
+ key_words = [
193
+ w.strip('.,!?";') for w in words
194
+ if len(w) > 4 and w.isalpha() and w.lower() not in {
195
+ 'this', 'that', 'with', 'have', 'will', 'from', 'they', 'been',
196
+ 'were', 'said', 'each', 'which', 'their', 'time', 'would', 'there'
197
+ }
198
+ ]
199
+
200
+ # Remove duplicates while preserving order
201
+ seen = set()
202
+ unique_words = [w for w in key_words if not (w.lower() in seen or seen.add(w.lower()))]
203
 
204
+ return unique_words[:top_n]
205
+
206
+
207
+ def summarize_text_simple(text: str) -> str:
208
+ """Simple text summarization"""
209
+ if not text or len(text.split()) < 10:
210
+ return text
211
 
212
+ # Simple approach: take first and last sentences, or middle part
213
+ sentences = text.split('.')
214
+ sentences = [s.strip() for s in sentences if s.strip()]
215
 
216
+ if len(sentences) <= 2:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
  return text
218
+ elif len(sentences) <= 5:
219
+ return '. '.join(sentences[:2]) + '.'
220
+ else:
221
+ # Take first, middle, and last sentences
222
+ middle_idx = len(sentences) // 2
223
+ summary_sentences = [sentences[0], sentences[middle_idx], sentences[-1]]
224
+ return '. '.join(summary_sentences) + '.'
225
 
226
 
227
  def format_timestamp(seconds: float) -> str:
 
231
  return f"{minutes:02d}:{remaining_seconds:02d}"
232
 
233
 
234
+ def process_video_segment(video_path: str, start_time: float, duration: float, segment_id: int, temp_dir: str) -> Dict:
235
+ """Process a single video segment"""
236
+ try:
237
+ print(f"Processing segment {segment_id}: {start_time}s - {start_time + duration}s")
238
+
239
+ # Create audio file path
240
+ audio_path = os.path.join(temp_dir, f"segment_{segment_id:03d}.wav")
241
+
242
+ # Extract audio for this segment
243
+ if not extract_audio_simple(video_path, audio_path, start_time, duration):
244
+ return {
245
+ "segment": segment_id,
246
+ "start_time": format_timestamp(start_time),
247
+ "end_time": format_timestamp(start_time + duration),
248
+ "text": "Audio extraction failed",
249
+ "summary": "Failed to process this segment",
250
+ "key_phrases": []
251
+ }
252
+
253
+ # Transcribe audio
254
+ segments = transcribe_audio_simple(audio_path)
255
+
256
+ # Combine all text from segments
257
+ full_text = " ".join([seg["text"] for seg in segments if seg["text"]])
258
+
259
+ # Clean up audio file
260
+ try:
261
+ os.remove(audio_path)
262
+ except:
263
+ pass
264
+
265
+ if not full_text.strip():
266
+ return {
267
+ "segment": segment_id,
268
+ "start_time": format_timestamp(start_time),
269
+ "end_time": format_timestamp(start_time + duration),
270
+ "text": "No speech detected",
271
+ "summary": "No content in this segment",
272
+ "key_phrases": []
273
+ }
274
+
275
+ # Generate summary and key phrases
276
+ summary = summarize_text_simple(full_text)
277
+ key_phrases = extract_key_phrases_simple(full_text)
278
+
279
+ return {
280
+ "segment": segment_id,
281
+ "start_time": format_timestamp(start_time),
282
+ "end_time": format_timestamp(start_time + duration),
283
+ "text": full_text,
284
+ "summary": summary,
285
+ "key_phrases": key_phrases
286
+ }
287
+
288
+ except Exception as e:
289
+ print(f"Error processing segment {segment_id}: {str(e)}")
290
+ return {
291
+ "segment": segment_id,
292
+ "start_time": format_timestamp(start_time),
293
+ "end_time": format_timestamp(start_time + duration),
294
+ "text": f"Processing failed: {str(e)}",
295
+ "summary": "Error occurred during processing",
296
+ "key_phrases": []
297
+ }
298
+
299
+
300
  def run_pipeline(video_file: str, progress=gr.Progress()) -> List[Dict]:
301
+ """Main pipeline function with better error handling and debugging"""
302
  if not video_file:
303
  return [{"error": "No video file provided"}]
304
 
305
+ print(f"Processing video: {video_file}")
306
+
307
  # Check if ffmpeg is available
308
  if not check_ffmpeg():
309
  return [{"error": "FFmpeg is not available in this environment"}]
310
 
311
+ progress(0.1, desc="Analyzing video...")
312
 
313
+ # Get video information
314
+ video_info = get_video_info(video_file)
315
+ print(f"Video info: {video_info}")
316
+
317
+ if not video_info['has_audio']:
318
+ return [{"error": "Video has no audio track"}]
319
+
320
+ duration = video_info['duration']
321
+ if duration == 0:
322
+ return [{"error": "Could not determine video duration"}]
323
+
324
+ # Limit processing time for HF Spaces
325
+ max_duration = min(duration, 600) # Max 10 minutes
326
+ segment_length = 120 # 2 minutes per segment
327
+
328
+ progress(0.2, desc=f"Video duration: {duration:.1f}s, processing {max_duration:.1f}s...")
329
+
330
+ # Create temporary directory
 
 
 
 
 
 
 
331
  temp_dir = tempfile.mkdtemp(prefix="lecture_capture_")
 
 
332
 
333
  try:
334
+ # Calculate segments
335
+ segments_to_process = []
336
+ current_time = 0
337
+ segment_id = 1
 
 
 
 
 
 
 
 
 
 
338
 
339
+ while current_time < max_duration:
340
+ remaining_time = max_duration - current_time
341
+ actual_duration = min(segment_length, remaining_time)
 
342
 
343
+ segments_to_process.append({
344
+ 'start_time': current_time,
345
+ 'duration': actual_duration,
346
+ 'segment_id': segment_id
347
+ })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
348
 
349
+ current_time += actual_duration
350
+ segment_id += 1
 
 
 
 
 
 
 
 
351
 
352
+ print(f"Will process {len(segments_to_process)} segments")
 
353
 
354
+ # Process each segment
355
  timeline = []
356
+ for i, seg_info in enumerate(segments_to_process):
357
+ progress(
358
+ 0.3 + (0.6 * i / len(segments_to_process)),
359
+ desc=f"Processing segment {i+1}/{len(segments_to_process)}..."
360
+ )
361
 
 
362
  try:
363
+ result = run_with_timeout(
364
+ process_video_segment,
365
+ 300, # 5 minute timeout per segment
366
+ video_file,
367
+ seg_info['start_time'],
368
+ seg_info['duration'],
369
+ seg_info['segment_id'],
370
+ temp_dir
371
+ )
372
+ timeline.append(result)
373
+
374
+ except TimeoutError:
375
+ print(f"Segment {i+1} timed out")
376
+ timeline.append({
377
+ "segment": seg_info['segment_id'],
378
+ "start_time": format_timestamp(seg_info['start_time']),
379
+ "end_time": format_timestamp(seg_info['start_time'] + seg_info['duration']),
380
+ "text": "Processing timed out",
381
+ "summary": "Segment processing exceeded time limit",
382
+ "key_phrases": []
383
+ })
384
  except Exception as e:
385
+ print(f"Error processing segment {i+1}: {str(e)}")
386
+ timeline.append({
387
+ "segment": seg_info['segment_id'],
388
+ "start_time": format_timestamp(seg_info['start_time']),
389
+ "end_time": format_timestamp(seg_info['start_time'] + seg_info['duration']),
390
+ "text": f"Error: {str(e)}",
391
+ "summary": "Processing failed",
392
+ "key_phrases": []
393
+ })
 
 
 
 
394
 
395
  progress(1.0, desc="Processing complete!")
396
+
397
+ if not timeline:
398
+ return [{"error": "No segments were successfully processed"}]
399
+
400
  return timeline
401
+
402
  except Exception as e:
403
  import traceback
404
+ print(f"Pipeline error: {str(e)}")
405
+ print(traceback.format_exc())
406
+ return [{"error": f"Pipeline failed: {str(e)}"}]
407
 
408
  finally:
409
  # Clean up temporary files
410
  try:
411
  shutil.rmtree(temp_dir)
412
+ print("Cleaned up temporary files")
413
  except Exception as e:
414
  print(f"Failed to clean up temp directory: {str(e)}")
415
 
416
 
417
+ # β€”β€”β€” Gradio UI β€”β€”β€”
418
  def create_interface():
419
  with gr.Blocks(title="Lecture Capture AI Pipeline", theme=gr.themes.Soft()) as demo:
420
  gr.Markdown("""
421
+ # πŸŽ“ Lecture Capture AI Pipeline (Debug Version)
422
 
423
  Upload a lecture video to automatically generate:
424
  - πŸ“ Transcription with timestamps
425
  - πŸ“‹ Summaries for each segment
426
  - πŸ”‘ Key phrases extraction
427
 
428
+ **Debug Features**: Enhanced error reporting and timeout handling
429
  """)
430
 
431
  with gr.Row():
 
443
 
444
  gr.Markdown("""
445
  ### πŸ’‘ Tips:
446
+ - Videos up to 10 minutes work best
447
+ - Clear audio improves results
448
  - Processing takes 2-5 minutes
449
+ - Check browser console for debug info
450
  """)
451
 
452
  with gr.Column(scale=2):
 
463
  )
464
 
465
  gr.Markdown("""
466
+ ### πŸ”§ Debug Info:
467
+ - Uses direct Whisper model loading
468
+ - Enhanced timeout handling
469
+ - Detailed error reporting
470
+ - Segment-by-segment processing
471
  """)
472
 
473
  return demo
 
475
 
476
  if __name__ == "__main__":
477
  demo = create_interface()
478
+ demo.launch(debug=True)