lochn commited on
Commit
5aa1ddf
Β·
verified Β·
1 Parent(s): 4fd1504

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +450 -0
app.py ADDED
@@ -0,0 +1,450 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import subprocess
3
+ import time
4
+ import tempfile
5
+ import shutil
6
+ from pathlib import Path
7
+ from typing import List, Dict, Optional
8
+
9
+ import spacy
10
+ import gradio as gr
11
+ from transformers import pipeline
12
+ import torch
13
+
14
+ # β€”β€”β€” spaCy setup for HF Spaces β€”β€”β€”
15
+ def setup_spacy():
16
+ """Setup spaCy model with proper error handling for HF Spaces"""
17
+ try:
18
+ nlp = spacy.load("en_core_web_sm")
19
+ return nlp
20
+ except OSError:
21
+ print("Downloading spaCy model...")
22
+ try:
23
+ from spacy.cli import download as spacy_download
24
+ spacy_download("en_core_web_sm")
25
+ nlp = spacy.load("en_core_web_sm")
26
+ return nlp
27
+ except Exception as e:
28
+ print(f"Failed to download spaCy model: {e}")
29
+ return None
30
+
31
+ nlp = setup_spacy()
32
+
33
+
34
+ def retry_on_rate_limit(func, max_retries=2, initial_delay=3, backoff=1.5):
35
+ def wrapper(*args, **kwargs):
36
+ delay = initial_delay
37
+ for attempt in range(max_retries):
38
+ try:
39
+ return func(*args, **kwargs)
40
+ except Exception as e:
41
+ if "rate limit" in str(e).lower() or "429" in str(e):
42
+ if attempt < max_retries - 1:
43
+ print(f"Rate limit detected, retrying in {delay}s...")
44
+ time.sleep(delay)
45
+ delay *= backoff
46
+ else:
47
+ print("Maximum retries reached for rate limit.")
48
+ raise
49
+ else:
50
+ # For non-rate-limit errors, raise immediately
51
+ raise
52
+ return wrapper
53
+
54
+
55
+ def check_ffmpeg():
56
+ """Check if ffmpeg is available in HF Spaces"""
57
+ try:
58
+ subprocess.run(["ffmpeg", "-version"], capture_output=True, check=True)
59
+ return True
60
+ except (subprocess.CalledProcessError, FileNotFoundError):
61
+ return False
62
+
63
+
64
+ def chunk_video(input_path: str, chunk_length: int = 180, output_dir: str = None) -> List[Path]:
65
+ """Chunk video with temporary directory handling for HF Spaces"""
66
+ if output_dir is None:
67
+ output_dir = tempfile.mkdtemp(prefix="chunks_")
68
+
69
+ Path(output_dir).mkdir(exist_ok=True)
70
+ output_pattern = os.path.join(output_dir, "chunk_%03d.mp4")
71
+
72
+ try:
73
+ cmd = [
74
+ "ffmpeg", "-y", "-i", input_path,
75
+ "-f", "segment", "-segment_time", str(chunk_length),
76
+ "-reset_timestamps", "1", "-c", "copy",
77
+ output_pattern
78
+ ]
79
+ result = subprocess.run(cmd, capture_output=True, text=True, timeout=300)
80
+
81
+ if result.returncode != 0:
82
+ print(f"FFmpeg error: {result.stderr}")
83
+ return []
84
+
85
+ return sorted(Path(output_dir).glob("chunk_*.mp4"))
86
+ except subprocess.TimeoutExpired:
87
+ print("Video chunking timed out")
88
+ return []
89
+ except Exception as e:
90
+ print(f"Error chunking video: {str(e)}")
91
+ return []
92
+
93
+
94
+ def extract_audio(video_path: str, audio_path: str) -> bool:
95
+ """Extract audio with better error handling for HF Spaces"""
96
+ try:
97
+ cmd = [
98
+ "ffmpeg", "-y", "-i", video_path,
99
+ "-vn", "-c:a", "pcm_s16le", "-ar", "16000", "-ac", "1",
100
+ "-t", "180", # Limit to 3 minutes per chunk
101
+ audio_path
102
+ ]
103
+ result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
104
+
105
+ if result.returncode != 0:
106
+ print(f"Audio extraction error: {result.stderr}")
107
+ return False
108
+ return True
109
+ except subprocess.TimeoutExpired:
110
+ print("Audio extraction timed out")
111
+ return False
112
+ except Exception as e:
113
+ print(f"Error extracting audio: {str(e)}")
114
+ return False
115
+
116
+
117
+ def extract_key_phrases(text: str, top_n: int = 5) -> List[str]:
118
+ """Extract key phrases with fallback if spaCy is not available"""
119
+ if nlp is None:
120
+ # Fallback: simple word extraction
121
+ words = text.split()
122
+ key_words = [w for w in words if len(w) > 4 and w.isalpha()]
123
+ return list(dict.fromkeys(key_words))[:top_n]
124
+
125
+ try:
126
+ doc = nlp(text)
127
+ phrases = [chunk.text.strip() for chunk in doc.noun_chunks if len(chunk.text.strip()) > 2]
128
+ seen = set()
129
+ unique_phrases = [p for p in phrases if not (p.lower() in seen or seen.add(p.lower()))]
130
+ return unique_phrases[:top_n]
131
+ except Exception as e:
132
+ print(f"Error extracting key phrases: {str(e)}")
133
+ return []
134
+
135
+
136
+ def extract_frame(video_path: str, timestamp: str, output_path: str) -> bool:
137
+ """Extract frame with timeout for HF Spaces"""
138
+ try:
139
+ cmd = ["ffmpeg", "-y", "-i", video_path, "-ss", timestamp, "-frames:v", "1", "-q:v", "2", output_path]
140
+ result = subprocess.run(cmd, capture_output=True, text=True, timeout=15)
141
+
142
+ if result.returncode != 0:
143
+ return False
144
+ return True
145
+ except (subprocess.TimeoutExpired, Exception):
146
+ return False
147
+
148
+
149
+ @retry_on_rate_limit
150
+ def transcribe_audio(asr_pipeline, audio_path: str) -> List[Dict]:
151
+ """Transcribe audio with improved error handling"""
152
+ try:
153
+ # Use the pipeline with proper parameters
154
+ result = asr_pipeline(
155
+ audio_path,
156
+ return_timestamps=True,
157
+ chunk_length_s=30,
158
+ stride_length_s=5
159
+ )
160
+
161
+ if isinstance(result, dict):
162
+ if "chunks" in result:
163
+ return result["chunks"]
164
+ else:
165
+ # Handle single result
166
+ text = result.get("text", "")
167
+ timestamps = result.get("timestamps", [(0.0, 30.0)])
168
+ if isinstance(timestamps, list) and len(timestamps) > 0:
169
+ return [{"text": text, "timestamp": timestamps[0]}]
170
+ else:
171
+ return [{"text": text, "timestamp": (0.0, 30.0)}]
172
+ elif isinstance(result, list):
173
+ # Handle list of results
174
+ segments = []
175
+ for i, item in enumerate(result):
176
+ if isinstance(item, dict):
177
+ segments.append({
178
+ "text": item.get("text", ""),
179
+ "timestamp": item.get("timestamp", (i*30, (i+1)*30))
180
+ })
181
+ return segments
182
+ else:
183
+ return [{"text": str(result), "timestamp": (0.0, 30.0)}]
184
+
185
+ except Exception as e:
186
+ print(f"Transcription error: {str(e)}")
187
+ return [{"text": "Transcription failed", "timestamp": (0.0, 30.0)}]
188
+
189
+
190
+ @retry_on_rate_limit
191
+ def summarize_text(summarizer_pipeline, text: str) -> str:
192
+ """Summarize text with proper length handling"""
193
+ if not text.strip():
194
+ return "No content to summarize."
195
+
196
+ # Clean and prepare text
197
+ text = text.strip()
198
+ words = text.split()
199
+
200
+ # Skip very short texts
201
+ if len(words) < 10:
202
+ return text # Return original if too short
203
+
204
+ # Truncate if too long
205
+ if len(words) > 500:
206
+ text = " ".join(words[:500])
207
+
208
+ try:
209
+ # Calculate appropriate lengths
210
+ input_length = len(words)
211
+ max_new_tokens = min(100, max(20, input_length // 3))
212
+ min_length = min(15, max(5, input_length // 8))
213
+
214
+ result = summarizer_pipeline(
215
+ text,
216
+ max_new_tokens=max_new_tokens,
217
+ min_length=min_length,
218
+ do_sample=False,
219
+ early_stopping=True
220
+ )
221
+
222
+ if isinstance(result, list) and len(result) > 0:
223
+ summary = result[0]["summary_text"].strip()
224
+ return summary if summary else text
225
+ return text
226
+
227
+ except Exception as e:
228
+ print(f"Summarization error: {str(e)}")
229
+ return text # Return original text if summarization fails
230
+
231
+
232
+ def format_timestamp(seconds: float) -> str:
233
+ """Format seconds into MM:SS format"""
234
+ minutes = int(seconds // 60)
235
+ remaining_seconds = int(seconds % 60)
236
+ return f"{minutes:02d}:{remaining_seconds:02d}"
237
+
238
+
239
+ def run_pipeline(video_file: str, progress=gr.Progress()) -> List[Dict]:
240
+ """Main pipeline function optimized for HF Spaces"""
241
+ if not video_file:
242
+ return [{"error": "No video file provided"}]
243
+
244
+ # Check if ffmpeg is available
245
+ if not check_ffmpeg():
246
+ return [{"error": "FFmpeg is not available in this environment"}]
247
+
248
+ progress(0.1, desc="Initializing models...")
249
+
250
+ # Initialize models with proper configuration
251
+ try:
252
+ # Configure Whisper with proper settings
253
+ asr = pipeline(
254
+ "automatic-speech-recognition",
255
+ model="openai/whisper-tiny", # Use tiny model for better compatibility
256
+ device=0 if torch.cuda.is_available() else -1,
257
+ model_kwargs={
258
+ "attn_implementation": "eager" # Fix attention implementation warning
259
+ }
260
+ )
261
+ progress(0.2, desc="ASR model loaded...")
262
+
263
+ # Configure BART with proper settings
264
+ summarizer = pipeline(
265
+ "summarization",
266
+ model="facebook/bart-large-cnn",
267
+ device=0 if torch.cuda.is_available() else -1
268
+ )
269
+ progress(0.3, desc="Summarization model loaded...")
270
+
271
+ except Exception as e:
272
+ return [{"error": f"Failed to load models: {str(e)}"}]
273
+
274
+ # Create temporary directories
275
+ temp_dir = tempfile.mkdtemp(prefix="lecture_capture_")
276
+ chunks_dir = os.path.join(temp_dir, "chunks")
277
+ frames_dir = os.path.join(temp_dir, "frames")
278
+
279
+ try:
280
+ Path(chunks_dir).mkdir(exist_ok=True)
281
+ Path(frames_dir).mkdir(exist_ok=True)
282
+
283
+ progress(0.4, desc="Processing video chunks...")
284
+
285
+ # Process video with shorter chunks
286
+ chunks = chunk_video(video_file, chunk_length=180, output_dir=chunks_dir)
287
+ if not chunks:
288
+ return [{"error": "No video chunks were created. Video may be corrupted or unsupported format."}]
289
+
290
+ # Limit number of chunks for HF Spaces
291
+ chunks = chunks[:5] # Process max 5 chunks (15 minutes)
292
+
293
+ progress(0.5, desc=f"Processing {len(chunks)} chunks...")
294
+
295
+ # Process each chunk
296
+ all_segments = []
297
+ for i, chunk in enumerate(chunks):
298
+ progress(0.5 + (0.3 * i / len(chunks)), desc=f"Processing chunk {i+1}/{len(chunks)}...")
299
+
300
+ wav_path = str(chunk).replace(".mp4", ".wav")
301
+
302
+ # Extract audio
303
+ if not extract_audio(str(chunk), wav_path):
304
+ print(f"Failed to extract audio from chunk {i}")
305
+ continue
306
+
307
+ # Transcribe with better error handling
308
+ try:
309
+ chunk_segments = transcribe_audio(asr, wav_path)
310
+
311
+ # Calculate absolute timestamps
312
+ chunk_start_time = i * 180 # 180 seconds per chunk
313
+
314
+ for seg in chunk_segments:
315
+ timestamp = seg.get("timestamp", (0.0, 30.0))
316
+ if isinstance(timestamp, tuple) and len(timestamp) == 2:
317
+ start_time = chunk_start_time + timestamp[0]
318
+ end_time = chunk_start_time + timestamp[1]
319
+ else:
320
+ start_time = chunk_start_time
321
+ end_time = chunk_start_time + 30
322
+
323
+ text = seg.get("text", "").strip()
324
+ if text: # Only add non-empty segments
325
+ all_segments.append({
326
+ "text": text,
327
+ "start": format_timestamp(start_time),
328
+ "end": format_timestamp(end_time),
329
+ "start_seconds": start_time,
330
+ "end_seconds": end_time
331
+ })
332
+
333
+ except Exception as e:
334
+ print(f"Error processing chunk {i}: {str(e)}")
335
+ continue
336
+
337
+ # Clean up audio file immediately
338
+ try:
339
+ os.remove(wav_path)
340
+ except:
341
+ pass
342
+
343
+ if not all_segments:
344
+ return [{"error": "No segments were successfully processed"}]
345
+
346
+ progress(0.8, desc="Generating summaries and extracting key phrases...")
347
+
348
+ # Sort segments by start time
349
+ all_segments.sort(key=lambda x: x["start_seconds"])
350
+
351
+ # Generate timeline (limit to 15 segments for HF Spaces)
352
+ timeline = []
353
+ for i, segment in enumerate(all_segments[:15]):
354
+ segment_text = segment["text"]
355
+
356
+ # Generate summary
357
+ try:
358
+ summary = summarize_text(summarizer, segment_text) if len(segment_text.split()) > 5 else segment_text
359
+ except Exception as e:
360
+ summary = segment_text
361
+
362
+ # Extract key phrases
363
+ key_phrases = extract_key_phrases(segment_text) if segment_text else []
364
+
365
+ timeline.append({
366
+ "segment": i + 1,
367
+ "start_time": segment["start"],
368
+ "end_time": segment["end"],
369
+ "text": segment_text,
370
+ "summary": summary,
371
+ "key_phrases": key_phrases
372
+ })
373
+
374
+ progress(1.0, desc="Processing complete!")
375
+ return timeline
376
+
377
+ except Exception as e:
378
+ import traceback
379
+ return [{"error": f"Pipeline failed: {str(e)}", "details": traceback.format_exc()}]
380
+
381
+ finally:
382
+ # Clean up temporary files
383
+ try:
384
+ shutil.rmtree(temp_dir)
385
+ except Exception as e:
386
+ print(f"Failed to clean up temp directory: {str(e)}")
387
+
388
+
389
+ # β€”β€”β€” Gradio UI optimized for HF Spaces β€”β€”β€”
390
+ def create_interface():
391
+ with gr.Blocks(title="Lecture Capture AI Pipeline", theme=gr.themes.Soft()) as demo:
392
+ gr.Markdown("""
393
+ # πŸŽ“ Lecture Capture AI Pipeline
394
+
395
+ Upload a lecture video to automatically generate:
396
+ - πŸ“ Transcription with timestamps
397
+ - πŸ“‹ Summaries for each segment
398
+ - πŸ”‘ Key phrases extraction
399
+
400
+ **Note**: Optimized for Hugging Face Spaces. Processing limited to 15 minutes of video.
401
+ """)
402
+
403
+ with gr.Row():
404
+ with gr.Column(scale=1):
405
+ video_input = gr.Video(
406
+ label="πŸ“Ή Upload Lecture Video",
407
+ height=300
408
+ )
409
+
410
+ process_btn = gr.Button(
411
+ "πŸš€ Process Video",
412
+ variant="primary",
413
+ size="lg"
414
+ )
415
+
416
+ gr.Markdown("""
417
+ ### πŸ’‘ Tips:
418
+ - Videos up to 15 minutes work best
419
+ - Clear audio improves transcription quality
420
+ - Processing takes 2-5 minutes
421
+ - Supported formats: MP4, AVI, MOV
422
+ """)
423
+
424
+ with gr.Column(scale=2):
425
+ output_json = gr.JSON(
426
+ label="πŸ“Š Generated Timeline",
427
+ height=600
428
+ )
429
+
430
+ process_btn.click(
431
+ fn=run_pipeline,
432
+ inputs=[video_input],
433
+ outputs=[output_json],
434
+ show_progress=True
435
+ )
436
+
437
+ gr.Markdown("""
438
+ ### πŸ”§ Technical Details:
439
+ - Uses Whisper (tiny) for speech recognition
440
+ - BART for text summarization
441
+ - spaCy for key phrase extraction
442
+ - Optimized for Hugging Face Spaces environment
443
+ """)
444
+
445
+ return demo
446
+
447
+
448
+ if __name__ == "__main__":
449
+ demo = create_interface()
450
+ demo.launch()