lochn commited on
Commit
8ae984c
·
verified ·
1 Parent(s): e1a91ad

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -131
app.py CHANGED
@@ -1,11 +1,13 @@
1
  import os
2
  import json
3
  import subprocess
 
4
  from pathlib import Path
5
 
6
  import openai
7
  import spacy
8
  import gradio as gr
 
9
 
10
  # Load spaCy model for key-phrase extraction, downloading if missing
11
  try:
@@ -16,79 +18,58 @@ except OSError:
16
  nlp = spacy.load("en_core_web_sm")
17
 
18
 
19
- def chunk_video(input_path: str, chunk_length: int = 300, output_dir: str = "chunks") -> list[Path]:
20
  """
21
- Split input video into fixed-length chunks using ffmpeg CLI.
22
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  Path(output_dir).mkdir(exist_ok=True)
24
  output_pattern = os.path.join(output_dir, "chunk_%03d.mp4")
25
  cmd = [
26
- "ffmpeg",
27
- "-y", # overwrite existing files
28
- "-i", input_path,
29
- "-f", "segment",
30
- "-segment_time", str(chunk_length),
31
- "-reset_timestamps", "1",
32
  output_pattern
33
  ]
34
- try:
35
- subprocess.run(cmd, check=True, capture_output=True, text=True)
36
- except subprocess.CalledProcessError as e:
37
- print(f"Error during chunking: {e.stderr}")
38
- raise
39
  return sorted(Path(output_dir).glob("chunk_*.mp4"))
40
 
41
 
42
  def extract_audio(video_path: str, audio_path: str) -> None:
43
- """
44
- Extract mono, 16kHz PCM audio from video using ffmpeg CLI.
45
- """
46
  cmd = [
47
- "ffmpeg",
48
- "-y", # overwrite if exists
49
- "-i", video_path,
50
- "-vn", # disable video output
51
- "-c:a", "pcm_s16le", # audio codec
52
- "-ar", "16000", # sample rate
53
- "-ac", "1", # mono audio
54
  audio_path
55
  ]
56
- try:
57
- subprocess.run(cmd, check=True, capture_output=True, text=True)
58
- except subprocess.CalledProcessError as e:
59
- print(f"Error extracting audio: {e.stderr}")
60
- raise
61
 
62
 
 
63
  def transcribe_audio(audio_path: str) -> list[dict]:
64
- """
65
- Transcribe audio using OpenAI Whisper API.
66
- Returns list of segments with start, end, and text.
67
- """
68
- with open(audio_path, "rb") as audio_file:
69
- transcript = openai.Audio.transcribe(
70
- model="whisper-1",
71
- file=audio_file,
72
- response_format="verbose_json"
73
- )
74
- return transcript.get("segments", [])
75
-
76
-
77
- def segment_text(segments: list[dict]) -> list[str]:
78
- """
79
- Join segment texts and split into semantic blocks based on paragraph breaks.
80
- """
81
- full_text = "\n\n".join(seg.get("text", "") for seg in segments)
82
- return [block.strip() for block in full_text.split("\n\n") if block.strip()]
83
 
84
 
 
85
  def summarize_text(text: str) -> str:
86
- """
87
- Summarize a chunk of transcript via GPT-4.
88
- """
89
- prompt = (
90
- "Summarize the following lecture segment in 2-3 sentences:\n\n" + text
91
- )
92
  response = openai.ChatCompletion.create(
93
  model="gpt-4o",
94
  messages=[{"role": "user", "content": prompt}],
@@ -97,100 +78,65 @@ def summarize_text(text: str) -> str:
97
  return response.choices[0].message.content.strip()
98
 
99
 
100
- def extract_key_phrases(text: str, top_n: int = 5) -> list[str]:
101
- """
102
- Extract noun chunks as key phrases from text.
103
- """
 
 
104
  doc = nlp(text)
105
  phrases = [chunk.text for chunk in doc.noun_chunks]
106
  return list(dict.fromkeys(phrases))[:top_n]
107
 
108
 
109
  def extract_frame(video_path: str, timestamp: str, output_path: str) -> None:
110
- """
111
- Extract a single frame at given timestamp using ffmpeg CLI.
112
- """
113
- cmd = [
114
- "ffmpeg",
115
- "-y",
116
- "-i", video_path,
117
- "-ss", timestamp,
118
- "-frames:v", "1",
119
- output_path
120
- ]
121
- try:
122
- subprocess.run(cmd, check=True, capture_output=True, text=True)
123
- except subprocess.CalledProcessError as e:
124
- print(f"Error extracting frame: {e.stderr}")
125
- raise
126
 
127
 
128
  def run_pipeline(api_key: str, video_file: str) -> list[dict]:
129
- """
130
- Complete processing pipeline: chunk, audio, transcribe, summarize, key phrases, frames.
131
- Returns structured timeline entries.
132
- """
133
  openai.api_key = api_key
134
- # 1. Chunk video
135
  chunks = chunk_video(video_file)
136
-
137
- # 2. Extract audio & transcribe
138
- all_segments = []
139
- for chunk in chunks:
140
- wav_path = str(chunk).replace(".mp4", ".wav")
141
- extract_audio(str(chunk), wav_path)
142
- segs = transcribe_audio(wav_path)
143
- all_segments.extend(segs)
144
-
145
- # 3. Segment transcript
146
- transcript_blocks = segment_text(all_segments)
147
-
148
- # 4. Summarization & key-phrase extraction
149
- summaries = [summarize_text(b) for b in transcript_blocks]
150
- phrases_list = [extract_key_phrases(b) for b in transcript_blocks]
151
-
152
- # 5. Extract frames for each segment
153
- frame_dir = Path("frames")
154
- frame_dir.mkdir(exist_ok=True)
155
- frame_paths = []
156
- for seg in all_segments:
157
- ts = seg.get("start", "00:00:00.000")
158
- fname = f"frame_{ts.replace(':', '-')}.jpg"
159
- out = frame_dir / fname
160
- extract_frame(video_file, ts, str(out))
161
- frame_paths.append(str(out))
162
-
163
- # 6. Assemble timeline
164
  timeline = []
165
- for seg, summary, keys, frame in zip(all_segments, summaries, phrases_list, frame_paths):
166
  timeline.append({
167
- "start_time": seg.get("start"),
168
- "end_time": seg.get("end"),
169
- "summary": summary,
170
- "key_phrases": keys,
171
- "frame": frame
172
  })
173
  return timeline
174
 
175
- # Gradio UI for Hugging Face Spaces
176
  demo = gr.Blocks()
177
  with demo:
178
- gr.Markdown(
179
- "# Lecture Capture AI Pipeline\n"
180
- "Enter your OpenAI API key and upload a lecture video to generate a summarized timeline."
181
- )
182
- api_key_input = gr.Textbox(
183
- label="OpenAI API Key", type="password",
184
- placeholder="sk-…", lines=1
185
- )
186
- video_input = gr.Video(label="Lecture Video File")
187
- run_button = gr.Button("Process Video")
188
- output = gr.JSON(label="Generated Timeline")
189
- run_button.click(
190
- fn=run_pipeline,
191
- inputs=[api_key_input, video_input],
192
- outputs=output
193
- )
194
-
195
- if __name__ == "__main__":
196
  demo.launch()
 
1
  import os
2
  import json
3
  import subprocess
4
+ import time
5
  from pathlib import Path
6
 
7
  import openai
8
  import spacy
9
  import gradio as gr
10
+ from openai.error import RateLimitError
11
 
12
  # Load spaCy model for key-phrase extraction, downloading if missing
13
  try:
 
18
  nlp = spacy.load("en_core_web_sm")
19
 
20
 
21
+ def retry_on_rate_limit(func, max_retries=3, initial_delay=5, backoff=2):
22
  """
23
+ Retry decorator for functions that may hit OpenAI rate limits.
24
  """
25
+ def wrapper(*args, **kwargs):
26
+ delay = initial_delay
27
+ for attempt in range(max_retries):
28
+ try:
29
+ return func(*args, **kwargs)
30
+ except RateLimitError as e:
31
+ if attempt < max_retries - 1:
32
+ print(f"Rate limit hit, retrying in {delay}s...")
33
+ time.sleep(delay)
34
+ delay *= backoff
35
+ else:
36
+ print("Maximum retries reached. Aborting.")
37
+ raise
38
+ return wrapper
39
+
40
+
41
+ def chunk_video(input_path: str, chunk_length: int = 300, output_dir: str = "chunks") -> list[Path]:
42
  Path(output_dir).mkdir(exist_ok=True)
43
  output_pattern = os.path.join(output_dir, "chunk_%03d.mp4")
44
  cmd = [
45
+ "ffmpeg", "-y", "-i", input_path,
46
+ "-f", "segment", "-segment_time", str(chunk_length), "-reset_timestamps", "1",
 
 
 
 
47
  output_pattern
48
  ]
49
+ subprocess.run(cmd, check=True)
 
 
 
 
50
  return sorted(Path(output_dir).glob("chunk_*.mp4"))
51
 
52
 
53
  def extract_audio(video_path: str, audio_path: str) -> None:
 
 
 
54
  cmd = [
55
+ "ffmpeg", "-y", "-i", video_path,
56
+ "-vn", "-c:a", "pcm_s16le", "-ar", "16000", "-ac", "1",
 
 
 
 
 
57
  audio_path
58
  ]
59
+ subprocess.run(cmd, check=True)
 
 
 
 
60
 
61
 
62
+ @retry_on_rate_limit
63
  def transcribe_audio(audio_path: str) -> list[dict]:
64
+ with open(audio_path, "rb") as f:
65
+ return openai.Audio.transcribe(
66
+ model="whisper-1", file=f, response_format="verbose_json"
67
+ ).get("segments", [])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
 
70
+ @retry_on_rate_limit
71
  def summarize_text(text: str) -> str:
72
+ prompt = f"Summarize the following lecture segment in 2-3 sentences:\n\n{text}"
 
 
 
 
 
73
  response = openai.ChatCompletion.create(
74
  model="gpt-4o",
75
  messages=[{"role": "user", "content": prompt}],
 
78
  return response.choices[0].message.content.strip()
79
 
80
 
81
+ def segment_text(segments: list[dict]) -> list[str]:
82
+ full = "\n\n".join(seg.get("text", "") for seg in segments)
83
+ return [b.strip() for b in full.split("\n\n") if b.strip()]
84
+
85
+
86
+ def extract_key_phrases(text: str, top_n=5) -> list[str]:
87
  doc = nlp(text)
88
  phrases = [chunk.text for chunk in doc.noun_chunks]
89
  return list(dict.fromkeys(phrases))[:top_n]
90
 
91
 
92
  def extract_frame(video_path: str, timestamp: str, output_path: str) -> None:
93
+ cmd = ["ffmpeg", "-y", "-i", video_path, "-ss", timestamp, "-frames:v", "1", output_path]
94
+ subprocess.run(cmd, check=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
 
96
 
97
  def run_pipeline(api_key: str, video_file: str) -> list[dict]:
 
 
 
 
98
  openai.api_key = api_key
99
+ # chunk
100
  chunks = chunk_video(video_file)
101
+ segments = []
102
+ for c in chunks:
103
+ wav = str(c).replace('.mp4', '.wav')
104
+ extract_audio(str(c), wav)
105
+ segments.extend(transcribe_audio(wav))
106
+ # segment text
107
+ blocks = segment_text(segments)
108
+ # summarize & phrases
109
+ summaries = [summarize_text(b) for b in blocks]
110
+ phrases = [extract_key_phrases(b) for b in blocks]
111
+ # extract frames
112
+ Path('frames').mkdir(exist_ok=True)
113
+ frames = []
114
+ for seg in segments:
115
+ ts = seg.get('start', '00:00:00.000')
116
+ out = f"frames/frame_{ts.replace(':','-')}.jpg"
117
+ extract_frame(video_file, ts, out)
118
+ frames.append(out)
119
+ # assemble
 
 
 
 
 
 
 
 
 
120
  timeline = []
121
+ for seg, sumry, ph, fr in zip(segments, summaries, phrases, frames):
122
  timeline.append({
123
+ 'start_time': seg.get('start'),
124
+ 'end_time': seg.get('end'),
125
+ 'summary': sumry,
126
+ 'key_phrases': ph,
127
+ 'frame': fr
128
  })
129
  return timeline
130
 
131
+ # Gradio UI
132
  demo = gr.Blocks()
133
  with demo:
134
+ gr.Markdown("# Lecture Capture AI Pipeline")
135
+ api = gr.Textbox(type='password', label='OpenAI API Key')
136
+ vid = gr.Video(label='Lecture Video')
137
+ btn = gr.Button('Process')
138
+ out = gr.JSON(label='Timeline')
139
+ btn.click(fn=run_pipeline, inputs=[api, vid], outputs=out)
140
+
141
+ if __name__ == '__main__':
 
 
 
 
 
 
 
 
 
 
142
  demo.launch()