lochn commited on
Commit
ebddfd5
·
verified ·
1 Parent(s): 9de22ed

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -39
app.py CHANGED
@@ -3,11 +3,13 @@ import json
3
  from pathlib import Path
4
 
5
  import ffmpeg
 
 
6
  import spacy
7
- from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
8
 
9
- # Set your OpenAI API key
10
- processor = AutoProcessor.from_pretrained("openai/whisper-large-v3-turbo")
11
 
12
  # Load spaCy model for key-phrase extraction
13
  nlp = spacy.load("en_core_web_sm")
@@ -20,9 +22,8 @@ def chunk_video(input_path: str, chunk_length: int = 300, output_dir: str = "chu
20
  Path(output_dir).mkdir(exist_ok=True)
21
  (
22
  ffmpeg
23
- .input('AP World UNIT 1 REVIEW (Everything you NEED to KNOW!).mp4')
24
- .output(f"{output_dir}/chunk_%03d.mp4",
25
- f="segment", segment_time=chunk_length, reset_timestamps=1)
26
  .run(overwrite_output=True)
27
  )
28
  return sorted(Path(output_dir).glob("chunk_*.mp4"))
@@ -45,7 +46,7 @@ def transcribe_audio(audio_path: str) -> list[dict]:
45
  Transcribe audio using OpenAI Whisper.
46
  Returns list of segments with start, end, and text.
47
  """
48
- model = AutoModelForSpeechSeq2Seq.from_pretrained("openai/whisper-large-v3-turbo")
49
  result = model.transcribe(audio_path)
50
  return result.get("segments", [])
51
 
@@ -77,7 +78,6 @@ def extract_key_phrases(text: str, top_n: int = 5) -> list[str]:
77
  """
78
  doc = nlp(text)
79
  phrases = [chunk.text for chunk in doc.noun_chunks]
80
- # Keep unique, preserve order
81
  return list(dict.fromkeys(phrases))[:top_n]
82
 
83
 
@@ -93,41 +93,22 @@ def extract_frame(video_path: str, timestamp: str, output_path: str) -> None:
93
  )
94
 
95
 
96
- def build_timeline(segments: list[dict], summaries: list[str], keys: list[list[str]], frames: list[str]) -> list[dict]:
97
  """
98
- Assemble timeline entries into a list of dictionaries.
99
  """
100
- timeline = []
101
- for seg, summary, key_list, frame in zip(segments, summaries, keys, frames):
102
- timeline.append({
103
- "start_time": seg.get("start"),
104
- "end_time": seg.get("end"),
105
- "summary": summary,
106
- "key_phrases": key_list,
107
- "frame_path": frame
108
- })
109
- return timeline
110
-
111
-
112
- def main(video_file: str):
113
- # 1. Chunk video
114
  chunks = chunk_video(video_file)
115
-
116
- # 2. Transcribe all chunks
117
  all_segments = []
118
  for chunk in chunks:
119
  wav_path = str(chunk).replace(".mp4", ".wav")
120
  extract_audio(str(chunk), wav_path)
121
  all_segments.extend(transcribe_audio(wav_path))
122
 
123
- # 3. Segment transcript
124
  transcript_blocks = segment_text(all_segments)
125
-
126
- # 4. Summarize and extract key phrases
127
  summaries = [summarize_text(block) for block in transcript_blocks]
128
  key_phrases = [extract_key_phrases(block) for block in transcript_blocks]
129
 
130
- # 5. Extract frames
131
  frame_dir = Path("frames"); frame_dir.mkdir(exist_ok=True)
132
  frame_paths = []
133
  for seg in all_segments:
@@ -137,15 +118,34 @@ def main(video_file: str):
137
  extract_frame(video_file, ts, str(out_path))
138
  frame_paths.append(str(out_path))
139
 
140
- # 6. Build timeline and save
141
- timeline = build_timeline(all_segments, summaries, key_phrases, frame_paths)
142
- with open("timeline.json", "w") as f:
143
- json.dump(timeline, f, indent=2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
 
 
145
 
146
  if __name__ == "__main__":
147
- import argparse
148
- parser = argparse.ArgumentParser(description="Lecture capture AI pipeline")
149
- parser.add_argument("video_file", help="Path to the input lecture video")
150
- args = parser.parse_args()
151
- main(args.video_file)
 
3
  from pathlib import Path
4
 
5
  import ffmpeg
6
+ import openai
7
+ import whisper
8
  import spacy
9
+ import gradio as gr
10
 
11
+ # Set your OpenAI API key from environment (configure in HF Spaces secrets)
12
+ openai.api_key = os.getenv("OPENAI_API_KEY")
13
 
14
  # Load spaCy model for key-phrase extraction
15
  nlp = spacy.load("en_core_web_sm")
 
22
  Path(output_dir).mkdir(exist_ok=True)
23
  (
24
  ffmpeg
25
+ .input(input_path)
26
+ .output(f"{output_dir}/chunk_%03d.mp4", f="segment", segment_time=chunk_length, reset_timestamps=1)
 
27
  .run(overwrite_output=True)
28
  )
29
  return sorted(Path(output_dir).glob("chunk_*.mp4"))
 
46
  Transcribe audio using OpenAI Whisper.
47
  Returns list of segments with start, end, and text.
48
  """
49
+ model = whisper.load_model("base")
50
  result = model.transcribe(audio_path)
51
  return result.get("segments", [])
52
 
 
78
  """
79
  doc = nlp(text)
80
  phrases = [chunk.text for chunk in doc.noun_chunks]
 
81
  return list(dict.fromkeys(phrases))[:top_n]
82
 
83
 
 
93
  )
94
 
95
 
96
+ def run_pipeline(video_file: str) -> list[dict]:
97
  """
98
+ Execute the full pipeline and return timeline as a list of dicts.
99
  """
100
+ # Prepare directories
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  chunks = chunk_video(video_file)
 
 
102
  all_segments = []
103
  for chunk in chunks:
104
  wav_path = str(chunk).replace(".mp4", ".wav")
105
  extract_audio(str(chunk), wav_path)
106
  all_segments.extend(transcribe_audio(wav_path))
107
 
 
108
  transcript_blocks = segment_text(all_segments)
 
 
109
  summaries = [summarize_text(block) for block in transcript_blocks]
110
  key_phrases = [extract_key_phrases(block) for block in transcript_blocks]
111
 
 
112
  frame_dir = Path("frames"); frame_dir.mkdir(exist_ok=True)
113
  frame_paths = []
114
  for seg in all_segments:
 
118
  extract_frame(video_file, ts, str(out_path))
119
  frame_paths.append(str(out_path))
120
 
121
+ timeline = []
122
+ for seg, summary, keys, frame in zip(all_segments, summaries, key_phrases, frame_paths):
123
+ timeline.append({
124
+ "start_time": seg.get("start"),
125
+ "end_time": seg.get("end"),
126
+ "summary": summary,
127
+ "key_phrases": keys,
128
+ "frame": frame
129
+ })
130
+ return timeline
131
+
132
+
133
+ def process_video(video):
134
+ """
135
+ Wrapper for Gradio: saves uploaded file and runs pipeline.
136
+ """
137
+ video_path = video.name
138
+ timeline = run_pipeline(video_path)
139
+ return timeline
140
+
141
+ # Gradio interface for Hugging Face Spaces
142
+ with gr.Blocks() as demo:
143
+ gr.Markdown("# Lecture Capture AI Pipeline\nUpload a lecture video to generate a summarized timeline.")
144
+ video_input = gr.Video(label="Lecture Video File")
145
+ run_button = gr.Button("Process Video")
146
+ output = gr.JSON(label="Generated Timeline")
147
 
148
+ run_button.click(fn=process_video, inputs=video_input, outputs=output)
149
 
150
  if __name__ == "__main__":
151
+ demo.launch()