lochn commited on
Commit
18e0d67
·
verified ·
1 Parent(s): 55e8ba3

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +151 -0
app.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ from pathlib import Path
4
+
5
+ import ffmpeg
6
+ import spacy
7
+ from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
8
+
9
+ # Set your OpenAI API key
10
+ processor = AutoProcessor.from_pretrained("openai/whisper-large-v3-turbo")
11
+
12
+ # Load spaCy model for key-phrase extraction
13
+ nlp = spacy.load("en_core_web_sm")
14
+
15
+
16
+ def chunk_video(input_path: str, chunk_length: int = 300, output_dir: str = "chunks") -> list[Path]:
17
+ """
18
+ Split input video into fixed-length chunks.
19
+ """
20
+ Path(output_dir).mkdir(exist_ok=True)
21
+ (
22
+ ffmpeg
23
+ .input('AP World UNIT 1 REVIEW (Everything you NEED to KNOW!).mp4')
24
+ .output(f"{output_dir}/chunk_%03d.mp4",
25
+ f="segment", segment_time=chunk_length, reset_timestamps=1)
26
+ .run(overwrite_output=True)
27
+ )
28
+ return sorted(Path(output_dir).glob("chunk_*.mp4"))
29
+
30
+
31
+ def extract_audio(video_path: str, audio_path: str) -> None:
32
+ """
33
+ Extract mono 16kHz PCM audio from video.
34
+ """
35
+ (
36
+ ffmpeg
37
+ .input(video_path)
38
+ .output(audio_path, acodec="pcm_s16le", ac=1, ar="16k")
39
+ .run(overwrite_output=True)
40
+ )
41
+
42
+
43
+ def transcribe_audio(audio_path: str) -> list[dict]:
44
+ """
45
+ Transcribe audio using OpenAI Whisper.
46
+ Returns list of segments with start, end, and text.
47
+ """
48
+ model = AutoModelForSpeechSeq2Seq.from_pretrained("openai/whisper-large-v3-turbo")
49
+ result = model.transcribe(audio_path)
50
+ return result.get("segments", [])
51
+
52
+
53
+ def segment_text(segments: list[dict]) -> list[str]:
54
+ """
55
+ Join segment texts and naively split into semantic blocks.
56
+ """
57
+ full_text = "\n\n".join(seg["text"] for seg in segments)
58
+ return [block.strip() for block in full_text.split("\n\n") if block.strip()]
59
+
60
+
61
+ def summarize_text(text: str) -> str:
62
+ """
63
+ Summarize a chunk of transcript via GPT.
64
+ """
65
+ prompt = f"Summarize the following lecture segment in 2-3 sentences:\n\n{text}"
66
+ response = openai.ChatCompletion.create(
67
+ model="gpt-4o",
68
+ messages=[{"role": "user", "content": prompt}],
69
+ max_tokens=200
70
+ )
71
+ return response.choices[0].message.content.strip()
72
+
73
+
74
+ def extract_key_phrases(text: str, top_n: int = 5) -> list[str]:
75
+ """
76
+ Extract noun chunks as key phrases.
77
+ """
78
+ doc = nlp(text)
79
+ phrases = [chunk.text for chunk in doc.noun_chunks]
80
+ # Keep unique, preserve order
81
+ return list(dict.fromkeys(phrases))[:top_n]
82
+
83
+
84
+ def extract_frame(video_path: str, timestamp: str, output_path: str) -> None:
85
+ """
86
+ Extract a single frame at given timestamp.
87
+ """
88
+ (
89
+ ffmpeg
90
+ .input(video_path, ss=timestamp)
91
+ .output(output_path, vframes=1)
92
+ .run(overwrite_output=True)
93
+ )
94
+
95
+
96
+ def build_timeline(segments: list[dict], summaries: list[str], keys: list[list[str]], frames: list[str]) -> list[dict]:
97
+ """
98
+ Assemble timeline entries into a list of dictionaries.
99
+ """
100
+ timeline = []
101
+ for seg, summary, key_list, frame in zip(segments, summaries, keys, frames):
102
+ timeline.append({
103
+ "start_time": seg.get("start"),
104
+ "end_time": seg.get("end"),
105
+ "summary": summary,
106
+ "key_phrases": key_list,
107
+ "frame_path": frame
108
+ })
109
+ return timeline
110
+
111
+
112
+ def main(video_file: str):
113
+ # 1. Chunk video
114
+ chunks = chunk_video(video_file)
115
+
116
+ # 2. Transcribe all chunks
117
+ all_segments = []
118
+ for chunk in chunks:
119
+ wav_path = str(chunk).replace(".mp4", ".wav")
120
+ extract_audio(str(chunk), wav_path)
121
+ all_segments.extend(transcribe_audio(wav_path))
122
+
123
+ # 3. Segment transcript
124
+ transcript_blocks = segment_text(all_segments)
125
+
126
+ # 4. Summarize and extract key phrases
127
+ summaries = [summarize_text(block) for block in transcript_blocks]
128
+ key_phrases = [extract_key_phrases(block) for block in transcript_blocks]
129
+
130
+ # 5. Extract frames
131
+ frame_dir = Path("frames"); frame_dir.mkdir(exist_ok=True)
132
+ frame_paths = []
133
+ for seg in all_segments:
134
+ ts = seg.get("start")
135
+ fname = f"frame_{ts.replace(':', '-')}.jpg"
136
+ out_path = frame_dir / fname
137
+ extract_frame(video_file, ts, str(out_path))
138
+ frame_paths.append(str(out_path))
139
+
140
+ # 6. Build timeline and save
141
+ timeline = build_timeline(all_segments, summaries, key_phrases, frame_paths)
142
+ with open("timeline.json", "w") as f:
143
+ json.dump(timeline, f, indent=2)
144
+
145
+
146
+ if __name__ == "__main__":
147
+ import argparse
148
+ parser = argparse.ArgumentParser(description="Lecture capture AI pipeline")
149
+ parser.add_argument("video_file", help="Path to the input lecture video")
150
+ args = parser.parse_args()
151
+ main(args.video_file)