Spaces:

lochn
/

audio

Sleeping

App Files Files Community

lochn commited on May 5, 2025

Commit

ebddfd5

verified ·

1 Parent(s): 9de22ed

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -39

app.py CHANGED Viewed

@@ -3,11 +3,13 @@ import json
 from pathlib import Path
 import ffmpeg
 import spacy
-from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
-# Set your OpenAI API key
-processor = AutoProcessor.from_pretrained("openai/whisper-large-v3-turbo")
 # Load spaCy model for key-phrase extraction
 nlp = spacy.load("en_core_web_sm")
@@ -20,9 +22,8 @@ def chunk_video(input_path: str, chunk_length: int = 300, output_dir: str = "chu
     Path(output_dir).mkdir(exist_ok=True)
     (
         ffmpeg
-        .input('AP World UNIT 1 REVIEW (Everything you NEED to KNOW!).mp4')
-        .output(f"{output_dir}/chunk_%03d.mp4",
-                f="segment", segment_time=chunk_length, reset_timestamps=1)
         .run(overwrite_output=True)
     )
     return sorted(Path(output_dir).glob("chunk_*.mp4"))
@@ -45,7 +46,7 @@ def transcribe_audio(audio_path: str) -> list[dict]:
     Transcribe audio using OpenAI Whisper.
     Returns list of segments with start, end, and text.
     """
-    model = AutoModelForSpeechSeq2Seq.from_pretrained("openai/whisper-large-v3-turbo")
     result = model.transcribe(audio_path)
     return result.get("segments", [])
@@ -77,7 +78,6 @@ def extract_key_phrases(text: str, top_n: int = 5) -> list[str]:
     """
     doc = nlp(text)
     phrases = [chunk.text for chunk in doc.noun_chunks]
-    # Keep unique, preserve order
     return list(dict.fromkeys(phrases))[:top_n]
@@ -93,41 +93,22 @@ def extract_frame(video_path: str, timestamp: str, output_path: str) -> None:
     )
-def build_timeline(segments: list[dict], summaries: list[str], keys: list[list[str]], frames: list[str]) -> list[dict]:
     """
-    Assemble timeline entries into a list of dictionaries.
     """
-    timeline = []
-    for seg, summary, key_list, frame in zip(segments, summaries, keys, frames):
-        timeline.append({
-            "start_time": seg.get("start"),
-            "end_time": seg.get("end"),
-            "summary": summary,
-            "key_phrases": key_list,
-            "frame_path": frame
-        })
-    return timeline
-def main(video_file: str):
-    # 1. Chunk video
     chunks = chunk_video(video_file)
-    # 2. Transcribe all chunks
     all_segments = []
     for chunk in chunks:
         wav_path = str(chunk).replace(".mp4", ".wav")
         extract_audio(str(chunk), wav_path)
         all_segments.extend(transcribe_audio(wav_path))
-    # 3. Segment transcript
     transcript_blocks = segment_text(all_segments)
-    # 4. Summarize and extract key phrases
     summaries = [summarize_text(block) for block in transcript_blocks]
     key_phrases = [extract_key_phrases(block) for block in transcript_blocks]
-    # 5. Extract frames
     frame_dir = Path("frames"); frame_dir.mkdir(exist_ok=True)
     frame_paths = []
     for seg in all_segments:
@@ -137,15 +118,34 @@ def main(video_file: str):
         extract_frame(video_file, ts, str(out_path))
         frame_paths.append(str(out_path))
-    # 6. Build timeline and save
-    timeline = build_timeline(all_segments, summaries, key_phrases, frame_paths)
-    with open("timeline.json", "w") as f:
-        json.dump(timeline, f, indent=2)
 if __name__ == "__main__":
-    import argparse
-    parser = argparse.ArgumentParser(description="Lecture capture AI pipeline")
-    parser.add_argument("video_file", help="Path to the input lecture video")
-    args = parser.parse_args()
-    main(args.video_file)

 from pathlib import Path
 import ffmpeg
+import openai
+import whisper
 import spacy
+import gradio as gr
+# Set your OpenAI API key from environment (configure in HF Spaces secrets)
+openai.api_key = os.getenv("OPENAI_API_KEY")
 # Load spaCy model for key-phrase extraction
 nlp = spacy.load("en_core_web_sm")
     Path(output_dir).mkdir(exist_ok=True)
     (
         ffmpeg
+        .input(input_path)
+        .output(f"{output_dir}/chunk_%03d.mp4", f="segment", segment_time=chunk_length, reset_timestamps=1)
         .run(overwrite_output=True)
     )
     return sorted(Path(output_dir).glob("chunk_*.mp4"))
     Transcribe audio using OpenAI Whisper.
     Returns list of segments with start, end, and text.
     """
+    model = whisper.load_model("base")
     result = model.transcribe(audio_path)
     return result.get("segments", [])
     """
     doc = nlp(text)
     phrases = [chunk.text for chunk in doc.noun_chunks]
     return list(dict.fromkeys(phrases))[:top_n]
     )
+def run_pipeline(video_file: str) -> list[dict]:
     """
+    Execute the full pipeline and return timeline as a list of dicts.
     """
+    # Prepare directories
     chunks = chunk_video(video_file)
     all_segments = []
     for chunk in chunks:
         wav_path = str(chunk).replace(".mp4", ".wav")
         extract_audio(str(chunk), wav_path)
         all_segments.extend(transcribe_audio(wav_path))
     transcript_blocks = segment_text(all_segments)
     summaries = [summarize_text(block) for block in transcript_blocks]
     key_phrases = [extract_key_phrases(block) for block in transcript_blocks]
     frame_dir = Path("frames"); frame_dir.mkdir(exist_ok=True)
     frame_paths = []
     for seg in all_segments:
         extract_frame(video_file, ts, str(out_path))
         frame_paths.append(str(out_path))
+    timeline = []
+    for seg, summary, keys, frame in zip(all_segments, summaries, key_phrases, frame_paths):
+        timeline.append({
+            "start_time": seg.get("start"),
+            "end_time": seg.get("end"),
+            "summary": summary,
+            "key_phrases": keys,
+            "frame": frame
+        })
+    return timeline
+def process_video(video):
+    """
+    Wrapper for Gradio: saves uploaded file and runs pipeline.
+    """
+    video_path = video.name
+    timeline = run_pipeline(video_path)
+    return timeline
+# Gradio interface for Hugging Face Spaces
+with gr.Blocks() as demo:
+    gr.Markdown("# Lecture Capture AI Pipeline\nUpload a lecture video to generate a summarized timeline.")
+    video_input = gr.Video(label="Lecture Video File")
+    run_button = gr.Button("Process Video")
+    output = gr.JSON(label="Generated Timeline")
+    run_button.click(fn=process_video, inputs=video_input, outputs=output)
 if __name__ == "__main__":
+    demo.launch()