Spaces:

lochn
/

audio

Sleeping

App Files Files Community

lochn commited on May 5, 2025

Commit

7d6f78b

verified ·

1 Parent(s): 196d556

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -27

app.py CHANGED Viewed

@@ -1,8 +1,8 @@
 import os
 import json
 from pathlib import Path
-import ffmpeg
 import openai
 import whisper
 import spacy
@@ -16,40 +16,42 @@ try:
     nlp = spacy.load("en_core_web_sm")
 except OSError:
     from spacy.cli import download
     download("en_core_web_sm")
     nlp = spacy.load("en_core_web_sm")
 def chunk_video(input_path: str, chunk_length: int = 300, output_dir: str = "chunks") -> list[Path]:
     """
-    Split input video into fixed-length chunks.
     """
     Path(output_dir).mkdir(exist_ok=True)
-    (
-        ffmpeg
-        .input(input_path)
-        .output(f"{output_dir}/chunk_%03d.mp4", f="segment", segment_time=chunk_length, reset_timestamps=1)
-        .run(overwrite_output=True)
-    )
     return sorted(Path(output_dir).glob("chunk_*.mp4"))
 def extract_audio(video_path: str, audio_path: str) -> None:
     """
-    Extract mono 16kHz PCM audio from video.
     """
-    (
-        ffmpeg
-        .input(video_path)
-        .output(audio_path, acodec="pcm_s16le", ac=1, ar="16k")
-        .run(overwrite_output=True)
-    )
 def transcribe_audio(audio_path: str) -> list[dict]:
     """
-    Transcribe audio using OpenAI Whisper.
     Returns list of segments with start, end, and text.
     """
     model = whisper.load_model("base")
@@ -89,21 +91,21 @@ def extract_key_phrases(text: str, top_n: int = 5) -> list[str]:
 def extract_frame(video_path: str, timestamp: str, output_path: str) -> None:
     """
-    Extract a single frame at given timestamp.
     """
-    (
-        ffmpeg
-        .input(video_path, ss=timestamp)
-        .output(output_path, vframes=1)
-        .run(overwrite_output=True)
-    )
 def run_pipeline(video_file: str) -> list[dict]:
     """
     Execute the full pipeline and return timeline as a list of dicts.
     """
-    # Prepare directories
     chunks = chunk_video(video_file)
     all_segments = []
     for chunk in chunks:
@@ -115,7 +117,8 @@ def run_pipeline(video_file: str) -> list[dict]:
     summaries = [summarize_text(block) for block in transcript_blocks]
     key_phrases = [extract_key_phrases(block) for block in transcript_blocks]
-    frame_dir = Path("frames"); frame_dir.mkdir(exist_ok=True)
     frame_paths = []
     for seg in all_segments:
         ts = seg.get("start")
@@ -138,7 +141,7 @@ def run_pipeline(video_file: str) -> list[dict]:
 def process_video(video_path: str) -> list[dict]:
     """
-    Wrapper for Gradio: receives file path to uploaded video and runs pipeline.
     """
     return run_pipeline(video_path)

 import os
 import json
+import subprocess
 from pathlib import Path
 import openai
 import whisper
 import spacy
     nlp = spacy.load("en_core_web_sm")
 except OSError:
     from spacy.cli import download
     download("en_core_web_sm")
     nlp = spacy.load("en_core_web_sm")
 def chunk_video(input_path: str, chunk_length: int = 300, output_dir: str = "chunks") -> list[Path]:
     """
+    Split input video into fixed-length chunks using ffmpeg CLI.
     """
     Path(output_dir).mkdir(exist_ok=True)
+    output_pattern = os.path.join(output_dir, "chunk_%03d.mp4")
+    cmd = [
+        "ffmpeg", "-i", input_path,
+        "-f", "segment",
+        "-segment_time", str(chunk_length),
+        "-reset_timestamps", "1",
+        output_pattern
+    ]
+    subprocess.run(cmd, check=True)
     return sorted(Path(output_dir).glob("chunk_*.mp4"))
 def extract_audio(video_path: str, audio_path: str) -> None:
     """
+    Extract mono 16kHz PCM audio from video using ffmpeg CLI.
     """
+    cmd = [
+        "ffmpeg", "-i", video_path,
+        "-acodec", "pcm_s16le", "-ac", "1", "-ar", "16k",
+        audio_path
+    ]
+    subprocess.run(cmd, check=True)
 def transcribe_audio(audio_path: str) -> list[dict]:
     """
+    Transcribe audio using Whisper model.
     Returns list of segments with start, end, and text.
     """
     model = whisper.load_model("base")
 def extract_frame(video_path: str, timestamp: str, output_path: str) -> None:
     """
+    Extract a single frame at given timestamp using ffmpeg CLI.
     """
+    cmd = [
+        "ffmpeg", "-i", video_path,
+        "-ss", timestamp,
+        "-frames:v", "1",
+        output_path
+    ]
+    subprocess.run(cmd, check=True)
 def run_pipeline(video_file: str) -> list[dict]:
     """
     Execute the full pipeline and return timeline as a list of dicts.
     """
     chunks = chunk_video(video_file)
     all_segments = []
     for chunk in chunks:
     summaries = [summarize_text(block) for block in transcript_blocks]
     key_phrases = [extract_key_phrases(block) for block in transcript_blocks]
+    frame_dir = Path("frames")
+    frame_dir.mkdir(exist_ok=True)
     frame_paths = []
     for seg in all_segments:
         ts = seg.get("start")
 def process_video(video_path: str) -> list[dict]:
     """
+    Gradio wrapper: receives file path to uploaded video and runs pipeline.
     """
     return run_pipeline(video_path)