YT_Video / runner.py
Shami96's picture
Update runner.py
5857c35 verified
import json
import re
from pathlib import Path
from typing import Optional, Tuple
from config import OUT_ROOT, FRAME_INTERVAL_SEC, MAX_FRAMES, FRAMES_SUBDIR
from video import prepare_dirs, extract_audio_ffmpeg, extract_frames
from transcribe import transcribe
from vision import caption_folder, dump_json
from packager import write_text, write_manifest, make_zip
TITLE_PATTERNS = [
re.compile(r"\bIn\s+([A-Za-z0-9:'\-&\. ]+)\s*\((20\d{2}|19\d{2})\)", re.IGNORECASE),
]
def infer_title_year(run_dir: Path) -> Tuple[str, str]:
"""
Return (title, year) if explicitly detectable in explanations.json metadata or transcription text, else ('','').
Deterministic. No guessing.
"""
# Try explanations.json → metadata.title/year
try:
ej = json.loads((run_dir / "explanations.json").read_text(encoding="utf-8"))
if isinstance(ej, dict) and "metadata" in ej and isinstance(ej["metadata"], dict):
t = ej["metadata"].get("title")
y = ej["metadata"].get("year")
if t and y:
return str(t).strip(), str(y).strip()
except Exception:
pass
# Try transcription.txt pattern
try:
txt = (run_dir / "transcription.txt").read_text(encoding="utf-8")
m = TITLE_PATTERNS[0].search(txt)
if m:
return m.group(1).strip(), m.group(2)
except Exception:
pass
return "", ""
def process_video(
video_file: Path,
interval_sec: Optional[float] = None,
max_frames: Optional[int] = None,
movie_title: str = "",
movie_year: str = "",
) -> Path:
interval = interval_sec if interval_sec is not None else FRAME_INTERVAL_SEC
cap_frames = max_frames if max_frames is not None else MAX_FRAMES
run_dir, frames_dir = prepare_dirs(OUT_ROOT, video_file)
# 1) Frames
n = extract_frames(video_file, frames_dir, interval, cap_frames)
# 2) Audio + ASR
wav_path = run_dir / "audio.wav"
extract_audio_ffmpeg(video_file, wav_path)
transcript = transcribe(wav_path)
write_text(transcript, run_dir / "transcription.txt")
# 3) Vision captions per frame
explanations = caption_folder(frames_dir)
dump_json(explanations, run_dir / "explanations.json")
# 4) Manifest with explicit movie metadata if provided or inferred deterministically
if not movie_title or not movie_year:
it, iy = infer_title_year(run_dir)
movie_title = movie_title or it
movie_year = movie_year or iy
manifest = {
"video": str(video_file.name),
"frames_dir": FRAMES_SUBDIR,
"num_frames": n,
"files": [
"transcription.txt",
"explanations.json",
"audio.wav",
],
"interval_sec": float(interval),
}
if movie_title:
manifest["movie"] = movie_title
if movie_year:
manifest["year"] = movie_year
write_manifest(run_dir, manifest)
# 5) Zip
zip_path = make_zip(run_dir)
return zip_path
if __name__ == "__main__":
import argparse
ap = argparse.ArgumentParser()
ap.add_argument("--video", required=True, type=Path)
ap.add_argument("--interval", type=float, default=FRAME_INTERVAL_SEC)
ap.add_argument("--max_frames", type=int, default=MAX_FRAMES)
ap.add_argument("--movie_title", type=str, default="")
ap.add_argument("--movie_year", type=str, default="")
args = ap.parse_args()
z = process_video(
args.video,
interval_sec=args.interval,
max_frames=args.max_frames,
movie_title=args.movie_title,
movie_year=args.movie_year,
)
print(z)