Spaces:
Sleeping
Sleeping
File size: 3,611 Bytes
85b485a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 | """Synthesize a tiny tutorial-style test clip with no external assets.
Produces ``work/sample/sample.mp4``: four solid-color scenes (so scene detection
has clear cuts) plus spoken narration generated with the built-in Windows SAPI
voice (so Whisper has real speech to transcribe).
"""
from __future__ import annotations
import os
import shutil
import subprocess
import sys
from pathlib import Path
try: # Windows consoles default to cp1252 and choke on non-ASCII output.
sys.stdout.reconfigure(encoding="utf-8", errors="replace")
except Exception:
pass
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
from src import config # noqa: E402
def _powershell() -> str:
"""Locate powershell.exe (it lives in a v1.0\\ subdir not always on PATH)."""
found = shutil.which("powershell") or shutil.which("pwsh")
if found:
return found
candidate = Path(os.environ.get("SystemRoot", r"C:\Windows")) / (
"System32/WindowsPowerShell/v1.0/powershell.exe"
)
if candidate.exists():
return str(candidate)
raise RuntimeError("Could not find powershell.exe to synthesize narration audio.")
NARRATION = (
"Welcome to this quick tutorial. "
"First, open the application from your desktop. "
"Next, click the File menu in the top left corner. "
"Then choose the Export option from the list. "
"Finally, pick a folder and save your document."
)
# Textured, visually distinct patterns so scene detection finds clear cuts and
# perceptual-hash dedup keeps them (solid colors collapse to one pHash).
PATTERNS = [
"smptebars=size=1280x720",
"testsrc2=size=1280x720",
"rgbtestsrc=size=1280x720",
"mandelbrot=size=1280x720",
]
def _run(cmd: list[str]) -> None:
print("+", " ".join(cmd))
proc = subprocess.run(cmd, capture_output=True, text=True)
if proc.returncode != 0:
raise RuntimeError(f"Command failed: {' '.join(cmd)}\n{proc.stderr}")
def make_narration(out_wav: Path) -> None:
ps = (
"Add-Type -AssemblyName System.Speech; "
"$s = New-Object System.Speech.Synthesis.SpeechSynthesizer; "
f"$s.SetOutputToWaveFile('{out_wav.as_posix()}'); "
f"$s.Speak('{NARRATION}'); $s.Dispose();"
)
_run([_powershell(), "-NoProfile", "-Command", ps])
def make_slides(dirpath: Path) -> list[Path]:
paths = []
for i, pattern in enumerate(PATTERNS):
p = dirpath / f"slide_{i}.png"
_run(
[
config.FFMPEG_BIN, "-y",
"-f", "lavfi", "-i", pattern,
"-frames:v", "1", str(p),
]
)
paths.append(p)
return paths
def main() -> Path:
out_dir = config.WORK_DIR / "sample"
out_dir.mkdir(parents=True, exist_ok=True)
narration = out_dir / "narration.wav"
make_narration(narration)
slides = make_slides(out_dir)
listfile = out_dir / "slides.txt"
lines: list[str] = []
for p in slides:
lines.append(f"file '{p.as_posix()}'")
lines.append("duration 3")
lines.append(f"file '{slides[-1].as_posix()}'") # concat needs the last file twice
listfile.write_text("\n".join(lines), encoding="utf-8")
out_mp4 = out_dir / "sample.mp4"
_run(
[
config.FFMPEG_BIN, "-y",
"-f", "concat", "-safe", "0", "-i", str(listfile),
"-i", str(narration),
"-c:v", "libx264", "-pix_fmt", "yuv420p",
"-c:a", "aac", "-shortest", str(out_mp4),
]
)
print("Sample video:", out_mp4)
return out_mp4
if __name__ == "__main__":
main()
|