Prompt48
/

PropmtEditor

Model card Files Files and versions

xet

Community

Prompt48 commited on May 26

Commit

8e7f21b

verified ·

1 Parent(s): 7565da9

Upload edit/build_cut.py with huggingface_hub

Browse files

Files changed (1) hide show

edit/build_cut.py +114 -0

edit/build_cut.py ADDED Viewed

	@@ -0,0 +1,114 @@

+"""
+Build speech segments from transcript, removing silences >= 0.3s and uh/um fillers.
+Then execute the ffmpeg trim+concat cut, scaling 4K → 1080p with dense keyframes.
+"""
+import json, subprocess, sys
+from pathlib import Path
+TRANSCRIPT = Path(r"D:\PromptEngineer48\In-Progress\P11-Editor\edit\transcripts\Mem0_1.json")
+SOURCE     = Path(r"D:\PromptEngineer48\In-Progress\P11-Editor\Mem0_1.mp4")
+OUT_BASE   = Path(r"D:\PromptEngineer48\In-Progress\P11-Editor\edit\hf\base_cut.mp4")
+THRESHOLD = 0.30   # gaps >= this get cut
+PAD       = 0.08   # 80ms padding around each speech segment
+FADE_MS   = 30     # ms audio fade at each edge to prevent pops
+FILLERS = {"uh", "um"}
+data  = json.load(open(TRANSCRIPT, encoding="utf-8"))
+words = [w for w in data["words"] if w.get("type") == "word"]
+# Remove filler words
+clean = [w for w in words if w["text"].strip().lower().rstrip(",.") not in FILLERS]
+# Build speech segments by merging words within THRESHOLD
+segs = []
+s = e = None
+for w in clean:
+    if s is None:
+        s, e = w["start"], w["end"]
+    elif w["start"] - e <= THRESHOLD:
+        e = w["end"]
+    else:
+        segs.append((max(0, s - PAD), e + PAD))
+        s, e = w["start"], w["end"]
+if s is not None:
+    segs.append((max(0, s - PAD), e + PAD))
+# Clamp and clip overlapping edges
+VIDEO_DUR = 805.5
+clamped = []
+for a, b in segs:
+    a = round(max(0.0, a), 4)
+    b = round(min(VIDEO_DUR, b), 4)
+    if clamped and a < clamped[-1][1]:
+        a = clamped[-1][1]   # no overlap
+    if b > a:
+        clamped.append((a, b))
+segs = clamped
+total_dur = sum(b - a for a, b in segs)
+print(f"Segments: {len(segs)}")
+print(f"Total kept: {total_dur:.1f}s = {total_dur/60:.1f}min  (cut {VIDEO_DUR - total_dur:.1f}s)")
+for i, (a, b) in enumerate(segs):
+    print(f"  [{i:03d}] {a:.3f} -> {b:.3f}  ({b-a:.2f}s)")
+# Build ffmpeg filter_complex with trim + concat
+# Use select/aselect in PASSES to avoid OOM with 216 between() calls.
+# Split segs into 4 time buckets, run one ffmpeg per bucket, concat results.
+PASSES = 4
+source_dur = VIDEO_DUR
+bucket_dur = source_dur / PASSES
+tmp_dir = OUT_BASE.parent / "tmp_passes"
+tmp_dir.mkdir(exist_ok=True)
+tmp_files = []
+for p in range(PASSES):
+    t_lo = p * bucket_dur
+    t_hi = (p + 1) * bucket_dur
+    bucket_segs = [(a, b) for a, b in segs if b > t_lo and a < t_hi]
+    if not bucket_segs:
+        continue
+    expr = "+".join(f"between(t,{a},{b})" for a, b in bucket_segs)
+    fc = (
+        f"[0:v]select='{expr}',setpts=N/30/TB,scale=1920:1080[outv];"
+        f"[0:a]aselect='{expr}',asetpts=N/SR/TB[outa]"
+    )
+    fc_f = tmp_dir / f"fc_pass{p}.txt"
+    fc_f.write_text(fc, encoding="utf-8")
+    tmp_out = tmp_dir / f"pass{p}.mp4"
+    tmp_files.append(tmp_out)
+    print(f"Pass {p}: {len(bucket_segs)} segs, source {t_lo:.0f}-{t_hi:.0f}s -> {tmp_out.name}")
+    r = subprocess.run([
+        "ffmpeg", "-y", "-i", str(SOURCE),
+        "-/filter_complex", str(fc_f),
+        "-map", "[outv]", "-map", "[outa]",
+        "-c:v", "libx264", "-crf", "18", "-preset", "fast",
+        "-g", "30", "-keyint_min", "30",
+        "-c:a", "aac", "-b:a", "192k",
+        str(tmp_out)
+    ], capture_output=True, text=True)
+    if r.returncode != 0:
+        print(f"Pass {p} STDERR:", r.stderr[-2000:])
+        sys.exit(1)
+    print(f"  Pass {p} done")
+# Concat pass outputs with concat demuxer
+concat_list = tmp_dir / "concat.txt"
+concat_list.write_text(
+    "\n".join(f"file '{f.as_posix()}'" for f in tmp_files),
+    encoding="utf-8"
+)
+print(f"\nConcatenating {len(tmp_files)} passes -> {OUT_BASE}")
+r = subprocess.run([
+    "ffmpeg", "-y", "-f", "concat", "-safe", "0",
+    "-i", str(concat_list),
+    "-c:v", "libx264", "-crf", "18", "-preset", "fast",
+    "-g", "30", "-keyint_min", "30",
+    "-c:a", "aac", "-b:a", "192k",
+    str(OUT_BASE)
+], capture_output=True, text=True)
+if r.returncode != 0:
+    print("Concat STDERR:", r.stderr[-2000:])
+    sys.exit(1)
+print(f"Done! -> {OUT_BASE}")