Upload edit/build_cut.py with huggingface_hub
Browse files- edit/build_cut.py +114 -0
edit/build_cut.py
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Build speech segments from transcript, removing silences >= 0.3s and uh/um fillers.
|
| 3 |
+
Then execute the ffmpeg trim+concat cut, scaling 4K → 1080p with dense keyframes.
|
| 4 |
+
"""
|
| 5 |
+
import json, subprocess, sys
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
|
| 8 |
+
TRANSCRIPT = Path(r"D:\PromptEngineer48\In-Progress\P11-Editor\edit\transcripts\Mem0_1.json")
|
| 9 |
+
SOURCE = Path(r"D:\PromptEngineer48\In-Progress\P11-Editor\Mem0_1.mp4")
|
| 10 |
+
OUT_BASE = Path(r"D:\PromptEngineer48\In-Progress\P11-Editor\edit\hf\base_cut.mp4")
|
| 11 |
+
|
| 12 |
+
THRESHOLD = 0.30 # gaps >= this get cut
|
| 13 |
+
PAD = 0.08 # 80ms padding around each speech segment
|
| 14 |
+
FADE_MS = 30 # ms audio fade at each edge to prevent pops
|
| 15 |
+
|
| 16 |
+
FILLERS = {"uh", "um"}
|
| 17 |
+
|
| 18 |
+
data = json.load(open(TRANSCRIPT, encoding="utf-8"))
|
| 19 |
+
words = [w for w in data["words"] if w.get("type") == "word"]
|
| 20 |
+
|
| 21 |
+
# Remove filler words
|
| 22 |
+
clean = [w for w in words if w["text"].strip().lower().rstrip(",.") not in FILLERS]
|
| 23 |
+
|
| 24 |
+
# Build speech segments by merging words within THRESHOLD
|
| 25 |
+
segs = []
|
| 26 |
+
s = e = None
|
| 27 |
+
for w in clean:
|
| 28 |
+
if s is None:
|
| 29 |
+
s, e = w["start"], w["end"]
|
| 30 |
+
elif w["start"] - e <= THRESHOLD:
|
| 31 |
+
e = w["end"]
|
| 32 |
+
else:
|
| 33 |
+
segs.append((max(0, s - PAD), e + PAD))
|
| 34 |
+
s, e = w["start"], w["end"]
|
| 35 |
+
if s is not None:
|
| 36 |
+
segs.append((max(0, s - PAD), e + PAD))
|
| 37 |
+
|
| 38 |
+
# Clamp and clip overlapping edges
|
| 39 |
+
VIDEO_DUR = 805.5
|
| 40 |
+
clamped = []
|
| 41 |
+
for a, b in segs:
|
| 42 |
+
a = round(max(0.0, a), 4)
|
| 43 |
+
b = round(min(VIDEO_DUR, b), 4)
|
| 44 |
+
if clamped and a < clamped[-1][1]:
|
| 45 |
+
a = clamped[-1][1] # no overlap
|
| 46 |
+
if b > a:
|
| 47 |
+
clamped.append((a, b))
|
| 48 |
+
segs = clamped
|
| 49 |
+
|
| 50 |
+
total_dur = sum(b - a for a, b in segs)
|
| 51 |
+
print(f"Segments: {len(segs)}")
|
| 52 |
+
print(f"Total kept: {total_dur:.1f}s = {total_dur/60:.1f}min (cut {VIDEO_DUR - total_dur:.1f}s)")
|
| 53 |
+
for i, (a, b) in enumerate(segs):
|
| 54 |
+
print(f" [{i:03d}] {a:.3f} -> {b:.3f} ({b-a:.2f}s)")
|
| 55 |
+
|
| 56 |
+
# Build ffmpeg filter_complex with trim + concat
|
| 57 |
+
# Use select/aselect in PASSES to avoid OOM with 216 between() calls.
|
| 58 |
+
# Split segs into 4 time buckets, run one ffmpeg per bucket, concat results.
|
| 59 |
+
PASSES = 4
|
| 60 |
+
source_dur = VIDEO_DUR
|
| 61 |
+
bucket_dur = source_dur / PASSES
|
| 62 |
+
tmp_dir = OUT_BASE.parent / "tmp_passes"
|
| 63 |
+
tmp_dir.mkdir(exist_ok=True)
|
| 64 |
+
|
| 65 |
+
tmp_files = []
|
| 66 |
+
for p in range(PASSES):
|
| 67 |
+
t_lo = p * bucket_dur
|
| 68 |
+
t_hi = (p + 1) * bucket_dur
|
| 69 |
+
bucket_segs = [(a, b) for a, b in segs if b > t_lo and a < t_hi]
|
| 70 |
+
if not bucket_segs:
|
| 71 |
+
continue
|
| 72 |
+
expr = "+".join(f"between(t,{a},{b})" for a, b in bucket_segs)
|
| 73 |
+
fc = (
|
| 74 |
+
f"[0:v]select='{expr}',setpts=N/30/TB,scale=1920:1080[outv];"
|
| 75 |
+
f"[0:a]aselect='{expr}',asetpts=N/SR/TB[outa]"
|
| 76 |
+
)
|
| 77 |
+
fc_f = tmp_dir / f"fc_pass{p}.txt"
|
| 78 |
+
fc_f.write_text(fc, encoding="utf-8")
|
| 79 |
+
tmp_out = tmp_dir / f"pass{p}.mp4"
|
| 80 |
+
tmp_files.append(tmp_out)
|
| 81 |
+
print(f"Pass {p}: {len(bucket_segs)} segs, source {t_lo:.0f}-{t_hi:.0f}s -> {tmp_out.name}")
|
| 82 |
+
r = subprocess.run([
|
| 83 |
+
"ffmpeg", "-y", "-i", str(SOURCE),
|
| 84 |
+
"-/filter_complex", str(fc_f),
|
| 85 |
+
"-map", "[outv]", "-map", "[outa]",
|
| 86 |
+
"-c:v", "libx264", "-crf", "18", "-preset", "fast",
|
| 87 |
+
"-g", "30", "-keyint_min", "30",
|
| 88 |
+
"-c:a", "aac", "-b:a", "192k",
|
| 89 |
+
str(tmp_out)
|
| 90 |
+
], capture_output=True, text=True)
|
| 91 |
+
if r.returncode != 0:
|
| 92 |
+
print(f"Pass {p} STDERR:", r.stderr[-2000:])
|
| 93 |
+
sys.exit(1)
|
| 94 |
+
print(f" Pass {p} done")
|
| 95 |
+
|
| 96 |
+
# Concat pass outputs with concat demuxer
|
| 97 |
+
concat_list = tmp_dir / "concat.txt"
|
| 98 |
+
concat_list.write_text(
|
| 99 |
+
"\n".join(f"file '{f.as_posix()}'" for f in tmp_files),
|
| 100 |
+
encoding="utf-8"
|
| 101 |
+
)
|
| 102 |
+
print(f"\nConcatenating {len(tmp_files)} passes -> {OUT_BASE}")
|
| 103 |
+
r = subprocess.run([
|
| 104 |
+
"ffmpeg", "-y", "-f", "concat", "-safe", "0",
|
| 105 |
+
"-i", str(concat_list),
|
| 106 |
+
"-c:v", "libx264", "-crf", "18", "-preset", "fast",
|
| 107 |
+
"-g", "30", "-keyint_min", "30",
|
| 108 |
+
"-c:a", "aac", "-b:a", "192k",
|
| 109 |
+
str(OUT_BASE)
|
| 110 |
+
], capture_output=True, text=True)
|
| 111 |
+
if r.returncode != 0:
|
| 112 |
+
print("Concat STDERR:", r.stderr[-2000:])
|
| 113 |
+
sys.exit(1)
|
| 114 |
+
print(f"Done! -> {OUT_BASE}")
|