Prompt48 commited on
Commit
8e7f21b
·
verified ·
1 Parent(s): 7565da9

Upload edit/build_cut.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. edit/build_cut.py +114 -0
edit/build_cut.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Build speech segments from transcript, removing silences >= 0.3s and uh/um fillers.
3
+ Then execute the ffmpeg trim+concat cut, scaling 4K → 1080p with dense keyframes.
4
+ """
5
+ import json, subprocess, sys
6
+ from pathlib import Path
7
+
8
+ TRANSCRIPT = Path(r"D:\PromptEngineer48\In-Progress\P11-Editor\edit\transcripts\Mem0_1.json")
9
+ SOURCE = Path(r"D:\PromptEngineer48\In-Progress\P11-Editor\Mem0_1.mp4")
10
+ OUT_BASE = Path(r"D:\PromptEngineer48\In-Progress\P11-Editor\edit\hf\base_cut.mp4")
11
+
12
+ THRESHOLD = 0.30 # gaps >= this get cut
13
+ PAD = 0.08 # 80ms padding around each speech segment
14
+ FADE_MS = 30 # ms audio fade at each edge to prevent pops
15
+
16
+ FILLERS = {"uh", "um"}
17
+
18
+ data = json.load(open(TRANSCRIPT, encoding="utf-8"))
19
+ words = [w for w in data["words"] if w.get("type") == "word"]
20
+
21
+ # Remove filler words
22
+ clean = [w for w in words if w["text"].strip().lower().rstrip(",.") not in FILLERS]
23
+
24
+ # Build speech segments by merging words within THRESHOLD
25
+ segs = []
26
+ s = e = None
27
+ for w in clean:
28
+ if s is None:
29
+ s, e = w["start"], w["end"]
30
+ elif w["start"] - e <= THRESHOLD:
31
+ e = w["end"]
32
+ else:
33
+ segs.append((max(0, s - PAD), e + PAD))
34
+ s, e = w["start"], w["end"]
35
+ if s is not None:
36
+ segs.append((max(0, s - PAD), e + PAD))
37
+
38
+ # Clamp and clip overlapping edges
39
+ VIDEO_DUR = 805.5
40
+ clamped = []
41
+ for a, b in segs:
42
+ a = round(max(0.0, a), 4)
43
+ b = round(min(VIDEO_DUR, b), 4)
44
+ if clamped and a < clamped[-1][1]:
45
+ a = clamped[-1][1] # no overlap
46
+ if b > a:
47
+ clamped.append((a, b))
48
+ segs = clamped
49
+
50
+ total_dur = sum(b - a for a, b in segs)
51
+ print(f"Segments: {len(segs)}")
52
+ print(f"Total kept: {total_dur:.1f}s = {total_dur/60:.1f}min (cut {VIDEO_DUR - total_dur:.1f}s)")
53
+ for i, (a, b) in enumerate(segs):
54
+ print(f" [{i:03d}] {a:.3f} -> {b:.3f} ({b-a:.2f}s)")
55
+
56
+ # Build ffmpeg filter_complex with trim + concat
57
+ # Use select/aselect in PASSES to avoid OOM with 216 between() calls.
58
+ # Split segs into 4 time buckets, run one ffmpeg per bucket, concat results.
59
+ PASSES = 4
60
+ source_dur = VIDEO_DUR
61
+ bucket_dur = source_dur / PASSES
62
+ tmp_dir = OUT_BASE.parent / "tmp_passes"
63
+ tmp_dir.mkdir(exist_ok=True)
64
+
65
+ tmp_files = []
66
+ for p in range(PASSES):
67
+ t_lo = p * bucket_dur
68
+ t_hi = (p + 1) * bucket_dur
69
+ bucket_segs = [(a, b) for a, b in segs if b > t_lo and a < t_hi]
70
+ if not bucket_segs:
71
+ continue
72
+ expr = "+".join(f"between(t,{a},{b})" for a, b in bucket_segs)
73
+ fc = (
74
+ f"[0:v]select='{expr}',setpts=N/30/TB,scale=1920:1080[outv];"
75
+ f"[0:a]aselect='{expr}',asetpts=N/SR/TB[outa]"
76
+ )
77
+ fc_f = tmp_dir / f"fc_pass{p}.txt"
78
+ fc_f.write_text(fc, encoding="utf-8")
79
+ tmp_out = tmp_dir / f"pass{p}.mp4"
80
+ tmp_files.append(tmp_out)
81
+ print(f"Pass {p}: {len(bucket_segs)} segs, source {t_lo:.0f}-{t_hi:.0f}s -> {tmp_out.name}")
82
+ r = subprocess.run([
83
+ "ffmpeg", "-y", "-i", str(SOURCE),
84
+ "-/filter_complex", str(fc_f),
85
+ "-map", "[outv]", "-map", "[outa]",
86
+ "-c:v", "libx264", "-crf", "18", "-preset", "fast",
87
+ "-g", "30", "-keyint_min", "30",
88
+ "-c:a", "aac", "-b:a", "192k",
89
+ str(tmp_out)
90
+ ], capture_output=True, text=True)
91
+ if r.returncode != 0:
92
+ print(f"Pass {p} STDERR:", r.stderr[-2000:])
93
+ sys.exit(1)
94
+ print(f" Pass {p} done")
95
+
96
+ # Concat pass outputs with concat demuxer
97
+ concat_list = tmp_dir / "concat.txt"
98
+ concat_list.write_text(
99
+ "\n".join(f"file '{f.as_posix()}'" for f in tmp_files),
100
+ encoding="utf-8"
101
+ )
102
+ print(f"\nConcatenating {len(tmp_files)} passes -> {OUT_BASE}")
103
+ r = subprocess.run([
104
+ "ffmpeg", "-y", "-f", "concat", "-safe", "0",
105
+ "-i", str(concat_list),
106
+ "-c:v", "libx264", "-crf", "18", "-preset", "fast",
107
+ "-g", "30", "-keyint_min", "30",
108
+ "-c:a", "aac", "-b:a", "192k",
109
+ str(OUT_BASE)
110
+ ], capture_output=True, text=True)
111
+ if r.returncode != 0:
112
+ print("Concat STDERR:", r.stderr[-2000:])
113
+ sys.exit(1)
114
+ print(f"Done! -> {OUT_BASE}")