Upload edit/build_cut.py with huggingface_hub

8e7f21b verified about 1 month ago

3.95 kB

	"""
	Build speech segments from transcript, removing silences >= 0.3s and uh/um fillers.
	Then execute the ffmpeg trim+concat cut, scaling 4K → 1080p with dense keyframes.
	"""
	import json, subprocess, sys
	from pathlib import Path

	TRANSCRIPT = Path(r"D:\PromptEngineer48\In-Progress\P11-Editor\edit\transcripts\Mem0_1.json")
	SOURCE = Path(r"D:\PromptEngineer48\In-Progress\P11-Editor\Mem0_1.mp4")
	OUT_BASE = Path(r"D:\PromptEngineer48\In-Progress\P11-Editor\edit\hf\base_cut.mp4")

	THRESHOLD = 0.30 # gaps >= this get cut
	PAD = 0.08 # 80ms padding around each speech segment
	FADE_MS = 30 # ms audio fade at each edge to prevent pops

	FILLERS = {"uh", "um"}

	data = json.load(open(TRANSCRIPT, encoding="utf-8"))
	words = [w for w in data["words"] if w.get("type") == "word"]

	# Remove filler words
	clean = [w for w in words if w["text"].strip().lower().rstrip(",.") not in FILLERS]

	# Build speech segments by merging words within THRESHOLD
	segs = []
	s = e = None
	for w in clean:
	if s is None:
	s, e = w["start"], w["end"]
	elif w["start"] - e <= THRESHOLD:
	e = w["end"]
	else:
	segs.append((max(0, s - PAD), e + PAD))
	s, e = w["start"], w["end"]
	if s is not None:
	segs.append((max(0, s - PAD), e + PAD))

	# Clamp and clip overlapping edges
	VIDEO_DUR = 805.5
	clamped = []
	for a, b in segs:
	a = round(max(0.0, a), 4)
	b = round(min(VIDEO_DUR, b), 4)
	if clamped and a < clamped[-1][1]:
	a = clamped[-1][1] # no overlap
	if b > a:
	clamped.append((a, b))
	segs = clamped

	total_dur = sum(b - a for a, b in segs)
	print(f"Segments: {len(segs)}")
	print(f"Total kept: {total_dur:.1f}s = {total_dur/60:.1f}min (cut {VIDEO_DUR - total_dur:.1f}s)")
	for i, (a, b) in enumerate(segs):
	print(f" [{i:03d}] {a:.3f} -> {b:.3f} ({b-a:.2f}s)")

	# Build ffmpeg filter_complex with trim + concat
	# Use select/aselect in PASSES to avoid OOM with 216 between() calls.
	# Split segs into 4 time buckets, run one ffmpeg per bucket, concat results.
	PASSES = 4
	source_dur = VIDEO_DUR
	bucket_dur = source_dur / PASSES
	tmp_dir = OUT_BASE.parent / "tmp_passes"
	tmp_dir.mkdir(exist_ok=True)

	tmp_files = []
	for p in range(PASSES):
	t_lo = p * bucket_dur
	t_hi = (p + 1) * bucket_dur
	bucket_segs = [(a, b) for a, b in segs if b > t_lo and a < t_hi]
	if not bucket_segs:
	continue
	expr = "+".join(f"between(t,{a},{b})" for a, b in bucket_segs)
	fc = (
	f"[0:v]select='{expr}',setpts=N/30/TB,scale=1920:1080[outv];"
	f"[0:a]aselect='{expr}',asetpts=N/SR/TB[outa]"
	)
	fc_f = tmp_dir / f"fc_pass{p}.txt"
	fc_f.write_text(fc, encoding="utf-8")
	tmp_out = tmp_dir / f"pass{p}.mp4"
	tmp_files.append(tmp_out)
	print(f"Pass {p}: {len(bucket_segs)} segs, source {t_lo:.0f}-{t_hi:.0f}s -> {tmp_out.name}")
	r = subprocess.run([
	"ffmpeg", "-y", "-i", str(SOURCE),
	"-/filter_complex", str(fc_f),
	"-map", "[outv]", "-map", "[outa]",
	"-c:v", "libx264", "-crf", "18", "-preset", "fast",
	"-g", "30", "-keyint_min", "30",
	"-c:a", "aac", "-b:a", "192k",
	str(tmp_out)
	], capture_output=True, text=True)
	if r.returncode != 0:
	print(f"Pass {p} STDERR:", r.stderr[-2000:])
	sys.exit(1)
	print(f" Pass {p} done")

	# Concat pass outputs with concat demuxer
	concat_list = tmp_dir / "concat.txt"
	concat_list.write_text(
	"\n".join(f"file '{f.as_posix()}'" for f in tmp_files),
	encoding="utf-8"
	)
	print(f"\nConcatenating {len(tmp_files)} passes -> {OUT_BASE}")
	r = subprocess.run([
	"ffmpeg", "-y", "-f", "concat", "-safe", "0",
	"-i", str(concat_list),
	"-c:v", "libx264", "-crf", "18", "-preset", "fast",
	"-g", "30", "-keyint_min", "30",
	"-c:a", "aac", "-b:a", "192k",
	str(OUT_BASE)
	], capture_output=True, text=True)
	if r.returncode != 0:
	print("Concat STDERR:", r.stderr[-2000:])
	sys.exit(1)
	print(f"Done! -> {OUT_BASE}")