File size: 21,537 Bytes
72f552e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
"""FFmpeg video stitching, clip splitting/shuffling, lyrics overlay.

Takes generated video clips (one per 4-beat segment), splits each into
two halves, shuffles them with a distance constraint, builds a timeline
with dynamic pacing (4-beat cuts before the drop, 2-beat after), overlays
audio and lyrics text.
"""

import json
import random
import subprocess
import tempfile
from pathlib import Path


def _get_audio_path(run_dir: Path) -> Path:
    """Find the original audio file one level above the run directory."""
    song_dir = run_dir.parent
    for ext in [".wav", ".mp3", ".flac", ".m4a"]:
        candidates = list(song_dir.glob(f"*{ext}"))
        if candidates:
            return candidates[0]
    raise FileNotFoundError(f"No audio file found in {song_dir}")


def _get_clip_duration(clip_path: Path) -> float:
    """Get video duration in seconds using ffprobe."""
    result = subprocess.run([
        "ffprobe", "-v", "error",
        "-show_entries", "format=duration",
        "-of", "csv=p=0",
        str(clip_path),
    ], capture_output=True, text=True, check=True)
    return float(result.stdout.strip())


def _get_clip_fps(clip_path: Path) -> float:
    """Get video frame rate using ffprobe."""
    result = subprocess.run([
        "ffprobe", "-v", "error",
        "-select_streams", "v:0",
        "-show_entries", "stream=r_frame_rate",
        "-of", "csv=p=0",
        str(clip_path),
    ], capture_output=True, text=True, check=True)
    num, den = result.stdout.strip().split("/")
    return int(num) / int(den)


def _trim_clip(clip_path: Path, start: float, duration: float, output_path: Path):
    """Trim a video clip from a start point to a duration using FFmpeg."""
    cmd = [
        "ffmpeg", "-y",
        "-ss", f"{start:.3f}",
        "-i", str(clip_path),
        "-t", f"{duration:.3f}",
        "-c:v", "libx264", "-preset", "fast",
        "-an",
        str(output_path),
    ]
    subprocess.run(cmd, check=True, capture_output=True)


# ---------------------------------------------------------------------------
# Ken Burns effects β€” subtle pan/zoom applied per slot for added motion
# ---------------------------------------------------------------------------

# Zoom factor: 8% total movement over the clip duration
_KB_ZOOM = 0.45

KEN_BURNS_EFFECTS = [
    "zoom_in",
    "zoom_out",
]


def _ken_burns_filter(
    effect: str, n_frames: int, width: int, height: int,
) -> str:
    """Build an FFmpeg filter for a smooth Ken Burns zoom effect on video.

    Upscales the video 4x before applying zoompan with d=1 (one output
    frame per input frame), then scales back to original size. The 4x
    upscale makes integer rounding in zoompan negligible, eliminating
    visible jitter.
    """
    z = _KB_ZOOM
    N = max(n_frames, 1)
    W, H = width, height
    # Upscale factor β€” higher = smoother but slower
    UP = 8
    UW, UH = W * UP, H * UP

    if effect == "zoom_in":
        zoom_expr = f"1+{z}*on/{N}"
    elif effect == "zoom_out":
        zoom_expr = f"1+{z}-{z}*on/{N}"
    else:
        return f"scale={W}:{H}"

    return (
        f"scale={UW}:{UH}:flags=lanczos,"
        f"zoompan=z='{zoom_expr}':"
        f"x='iw/2-(iw/zoom/2)':y='ih/2-(ih/zoom/2)':"
        f"d=1:s={UW}x{UH},"
        f"scale={W}:{H}:flags=lanczos"
    )


def _get_clip_dimensions(clip_path: Path) -> tuple[int, int]:
    """Get width and height of a video clip."""
    result = subprocess.run(
        ["ffprobe", "-v", "error", "-select_streams", "v:0",
         "-show_entries", "stream=width,height",
         "-of", "csv=s=x:p=0", str(clip_path)],
        capture_output=True, text=True, check=True,
    )
    w, h = result.stdout.strip().split("x")
    return int(w), int(h)


def _split_clip(clip_path: Path, clip_id: int) -> dict:
    """Register a clip's two halves without pre-splitting.

    The "first" half plays from the start, the "second" half plays from
    the end (offset back by the slot duration at trim time). This makes
    the two halves maximally different β€” no fixed midpoint split.

    Returns dict with the original path and full duration for each half.
    """
    duration = _get_clip_duration(clip_path)

    return {
        "clip_id": clip_id,
        "first": clip_path,
        "second": clip_path,
        "first_duration": duration,
        "second_duration": duration,
    }


def _build_sub_segments(segments: list[dict], drop_time: float | None) -> list[dict]:
    """Build the final timeline of sub-segments.

    Before the drop: one slot per 4-beat segment.
    After the drop: each 4-beat segment splits into two 2-beat slots
    using the beat timestamps stored in the segment.
    """
    sub_segments = []

    for seg in segments:
        beats = seg.get("beats", [seg["start"], seg["end"]])
        is_after_drop = drop_time is not None and seg["start"] >= drop_time

        if is_after_drop and len(beats) >= 3:
            # Split at midpoint beat (beat 2 of 4)
            mid_idx = len(beats) // 2
            mid_time = beats[mid_idx]

            sub_segments.append({
                "start": seg["start"],
                "end": mid_time,
                "duration": round(mid_time - seg["start"], 3),
                "lyrics": seg.get("lyrics", ""),
                "parent_segment": seg["segment"],
            })
            sub_segments.append({
                "start": mid_time,
                "end": seg["end"],
                "duration": round(seg["end"] - mid_time, 3),
                "lyrics": "",  # lyrics stay on the first half
                "parent_segment": seg["segment"],
            })
        else:
            # Before drop: one slot for the full 4-beat segment
            sub_segments.append({
                "start": seg["start"],
                "end": seg["end"],
                "duration": seg["duration"],
                "lyrics": seg.get("lyrics", ""),
                "parent_segment": seg["segment"],
            })

    return sub_segments


def _shuffle_with_distance(pool: list[tuple], n_slots: int) -> list[tuple]:
    """Select n_slots sub-clips maximising clip diversity and spacing.

    Shuffles clip IDs once, then repeats that order to fill all slots.
    First pass uses "first" halves, second pass uses "second" halves.
    Same clip is always exactly n_clips positions apart β€” maximum spacing.

    Each item is (clip_id, half_label, path, duration).
    """
    by_clip: dict[int, list[tuple]] = {}
    for item in pool:
        by_clip.setdefault(item[0], []).append(item)

    clip_ids = list(by_clip.keys())
    random.shuffle(clip_ids)

    # Repeat the shuffled order: [4,5,1,2,6,3, 4,5,1,2,6,3, ...]
    result = []
    cycle = 0
    while len(result) < n_slots:
        for cid in clip_ids:
            if len(result) >= n_slots:
                break
            halves = by_clip[cid]
            # First cycle uses "first" half, second cycle uses "second", etc.
            half_idx = cycle % len(halves)
            result.append(halves[half_idx])
        cycle += 1

    return result


# Font registry β€” maps display names to .ttf filenames in fonts/
FONTS = {
    "Bebas Neue": "BebasNeue-Regular.ttf",
    "Teko": "Teko-Bold.ttf",
    "Russo One": "RussoOne-Regular.ttf",
    "Staatliches": "Staatliches-Regular.ttf",
}

DEFAULT_FONT = "Bebas Neue"
DEFAULT_FONT_COLOR = "#FFF7D4"

_FONTS_DIR = Path(__file__).resolve().parent.parent / "fonts"


def font_names() -> list[str]:
    """Return list of available font display names."""
    return list(FONTS.keys())


def _get_font_path(font_name: str) -> Path:
    """Resolve a font display name to its .ttf file path."""
    filename = FONTS.get(font_name, FONTS[DEFAULT_FONT])
    return _FONTS_DIR / filename


_SPOTIFY_BADGE = Path(__file__).resolve().parent.parent / "assets" / "spotify_badge.png"


def _add_lyrics_overlay(
    video_path: Path,
    segments: list[dict],
    output_path: Path,
    audio_offset: float,
    font_name: str = DEFAULT_FONT,
    font_color: str = DEFAULT_FONT_COLOR,
    cover_art: Path | None = None,
    drop_time: float | None = None,
    song_name: str = "",
):
    """Add lyrics text and optional cover art overlay using FFmpeg filters."""
    font_path = _get_font_path(font_name)

    # If cover art provided, lyrics stop at the drop
    lyrics_cutoff = None
    if cover_art is not None and drop_time is not None:
        lyrics_cutoff = drop_time

    # Collect all words with timestamps
    all_words = []
    for seg in segments:
        for word_info in seg.get("words", []):
            word = word_info["word"].strip().lower()
            if not word:
                continue
            w_start = word_info["start"]
            w_end = word_info["end"]
            # Skip words that start after the cutoff
            if lyrics_cutoff is not None and w_start >= lyrics_cutoff:
                continue
            # Clamp end to cutoff for words that span the drop
            if lyrics_cutoff is not None and w_end > lyrics_cutoff:
                w_end = lyrics_cutoff
            all_words.append({"word": word, "start": w_start, "end": w_end})

    # Close small gaps: both words meet in the middle of the gap
    gap_threshold = 0.5
    for i in range(len(all_words) - 1):
        gap = all_words[i + 1]["start"] - all_words[i]["end"]
        if 0 < gap < gap_threshold:
            mid = all_words[i]["end"] + gap / 2
            all_words[i]["end"] = mid
            all_words[i + 1]["start"] = mid

    # Build drawtext filter chain β€” one filter per word, timed to speech
    drawtext_filters = []
    for w in all_words:
        escaped = (w["word"]
                   .replace("\\", "\\\\")
                   .replace("'", "\u2019")
                   .replace('"', '\\"')
                   .replace(":", "\\:")
                   .replace("%", "%%")
                   .replace("[", "\\[")
                   .replace("]", "\\]"))

        start = w["start"] - audio_offset
        end = w["end"] - audio_offset

        drawtext_filters.append(
            f"drawtext=text='{escaped}'"
            f":fontfile='{font_path}'"
            f":fontsize=36"
            f":fontcolor={font_color}"
            f":x=(w-text_w)/2:y=(h-text_h)/2"
            f":enable='between(t,{start:.3f},{end:.3f})'"
        )

    has_cover = cover_art is not None and drop_time is not None
    has_lyrics = len(drawtext_filters) > 0

    if not has_cover and not has_lyrics:
        subprocess.run([
            "ffmpeg", "-y", "-i", str(video_path),
            "-c", "copy", str(output_path),
        ], check=True, capture_output=True)
        return

    if has_cover:
        drop_start = drop_time - audio_offset
        enable = f"enable='gte(t,{drop_start:.3f})'"

        # --- Cover art layout (change these to adjust) ---
        art_h = 270            # cover art height in px
        art_y_offset = 10      # px below center (positive = down)
        badge_h = 56           # spotify badge height in px

        # Probe video height for position calculations
        vid_h = int(subprocess.run([
            "ffprobe", "-v", "error", "-select_streams", "v:0",
            "-show_entries", "stream=height", "-of", "csv=p=0",
            str(video_path),
        ], capture_output=True, text=True, check=True).stdout.strip())
        art_center = vid_h / 2 + art_y_offset
        art_top = art_center - art_h / 2
        art_bottom = art_center + art_h / 2

        # Square = 9:16 crop region (side = vid_h * 9/16)
        sq_side = vid_h * 9 / 16
        sq_top = (vid_h - sq_side) / 2
        sq_bottom = (vid_h + sq_side) / 2

        # Badge centered between square top and art top
        badge_center_y = (sq_top + art_top) / 2
        badge_y = int(badge_center_y - badge_h / 2)

        # Title centered between art bottom and square bottom
        title_center_y = int((art_bottom + sq_bottom) / 2)

        art_overlay_y = int(art_center - art_h / 2)

        parts = [
            f"[1:v]scale=-2:{art_h}:flags=lanczos[art]",
            f"[2:v]scale=-2:{badge_h}:flags=lanczos[badge]",
            f"[0:v][art]overlay=(W-w)/2:{art_overlay_y}:{enable}[v1]",
            f"[v1][badge]overlay=(W-w)/2:{badge_y}:{enable}",
        ]

        # Add song title drawtext below cover art
        title_escaped = (song_name
                         .replace("\\", "\\\\")
                         .replace("'", "\u2019")
                         .replace('"', '\\"')
                         .replace(":", "\\:")
                         .replace("%", "%%"))
        title_text = f'\\"{title_escaped}\\" out now!'.lower()
        parts[-1] += (
            f",drawtext=text='{title_text}'"
            f":fontfile='{font_path}'"
            f":fontsize=40"
            f":fontcolor={font_color}"
            f":x=(w-text_w)/2:y={title_center_y}-text_h/2"
            f":{enable}"
        )

        # Chain drawtext lyrics filters
        if has_lyrics:
            parts[-1] += "," + ",".join(drawtext_filters)
        filter_chain = ";".join(parts)

        cmd = [
            "ffmpeg", "-y",
            "-i", str(video_path),
            "-i", str(cover_art),
            "-i", str(_SPOTIFY_BADGE),
            "-filter_complex", filter_chain,
            "-c:v", "libx264", "-preset", "fast",
            "-c:a", "copy",
            str(output_path),
        ]
        subprocess.run(cmd, check=True, capture_output=True)
    else:
        # Lyrics only, no cover art
        filter_chain = ",".join(drawtext_filters)
        subprocess.run([
            "ffmpeg", "-y",
            "-i", str(video_path),
            "-vf", filter_chain,
            "-c:v", "libx264", "-preset", "fast",
            "-c:a", "copy",
            str(output_path),
        ], check=True, capture_output=True)


def assemble(
    run_dir: str | Path,
    audio_path: str | Path | None = None,
    font_name: str = DEFAULT_FONT,
    font_color: str = DEFAULT_FONT_COLOR,
    cover_art: str | Path | None = None,
) -> Path:
    """Assemble final video with dynamic pacing, clip shuffling, and lyrics.

    Args:
        run_dir: Run directory containing clips/, segments.json, drop.json.
        audio_path: Path to the original audio. Auto-detected if None.
        font_name: Display name of the font for lyrics overlay.
        font_color: Hex color for lyrics text (e.g. '#FFF7D4').
        cover_art: Path to cover art image. Overlayed from the drop onwards.

    Returns:
        Path to the final video file.
    """
    run_dir = Path(run_dir)
    clips_dir = run_dir / "clips"
    output_dir = run_dir / "output"
    output_dir.mkdir(parents=True, exist_ok=True)

    with open(run_dir / "segments.json") as f:
        segments = json.load(f)

    # Load drop time
    drop_time = None
    drop_path = run_dir / "drop.json"
    if drop_path.exists():
        with open(drop_path) as f:
            drop_time = json.load(f).get("drop_time")
        print(f"  Drop at {drop_time:.3f}s")
    else:
        print("  No drop detected β€” using uniform pacing")

    if audio_path is None:
        audio_path = _get_audio_path(run_dir)
    audio_path = Path(audio_path)

    # --- Step 1: Register clip halves (no pre-splitting needed) ---
    sub_clips = []  # list of (clip_id, half, path, full_duration)
    for seg in segments:
        idx = seg["segment"]
        clip_path = clips_dir / f"clip_{idx:03d}.mp4"
        if not clip_path.exists():
            print(f"  Warning: {clip_path.name} not found, skipping")
            continue

        halves = _split_clip(clip_path, idx)
        sub_clips.append((idx, "first", halves["first"], halves["first_duration"]))
        sub_clips.append((idx, "second", halves["second"], halves["second_duration"]))
        print(f"  Registered {clip_path.name} ({halves['first_duration']:.1f}s)")

    if not sub_clips:
        raise FileNotFoundError(f"No clips found in {clips_dir}")

    # --- Step 2: Build sub-segment timeline ---
    sub_segments = _build_sub_segments(segments, drop_time)
    print(f"  Timeline: {len(sub_segments)} slots "
          f"({len([s for s in sub_segments if s['duration'] < 1.5])} fast cuts)")

    # --- Step 3: Shuffle sub-clips into slots ---
    assigned = _shuffle_with_distance(sub_clips.copy(), n_slots=len(sub_segments))

    # --- Step 4: Frame-accurate trim of each sub-clip to slot duration ---
    # Detect FPS from first available sub-clip
    fps = _get_clip_fps(assigned[0][2])
    print(f"  Source FPS: {fps}")

    trimmed_dir = run_dir / "clips_trimmed"
    trimmed_dir.mkdir(exist_ok=True)
    trimmed_paths = []

    # Get clip dimensions from the first available clip (all clips share resolution)
    clip_width, clip_height = _get_clip_dimensions(assigned[0][2])
    print(f"  Clip resolution: {clip_width}x{clip_height}")

    # Track cumulative frames to prevent drift between cuts and beats
    cumulative_frames = 0
    cumulative_target = 0.0

    for i, (sub_seg, (clip_id, half, clip_path, clip_dur)) in enumerate(
        zip(sub_segments, assigned)
    ):
        slot_dur = sub_seg["duration"]
        cumulative_target += min(slot_dur, clip_dur)
        target_frame = round(cumulative_target * fps)
        n_frames = max(1, target_frame - cumulative_frames)
        cumulative_frames = target_frame

        # "first" half starts from 0, "second" half starts from end minus slot duration
        # This makes the two halves show maximally different frames
        if half == "second":
            ss = max(0, clip_dur - slot_dur)
        else:
            ss = 0

        # Apply Ken Burns effect β€” cycle through effects per slot
        effect = KEN_BURNS_EFFECTS[i % len(KEN_BURNS_EFFECTS)]
        vf = _ken_burns_filter(effect, n_frames, clip_width, clip_height)

        trimmed_path = trimmed_dir / f"slot_{i:03d}.mp4"
        cmd = [
            "ffmpeg", "-y",
            "-ss", f"{ss:.3f}",
            "-i", str(clip_path),
            "-frames:v", str(n_frames),
            "-vf", vf,
            "-c:v", "libx264", "-preset", "fast",
            "-r", str(int(fps)),
            "-an",
            str(trimmed_path),
        ]
        subprocess.run(cmd, check=True, capture_output=True)
        trimmed_paths.append(trimmed_path)
        actual_dur = n_frames / fps
        print(f"  Slot {i}: clip {clip_id} ({half}, ss={ss:.1f}s, {effect}) β†’ "
              f"{n_frames}f/{actual_dur:.3f}s (target {slot_dur:.3f}s)")

    # --- Step 5: Concatenate (copy, no re-encode to preserve timing) ---
    with tempfile.NamedTemporaryFile(
        mode="w", suffix=".txt", delete=False, dir=str(run_dir)
    ) as f:
        for p in trimmed_paths:
            f.write(f"file '{p.resolve()}'\n")
        concat_list = f.name

    concat_path = output_dir / "video_only.mp4"
    subprocess.run([
        "ffmpeg", "-y",
        "-f", "concat", "-safe", "0",
        "-i", concat_list,
        "-c", "copy",
        str(concat_path),
    ], check=True, capture_output=True)

    # --- Step 6: Overlay audio ---
    audio_start = segments[0]["start"]
    video_duration = cumulative_frames / fps  # actual frame-accurate duration

    with_audio_path = output_dir / "with_audio.mp4"
    subprocess.run([
        "ffmpeg", "-y",
        "-i", str(concat_path),
        "-ss", f"{audio_start:.3f}",
        "-i", str(audio_path),
        "-t", f"{video_duration:.3f}",
        "-c:v", "copy",
        "-c:a", "aac", "-b:a", "192k",
        "-map", "0:v:0", "-map", "1:a:0",
        "-shortest",
        str(with_audio_path),
    ], check=True, capture_output=True)

    # --- Step 7: Lyrics + cover art overlay ---
    overlay_path = output_dir / "with_overlay.mp4"
    cover_path = Path(cover_art) if cover_art else None
    song_name = run_dir.parent.name
    _add_lyrics_overlay(with_audio_path, segments, overlay_path, audio_start,
                        font_name=font_name, font_color=font_color,
                        cover_art=cover_path, drop_time=drop_time,
                        song_name=song_name)

    # --- Step 8: Crop to exact 9:16 ---
    final_path = output_dir / "final.mp4"
    subprocess.run([
        "ffmpeg", "-y",
        "-i", str(overlay_path),
        "-vf", "crop=2*floor(ih*9/16/2):ih:(iw-2*floor(ih*9/16/2))/2:0",
        "-c:v", "libx264", "-preset", "fast",
        "-c:a", "copy",
        str(final_path),
    ], check=True, capture_output=True)

    # Clean up
    Path(concat_list).unlink(missing_ok=True)

    print(f"\nFinal video: {final_path}")
    print(f"  Duration: {video_duration:.2f}s")
    print(f"  Slots: {len(sub_segments)} ({len(segments)} original segments)")
    return final_path


def run(
    run_dir: str | Path,
    font_name: str = DEFAULT_FONT,
    font_color: str = DEFAULT_FONT_COLOR,
    cover_art: str | Path | None = None,
) -> Path:
    """Assemble final video from clips + audio.

    Args:
        run_dir: Run directory (e.g. data/Gone/run_001/).
        font_name: Display name of the font for lyrics overlay.
        font_color: Hex color for lyrics text.
        cover_art: Path to cover art image (optional).

    Returns:
        Path to final video.
    """
    print("Assembling final video...")
    return assemble(run_dir, font_name=font_name, font_color=font_color,
                    cover_art=cover_art)


if __name__ == "__main__":
    import sys

    if len(sys.argv) < 2:
        print("Usage: python -m src.assembler <run_dir>")
        print("  e.g. python -m src.assembler data/Gone/run_001")
        sys.exit(1)

    run(sys.argv[1])