"""Interactive cue-timeline payloads for manual transition editing. The first cue editor rendered a static waveform image plus dropdowns. This module adds a DAW-like review artifact: a self-contained HTML timeline with waveform envelopes, candidate markers, draggable selected anchors, keyboard nudge controls, and an explicit JSON payload that can be pasted/applied back to Gradio. It intentionally stays dependency-light and file-local. The browser-side editor is not a full DAW, but it is a real interactive timeline rather than a static plot: users can drag A-out, B-in and B-drop markers against the actual audio peak envelope and then apply the resulting payload to the TransitionPlan. """ from __future__ import annotations from dataclasses import dataclass, asdict from html import escape from pathlib import Path from typing import Any, Mapping import base64 import hashlib import json import math import tempfile import librosa import numpy as np from cue_editor import parse_cue_choice @dataclass(frozen=True) class TimelineMarker: id: str label: str track: str role: str time: float confidence: float = 1.0 locked: bool = False @dataclass(frozen=True) class TimelinePayload: version: int transition_index: int transition_type: str duration_beats: int bpm_reference: float markers: list[TimelineMarker] tracks: dict[str, dict[str, Any]] def to_dict(self) -> dict[str, Any]: d = asdict(self) d["markers"] = [asdict(m) for m in self.markers] return d def _load_mono(path: str, *, max_duration: float, sr: int = 8000) -> tuple[np.ndarray, int]: try: y, got_sr = librosa.load(path, sr=sr, mono=True, duration=max_duration) except Exception: got_sr = sr y = np.zeros(sr, dtype=np.float32) if y.size == 0: y = np.zeros(sr, dtype=np.float32) peak = float(np.max(np.abs(y))) if y.size else 0.0 if peak > 0: y = y / peak return np.asarray(y, dtype=np.float32), got_sr def _peak_envelope(path: str, *, duration: float, bins: int = 900) -> list[float]: y, _sr = _load_mono(path, max_duration=max(1.0, duration)) if y.size == 0: return [0.0] * bins bins = max(64, min(bins, len(y))) edges = np.linspace(0, len(y), bins + 1, dtype=int) env: list[float] = [] for i in range(bins): seg = y[edges[i]:edges[i + 1]] env.append(round(float(np.max(np.abs(seg))) if len(seg) else 0.0, 4)) return env def _candidate_markers(track: Any, *, track_id: str, limit: int = 80) -> list[dict[str, Any]]: cues = list(getattr(track, "cue_points", []) or []) cues.sort(key=lambda c: (-float(c.get("confidence", 0.0) or 0.0), float(c.get("time", 0.0) or 0.0))) out: list[dict[str, Any]] = [] for cue in cues[:limit]: try: t = float(cue.get("time", 0.0) or 0.0) except Exception: continue if t < 0: continue out.append({ "track": track_id, "time": round(t, 3), "kind": str(cue.get("kind", cue.get("type", "cue"))), "label": str(cue.get("label", cue.get("kind", "cue"))), "confidence": round(float(cue.get("confidence", 0.0) or 0.0), 3), }) return out def build_timeline_payload( *, transition_index: int, track_a: Any, track_b: Any, plan: Any, transition_type: str | None = None, ) -> TimelinePayload: selected = getattr(plan, "selected_cues", {}) or {} a_out = float(selected.get("a_out", {}).get("time", getattr(plan, "mix_out_point", 0.0)) or 0.0) b_in = float(selected.get("b_in", {}).get("time", getattr(plan, "mix_in_point", 0.0)) or 0.0) b_drop = float(selected.get("b_drop", {}).get("time", b_in + getattr(plan, "duration_seconds", 0.0)) or 0.0) typ = transition_type or getattr(plan, "transition_type", "eq_crossfade") bpm = float(getattr(track_b, "bpm", getattr(plan, "bpm", 128.0)) or 128.0) markers = [ TimelineMarker("a_out", "A OUT", "A", "mix_out", round(a_out, 3), float(selected.get("a_out", {}).get("confidence", 1.0) or 1.0)), TimelineMarker("b_in", "B IN", "B", "mix_in", round(b_in, 3), float(selected.get("b_in", {}).get("confidence", 1.0) or 1.0)), TimelineMarker("b_drop", "B DROP", "B", "drop", round(b_drop, 3), float(selected.get("b_drop", {}).get("confidence", 1.0) or 1.0)), ] return TimelinePayload( version=1, transition_index=int(transition_index), transition_type=typ, duration_beats=int(getattr(plan, "duration_beats", 64) or 64), bpm_reference=round(bpm, 3), markers=markers, tracks={ "A": { "filename": getattr(track_a, "filename", "track A"), "duration": round(float(getattr(track_a, "duration", 0.0) or 0.0), 3), "bpm": round(float(getattr(track_a, "bpm", 0.0) or 0.0), 3), "envelope": _peak_envelope(getattr(track_a, "path", ""), duration=float(getattr(track_a, "duration", 300.0) or 300.0)), "candidates": _candidate_markers(track_a, track_id="A"), }, "B": { "filename": getattr(track_b, "filename", "track B"), "duration": round(float(getattr(track_b, "duration", 0.0) or 0.0), 3), "bpm": round(float(getattr(track_b, "bpm", 0.0) or 0.0), 3), "envelope": _peak_envelope(getattr(track_b, "path", ""), duration=float(getattr(track_b, "duration", 300.0) or 300.0)), "candidates": _candidate_markers(track_b, track_id="B"), }, }, ) def _payload_json(payload: TimelinePayload | Mapping[str, Any]) -> str: data = payload.to_dict() if isinstance(payload, TimelinePayload) else dict(payload) return json.dumps(data, ensure_ascii=False, separators=(",", ":")) def render_interactive_timeline_html(payload: TimelinePayload | Mapping[str, Any]) -> str: """Return self-contained HTML for a draggable two-track cue timeline.""" data = payload.to_dict() if isinstance(payload, TimelinePayload) else dict(payload) raw = _payload_json(data) encoded = base64.b64encode(raw.encode("utf-8")).decode("ascii") title = escape(f"Transition {data.get('transition_index', '?')} · {data.get('transition_type', 'transition')}") # The script intentionally avoids external dependencies so the artifact can # be saved or reviewed outside Gradio. return f"""