"""Interactive cue-timeline payloads for manual transition editing. The first cue editor rendered a static waveform image plus dropdowns. This module adds a DAW-like review artifact: a self-contained HTML timeline with waveform envelopes, candidate markers, draggable selected anchors, keyboard nudge controls, and an explicit JSON payload that can be pasted/applied back to Gradio. It intentionally stays dependency-light and file-local. The browser-side editor is not a full DAW, but it is a real interactive timeline rather than a static plot: users can drag A-out, B-in and B-drop markers against the actual audio peak envelope and then apply the resulting payload to the TransitionPlan. """ from __future__ import annotations from dataclasses import dataclass, asdict from html import escape from pathlib import Path from typing import Any, Mapping import base64 import hashlib import json import math import tempfile import librosa import numpy as np from cue_editor import parse_cue_choice @dataclass(frozen=True) class TimelineMarker: id: str label: str track: str role: str time: float confidence: float = 1.0 locked: bool = False @dataclass(frozen=True) class TimelinePayload: version: int transition_index: int transition_type: str duration_beats: int bpm_reference: float markers: list[TimelineMarker] tracks: dict[str, dict[str, Any]] def to_dict(self) -> dict[str, Any]: d = asdict(self) d["markers"] = [asdict(m) for m in self.markers] return d def _load_mono(path: str, *, max_duration: float, sr: int = 8000) -> tuple[np.ndarray, int]: try: y, got_sr = librosa.load(path, sr=sr, mono=True, duration=max_duration) except Exception: got_sr = sr y = np.zeros(sr, dtype=np.float32) if y.size == 0: y = np.zeros(sr, dtype=np.float32) peak = float(np.max(np.abs(y))) if y.size else 0.0 if peak > 0: y = y / peak return np.asarray(y, dtype=np.float32), got_sr def _peak_envelope(path: str, *, duration: float, bins: int = 900) -> list[float]: y, _sr = _load_mono(path, max_duration=max(1.0, duration)) if y.size == 0: return [0.0] * bins bins = max(64, min(bins, len(y))) edges = np.linspace(0, len(y), bins + 1, dtype=int) env: list[float] = [] for i in range(bins): seg = y[edges[i]:edges[i + 1]] env.append(round(float(np.max(np.abs(seg))) if len(seg) else 0.0, 4)) return env def _candidate_markers(track: Any, *, track_id: str, limit: int = 80) -> list[dict[str, Any]]: cues = list(getattr(track, "cue_points", []) or []) cues.sort(key=lambda c: (-float(c.get("confidence", 0.0) or 0.0), float(c.get("time", 0.0) or 0.0))) out: list[dict[str, Any]] = [] for cue in cues[:limit]: try: t = float(cue.get("time", 0.0) or 0.0) except Exception: continue if t < 0: continue out.append({ "track": track_id, "time": round(t, 3), "kind": str(cue.get("kind", cue.get("type", "cue"))), "label": str(cue.get("label", cue.get("kind", "cue"))), "confidence": round(float(cue.get("confidence", 0.0) or 0.0), 3), }) return out def build_timeline_payload( *, transition_index: int, track_a: Any, track_b: Any, plan: Any, transition_type: str | None = None, ) -> TimelinePayload: selected = getattr(plan, "selected_cues", {}) or {} a_out = float(selected.get("a_out", {}).get("time", getattr(plan, "mix_out_point", 0.0)) or 0.0) b_in = float(selected.get("b_in", {}).get("time", getattr(plan, "mix_in_point", 0.0)) or 0.0) b_drop = float(selected.get("b_drop", {}).get("time", b_in + getattr(plan, "duration_seconds", 0.0)) or 0.0) typ = transition_type or getattr(plan, "transition_type", "eq_crossfade") bpm = float(getattr(track_b, "bpm", getattr(plan, "bpm", 128.0)) or 128.0) markers = [ TimelineMarker("a_out", "A OUT", "A", "mix_out", round(a_out, 3), float(selected.get("a_out", {}).get("confidence", 1.0) or 1.0)), TimelineMarker("b_in", "B IN", "B", "mix_in", round(b_in, 3), float(selected.get("b_in", {}).get("confidence", 1.0) or 1.0)), TimelineMarker("b_drop", "B DROP", "B", "drop", round(b_drop, 3), float(selected.get("b_drop", {}).get("confidence", 1.0) or 1.0)), ] return TimelinePayload( version=1, transition_index=int(transition_index), transition_type=typ, duration_beats=int(getattr(plan, "duration_beats", 64) or 64), bpm_reference=round(bpm, 3), markers=markers, tracks={ "A": { "filename": getattr(track_a, "filename", "track A"), "duration": round(float(getattr(track_a, "duration", 0.0) or 0.0), 3), "bpm": round(float(getattr(track_a, "bpm", 0.0) or 0.0), 3), "envelope": _peak_envelope(getattr(track_a, "path", ""), duration=float(getattr(track_a, "duration", 300.0) or 300.0)), "candidates": _candidate_markers(track_a, track_id="A"), }, "B": { "filename": getattr(track_b, "filename", "track B"), "duration": round(float(getattr(track_b, "duration", 0.0) or 0.0), 3), "bpm": round(float(getattr(track_b, "bpm", 0.0) or 0.0), 3), "envelope": _peak_envelope(getattr(track_b, "path", ""), duration=float(getattr(track_b, "duration", 300.0) or 300.0)), "candidates": _candidate_markers(track_b, track_id="B"), }, }, ) def _payload_json(payload: TimelinePayload | Mapping[str, Any]) -> str: data = payload.to_dict() if isinstance(payload, TimelinePayload) else dict(payload) return json.dumps(data, ensure_ascii=False, separators=(",", ":")) def render_interactive_timeline_html(payload: TimelinePayload | Mapping[str, Any]) -> str: """Return self-contained HTML for a draggable two-track cue timeline.""" data = payload.to_dict() if isinstance(payload, TimelinePayload) else dict(payload) raw = _payload_json(data) encoded = base64.b64encode(raw.encode("utf-8")).decode("ascii") title = escape(f"Transition {data.get('transition_index', '?')} · {data.get('transition_type', 'transition')}") # The script intentionally avoids external dependencies so the artifact can # be saved or reviewed outside Gradio. return f"""
{title}
Drag markers. Hold Shift for 0.01s nudges, arrow keys nudge selected marker, double-click a candidate to snap nearest marker.
""".strip() def render_timeline_editor_file( *, transition_index: int, track_a: Any, track_b: Any, plan: Any, output_dir: str | Path | None = None, ) -> tuple[str, str, str]: """Create an HTML timeline file and return `(path, html, json)`. The Gradio UI can embed `html`; external review can open `path`; tests and backend application can use the JSON directly. """ payload = build_timeline_payload(transition_index=transition_index, track_a=track_a, track_b=track_b, plan=plan) html = render_interactive_timeline_html(payload) raw = json.dumps(payload.to_dict(), ensure_ascii=False, indent=2) output_dir = Path(output_dir or tempfile.gettempdir()) output_dir.mkdir(parents=True, exist_ok=True) digest = hashlib.sha1(raw.encode("utf-8")).hexdigest()[:12] path = output_dir / f"ai-dj-transition-{int(transition_index)}-timeline-{digest}.html" path.write_text(html, encoding="utf-8") return str(path), html, raw def _marker_map(data: Mapping[str, Any]) -> dict[str, Mapping[str, Any]]: markers = data.get("markers", []) if not isinstance(markers, list): return {} return {str(m.get("id", "")): m for m in markers if isinstance(m, Mapping)} def apply_timeline_json_to_plan(plan: Any, track_a: Any, track_b: Any, payload_json: str) -> dict[str, Any]: """Apply a timeline payload to a TransitionPlan-like object. Returns a summary dict useful for tests and UI messages. Raises ValueError on malformed payloads so bad edits fail loudly. """ try: data = json.loads(payload_json) except json.JSONDecodeError as exc: raise ValueError(f"invalid timeline JSON: {exc}") from exc if int(data.get("version", 0) or 0) < 1: raise ValueError("timeline JSON missing supported version") markers = _marker_map(data) required = ["a_out", "b_in", "b_drop"] missing = [m for m in required if m not in markers] if missing: raise ValueError(f"timeline JSON missing marker(s): {', '.join(missing)}") def clamp(marker_id: str, duration: float) -> float: marker = markers[marker_id] t = float(marker.get("time", 0.0) or 0.0) if not math.isfinite(t): raise ValueError(f"marker {marker_id} has non-finite time") return round(max(0.0, min(t, float(duration or 0.0))), 3) a_out = clamp("a_out", float(getattr(track_a, "duration", 0.0) or 0.0)) b_in = clamp("b_in", float(getattr(track_b, "duration", 0.0) or 0.0)) b_drop = clamp("b_drop", float(getattr(track_b, "duration", 0.0) or 0.0)) if b_drop <= b_in: raise ValueError("B DROP must be after B IN") typ = str(data.get("transition_type", getattr(plan, "transition_type", "eq_crossfade"))) bpm = max(60.0, float(data.get("bpm_reference", getattr(track_b, "bpm", 128.0)) or 128.0)) seconds = round(b_drop - b_in, 3) beats = int(round(seconds / (60.0 / bpm))) if beats <= 0: raise ValueError("duration must be positive") plan.transition_type = typ plan.mix_out_point = a_out plan.mix_in_point = b_in plan.duration_seconds = seconds plan.duration_beats = beats plan.needs_stems = typ in {"bass_swap", "acapella_over_instrumental", "drums_first", "double_drop"} plan.selected_cues = { "a_out": {"kind": "mix_out", "label": "interactive timeline override", "time": a_out, "confidence": 1.0, "evidence": {"source": "interactive_timeline"}}, "b_in": {"kind": "mix_in", "label": "interactive timeline override", "time": b_in, "confidence": 1.0, "evidence": {"source": "interactive_timeline"}}, "b_drop": {"kind": "drop", "label": "interactive timeline override", "time": b_drop, "confidence": 1.0, "evidence": {"source": "interactive_timeline"}}, } plan.cue_confidence = 1.0 if hasattr(plan, "score_breakdown"): plan.score_breakdown = {**dict(getattr(plan, "score_breakdown", {}) or {}), "interactive_timeline_override": 1.0, "cue_confidence": 1.0} assumptions = list(getattr(plan, "assumptions", []) or []) assumptions = [a for a in assumptions if "timeline" not in str(a).lower()] assumptions.append("interactive timeline override applied; preview before full-set render") plan.assumptions = assumptions return { "transition_type": typ, "a_out": a_out, "b_in": b_in, "b_drop": b_drop, "duration_seconds": seconds, "duration_beats": beats, }