Spaces:

rikhoffbauer2
/

ai-techno-dj

Running

App Files Files Community

Rik Hoffbauer commited on May 2

Commit

6362e08

1 Parent(s): 94f6c04

Implement waveform cue editor and feedback-to-learning path

Browse files

Files changed (3) hide show

app.py +135 -3
cue_editor.py +246 -0
cue_learning.py +44 -0

app.py CHANGED Viewed

@@ -1326,6 +1326,7 @@ class AppState:
         self.rendered_set = None
         self.benchmarks = []
         self.set_order_metadata = {}
 app_state = AppState()
@@ -1492,6 +1493,8 @@ def render_full_set(max_iter, progress=gr.Progress()):
     progress(0.05, desc="Compiling full-set AutomationIR...")
     from automation_set_renderer import render_set_with_automation_ir
     def progress_cb(_p, _msg):
         # The AutomationIR renderer is deterministic and currently not chunk-progressive.
@@ -1503,8 +1506,10 @@ def render_full_set(max_iter, progress=gr.Progress()):
         app_state.transitions,
         load_audio_segment=load_audio_segment,
         time_stretch_audio=time_stretch_audio,
         sr=44100,
     )
     app_state.rendered_set = set_audio
     progress(0.82, desc="Running diagnostics...")
@@ -1523,7 +1528,8 @@ def render_full_set(max_iter, progress=gr.Progress()):
     summary += f"- **Total duration:** {set_info['total_duration']:.1f}s ({set_info['total_duration']/60:.1f} min)\n"
     summary += f"- **Tracks:** {len(set_info['tracks'])}\n"
     summary += f"- **Transitions:** {len(set_info.get('transitions', []))}\n"
-    summary += f"- **AutomationIR:** {set_info['automation_ir']['clips']} clips, {set_info['automation_ir']['lanes']} lanes\n\n"
     summary += "## Tracklist\n"
     for i, t in enumerate(set_info["tracks"]):
@@ -1558,6 +1564,8 @@ def render_single_transition(transition_idx, candidate_rank=0, progress=gr.Progr
     progress(0.15, desc="Compiling automation IR...")
     from automation_ir import render_transition_candidate
     audio, ir, candidate = render_transition_candidate(
         trans,
         track_a,
@@ -1565,8 +1573,10 @@ def render_single_transition(transition_idx, candidate_rank=0, progress=gr.Progr
         candidate_rank=rank,
         load_audio_segment=load_audio_segment,
         time_stretch_audio=time_stretch_audio,
         sr=44100,
     )
     progress(0.85, desc="Saving preview...")
     tmp = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
@@ -1579,6 +1589,10 @@ def render_single_transition(transition_idx, candidate_rank=0, progress=gr.Progr
     else:
         edge_score = trans.score_breakdown.get('overall')
     info = (
         f"**Transition {idx+1}:** {track_a.filename} → {track_b.filename}\n"
         f"**Candidate:** {cue_source}\n"
@@ -1588,6 +1602,8 @@ def render_single_transition(transition_idx, candidate_rank=0, progress=gr.Progr
         f"B in {ir.metadata['mix_in_point']:.2f}s, B drop {ir.metadata['b_drop']:.2f}s\n"
         f"**Duration:** {ir.metadata['duration_seconds']:.2f}s; score={edge_score if edge_score is not None else 'n/a'}\n"
         f"**Preview file duration:** {audio.shape[-1] / 44100:.1f}s\n\n"
         f"```json\n{json.dumps(ir.to_dict(), indent=2)[:6000]}\n```"
     )
     return tmp.name, info
@@ -1624,6 +1640,13 @@ def apply_manual_transition_edit(transition_idx, mix_out_point, mix_in_point, du
     assumptions.append("manual cue/timing override applied; preview this candidate before full render")
     trans.assumptions = assumptions
     return (
         f"✅ Updated transition {idx+1}\n\n"
         f"- Type: `{trans.transition_type}`\n"
@@ -1634,6 +1657,100 @@ def apply_manual_transition_edit(transition_idx, mix_out_point, mix_in_point, du
     )
 def save_transition_rating(transition_idx, candidate_rank, rating, accepted, notes):
     """Persist a human listening rating for a transition candidate."""
     if not app_state.transitions:
@@ -1736,15 +1853,28 @@ def build_ui():
                 preview_info = gr.Markdown()
                 preview_btn.click(render_single_transition, [trans_idx_input, candidate_rank_input], [preview_audio, preview_info])
-                gr.Markdown("### Manual cue editor")
                 with gr.Row():
                     manual_idx = gr.Number(value=1, label="Transition", minimum=1, precision=0)
                     manual_type = gr.Dropdown(choices=list(TRANSITION_TYPES.keys()), value="eq_crossfade", label="Transition type")
                 with gr.Row():
                     manual_mix_out = gr.Number(value=0, label="A mix-out seconds", precision=2)
                     manual_mix_in = gr.Number(value=0, label="B mix-in seconds", precision=2)
                     manual_beats = gr.Number(value=64, label="Duration beats", minimum=1, precision=0)
-                    manual_apply = gr.Button("Apply manual override")
                 manual_output = gr.Markdown()
                 manual_apply.click(apply_manual_transition_edit, [manual_idx, manual_mix_out, manual_mix_in, manual_beats, manual_type], [manual_output])
@@ -1756,9 +1886,11 @@ def build_ui():
                 with gr.Row():
                     save_rating_btn = gr.Button("Save rating")
                     show_ratings_btn = gr.Button("Show rating summary")
                 rating_output = gr.Markdown()
                 save_rating_btn.click(save_transition_rating, [trans_idx_input, candidate_rank_input, rating, accepted, notes], [rating_output])
                 show_ratings_btn.click(show_listening_benchmarks, [], [rating_output])
             # ──── TAB 4: RENDER FULL SET ────
             with gr.Tab("4️⃣ Render Full Set"):

         self.rendered_set = None
         self.benchmarks = []
         self.set_order_metadata = {}
+        self.last_stem_diagnostics = {}
 app_state = AppState()
     progress(0.05, desc="Compiling full-set AutomationIR...")
     from automation_set_renderer import render_set_with_automation_ir
+    from stem_provider import StemProvider
+    stem_provider = StemProvider()
     def progress_cb(_p, _msg):
         # The AutomationIR renderer is deterministic and currently not chunk-progressive.
         app_state.transitions,
         load_audio_segment=load_audio_segment,
         time_stretch_audio=time_stretch_audio,
+        stem_resolver=stem_provider.resolver(),
         sr=44100,
     )
+    app_state.last_stem_diagnostics = dict(stem_provider.diagnostics)
     app_state.rendered_set = set_audio
     progress(0.82, desc="Running diagnostics...")
     summary += f"- **Total duration:** {set_info['total_duration']:.1f}s ({set_info['total_duration']/60:.1f} min)\n"
     summary += f"- **Tracks:** {len(set_info['tracks'])}\n"
     summary += f"- **Transitions:** {len(set_info.get('transitions', []))}\n"
+    summary += f"- **AutomationIR:** {set_info['automation_ir']['clips']} clips, {set_info['automation_ir']['lanes']} lanes\n"
+    summary += f"- **Stem lane method:** `{set_info['automation_ir'].get('component_lane_method', 'n/a')}`\n\n"
     summary += "## Tracklist\n"
     for i, t in enumerate(set_info["tracks"]):
     progress(0.15, desc="Compiling automation IR...")
     from automation_ir import render_transition_candidate
+    from stem_provider import StemProvider
+    stem_provider = StemProvider()
     audio, ir, candidate = render_transition_candidate(
         trans,
         track_a,
         candidate_rank=rank,
         load_audio_segment=load_audio_segment,
         time_stretch_audio=time_stretch_audio,
+        stem_resolver=stem_provider.resolver(),
         sr=44100,
     )
+    app_state.last_stem_diagnostics = dict(stem_provider.diagnostics)
     progress(0.85, desc="Saving preview...")
     tmp = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
     else:
         edge_score = trans.score_breakdown.get('overall')
+    from transition_diagnostics import diagnose_transition_audio, format_transition_diagnostics
+    diag = diagnose_transition_audio(audio, sr=44100, anchor_seconds=ir.anchor_seconds)
+    stem_diag = json.dumps(app_state.last_stem_diagnostics, indent=2)[:2500] if app_state.last_stem_diagnostics else "{}"
     info = (
         f"**Transition {idx+1}:** {track_a.filename} → {track_b.filename}\n"
         f"**Candidate:** {cue_source}\n"
         f"B in {ir.metadata['mix_in_point']:.2f}s, B drop {ir.metadata['b_drop']:.2f}s\n"
         f"**Duration:** {ir.metadata['duration_seconds']:.2f}s; score={edge_score if edge_score is not None else 'n/a'}\n"
         f"**Preview file duration:** {audio.shape[-1] / 44100:.1f}s\n\n"
+        f"{format_transition_diagnostics(diag)}\n\n"
+        f"### Stem provider diagnostics\n```json\n{stem_diag}\n```\n\n"
         f"```json\n{json.dumps(ir.to_dict(), indent=2)[:6000]}\n```"
     )
     return tmp.name, info
     assumptions.append("manual cue/timing override applied; preview this candidate before full render")
     trans.assumptions = assumptions
+    try:
+        from cue_learning import append_training_example
+        for cue in trans.selected_cues.values():
+            append_training_example("data/manual-cue-edits.jsonl", cue, duration=track_b.duration, label=1, source="numeric_manual_editor")
+    except Exception as exc:
+        logger.warning(f"Could not append manual cue training examples: {exc}")
     return (
         f"✅ Updated transition {idx+1}\n\n"
         f"- Type: `{trans.transition_type}`\n"
     )
+def load_waveform_cue_editor(transition_idx):
+    """Load waveform image and cue choices for the selected transition."""
+    if not app_state.transitions:
+        return None, "⚠️ Generate a set plan first", gr.update(choices=[]), gr.update(choices=[]), gr.update(choices=[])
+    idx = int(transition_idx) - 1
+    if idx < 0 or idx >= len(app_state.transitions):
+        return None, f"⚠️ Invalid transition index. Choose 1-{len(app_state.transitions)}", gr.update(choices=[]), gr.update(choices=[]), gr.update(choices=[])
+    trans = app_state.transitions[idx]
+    track_a = app_state.analyses[trans.track_a_idx]
+    track_b = app_state.analyses[trans.track_b_idx]
+    from cue_editor import render_transition_cue_editor, choices_for_transition
+    image_path, summary = render_transition_cue_editor(track_a, track_b, trans)
+    choices = choices_for_transition(track_a, track_b, trans)
+    return (
+        image_path,
+        summary,
+        gr.update(choices=choices["a_choices"], value=choices["a_default"]),
+        gr.update(choices=choices["b_in_choices"], value=choices["b_in_default"]),
+        gr.update(choices=choices["b_drop_choices"], value=choices["b_drop_default"]),
+    )
+def apply_waveform_cue_choices(transition_idx, a_choice, b_in_choice, b_drop_choice, transition_type):
+    """Apply cue choices from the waveform editor."""
+    if not app_state.transitions:
+        return "⚠️ Generate a set plan first"
+    idx = int(transition_idx) - 1
+    if idx < 0 or idx >= len(app_state.transitions):
+        return f"⚠️ Invalid transition index. Choose 1-{len(app_state.transitions)}"
+    trans = app_state.transitions[idx]
+    track_a = app_state.analyses[trans.track_a_idx]
+    track_b = app_state.analyses[trans.track_b_idx]
+    from cue_editor import apply_choices_to_plan
+    mix_out, mix_in, duration, selected = apply_choices_to_plan(
+        trans,
+        a_choice=a_choice,
+        b_in_choice=b_in_choice,
+        b_drop_choice=b_drop_choice,
+        transition_type=transition_type if transition_type in TRANSITION_TYPES else None,
+    )
+    trans.mix_out_point = round(max(0.0, min(mix_out, track_a.duration)), 3)
+    trans.mix_in_point = round(max(0.0, min(mix_in, track_b.duration)), 3)
+    trans.duration_seconds = round(max(0.25, duration), 3)
+    trans.duration_beats = max(1, round(trans.duration_seconds * max(track_b.bpm, 60.0) / 60.0))
+    trans.needs_stems = trans.transition_type in ("bass_swap", "acapella_over_instrumental", "drums_first", "double_drop")
+    trans.selected_cues = selected
+    confs = [float(c.get("confidence", 0.0) or 0.0) for c in selected.values()]
+    trans.cue_confidence = round(sum(confs) / len(confs), 3) if confs else 1.0
+    trans.score_breakdown = {**dict(trans.score_breakdown), "waveform_editor_override": 1.0, "cue_confidence": trans.cue_confidence}
+    assumptions = [a for a in trans.assumptions if "waveform" not in a.lower()]
+    assumptions.append("waveform cue editor override applied; audition before full render")
+    trans.assumptions = assumptions
+    # Persist positive cue examples as manual supervision. The user can later
+    # train a cue model from this file or merge it with listening ratings.
+    try:
+        from cue_learning import append_training_example
+        for cue in selected.values():
+            append_training_example("data/manual-cue-edits.jsonl", cue, duration=track_b.duration, label=1, source="waveform_editor")
+    except Exception as exc:
+        logger.warning(f"Could not append manual cue training examples: {exc}")
+    return (
+        f"✅ Applied waveform cue edit to transition {idx+1}\n\n"
+        f"- Type: `{trans.transition_type}`\n"
+        f"- A mix-out: {trans.mix_out_point:.2f}s\n"
+        f"- B mix-in: {trans.mix_in_point:.2f}s\n"
+        f"- B drop: {trans.selected_cues['b_drop']['time']:.2f}s\n"
+        f"- Duration: {trans.duration_beats} beats / {trans.duration_seconds:.2f}s\n"
+        f"- Cue confidence: {trans.cue_confidence:.0%}"
+    )
+def train_cue_model_from_feedback():
+    """Train the lightweight cue scorer from ratings and manual edits."""
+    from cue_learning import load_training_examples, examples_from_rating_rows, fit_logistic_model, save_model
+    from listening_benchmarks import load_ratings
+    manual = load_training_examples("data/manual-cue-edits.jsonl")
+    rating_examples = examples_from_rating_rows(load_ratings())
+    examples = manual + rating_examples
+    if not examples:
+        return "⚠️ No manual cue edits or decisive listening ratings available for training yet."
+    model = fit_logistic_model(examples)
+    path = save_model(model)
+    return (
+        "✅ Cue model trained\n\n"
+        f"- Examples: {len(examples)}\n"
+        f"- Manual cue edits: {len(manual)}\n"
+        f"- Rating-derived examples: {len(rating_examples)}\n"
+        f"- Output: `{path}`\n\n"
+        "New analyses will blend this learned probability into cue confidence."
+    )
 def save_transition_rating(transition_idx, candidate_rank, rating, accepted, notes):
     """Persist a human listening rating for a transition candidate."""
     if not app_state.transitions:
                 preview_info = gr.Markdown()
                 preview_btn.click(render_single_transition, [trans_idx_input, candidate_rank_input], [preview_audio, preview_info])
+                gr.Markdown("### Waveform-backed cue editor")
                 with gr.Row():
                     manual_idx = gr.Number(value=1, label="Transition", minimum=1, precision=0)
                     manual_type = gr.Dropdown(choices=list(TRANSITION_TYPES.keys()), value="eq_crossfade", label="Transition type")
+                    load_editor_btn = gr.Button("Load waveform editor")
+                cue_waveform = gr.Image(label="A/B waveform with cue markers", type="filepath", interactive=False)
+                cue_editor_summary = gr.Markdown()
+                with gr.Row():
+                    a_cue_choice = gr.Dropdown(choices=[], label="A mix-out cue")
+                    b_in_choice = gr.Dropdown(choices=[], label="B mix-in cue")
+                    b_drop_choice = gr.Dropdown(choices=[], label="B drop cue")
+                apply_waveform_btn = gr.Button("Apply waveform cue choices")
+                waveform_output = gr.Markdown()
+                load_editor_btn.click(load_waveform_cue_editor, [manual_idx], [cue_waveform, cue_editor_summary, a_cue_choice, b_in_choice, b_drop_choice])
+                apply_waveform_btn.click(apply_waveform_cue_choices, [manual_idx, a_cue_choice, b_in_choice, b_drop_choice, manual_type], [waveform_output])
+                gr.Markdown("### Numeric fallback editor")
                 with gr.Row():
                     manual_mix_out = gr.Number(value=0, label="A mix-out seconds", precision=2)
                     manual_mix_in = gr.Number(value=0, label="B mix-in seconds", precision=2)
                     manual_beats = gr.Number(value=64, label="Duration beats", minimum=1, precision=0)
+                    manual_apply = gr.Button("Apply numeric override")
                 manual_output = gr.Markdown()
                 manual_apply.click(apply_manual_transition_edit, [manual_idx, manual_mix_out, manual_mix_in, manual_beats, manual_type], [manual_output])
                 with gr.Row():
                     save_rating_btn = gr.Button("Save rating")
                     show_ratings_btn = gr.Button("Show rating summary")
+                    train_cue_btn = gr.Button("Train cue model from feedback")
                 rating_output = gr.Markdown()
                 save_rating_btn.click(save_transition_rating, [trans_idx_input, candidate_rank_input, rating, accepted, notes], [rating_output])
                 show_ratings_btn.click(show_listening_benchmarks, [], [rating_output])
+                train_cue_btn.click(train_cue_model_from_feedback, [], [rating_output])
             # ──── TAB 4: RENDER FULL SET ────
             with gr.Tab("4️⃣ Render Full Set"):

cue_editor.py ADDED Viewed

	@@ -0,0 +1,246 @@

+"""Waveform-backed manual cue editing helpers.
+The previous manual editor only exposed numeric inputs. This module adds a
+visual, audio-derived cue editor: it renders waveform overviews for the two
+tracks in a transition, overlays selected cue positions and alternative cue
+candidates, and returns stable cue-choice strings that can be applied back to
+TransitionPlan objects.
+The UI remains deliberately simple because Gradio event/click APIs vary across
+versions. The backend is still real: the waveform image is computed from the
+actual audio files, candidate lists are built from analysis cue objects, and
+manual edits become explicit cue overrides that can later be exported as
+training examples.
+"""
+from __future__ import annotations
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, Iterable, Mapping
+import hashlib
+import math
+import tempfile
+import librosa
+import matplotlib
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+import numpy as np
+@dataclass(frozen=True)
+class CueChoice:
+    role: str
+    time: float
+    label: str
+    confidence: float
+    source: str = ""
+    @property
+    def value(self) -> str:
+        safe_label = self.label.replace("|", "/")
+        safe_source = self.source.replace("|", "/")
+        return f"{self.role}|{self.time:.3f}|{self.confidence:.3f}|{safe_label}|{safe_source}"
+    @property
+    def display(self) -> str:
+        source = f" · {self.source}" if self.source else ""
+        return f"{self.role} @ {self.time:.2f}s · {self.confidence:.0%} · {self.label}{source}"
+def parse_cue_choice(value: str | None) -> CueChoice | None:
+    if not value:
+        return None
+    parts = str(value).split("|", 4)
+    if len(parts) < 4:
+        return None
+    role, time_s, confidence, label = parts[:4]
+    source = parts[4] if len(parts) > 4 else ""
+    try:
+        return CueChoice(role=role, time=float(time_s), confidence=float(confidence), label=label, source=source)
+    except ValueError:
+        return None
+def _cue_source(cue: Mapping[str, Any]) -> str:
+    ev = cue.get("evidence", {})
+    return str(ev.get("source", "")) if isinstance(ev, Mapping) else ""
+def cue_choices(track: Any, role: str, *, limit: int = 12) -> list[tuple[str, str]]:
+    """Return Gradio-compatible `(label, value)` cue choices for a role."""
+    aliases = {
+        "a_out": {"mix_out", "loopable", "drop"},
+        "b_in": {"mix_in", "loopable"},
+        "b_drop": {"first_drop", "drop"},
+    }.get(role, {role})
+    cues = [c for c in getattr(track, "cue_points", []) if str(c.get("kind", c.get("type", ""))) in aliases]
+    cues.sort(key=lambda c: (-float(c.get("confidence", 0.0) or 0.0), float(c.get("time", 0.0) or 0.0)))
+    out: list[tuple[str, str]] = []
+    for cue in cues[:limit]:
+        choice = CueChoice(
+            role=role,
+            time=float(cue.get("time", 0.0) or 0.0),
+            label=str(cue.get("label", cue.get("kind", "cue"))),
+            confidence=float(cue.get("confidence", 0.0) or 0.0),
+            source=_cue_source(cue),
+        )
+        out.append((choice.display, choice.value))
+    return out
+def default_choice(track: Any, role: str, time_s: float) -> str | None:
+    choices = cue_choices(track, role)
+    if not choices:
+        return None
+    parsed = [(label, value, parse_cue_choice(value)) for label, value in choices]
+    parsed = [(label, value, cue) for label, value, cue in parsed if cue is not None]
+    if not parsed:
+        return None
+    return min(parsed, key=lambda item: abs(item[2].time - float(time_s)))[1]
+def _load_preview(path: str, *, max_duration: float = 300.0, sr: int = 12000) -> tuple[np.ndarray, int]:
+    try:
+        y, got_sr = librosa.load(path, sr=sr, mono=True, duration=max_duration)
+    except Exception:
+        # A missing/corrupt file should not kill the editor. Return a visible flatline.
+        got_sr = sr
+        y = np.zeros(sr, dtype=np.float32)
+    if y.size == 0:
+        y = np.zeros(sr, dtype=np.float32)
+    y = np.asarray(y, dtype=np.float32)
+    peak = float(np.max(np.abs(y))) if y.size else 0.0
+    if peak > 0:
+        y = y / peak
+    return y, got_sr
+def _amplitude_envelope(y: np.ndarray, sr: int, *, bins: int = 1800) -> tuple[np.ndarray, np.ndarray]:
+    n = len(y)
+    if n == 0:
+        return np.array([0.0]), np.array([0.0])
+    bins = max(64, min(bins, n))
+    edges = np.linspace(0, n, bins + 1, dtype=int)
+    env = np.zeros(bins, dtype=np.float32)
+    for i in range(bins):
+        seg = y[edges[i]:edges[i + 1]]
+        env[i] = float(np.max(np.abs(seg))) if len(seg) else 0.0
+    times = np.linspace(0.0, n / sr, bins)
+    return times, env
+def _draw_track(ax: Any, track: Any, *, selected: dict[str, float], title: str) -> None:
+    y, sr = _load_preview(getattr(track, "path", ""), max_duration=float(getattr(track, "duration", 300.0) or 300.0))
+    times, env = _amplitude_envelope(y, sr)
+    ax.fill_between(times, -env, env, alpha=0.35, linewidth=0)
+    ax.plot(times, env, linewidth=0.35)
+    ax.plot(times, -env, linewidth=0.35)
+    duration = float(getattr(track, "duration", times[-1] if len(times) else 0.0) or 0.0)
+    ax.set_xlim(0, max(1.0, min(duration, times[-1] if len(times) else duration)))
+    ax.set_ylim(-1.05, 1.05)
+    ax.set_yticks([])
+    ax.set_title(title, loc="left", fontsize=10)
+    ax.set_xlabel("seconds")
+    # Segment spans give the user context beyond the raw waveform.
+    for seg in getattr(track, "segments", [])[:40]:
+        if not isinstance(seg, Mapping):
+            continue
+        start = float(seg.get("start", 0.0) or 0.0)
+        end = float(seg.get("end", start) or start)
+        label = str(seg.get("label", "section"))
+        if end <= start:
+            continue
+        ax.axvspan(start, end, alpha=0.04)
+        if end - start > 5:
+            ax.text(start + 0.15, 0.82, label, fontsize=7, alpha=0.65)
+    cue_palette = {
+        "mix_in": (0.2, 0.7, 0.2),
+        "mix_out": (0.8, 0.25, 0.2),
+        "first_drop": (0.55, 0.2, 0.8),
+        "drop": (0.55, 0.2, 0.8),
+        "loopable": (0.2, 0.45, 0.85),
+    }
+    for cue in getattr(track, "cue_points", [])[:60]:
+        kind = str(cue.get("kind", cue.get("type", "cue")))
+        t = float(cue.get("time", 0.0) or 0.0)
+        if t < 0 or t > duration:
+            continue
+        conf = float(cue.get("confidence", 0.0) or 0.0)
+        color = cue_palette.get(kind, (0.3, 0.3, 0.3))
+        ax.axvline(t, color=color, alpha=max(0.12, min(0.55, conf * 0.55)), linewidth=0.8)
+    for name, t in selected.items():
+        ax.axvline(float(t), color="black", linewidth=2.0, alpha=0.95)
+        ax.text(float(t), -0.92, name, rotation=90, va="bottom", ha="right", fontsize=8, fontweight="bold")
+def render_transition_cue_editor(track_a: Any, track_b: Any, plan: Any, *, output_dir: str | Path | None = None) -> tuple[str, str]:
+    """Render a two-track waveform/cue overview and return `(png_path, markdown)`."""
+    output_dir = Path(output_dir or tempfile.gettempdir())
+    output_dir.mkdir(parents=True, exist_ok=True)
+    fingerprint = hashlib.sha1(
+        f"{getattr(track_a, 'path', '')}|{getattr(track_b, 'path', '')}|{getattr(plan, 'mix_out_point', 0)}|{getattr(plan, 'mix_in_point', 0)}|{getattr(plan, 'duration_seconds', 0)}|{getattr(plan, 'transition_type', '')}".encode()
+    ).hexdigest()[:12]
+    out = output_dir / f"ai-dj-cue-editor-{fingerprint}.png"
+    selected = getattr(plan, "selected_cues", {}) or {}
+    a_out = float(selected.get("a_out", {}).get("time", getattr(plan, "mix_out_point", 0.0)))
+    b_in = float(selected.get("b_in", {}).get("time", getattr(plan, "mix_in_point", 0.0)))
+    b_drop = float(selected.get("b_drop", {}).get("time", b_in + getattr(plan, "duration_seconds", 0.0)))
+    fig, axes = plt.subplots(2, 1, figsize=(15, 5.2), constrained_layout=True)
+    _draw_track(axes[0], track_a, selected={"A OUT": a_out}, title=f"A: {getattr(track_a, 'filename', 'track A')}")
+    _draw_track(axes[1], track_b, selected={"B IN": b_in, "B DROP": b_drop}, title=f"B: {getattr(track_b, 'filename', 'track B')}")
+    fig.suptitle(f"Transition cue editor · {getattr(plan, 'transition_type', 'transition')} · {getattr(plan, 'duration_beats', '?')} beats", fontsize=12)
+    fig.savefig(out, dpi=150)
+    plt.close(fig)
+    summary = [
+        "### Waveform cue editor",
+        "The black markers are the currently selected transition anchors. Thin colored lines are ranked cue candidates from analysis.",
+        f"- A mix-out: **{a_out:.2f}s**",
+        f"- B mix-in: **{b_in:.2f}s**",
+        f"- B drop: **{b_drop:.2f}s**",
+        f"- Transition type: `{getattr(plan, 'transition_type', 'unknown')}`",
+        f"- Duration: **{float(getattr(plan, 'duration_seconds', 0.0)):.2f}s** / **{int(getattr(plan, 'duration_beats', 0))} beats**",
+    ]
+    return str(out), "\n".join(summary)
+def choices_for_transition(track_a: Any, track_b: Any, plan: Any) -> dict[str, Any]:
+    """Return choice lists and defaults for the UI/backend tests."""
+    return {
+        "a_choices": cue_choices(track_a, "a_out"),
+        "b_in_choices": cue_choices(track_b, "b_in"),
+        "b_drop_choices": cue_choices(track_b, "b_drop"),
+        "a_default": default_choice(track_a, "a_out", float(getattr(plan, "mix_out_point", 0.0) or 0.0)),
+        "b_in_default": default_choice(track_b, "b_in", float(getattr(plan, "mix_in_point", 0.0) or 0.0)),
+        "b_drop_default": default_choice(track_b, "b_drop", float(getattr(plan, "mix_in_point", 0.0) + getattr(plan, "duration_seconds", 0.0))),
+    }
+def apply_choices_to_plan(plan: Any, *, a_choice: str | None, b_in_choice: str | None, b_drop_choice: str | None, transition_type: str | None = None) -> tuple[float, float, float, dict[str, Any]]:
+    """Apply cue-choice strings to a TransitionPlan-like object.
+    Returns `(mix_out, mix_in, duration_seconds, selected_cues)` so callers can
+    update additional derived fields such as beat count.
+    """
+    a = parse_cue_choice(a_choice)
+    b = parse_cue_choice(b_in_choice)
+    d = parse_cue_choice(b_drop_choice)
+    mix_out = float(a.time if a else getattr(plan, "mix_out_point", 0.0))
+    mix_in = float(b.time if b else getattr(plan, "mix_in_point", 0.0))
+    drop = float(d.time if d else mix_in + float(getattr(plan, "duration_seconds", 0.0)))
+    duration = max(0.25, drop - mix_in)
+    if transition_type:
+        setattr(plan, "transition_type", transition_type)
+    selected = {
+        "a_out": {"kind": "mix_out", "label": a.label if a else "manual waveform value", "time": round(mix_out, 3), "confidence": a.confidence if a else 1.0, "evidence": {"source": a.source if a else "waveform_editor"}},
+        "b_in": {"kind": "mix_in", "label": b.label if b else "manual waveform value", "time": round(mix_in, 3), "confidence": b.confidence if b else 1.0, "evidence": {"source": b.source if b else "waveform_editor"}},
+        "b_drop": {"kind": "drop", "label": d.label if d else "manual waveform value", "time": round(drop, 3), "confidence": d.confidence if d else 1.0, "evidence": {"source": d.source if d else "waveform_editor"}},
+    }
+    return mix_out, mix_in, duration, selected

cue_learning.py CHANGED Viewed

@@ -162,3 +162,47 @@ def train_from_jsonl(path: str | Path, *, output_path: str | Path = DEFAULT_MODE
     model = fit_logistic_model(load_training_examples(path))
     save_model(model, output_path)
     return model

     model = fit_logistic_model(load_training_examples(path))
     save_model(model, output_path)
     return model
+def examples_from_rating_rows(rows: list[Mapping[str, Any]]) -> list[dict[str, Any]]:
+    """Convert listening ratings into cue-training examples.
+    Positive labels come from accepted candidates or ratings >= 4. Negative
+    labels come from explicitly rejected low-rated candidates. Neutral ratings
+    are ignored to avoid teaching ambiguous preferences.
+    """
+    examples: list[dict[str, Any]] = []
+    for row in rows:
+        rating = float(row.get("rating", 0.0) or 0.0)
+        accepted = bool(row.get("accepted", False))
+        if accepted or rating >= 4.0:
+            label = 1
+        elif rating <= 2.0:
+            label = 0
+        else:
+            continue
+        transition = row.get("transition", {}) if isinstance(row.get("transition", {}), Mapping) else {}
+        cues = transition.get("selected_cues", {}) if isinstance(transition.get("selected_cues", {}), Mapping) else {}
+        duration = float(transition.get("duration_seconds", 1.0) or 1.0)
+        for role, cue in cues.items():
+            if isinstance(cue, Mapping):
+                enriched = dict(cue)
+                enriched.setdefault("kind", "mix_out" if role == "a_out" else "mix_in" if role == "b_in" else "drop")
+                examples.append({"cue": enriched, "duration": duration, "label": label, "source": "listening_rating", "rating": rating, "accepted": accepted})
+    return examples
+def train_from_listening_ratings(ratings_path: str | Path = "data/listening-ratings.jsonl", *, output_path: str | Path = DEFAULT_MODEL_PATH) -> CuePointModel:
+    from listening_benchmarks import load_ratings
+    examples = examples_from_rating_rows(load_ratings(ratings_path))
+    model = fit_logistic_model(examples)
+    save_model(model, output_path)
+    return model
+def append_training_example(path: str | Path, cue: Mapping[str, Any], *, duration: float, label: int, source: str) -> None:
+    path = Path(path)
+    path.parent.mkdir(parents=True, exist_ok=True)
+    row = {"cue": dict(cue), "duration": float(duration), "label": int(label), "source": source}
+    with path.open("a", encoding="utf-8") as f:
+        f.write(json.dumps(row, ensure_ascii=False) + "\n")