Spaces:

MazCodes
/

fragmenta

Running

App Files Files

MazCodes commited on 3 days ago

Commit

9ea28c1

verified ·

1 Parent(s): 14269fb

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +2 -0
Dockerfile +4 -2
README.md +4 -3
app/backend/app.py +0 -0
app/backend/data/auto_annotator.py +157 -34
app/backend/data/pre_encoder.py +354 -0
app/backend/data/projects.py +1023 -0
app/backend/data/slicing.py +183 -0
app/core/audio/midi_input.py +172 -0
app/core/config.py +16 -86
app/core/generation/audio_generator.py +490 -473
app/core/generation/audio_post_process.py +713 -44
app/core/model_manager.py +628 -437
app/core/training/hyperparam_suggester.py +299 -141
app/core/training/sa3_lora_runner.py +331 -0
app/core/training/sa3_trainer.py +839 -0
app/frontend/index.html +29 -6
app/frontend/logs/fragmenta_20260525.log +8 -0
app/frontend/package.json +2 -2
app/frontend/public/BricolageGrotesque-VariableFont_opsz,wdth,wght.ttf +3 -0
app/frontend/public/InterTight-VariableFont_wght.ttf +3 -0
app/frontend/public/fragmenta_background.png +2 -2
app/frontend/public/interface.png +2 -2
app/frontend/src/App.js +0 -0
app/frontend/src/api.js +1 -0
app/frontend/src/components/AboutDialog.js +130 -0
app/frontend/src/components/AudioWaveform.js +258 -0
app/frontend/src/components/ChannelFragmentHistory.js +217 -0
app/frontend/src/components/CheckpointManagerWindow.js +243 -0
app/frontend/src/components/CheckpointRow.js +270 -0
app/frontend/src/components/DatasetPrep.js +1823 -0
app/frontend/src/components/EditPanel.js +597 -0
app/frontend/src/components/GeneratedFragmentsWindow.js +420 -70
app/frontend/src/components/GenerationWaveform.js +217 -0
app/frontend/src/components/InfoView.js +91 -0
app/frontend/src/components/LoraStack.js +252 -0
app/frontend/src/components/LossChart.js +27 -11
app/frontend/src/components/MidiConfigMenu.js +118 -46
app/frontend/src/components/MidiContext.js +38 -48
app/frontend/src/components/PerformanceChannel.js +618 -239
app/frontend/src/components/PerformancePanel.js +0 -0
app/frontend/src/components/StorageDrilldown.js +84 -0
app/frontend/src/components/Tooltip.js +35 -0
app/frontend/src/components/TrainingMonitor.js +76 -35
app/frontend/src/components/WelcomePage.js +22 -33
app/frontend/src/components/usePerformanceSession.js +37 -7
app/frontend/src/theme.js +0 -0
app/frontend/src/tooltips.js +134 -0
app/frontend/src/utils/cueAudio.js +29 -6
app/frontend/src/utils/fragmentDrag.js +25 -0

.gitattributes CHANGED Viewed

@@ -47,3 +47,5 @@ utils/vendor/wheels/antlr4_python3_runtime-4.9.3-py3-none-any.whl filter=lfs dif
 vendor/stable-audio-tools/demo_cfg_3_00000001.wav filter=lfs diff=lfs merge=lfs -text
 vendor/stable-audio-tools/demo_cfg_6_00000001.wav filter=lfs diff=lfs merge=lfs -text
 vendor/stable-audio-tools/demo_cfg_9_00000001.wav filter=lfs diff=lfs merge=lfs -text

 vendor/stable-audio-tools/demo_cfg_3_00000001.wav filter=lfs diff=lfs merge=lfs -text
 vendor/stable-audio-tools/demo_cfg_6_00000001.wav filter=lfs diff=lfs merge=lfs -text
 vendor/stable-audio-tools/demo_cfg_9_00000001.wav filter=lfs diff=lfs merge=lfs -text
+app/frontend/public/BricolageGrotesque-VariableFont_opsz,wdth,wght.ttf filter=lfs diff=lfs merge=lfs -text
+app/frontend/public/InterTight-VariableFont_wght.ttf filter=lfs diff=lfs merge=lfs -text

Dockerfile CHANGED Viewed

@@ -60,8 +60,9 @@ RUN grep -ivE 'flash-attn|extra-index-url|pycairo|pygobject|pywebview' requireme
 COPY . .
 COPY --from=frontend-builder /build/frontend/build ./app/frontend/build
-# Install stable-audio-tools in-tree
-RUN pip install --no-cache-dir --root-user-action=ignore -e ./vendor/stable-audio-tools/
 # Create writable directories
 RUN mkdir -p /app/models/pretrained \
@@ -105,6 +106,7 @@ ENV FLASK_HOST=0.0.0.0
 ENV FLASK_PORT=7860
 ENV FRAGMENTA_LOG_LEVEL=INFO
 ENV FRAGMENTA_DOCKER=1
 ENV FRAGMENTA_USE_CUSTOM_MODELS=true
 ENV HOME=/home/user
 ENV PATH="/home/user/.local/bin:${PATH}"

 COPY . .
 COPY --from=frontend-builder /build/frontend/build ./app/frontend/build
+# Install vendored Stable Audio 3 in-tree (--no-deps: runtime deps come from
+# requirements.txt). Makes `import stable_audio_3` resolve.
+RUN pip install --no-cache-dir --root-user-action=ignore --no-deps -e ./vendor/stable-audio-3/
 # Create writable directories
 RUN mkdir -p /app/models/pretrained \
 ENV FLASK_PORT=7860
 ENV FRAGMENTA_LOG_LEVEL=INFO
 ENV FRAGMENTA_DOCKER=1
+ENV PYTHONPATH=/app/vendor/stable-audio-3
 ENV FRAGMENTA_USE_CUSTOM_MODELS=true
 ENV HOME=/home/user
 ENV PATH="/home/user/.local/bin:${PATH}"

README.md CHANGED Viewed

@@ -17,9 +17,10 @@ Generate and fine-tune audio from text prompts using Stable Audio Open.
 ## Getting Started
-1. Upload your model weights (`.safetensors`) to `models/pretrained/` in the Space Files tab.
-   - `stable-audio-open-small-model.safetensors` (recommended for CPU)
-   - `stable-audio-open-model.safetensors` (full model, recommended for GPU)
 2. The Space will auto-rebuild after the upload.
 3. Use the **Data Processing** tab to upload audio + prompts.
 4. Use the **Training** tab to fine-tune.

 ## Getting Started
+1. Download an SA3 checkpoint via the in-app Checkpoint Manager, or place one
+   under `models/pretrained/sa3/hub/` in the Space Files tab.
+   - `sa3-small-music` (recommended for CPU Spaces)
+   - `sa3-medium` (recommended for GPU Spaces with Flash Attention 2)
 2. The Space will auto-rebuild after the upload.
 3. Use the **Data Processing** tab to upload audio + prompts.
 4. Use the **Training** tab to fine-tune.

app/backend/app.py CHANGED Viewed

The diff for this file is too large to render. See raw diff

app/backend/data/auto_annotator.py CHANGED Viewed

@@ -22,6 +22,11 @@ AUDIO_EXTENSIONS = (".wav", ".mp3", ".flac", ".m4a", ".ogg", ".aac")
 CLAP_CKPT_FILENAME = "music_audioset_epoch_15_esc_90.14.pt"
 CLAP_REPO = "lukewys/laion_clap"
 KEY_NAMES_SHARP = ["C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"]
 KEY_NAMES_FLAT = ["C", "Db", "D", "Eb", "E", "F", "Gb", "G", "Ab", "A", "Bb", "B"]
@@ -44,9 +49,18 @@ def _iter_audio_files(folder: Path) -> List[Path]:
 def _estimate_tempo(y, sr) -> Optional[int]:
     import librosa
     try:
         tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
-        bpm = float(tempo if hasattr(tempo, "__float__") else tempo[0])
         if bpm <= 0:
             return None
         return int(round(bpm))
@@ -178,12 +192,50 @@ class _ClapTagger:
                     f"CLAP checkpoint not found at {self.ckpt_path}. "
                     "Download it first via /api/bulk-annotate/download-clap."
                 )
-            import laion_clap
-            import torch
             logging.getLogger("transformers").setLevel(logging.ERROR)
-            device = "cuda" if torch.cuda.is_available() else "cpu"
-            model = laion_clap.CLAP_Module(enable_fusion=False, amodel="HTSAT-base", device=device)
             # torch >= 2.6 flipped torch.load(weights_only=True) and newer
             # transformers dropped the roberta position_ids buffer, so
@@ -268,44 +320,97 @@ def clap_checkpoint_path(models_pretrained_dir: Path) -> Path:
     return models_pretrained_dir / "clap" / CLAP_CKPT_FILENAME
 def clap_checkpoint_available(models_pretrained_dir: Path) -> bool:
     return clap_checkpoint_path(models_pretrained_dir).exists()
 def download_clap_checkpoint(
     models_pretrained_dir: Path,
     progress_cb: Optional[Callable[[str], None]] = None,
 ) -> Path:
     target = clap_checkpoint_path(models_pretrained_dir)
     target.parent.mkdir(parents=True, exist_ok=True)
-    if target.exists():
-        return target
-    from huggingface_hub import hf_hub_download
     import os
-    if progress_cb:
-        progress_cb("Downloading CLAP checkpoint (~630 MB)…")
-    # Use custom CLAP from fragmenta-models on HF Spaces
-    use_custom_repo = os.getenv('FRAGMENTA_USE_CUSTOM_MODELS', '').lower() == 'true'
-    if use_custom_repo:
-        repo_id = "MazCodes/fragmenta-models"
-    else:
-        repo_id = CLAP_REPO
-    downloaded = hf_hub_download(
-        repo_id=repo_id,
-        filename=CLAP_CKPT_FILENAME,
-        local_dir=str(target.parent),
-    )
-    downloaded_path = Path(downloaded)
-    if downloaded_path != target:
-        try:
-            downloaded_path.replace(target)
-        except OSError:
-            import shutil
-            shutil.copy2(downloaded_path, target)
     return target
@@ -328,8 +433,10 @@ def annotate_file(
     label_sets: Dict[str, List[str]],
     sr: int = 22050,
     max_seconds: float = 60.0,
 ) -> Dict[str, Any]:
     import librosa
     parts: Dict[str, Any] = {}
     try:
@@ -343,10 +450,19 @@ def annotate_file(
             "error": f"load failed: {exc}",
         }
-    parts["bpm"] = _estimate_tempo(y, loaded_sr)
-    parts["key"] = _estimate_key(y, loaded_sr)
-    parts["brightness"] = _estimate_brightness(y, loaded_sr)
-    parts["character"] = _estimate_character(y, loaded_sr)
     if tier == "rich" and clap_tagger is not None:
         try:
@@ -355,7 +471,14 @@ def annotate_file(
         except Exception as exc:
             logger.warning("CLAP tagging failed for %s: %s", audio_path.name, exc)
-    prompt = _compose_prompt(parts)
     return {
         "file_name": audio_path.name,
         "prompt": prompt,

 CLAP_CKPT_FILENAME = "music_audioset_epoch_15_esc_90.14.pt"
 CLAP_REPO = "lukewys/laion_clap"
+# Text-side dependencies laion_clap pulls from HF on construction.
+# We stage these into models/pretrained/clap/hub/ so the rich tier is
+# fully offline after a single download and nothing leaks to ~/.cache.
+CLAP_TEXT_DEPS = ("roberta-base", "bert-base-uncased", "facebook/bart-base")
 KEY_NAMES_SHARP = ["C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"]
 KEY_NAMES_FLAT = ["C", "Db", "D", "Eb", "E", "F", "Gb", "G", "Ab", "A", "Bb", "B"]
 def _estimate_tempo(y, sr) -> Optional[int]:
     import librosa
+    import numpy as np
     try:
         tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
+        # librosa 0.10+ returns tempo as np.ndarray (shape (1,) typically).
+        # numpy 2.x removed implicit float() conversion of N-d arrays —
+        # `float(np.array([120.]))` now raises TypeError instead of returning
+        # 120.0 like numpy 1.x did. Normalize via .flat[0] which handles
+        # scalar, 0-d, 1-d, and N-d uniformly.
+        arr = np.atleast_1d(np.asarray(tempo))
+        if arr.size == 0:
+            return None
+        bpm = float(arr.flat[0])
         if bpm <= 0:
             return None
         return int(round(bpm))
                     f"CLAP checkpoint not found at {self.ckpt_path}. "
                     "Download it first via /api/bulk-annotate/download-clap."
                 )
             logging.getLogger("transformers").setLevel(logging.ERROR)
+            # Point HF resolution at our project-local cache and disable the
+            # HEAD-revalidation traffic. After download_clap_checkpoint() has
+            # staged the text deps under <pretrained>/clap/hub/, CLAP_Module
+            # loads them offline with zero HF hub requests.
+            #
+            # Two reasons env vars alone aren't enough:
+            # 1. huggingface_hub.constants.HF_HUB_OFFLINE is captured at
+            #    module-import time (constants.py:185). model_manager.py
+            #    imports huggingface_hub at app startup, so the constant is
+            #    already False by the time we set the env var here.
+            #    transformers.utils.hub.is_offline_mode reads that same
+            #    constant — patching the attribute makes both libraries see
+            #    offline mode.
+            # 2. laion_clap/training/data.py:44-46 runs three from_pretrained
+            #    calls at MODULE LEVEL — those fire the first time we do
+            #    `import laion_clap` and predate any patch we do after the
+            #    import. So we patch BEFORE the import, not after.
+            hub_dir = self.ckpt_path.parent / "hub"
+            env_keys = ("HF_HUB_CACHE", "HUGGINGFACE_HUB_CACHE", "TRANSFORMERS_CACHE",
+                        "HF_HUB_OFFLINE", "TRANSFORMERS_OFFLINE")
+            prev_env = {k: os.environ.get(k) for k in env_keys}
+            os.environ["HF_HUB_CACHE"] = str(hub_dir)
+            os.environ["HUGGINGFACE_HUB_CACHE"] = str(hub_dir)
+            os.environ["TRANSFORMERS_CACHE"] = str(hub_dir)
+            os.environ["HF_HUB_OFFLINE"] = "1"
+            os.environ["TRANSFORMERS_OFFLINE"] = "1"
+            import huggingface_hub.constants as _hhc
+            prev_offline_attr = _hhc.HF_HUB_OFFLINE
+            _hhc.HF_HUB_OFFLINE = True
+            try:
+                import laion_clap  # noqa: E402 — must follow the offline patch
+                import torch
+                device = "cuda" if torch.cuda.is_available() else "cpu"
+                model = laion_clap.CLAP_Module(enable_fusion=False, amodel="HTSAT-base", device=device)
+            finally:
+                _hhc.HF_HUB_OFFLINE = prev_offline_attr
+                for k, v in prev_env.items():
+                    if v is None:
+                        os.environ.pop(k, None)
+                    else:
+                        os.environ[k] = v
             # torch >= 2.6 flipped torch.load(weights_only=True) and newer
             # transformers dropped the roberta position_ids buffer, so
     return models_pretrained_dir / "clap" / CLAP_CKPT_FILENAME
+def clap_hub_dir(models_pretrained_dir: Path) -> Path:
+    """HF cache for laion_clap's text-side deps. Sibling of the .pt."""
+    return models_pretrained_dir / "clap" / "hub"
 def clap_checkpoint_available(models_pretrained_dir: Path) -> bool:
     return clap_checkpoint_path(models_pretrained_dir).exists()
+def _text_dep_snapshot_present(hub_dir: Path, repo_id: str) -> bool:
+    safe = "models--" + repo_id.replace("/", "--")
+    snap_root = hub_dir / safe / "snapshots"
+    if not snap_root.exists():
+        return False
+    return any(snap_root.iterdir())
 def download_clap_checkpoint(
     models_pretrained_dir: Path,
     progress_cb: Optional[Callable[[str], None]] = None,
+    phase_cb: Optional[Callable[[int, int, str], None]] = None,
 ) -> Path:
+    """Download the CLAP audio .pt plus laion_clap's text-side HF snapshots.
+    Four sequential phases — emit a phase update (current, total, label) at the
+    start of each so a multi-phase progress UI can show real context. Skips
+    phases whose artifacts are already on disk.
+    `progress_cb` (str-only) is kept for the bulk-annotate API.
+    `phase_cb` (current, total, label) is the structured channel.
+    """
     target = clap_checkpoint_path(models_pretrained_dir)
     target.parent.mkdir(parents=True, exist_ok=True)
+    hub_dir = clap_hub_dir(models_pretrained_dir)
+    hub_dir.mkdir(parents=True, exist_ok=True)
+    from huggingface_hub import hf_hub_download, snapshot_download
     import os
+    total_phases = 1 + len(CLAP_TEXT_DEPS)
+    def _emit(phase_index: int, label: str) -> None:
+        if phase_cb:
+            phase_cb(phase_index, total_phases, label)
+        if progress_cb:
+            progress_cb(f"[{phase_index}/{total_phases}] {label}")
+    if not target.exists():
+        _emit(1, "CLAP audio model (~2.35 GB)")
+        # Use custom CLAP from fragmenta-models on HF Spaces
+        use_custom_repo = os.getenv('FRAGMENTA_USE_CUSTOM_MODELS', '').lower() == 'true'
+        if use_custom_repo:
+            repo_id = "MazCodes/fragmenta-models"
+        else:
+            repo_id = CLAP_REPO
+        downloaded = hf_hub_download(
+            repo_id=repo_id,
+            filename=CLAP_CKPT_FILENAME,
+            local_dir=str(target.parent),
+        )
+        downloaded_path = Path(downloaded)
+        if downloaded_path != target:
+            try:
+                downloaded_path.replace(target)
+            except OSError:
+                import shutil
+                shutil.copy2(downloaded_path, target)
+    # laion_clap's CLAP_Module(...) constructor instantiates a Roberta text
+    # branch plus bert/bart tokenizers at import time. Pre-stage them into
+    # our own cache so the rich tier is fully offline after this step.
+    # safetensors only — pytorch_model.bin is a redundant copy.
+    for i, repo_id in enumerate(CLAP_TEXT_DEPS, start=2):
+        if _text_dep_snapshot_present(hub_dir, repo_id):
+            continue
+        _emit(i, f"Text encoder: {repo_id}")
+        snapshot_download(
+            repo_id=repo_id,
+            cache_dir=str(hub_dir),
+            allow_patterns=[
+                "config.json",
+                "tokenizer*",
+                "vocab*",
+                "merges.txt",
+                "special_tokens_map.json",
+                "model.safetensors",
+            ],
+        )
     return target
     label_sets: Dict[str, List[str]],
     sr: int = 22050,
     max_seconds: float = 60.0,
+    prompt_template: Optional[str] = None,
 ) -> Dict[str, Any]:
     import librosa
+    import warnings
     parts: Dict[str, Any] = {}
     try:
             "error": f"load failed: {exc}",
         }
+    # Silent / harmonically flat clips trip librosa's "Trying to estimate
+    # tuning from empty frequency set" warning during chroma extraction.
+    # The warning is benign — the analysis returns sensible defaults — but
+    # it spams stderr on every silent file, so we mute it here.
+    with warnings.catch_warnings():
+        warnings.filterwarnings(
+            "ignore",
+            message="Trying to estimate tuning from empty frequency set",
+        )
+        parts["bpm"] = _estimate_tempo(y, loaded_sr)
+        parts["key"] = _estimate_key(y, loaded_sr)
+        parts["brightness"] = _estimate_brightness(y, loaded_sr)
+        parts["character"] = _estimate_character(y, loaded_sr)
     if tier == "rich" and clap_tagger is not None:
         try:
         except Exception as exc:
             logger.warning("CLAP tagging failed for %s: %s", audio_path.name, exc)
+    # Template-driven prompt assembly. Falls back to the legacy descriptive
+    # prose if no template is supplied (call sites that haven't been
+    # threaded with project metadata yet).
+    if prompt_template is not None and prompt_template.strip():
+        from app.backend.data.projects import apply_template
+        prompt = apply_template(prompt_template, parts)
+    else:
+        prompt = _compose_prompt(parts)
     return {
         "file_name": audio_path.name,
         "prompt": prompt,

app/backend/data/pre_encoder.py ADDED Viewed

	@@ -0,0 +1,354 @@

+"""SA3 pre-encoding job runner — Phase 6.
+Encodes every audio clip in a Dataset Workbench project into SA3 latents
+once, ahead of training, so the training subprocess can skip the SAME
+autoencoder pass per step. Mirrors the shape of `_project_annotate_jobs`
+in app.py (background thread, per-project state, cooperative cancel).
+Latents land in `<project>/.latents/` — a hidden subdirectory inside the
+project folder. Disk layout matches SA3's `pre_encode_dataset.py`:
+  <project>/.latents/
+    000000000000.npy     # latent tensor (shape (256, T_lat))
+    000000000000.json    # {"prompt": "...", "padding_mask": [...], ...}
+    000001000000.npy
+    000001000000.json
+    ...
+    silence.npy          # padding latent (auto-generated)
+    _meta.json           # Fragmenta-specific: AE used, source clip count
+SA3's `train_lora.py --encoded_dir <project>/.latents` consumes this layout
+directly. `SA3Trainer._stage_dataset` auto-detects the directory and feeds
+`--encoded_dir` to the subprocess when latents are present.
+Cache invalidation lives in projects.py — any project mutation that could
+desync the latents (commit, delete_clip, slice_clip) wipes the directory.
+"""
+from __future__ import annotations
+import json
+import os
+import re
+import signal
+import subprocess
+import sys
+import threading
+import time
+from pathlib import Path
+from typing import Any, Dict, Optional
+from app.backend.data.projects import project_path
+from app.core.config import get_config
+from utils.logger import get_logger
+logger = get_logger("PreEncoder")
+# --- Per-project job registry ----------------------------------------------
+_pre_encode_jobs: Dict[str, Dict[str, Any]] = {}
+_pre_encode_jobs_lock = threading.Lock()
+_pre_encode_processes: Dict[str, subprocess.Popen] = {}
+def get_pre_encode_job(project_name: str) -> Dict[str, Any]:
+    """Snapshot of the current job state for a project. Always returns a
+    well-formed dict so the frontend can render against it without guards."""
+    with _pre_encode_jobs_lock:
+        job = _pre_encode_jobs.get(project_name)
+        if job is None:
+            return _idle_job()
+        return dict(job)
+def _idle_job() -> Dict[str, Any]:
+    return {
+        "state": "idle",          # idle | queued | running | complete | failed | cancelled
+        "current": 0,             # batch index (0-based)
+        "total": 0,               # total batches (derived from clip count)
+        "current_file": "",
+        "error": None,
+        "started_at": None,
+        "finished_at": None,
+        "autoencoder": None,
+    }
+# --- Autoencoder selection -------------------------------------------------
+# Bind latents to a specific SA3 autoencoder. Latents from same-s only work
+# with small-music / small-sfx DiTs; same-l latents only work with medium.
+# For v1 we default to same-s (covers the most common base) and leave a
+# manifest in .latents/_meta.json that training reads to verify
+# compatibility. If a user trains against medium with same-s latents,
+# SA3Trainer falls back to non-encoded training and logs a warning.
+DEFAULT_AUTOENCODER = "same-s"
+# Audio length (samples per channel) the dataset pads/crops to before
+# encoding. SA3's pre_encode_dataset.py defaults to ~285s at 44.1 kHz, which
+# covers any training-time --duration up to that limit (and SA3 small caps
+# at 120s anyway). Longer clips in the project will be cropped to this
+# length during encoding — a documented limitation for v1.
+DEFAULT_SAMPLE_SIZE = 12_582_912
+# --- Job lifecycle ---------------------------------------------------------
+def latents_dir(project_name: str) -> Path:
+    return project_path(project_name) / ".latents"
+def latents_count(project_name: str) -> int:
+    d = latents_dir(project_name)
+    if not d.exists():
+        return 0
+    return sum(
+        1 for p in d.glob("*.npy")
+        if p.name != "silence.npy"
+    )
+def latents_meta(project_name: str) -> Optional[Dict[str, Any]]:
+    """Read the manifest we drop alongside the .npy files."""
+    p = latents_dir(project_name) / "_meta.json"
+    if not p.exists():
+        return None
+    try:
+        return json.loads(p.read_text(encoding="utf-8"))
+    except Exception:
+        return None
+def latents_match_base(project_name: str, base_model: str) -> bool:
+    """Whether the cached latents are compatible with the chosen base.
+    same-s ↔ small-music / small-sfx (and their *-base variants).
+    same-l ↔ medium (and medium-base).
+    """
+    meta = latents_meta(project_name)
+    if not meta:
+        return False
+    ae = meta.get("autoencoder")
+    if ae == "same-s":
+        return base_model in ("sa3-small-music", "sa3-small-music-base",
+                              "sa3-small-sfx", "sa3-small-sfx-base")
+    if ae == "same-l":
+        return base_model in ("sa3-medium", "sa3-medium-base")
+    return False
+def cancel_pre_encode(project_name: str) -> bool:
+    """Send a cancel signal to an in-flight job. Returns True if cancelled."""
+    with _pre_encode_jobs_lock:
+        job = _pre_encode_jobs.get(project_name)
+        if not job or job.get("state") not in ("queued", "running"):
+            return False
+        job["state"] = "cancelled"
+        job["cancelled"] = True
+    proc = _pre_encode_processes.get(project_name)
+    if proc is not None and proc.poll() is None:
+        try:
+            proc.send_signal(signal.SIGINT)
+            try:
+                proc.wait(timeout=5)
+            except subprocess.TimeoutExpired:
+                proc.terminate()
+                try:
+                    proc.wait(timeout=3)
+                except subprocess.TimeoutExpired:
+                    proc.kill()
+        except Exception as exc:
+            logger.warning("Failed to signal pre-encode subprocess: %s", exc)
+    return True
+def start_pre_encode(
+    project_name: str,
+    autoencoder: Optional[str] = None,
+    sample_size: Optional[int] = None,
+) -> Dict[str, Any]:
+    """Spawn the pre-encode subprocess in a background thread. Returns the
+    job state — frontend polls /pre-encode/status thereafter.
+    """
+    proj_dir = project_path(project_name)
+    if not proj_dir.exists():
+        raise FileNotFoundError(f"project not found: {project_name}")
+    ae = autoencoder or DEFAULT_AUTOENCODER
+    if ae not in ("same-s", "same-l"):
+        raise ValueError(f"autoencoder must be 'same-s' or 'same-l'; got {ae!r}")
+    with _pre_encode_jobs_lock:
+        existing = _pre_encode_jobs.get(project_name)
+        if existing and existing.get("state") in ("queued", "running"):
+            return dict(existing)
+        # Count source clips (sidecars committed) so we know the denominator.
+        sidecars = list(proj_dir.glob("*.txt"))
+        clip_count = sum(
+            1 for p in sidecars
+            if p.read_text(encoding="utf-8").strip()
+            and p.with_suffix(".wav").exists()  # cheap & accurate enough
+        )
+        job: Dict[str, Any] = {
+            "state": "queued",
+            "current": 0,
+            "total": clip_count,
+            "current_file": "",
+            "error": None,
+            "started_at": time.time(),
+            "finished_at": None,
+            "autoencoder": ae,
+            "cancelled": False,
+        }
+        _pre_encode_jobs[project_name] = job
+    thread = threading.Thread(
+        target=_run_pre_encode,
+        args=(project_name, ae, sample_size or DEFAULT_SAMPLE_SIZE),
+        daemon=True,
+        name=f"sa3-pre-encode:{project_name}",
+    )
+    thread.start()
+    return get_pre_encode_job(project_name)
+# --- Worker ----------------------------------------------------------------
+def _update_job(project_name: str, **fields: Any) -> None:
+    with _pre_encode_jobs_lock:
+        job = _pre_encode_jobs.get(project_name)
+        if job is None:
+            return
+        job.update(fields)
+def _run_pre_encode(project_name: str, ae: str, sample_size: int) -> None:
+    """Background-thread target. Spawns the SA3 pre_encode_dataset.py script,
+    streams stdout for progress, writes a _meta.json manifest on success."""
+    cfg = get_config()
+    proj_dir = project_path(project_name)
+    out_dir = latents_dir(project_name)
+    out_dir.mkdir(parents=True, exist_ok=True)
+    sa3_vendor = cfg.get_path("stable_audio_3")
+    venv_python = sys.executable
+    cmd = [
+        venv_python,
+        str(sa3_vendor / "scripts" / "pre_encode_dataset.py"),
+        "--model", ae,
+        "--data_dir", str(proj_dir),
+        "--output_path", str(out_dir),
+        "--batch_size", "1",
+        "--sample_size", str(int(sample_size)),
+    ]
+    env = os.environ.copy()
+    pp = env.get("PYTHONPATH", "")
+    env["PYTHONPATH"] = (
+        f"{sa3_vendor}{os.pathsep}{pp}" if pp else str(sa3_vendor)
+    )
+    hub_dir = cfg.get_path("models_pretrained") / "sa3" / "hub"
+    env["HF_HUB_CACHE"] = str(hub_dir)
+    env["HUGGINGFACE_HUB_CACHE"] = str(hub_dir)
+    env["TRANSFORMERS_CACHE"] = str(hub_dir)
+    env["HF_HUB_DISABLE_PROGRESS_BARS"] = "1"
+    env["HF_HUB_OFFLINE"] = "1"
+    env["TRANSFORMERS_OFFLINE"] = "1"
+    _update_job(project_name, state="running")
+    logger.info(
+        "Pre-encoding started · project=%s · autoencoder=%s · clips=%d · sample_size=%d",
+        project_name, ae, get_pre_encode_job(project_name)["total"], sample_size,
+    )
+    batch_pat = re.compile(r"Processing batch (\d+)")
+    process: Optional[subprocess.Popen] = None
+    try:
+        process = subprocess.Popen(
+            cmd,
+            cwd=str(cfg.project_root),
+            env=env,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT,
+            text=True,
+            bufsize=1,
+        )
+        _pre_encode_processes[project_name] = process
+        if process.stdout is not None:
+            for line in process.stdout:
+                line = line.rstrip()
+                m = batch_pat.search(line)
+                if m:
+                    # Subprocess prints "Processing batch N" once per batch
+                    # (and batch_size=1 → one batch per clip). N starts at 0.
+                    _update_job(project_name, current=int(m.group(1)) + 1)
+        rc = process.wait() if process else 1
+        # Check whether we got cancelled mid-flight.
+        snapshot = get_pre_encode_job(project_name)
+        if snapshot.get("cancelled"):
+            _update_job(
+                project_name,
+                state="cancelled",
+                finished_at=time.time(),
+            )
+            logger.info("Pre-encoding cancelled · project=%s", project_name)
+            return
+        if rc != 0:
+            _update_job(
+                project_name,
+                state="failed",
+                error=f"pre_encode_dataset.py exited with code {rc}",
+                finished_at=time.time(),
+            )
+            logger.error(
+                "Pre-encoding failed (exit %s) · project=%s",
+                rc, project_name,
+            )
+            return
+        # Success — write manifest so SA3Trainer can verify AE compatibility.
+        manifest = {
+            "autoencoder": ae,
+            "sample_size": sample_size,
+            "created_at": time.time(),
+            "source_clip_count": snapshot.get("total", 0),
+            "encoded_count": latents_count(project_name),
+        }
+        try:
+            (out_dir / "_meta.json").write_text(
+                json.dumps(manifest, indent=2), encoding="utf-8",
+            )
+        except Exception as exc:
+            logger.warning("Failed to write latents manifest: %s", exc)
+        _update_job(
+            project_name,
+            state="complete",
+            current=manifest["encoded_count"],
+            total=manifest["encoded_count"] or snapshot.get("total", 0),
+            finished_at=time.time(),
+        )
+        logger.info(
+            "Pre-encoding complete · project=%s · %d latent(s) · ae=%s",
+            project_name, manifest["encoded_count"], ae,
+        )
+    except Exception as exc:
+        _update_job(
+            project_name,
+            state="failed",
+            error=str(exc),
+            finished_at=time.time(),
+        )
+        logger.exception("Pre-encoding crashed for project=%s", project_name)
+    finally:
+        _pre_encode_processes.pop(project_name, None)

app/backend/data/projects.py ADDED Viewed

	@@ -0,0 +1,1023 @@

+"""On-disk projects + buffered in-memory editing for SA3 sidecar datasets.
+A *project* is a folder under `<user_data_dir>/projects/<name>/` (or wherever
+`FRAGMENTA_PROJECTS_DIR` points) holding audio + `.txt` sidecar pairs plus a
+hidden `.project.json` with Fragmenta metadata. The on-disk folder is the
+**committed** dataset — what training reads, what survives across app
+restarts.
+The UI works against an **in-memory session** per loaded project. Prompt
+edits, auto-annotate output, and just-ingested audio all live in memory
+until the user explicitly persists them via:
+  Save    → write `.draft.json` (transient, hidden). Survives app restart
+            but is not the SA3 deliverable.
+  Commit  → flush prompts to `.txt` sidecars, mark current audio as
+            committed in `.project.json`, delete `.draft.json`.
+  Discard → drop the in-memory session, delete `.draft.json`, remove any
+            audio files added since the last commit.
+See DATASET_PREP_REDESIGN.md for the full design and rationale.
+"""
+from __future__ import annotations
+import json
+import logging
+import os
+import re
+import shutil
+import threading
+import time
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple
+from app.backend.data.auto_annotator import AUDIO_EXTENSIONS, _iter_audio_files
+logger = logging.getLogger(__name__)
+PROJECT_METADATA_FILENAME = ".project.json"
+PROJECT_DRAFT_FILENAME = ".draft.json"
+DEFAULT_INGEST_MODE = "copy"  # copy | symlink
+INGEST_MODES = ("copy", "symlink")
+# SA3's prompting guide (vendor/stable-audio-3/docs/guides/prompting.md)
+# distinguishes three generation modes — music, stems / solo instruments,
+# and audio samples / SFX — each with its own AudioSparx-tag convention.
+# We ship one preset per mode and let the user pick a single id; the rest
+# is opinionated defaults. Each segment is rendered by apply_template's
+# segment-drop semantics, so missing CLAP attributes never leave dangling
+# punctuation.
+PROMPT_TEMPLATE_PRESETS: Dict[str, Dict[str, str]] = {
+    "music": {
+        "label": "Music",
+        "description": "Full instrumental tracks (SA3's `TrackType: Music` convention).",
+        "template": (
+            "TrackType: Music, VocalType: Instrumental, "
+            "Genre: {genre}, Mood: {mood}, Instruments: {instruments}, "
+            "BPM: {bpm}, Key: {key}"
+        ),
+    },
+    "instrument": {
+        "label": "Instrument / Stem",
+        "description": "Isolated parts or single-instrument pieces (`TrackType: Instrument`).",
+        "template": (
+            "TrackType: Instrument, "
+            "Instruments: {instruments}, Genre: {genre}, "
+            "BPM: {bpm}, Key: {key}, Mood: {mood}"
+        ),
+    },
+    "sfx": {
+        "label": "Sample / SFX",
+        "description": "Sound effects, one-shots, samples (`TrackType: SFX`).",
+        "template": "TrackType: SFX, {brightness}, {character}",
+    },
+}
+DEFAULT_PROMPT_TEMPLATE_PRESET = "music"
+# Names must look like reasonable filesystem folders.
+_VALID_NAME_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9 _\-.]{0,99}$")
+# ---------- Locations -------------------------------------------------------
+def get_projects_dir() -> Path:
+    """Resolve the projects root.
+    Honors `FRAGMENTA_PROJECTS_DIR` for power users; otherwise sits next to
+    `data/` and `models/` under the configured user_data_dir.
+    """
+    override = os.environ.get("FRAGMENTA_PROJECTS_DIR")
+    if override:
+        root = Path(override).expanduser()
+    else:
+        from app.core.config import get_config
+        root = get_config().user_data_dir / "projects"
+    root.mkdir(parents=True, exist_ok=True)
+    return root
+def project_path(name: str) -> Path:
+    return get_projects_dir() / name
+def project_metadata_path(name: str) -> Path:
+    return project_path(name) / PROJECT_METADATA_FILENAME
+def project_draft_path(name: str) -> Path:
+    return project_path(name) / PROJECT_DRAFT_FILENAME
+# ---------- Validation ------------------------------------------------------
+def sanitize_project_name(raw: Any) -> str:
+    if not isinstance(raw, str):
+        raise ValueError("Project name must be a string.")
+    name = raw.strip()
+    if not name:
+        raise ValueError("Project name cannot be empty.")
+    if name in (".", ".."):
+        raise ValueError("Invalid project name.")
+    if not _VALID_NAME_RE.match(name):
+        raise ValueError(
+            "Project name must start with a letter or digit and may only "
+            "contain letters, digits, spaces, dashes, underscores, and dots."
+        )
+    return name
+# ---------- Disk persistence: committed state -------------------------------
+def _default_metadata(name: str) -> Dict[str, Any]:
+    now = time.time()
+    return {
+        "name": name,
+        "created_at": now,
+        "modified_at": now,
+        "committed_at": None,
+        "ingest_mode": DEFAULT_INGEST_MODE,
+        "prompt_template_preset": DEFAULT_PROMPT_TEMPLATE_PRESET,
+        "source_folders": [],
+        "committed_files": [],  # files written to disk + already committed
+    }
+def _read_metadata(name: str) -> Dict[str, Any]:
+    path = project_metadata_path(name)
+    if not path.exists():
+        return _default_metadata(name)
+    try:
+        with open(path, "r", encoding="utf-8") as f:
+            data = json.load(f)
+    except (OSError, json.JSONDecodeError) as exc:
+        logger.warning("Could not read project metadata %s: %s; using defaults.", path, exc)
+        return _default_metadata(name)
+    defaults = _default_metadata(name)
+    for k, v in defaults.items():
+        data.setdefault(k, v)
+    return data
+def _write_metadata(name: str, metadata: Dict[str, Any]) -> None:
+    metadata["modified_at"] = time.time()
+    path = project_metadata_path(name)
+    path.parent.mkdir(parents=True, exist_ok=True)
+    tmp = path.with_suffix(path.suffix + ".tmp")
+    with open(tmp, "w", encoding="utf-8") as f:
+        json.dump(metadata, f, indent=2)
+    os.replace(tmp, path)
+def _sidecar_for(audio_path: Path) -> Path:
+    return audio_path.with_suffix(".txt")
+def _read_sidecar(audio_path: Path) -> str:
+    txt = _sidecar_for(audio_path)
+    if not txt.exists():
+        return ""
+    try:
+        return txt.read_text(encoding="utf-8").strip()
+    except OSError:
+        return ""
+def _write_sidecar(audio_path: Path, prompt: str) -> None:
+    _sidecar_for(audio_path).write_text(prompt or "", encoding="utf-8")
+# ---------- Disk persistence: draft state -----------------------------------
+def _read_draft(name: str) -> Optional[Dict[str, Any]]:
+    path = project_draft_path(name)
+    if not path.exists():
+        return None
+    try:
+        with open(path, "r", encoding="utf-8") as f:
+            return json.load(f)
+    except (OSError, json.JSONDecodeError) as exc:
+        logger.warning("Could not read draft %s: %s; treating as no draft.", path, exc)
+        return None
+def _write_draft(name: str, draft: Dict[str, Any]) -> None:
+    draft["saved_at"] = time.time()
+    path = project_draft_path(name)
+    path.parent.mkdir(parents=True, exist_ok=True)
+    tmp = path.with_suffix(path.suffix + ".tmp")
+    with open(tmp, "w", encoding="utf-8") as f:
+        json.dump(draft, f, indent=2)
+    os.replace(tmp, path)
+def _delete_draft(name: str) -> None:
+    path = project_draft_path(name)
+    if path.exists():
+        path.unlink()
+# ---------- In-memory session ----------------------------------------------
+@dataclass
+class ClipState:
+    """One clip in an active project session.
+    `prompt` is the live in-memory value (what the UI shows). `committed_prompt`
+    is what's on disk in the sidecar — used to compute dirtiness.
+    `parent` is the original clip's file_name if this clip was produced by a
+    slice operation in the current session. In-memory only; not persisted
+    across restart (yet). Future merge-back will need disk-level lineage.
+    """
+    file_name: str
+    path: str
+    prompt: str = ""
+    committed_prompt: str = ""
+    committed: bool = True   # False if audio was added since last commit
+    parent: Optional[str] = None
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "file_name": self.file_name,
+            "path": self.path,
+            "prompt": self.prompt,
+            "committed_prompt": self.committed_prompt,
+            "committed": self.committed,
+            "dirty": self.prompt != self.committed_prompt,
+            "parent": self.parent,
+        }
+@dataclass
+class ProjectSession:
+    """In-memory view of a project. One per loaded project name.
+    Loading happens lazily on first GET. The session stays alive until
+    the user discards, commits, or the process exits.
+    """
+    name: str
+    clips: Dict[str, ClipState] = field(default_factory=dict)  # by file_name
+    saved_at: Optional[float] = None        # last time .draft.json was written
+    last_save_snapshot: Dict[str, str] = field(default_factory=dict)
+    metadata: Dict[str, Any] = field(default_factory=dict)
+    cancel_event: threading.Event = field(default_factory=threading.Event)
+    lock: threading.Lock = field(default_factory=threading.Lock)
+    # file_name -> (peaks, duration). Lazily filled by get_or_compute_peaks.
+    # Cleared on Discard. Survives an annotate; safe to recompute on miss.
+    peaks_cache: Dict[str, Tuple[List[float], float]] = field(default_factory=dict)
+    # file_name -> duration_sec. Same lifecycle, but populated cheaply via
+    # soundfile.info() instead of waiting for a peaks fetch.
+    duration_cache: Dict[str, float] = field(default_factory=dict)
+    def _draft_snapshot(self) -> Dict[str, str]:
+        """Map file_name -> prompt, only for clips whose prompt differs from
+        the committed sidecar. Used both to decide if a Save is needed and
+        to compute the on-disk draft contents."""
+        return {c.file_name: c.prompt for c in self.clips.values() if c.prompt != c.committed_prompt}
+    def has_dirty_prompts(self) -> bool:
+        return any(c.prompt != c.committed_prompt for c in self.clips.values())
+    def has_uncommitted_files(self) -> bool:
+        return any(not c.committed for c in self.clips.values())
+    def has_unsaved_changes(self) -> bool:
+        """True if the in-memory state differs from the saved draft."""
+        return self._draft_snapshot() != self.last_save_snapshot
+    def to_dict(self) -> Dict[str, Any]:
+        ordered = sorted(self.clips.values(), key=lambda c: c.file_name)
+        # Phase 6 — pre-encoded latents state. The latents live inside the
+        # project at .latents/. Surface presence + count for the UI, plus
+        # the per-project "don't ask again" flag for the post-commit dialog.
+        proj_path = project_path(self.name)
+        latents_dir = proj_path / ".latents"
+        latents_npy = (
+            [p for p in latents_dir.glob("*.npy") if p.name != "silence.npy"]
+            if latents_dir.exists() else []
+        )
+        return {
+            "name": self.name,
+            "created_at": self.metadata.get("created_at"),
+            "modified_at": self.metadata.get("modified_at"),
+            "committed_at": self.metadata.get("committed_at"),
+            "ingest_mode": self.metadata.get("ingest_mode", DEFAULT_INGEST_MODE),
+            "prompt_template_preset": (
+                self.metadata.get("prompt_template_preset") or DEFAULT_PROMPT_TEMPLATE_PRESET
+            ),
+            "prompt_template_presets": [
+                {"id": k, "label": v["label"], "description": v["description"], "template": v["template"]}
+                for k, v in PROMPT_TEMPLATE_PRESETS.items()
+            ],
+            "source_folders": list(self.metadata.get("source_folders", [])),
+            "saved_at": self.saved_at,
+            "dirty": self.has_dirty_prompts() or self.has_uncommitted_files(),
+            "has_unsaved_changes": self.has_unsaved_changes(),
+            "uncommitted_files": [c.file_name for c in ordered if not c.committed],
+            "clips": [c.to_dict() for c in ordered],
+            "clip_count": len(self.clips),
+            "latents_present": bool(latents_npy),
+            "latents_count": len(latents_npy),
+            "suppress_pre_encode_prompt": bool(self.metadata.get("suppress_pre_encode_prompt")),
+        }
+# Registry of active sessions keyed by project name.
+_sessions: Dict[str, ProjectSession] = {}
+_sessions_lock = threading.Lock()
+def _get_or_load_session(name: str) -> ProjectSession:
+    """Return the active session for `name`, loading from disk if needed."""
+    with _sessions_lock:
+        existing = _sessions.get(name)
+        if existing is not None:
+            return existing
+    # Validate folder exists.
+    path = project_path(name)
+    if not path.exists() or not path.is_dir():
+        raise FileNotFoundError(f"Project not found: {name}")
+    metadata = _read_metadata(name)
+    committed_files = set(metadata.get("committed_files") or [])
+    # Build clip states from the disk layout. `committed_prompt` is whatever's
+    # in the .txt sidecar today.
+    clips: Dict[str, ClipState] = {}
+    for audio_path in sorted(path.iterdir()):
+        if not audio_path.is_file():
+            continue
+        if audio_path.suffix.lower() not in AUDIO_EXTENSIONS:
+            continue
+        committed_prompt = _read_sidecar(audio_path)
+        is_committed = audio_path.name in committed_files
+        clips[audio_path.name] = ClipState(
+            file_name=audio_path.name,
+            path=str(audio_path),
+            prompt=committed_prompt,
+            committed_prompt=committed_prompt,
+            committed=is_committed,
+        )
+    session = ProjectSession(name=name, clips=clips, metadata=metadata)
+    # Overlay any draft prompts on top of committed values.
+    draft = _read_draft(name)
+    if draft:
+        for file_name, prompt in (draft.get("prompts") or {}).items():
+            clip = session.clips.get(file_name)
+            if clip is not None:
+                clip.prompt = prompt
+        session.saved_at = draft.get("saved_at")
+        session.last_save_snapshot = dict(draft.get("prompts") or {})
+    with _sessions_lock:
+        # Race: another thread may have loaded concurrently. Use whichever
+        # got in first.
+        existing = _sessions.get(name)
+        if existing is not None:
+            return existing
+        _sessions[name] = session
+        return session
+def _drop_session(name: str) -> None:
+    with _sessions_lock:
+        _sessions.pop(name, None)
+# ---------- CRUD ------------------------------------------------------------
+def list_projects() -> List[Dict[str, Any]]:
+    root = get_projects_dir()
+    out: List[Dict[str, Any]] = []
+    for entry in sorted(root.iterdir()):
+        if not entry.is_dir() or entry.name.startswith("."):
+            continue
+        try:
+            meta = _read_metadata(entry.name)
+        except Exception as exc:
+            logger.warning("Skipping project %s: %s", entry.name, exc)
+            continue
+        clip_count = sum(
+            1 for f in entry.iterdir()
+            if f.is_file() and f.suffix.lower() in AUDIO_EXTENSIONS
+        )
+        has_draft = project_draft_path(entry.name).exists()
+        out.append({
+            "name": entry.name,
+            "created_at": meta.get("created_at"),
+            "modified_at": meta.get("modified_at"),
+            "committed_at": meta.get("committed_at"),
+            "clip_count": clip_count,
+            "has_draft": has_draft,
+        })
+    return out
+def create_project(name: str) -> Dict[str, Any]:
+    name = sanitize_project_name(name)
+    path = project_path(name)
+    if path.exists():
+        raise FileExistsError(f"Project '{name}' already exists.")
+    path.mkdir(parents=True)
+    metadata = _default_metadata(name)
+    _write_metadata(name, metadata)
+    return get_project(name)
+def get_project(name: str) -> Dict[str, Any]:
+    session = _get_or_load_session(name)
+    with session.lock:
+        return session.to_dict()
+def _stage_file(src: Path, dst: Path, mode: str) -> str:
+    """Place `src` at `dst` using the requested ingest mode."""
+    if dst.exists() or dst.is_symlink():
+        return "skipped"
+    if mode == "symlink":
+        try:
+            dst.symlink_to(src.resolve())
+            return "symlinked"
+        except OSError as exc:
+            logger.warning("Symlink failed for %s -> %s: %s; falling back to copy.", src, dst, exc)
+            shutil.copy2(src, dst)
+            return "copied"
+    else:
+        shutil.copy2(src, dst)
+        return "copied"
+def ingest_folder(name: str, source_folder: Path, mode: str) -> Dict[str, Any]:
+    """Add every audio file under `source_folder` to project `name`.
+    Audio is written to disk immediately (we don't buffer gigabytes). The
+    new files are flagged as uncommitted in the session so a later Discard
+    can remove them.
+    """
+    if mode not in INGEST_MODES:
+        raise ValueError(f"Invalid ingest mode: {mode}")
+    if not source_folder.exists() or not source_folder.is_dir():
+        raise FileNotFoundError(f"Source folder not found: {source_folder}")
+    session = _get_or_load_session(name)
+    proj_path = project_path(name)
+    files = _iter_audio_files(source_folder)
+    if not files:
+        raise ValueError(f"No audio files found in {source_folder}")
+    copied = 0
+    symlinked = 0
+    skipped = 0
+    with session.lock:
+        for src in files:
+            dst = proj_path / src.name
+            tag = _stage_file(src, dst, mode)
+            if tag == "copied":
+                copied += 1
+            elif tag == "symlinked":
+                symlinked += 1
+            else:
+                skipped += 1
+            if tag != "skipped" and src.name not in session.clips:
+                # Newly added file — uncommitted.
+                session.clips[src.name] = ClipState(
+                    file_name=src.name,
+                    path=str(dst),
+                    prompt="",
+                    committed_prompt="",
+                    committed=False,
+                )
+        session.metadata["ingest_mode"] = mode
+        src_abs = str(source_folder.resolve())
+        if src_abs not in session.metadata.setdefault("source_folders", []):
+            session.metadata["source_folders"].append(src_abs)
+    return {
+        "copied": copied,
+        "symlinked": symlinked,
+        "skipped": skipped,
+        "added": copied + symlinked,
+    }
+def update_clip_prompt(name: str, file_name: str, prompt: str) -> Dict[str, Any]:
+    """In-memory only. Disk is not touched until Save or Commit."""
+    session = _get_or_load_session(name)
+    with session.lock:
+        clip = session.clips.get(file_name)
+        if clip is None:
+            raise FileNotFoundError(f"Clip not found in project '{name}': {file_name}")
+        clip.prompt = prompt or ""
+        return clip.to_dict()
+def delete_clip(name: str, file_name: str) -> None:
+    """Remove a clip immediately (audio + sidecar + session entry).
+    Treated like ingest: the disk change happens now, since carrying a
+    pending-deletion in memory complicates everything for no real win.
+    Discard cannot recover deleted files.
+    """
+    session = _get_or_load_session(name)
+    proj_path = project_path(name)
+    with session.lock:
+        audio_path = proj_path / file_name
+        txt_path = _sidecar_for(audio_path)
+        if audio_path.exists():
+            audio_path.unlink()
+        if txt_path.exists():
+            txt_path.unlink()
+        session.clips.pop(file_name, None)
+        # Evict any cached peaks for this file (regardless of N).
+        for key in list(session.peaks_cache):
+            if key.startswith(f"{file_name}:"):
+                del session.peaks_cache[key]
+        session.duration_cache.pop(file_name, None)
+        committed = session.metadata.get("committed_files") or []
+        if file_name in committed:
+            session.metadata["committed_files"] = [f for f in committed if f != file_name]
+    # Invalidate latents — outside the lock so we don't block under FS I/O.
+    _invalidate_latents(name)
+# ---------- Save / Commit / Discard -----------------------------------------
+def save_project(name: str) -> Dict[str, Any]:
+    """Persist the current in-memory prompt diffs as a hidden draft."""
+    session = _get_or_load_session(name)
+    with session.lock:
+        snapshot = session._draft_snapshot()
+        draft = {
+            "prompts": snapshot,
+            "uncommitted_files": [c.file_name for c in session.clips.values() if not c.committed],
+        }
+        _write_draft(name, draft)
+        session.saved_at = time.time()
+        session.last_save_snapshot = dict(snapshot)
+        return session.to_dict()
+def _invalidate_latents(name: str) -> None:
+    """Phase 6 — wipe any pre-encoded latents for this project.
+    Latents are bound to specific source-clip content; any mutation that
+    changes the source set (commit, delete_clip, slice_clip) renders them
+    misaligned. v1 strategy is wipe-and-recompute; per-clip invalidation
+    is a follow-up (not worth the complexity for the speed-up we get).
+    """
+    latents_dir = project_path(name) / ".latents"
+    if latents_dir.exists():
+        shutil.rmtree(latents_dir, ignore_errors=True)
+def update_pre_encode_suppression(name: str, suppress: bool) -> Dict[str, Any]:
+    """Persist the 'Don't ask again' choice from the post-commit dialog.
+    Stored on .project.json so it survives restart. The Training-tab
+    fallback button is always available regardless of this flag.
+    """
+    session = _get_or_load_session(name)
+    with session.lock:
+        session.metadata["suppress_pre_encode_prompt"] = bool(suppress)
+        _write_metadata(name, session.metadata)
+        return session.to_dict()
+def commit_project(name: str) -> Dict[str, Any]:
+    """Flush in-memory state to disk as the canonical SA3 dataset.
+    Overwrites existing sidecars. Marks all current audio as committed.
+    Deletes any draft. Wipes any pre-encoded latents — re-encode is
+    explicit via the post-commit dialog or the Training-tab button.
+    """
+    _invalidate_latents(name)
+    session = _get_or_load_session(name)
+    proj_path = project_path(name)
+    with session.lock:
+        # Write a sidecar for every clip, even if the prompt didn't change.
+        # This guarantees the on-disk state is exactly the in-memory state
+        # after Commit, no surprises.
+        for clip in session.clips.values():
+            audio_path = proj_path / clip.file_name
+            _write_sidecar(audio_path, clip.prompt)
+            clip.committed_prompt = clip.prompt
+            clip.committed = True
+        session.metadata["committed_files"] = sorted(session.clips.keys())
+        session.metadata["committed_at"] = time.time()
+        _write_metadata(name, session.metadata)
+        _delete_draft(name)
+        session.saved_at = None
+        session.last_save_snapshot = {}
+        return session.to_dict()
+def delete_project(name: str) -> None:
+    """Permanently remove a project — folder, sidecars, drafts, session.
+    Destructive: there is no recovery path. Caller should confirm with
+    the user before invoking.
+    """
+    proj_path = project_path(name)
+    if not proj_path.exists():
+        raise FileNotFoundError(f"Project not found: {name}")
+    # Cancel any in-flight annotate first, drop the session, then nuke
+    # the folder. Order matters: if we rm the folder while another
+    # thread is writing to it (e.g. annotate writing prompts to memory
+    # is fine, but the audio-stream endpoint could be holding a file
+    # handle), at least the session is gone so no fresh writes happen.
+    with _sessions_lock:
+        existing = _sessions.pop(name, None)
+    if existing is not None:
+        existing.cancel_event.set()
+    shutil.rmtree(proj_path, ignore_errors=True)
+def discard_project(name: str) -> Dict[str, Any]:
+    """Throw away all uncommitted work.
+    - Delete the draft.
+    - Delete audio files added since the last commit (and their sidecars).
+    - Drop the in-memory session so the next GET rebuilds from disk.
+    """
+    session = _get_or_load_session(name)
+    proj_path = project_path(name)
+    with session.lock:
+        # Cancel any in-flight annotate before we tear state apart.
+        session.cancel_event.set()
+        uncommitted = [c.file_name for c in session.clips.values() if not c.committed]
+        for file_name in uncommitted:
+            audio_path = proj_path / file_name
+            txt_path = _sidecar_for(audio_path)
+            if audio_path.exists():
+                audio_path.unlink()
+            if txt_path.exists():
+                txt_path.unlink()
+        _delete_draft(name)
+    _drop_session(name)
+    return get_project(name)
+# ---------- Annotate cancellation handle ------------------------------------
+def get_session_handle(name: str) -> ProjectSession:
+    """Used by the annotate endpoint to share a cancel handle + clip dict."""
+    return _get_or_load_session(name)
+def reset_cancel(session: ProjectSession) -> None:
+    session.cancel_event.clear()
+# ---------- Prompt template -------------------------------------------------
+_TEMPLATE_VAR_RE = re.compile(r"\{([a-zA-Z_][a-zA-Z0-9_]*)\}")
+def _render_value(name: str, raw: Any) -> str:
+    """Stringify one variable value. Lists get joined; falsy is empty."""
+    if raw is None:
+        return ""
+    if isinstance(raw, (list, tuple)):
+        parts = [str(x).strip() for x in raw if str(x).strip()]
+        return ", ".join(parts)
+    text = str(raw).strip()
+    return text
+def apply_template(template: str, attributes: Dict[str, Any]) -> str:
+    """Segment-based templating with graceful missing-value handling.
+    The template is split on ',' (segments). For each segment, every
+    {var} placeholder is resolved against `attributes`. If any placeholder
+    in the segment resolves to empty/missing, the whole segment is dropped
+    — so a missing key/BPM/whatever doesn't leave dangling punctuation.
+    Segments without any placeholders (e.g. "TrackType: Music") always
+    appear.
+    """
+    if not template:
+        return ""
+    out_segments: List[str] = []
+    for raw_segment in template.split(","):
+        segment = raw_segment.strip()
+        if not segment:
+            continue
+        var_names = _TEMPLATE_VAR_RE.findall(segment)
+        if var_names:
+            resolved = {n: _render_value(n, attributes.get(n)) for n in var_names}
+            if any(not v for v in resolved.values()):
+                continue  # drop the segment — one of its vars is missing
+            segment = _TEMPLATE_VAR_RE.sub(
+                lambda m: resolved[m.group(1)],
+                segment,
+            )
+        out_segments.append(segment)
+    return ", ".join(out_segments)
+def resolve_prompt_template(session: "ProjectSession") -> str:
+    """Return the active template string for the project's selected preset.
+    Falls back to the music default if the stored preset id is unknown
+    (e.g. someone hand-edited .project.json to a bad value).
+    """
+    preset_id = (session.metadata.get("prompt_template_preset")
+                 or DEFAULT_PROMPT_TEMPLATE_PRESET)
+    preset = PROMPT_TEMPLATE_PRESETS.get(preset_id)
+    if preset is None:
+        preset = PROMPT_TEMPLATE_PRESETS[DEFAULT_PROMPT_TEMPLATE_PRESET]
+    return preset["template"]
+def update_project_template_preset(name: str, preset_id: str) -> Dict[str, Any]:
+    """Persist the user-selected preset id and return updated project state."""
+    if not isinstance(preset_id, str) or preset_id not in PROMPT_TEMPLATE_PRESETS:
+        valid = ", ".join(PROMPT_TEMPLATE_PRESETS.keys())
+        raise ValueError(f"Unknown preset id: {preset_id!r}. Valid: {valid}")
+    session = _get_or_load_session(name)
+    with session.lock:
+        session.metadata["prompt_template_preset"] = preset_id
+        # Drop the legacy free-form field so we stop carrying two parallel
+        # ways to configure annotation shape.
+        session.metadata.pop("prompt_template", None)
+        _write_metadata(name, session.metadata)
+    return get_project(name)
+# ---------- Waveform peaks --------------------------------------------------
+def _compute_peaks(audio_path: Path, n: int) -> Tuple[List[float], float]:
+    """Return N normalized peak amplitudes + duration in seconds.
+    Reads N short blocks at evenly spaced offsets via soundfile.seek instead
+    of decoding the whole file. ~40x faster than librosa.load on a typical
+    30s clip; bounded I/O regardless of file length (a 5-minute clip costs
+    the same as a 30s one).
+    Falls back to a librosa-based decode for formats soundfile can't open
+    on this build (typically m4a/aac without ffmpeg-libsndfile).
+    """
+    import numpy as np
+    try:
+        import soundfile as sf
+        with sf.SoundFile(str(audio_path)) as src:
+            total = src.frames
+            sr = src.samplerate
+            if total == 0:
+                return ([0.0] * n, 0.0)
+            duration = float(total / sr)
+            # ~6 buckets-worth of samples per probe gives stable peaks without
+            # devolving into "read the whole file."
+            block = max(256, total // (n * 6))
+            peaks = np.zeros(n, dtype="float32")
+            for i in range(n):
+                center = int((i + 0.5) * total / n)
+                start = max(0, center - block // 2)
+                src.seek(start)
+                data = src.read(block, dtype="float32", always_2d=False)
+                if data.ndim > 1:
+                    data = data.max(axis=1)
+                if len(data):
+                    peaks[i] = float(np.abs(data).max())
+            max_peak = float(peaks.max())
+            if max_peak > 0:
+                peaks = peaks / max_peak
+            return (peaks.tolist(), duration)
+    except Exception as exc:
+        logger.debug("soundfile peak path failed for %s (%s); falling back to librosa", audio_path.name, exc)
+    # Fallback: librosa.load handles every codec we register, at the cost of
+    # a full-file decode + resample. Slower but bulletproof.
+    import librosa
+    y, sr = librosa.load(str(audio_path), sr=8000, mono=True)
+    if len(y) == 0:
+        return ([0.0] * n, 0.0)
+    duration = float(len(y) / sr)
+    chunks = np.array_split(y, n)
+    peaks = np.array([float(np.abs(c).max()) if len(c) else 0.0 for c in chunks])
+    max_peak = peaks.max()
+    if max_peak > 0:
+        peaks = peaks / max_peak
+    return (peaks.tolist(), duration)
+def get_or_compute_peaks(
+    session: ProjectSession,
+    file_name: str,
+    audio_path: Path,
+    n: int = 200,
+) -> Tuple[List[float], float]:
+    """Memoized per-session peak computation. Cache key is `file_name:N`."""
+    cache_key = f"{file_name}:{n}"
+    cached = session.peaks_cache.get(cache_key)
+    if cached is not None:
+        return cached
+    result = _compute_peaks(audio_path, n)
+    session.peaks_cache[cache_key] = result
+    return result
+# ---------- Health checks ---------------------------------------------------
+def _clip_duration_sec(audio_path: Path) -> Optional[float]:
+    """Cheap duration probe via soundfile.info() — header read, no decode."""
+    try:
+        import soundfile as sf
+        info = sf.info(str(audio_path))
+        if info.samplerate <= 0:
+            return None
+        return float(info.frames / info.samplerate)
+    except Exception:
+        return None
+def compute_health(
+    name: str,
+    short_threshold_sec: float = 1.0,
+) -> Dict[str, Any]:
+    """Per-clip checks that surface dataset problems before training.
+    Note: we don't flag "too long" clips. The SA3 dataloader handles them
+    via random-crop per __getitem__ — long files just get sampled at
+    different windows across epochs. Slicing remains useful for annotation
+    granularity and CLAP's 10s window, but it's not a correctness issue.
+    We also don't flag mixed sample rates or loudness: SA3 resamples every
+    file to its model rate (T.Resample in its dataset loader) and Fragmenta
+    enables SA3's built-in -16 LUFS VolumeNorm at train/pre-encode time, so
+    both are handled automatically downstream.
+    short_threshold_sec defaults to 1s — clips below this end up mostly
+    silence-padded into the training window.
+    """
+    from collections import defaultdict
+    # Single source of truth for what SA3's loader actually accepts. Fragmenta
+    # ingest accepts a wider set (.m4a, .aac) — those files would be silently
+    # skipped at train time, so we surface them here.
+    from app.core.training.sa3_lora_runner import SA3_AUDIO_EXTENSIONS
+    session = _get_or_load_session(name)
+    with session.lock:
+        clips = list(session.clips.values())
+    empty_prompts: List[str] = []
+    too_short: List[str] = []
+    unsupported_format: List[str] = []
+    prompt_groups: Dict[str, List[str]] = defaultdict(list)
+    for c in clips:
+        if not (c.prompt or "").strip():
+            empty_prompts.append(c.file_name)
+        else:
+            prompt_groups[c.prompt.strip().lower()].append(c.file_name)
+        ext = Path(c.file_name).suffix.lower()
+        if ext not in SA3_AUDIO_EXTENSIONS:
+            unsupported_format.append(c.file_name)
+        # Duration (header-only, ~free) — only used for the too-short check now.
+        dur = session.duration_cache.get(c.file_name)
+        if dur is None:
+            dur = _clip_duration_sec(Path(c.path))
+            if dur is not None:
+                session.duration_cache[c.file_name] = dur
+        if dur is not None and dur < short_threshold_sec:
+            too_short.append(c.file_name)
+    # --- Duplicate annotations: any non-empty prompt shared by 2+ clips.
+    dup_groups = [files for files in prompt_groups.values() if len(files) > 1]
+    dup_files = sorted({f for group in dup_groups for f in group})
+    empty_prompts.sort()
+    too_short.sort()
+    unsupported_format.sort()
+    return {
+        "total_clips": len(clips),
+        "empty_prompts": {"count": len(empty_prompts), "files": empty_prompts},
+        "too_short": {
+            "count": len(too_short),
+            "threshold_sec": short_threshold_sec,
+            "files": too_short,
+        },
+        "unsupported_format": {
+            "count": len(unsupported_format),
+            "accepted": sorted(SA3_AUDIO_EXTENSIONS),
+            "files": unsupported_format,
+        },
+        "duplicate_annotations": {
+            "count": len(dup_files),
+            "group_count": len(dup_groups),
+            "files": dup_files,
+        },
+    }
+# ---------- Slicing ---------------------------------------------------------
+def slice_clip(
+    name: str,
+    file_name: str,
+    target_sec: float,
+    overlap_sec: float,
+    strategy: str,
+) -> Dict[str, Any]:
+    """Split one clip into N children. Disk-level — happens immediately.
+    The parent file (and its sidecar) is deleted. Each child:
+      - lives in the project folder as `<stem>__NNN.wav`
+      - inherits the parent's in-memory prompt verbatim
+      - is uncommitted (so Discard rolls it back)
+      - keeps `parent=<parent_file_name>` in its session state
+    Discard cannot recover the parent file from children — same rule as
+    delete_clip. Commit makes the slice permanent.
+    """
+    from app.backend.data.slicing import plan_slices, write_slices
+    session = _get_or_load_session(name)
+    proj_path = project_path(name)
+    audio_path = proj_path / file_name
+    if not audio_path.exists():
+        raise FileNotFoundError(f"Clip not on disk: {file_name}")
+    plans = plan_slices(audio_path, target_sec, overlap_sec, strategy)
+    if len(plans) <= 1:
+        raise ValueError(
+            f"{file_name} is shorter than the target duration "
+            f"({target_sec:.1f}s); nothing to slice."
+        )
+    stem = audio_path.stem
+    children = write_slices(audio_path, plans, proj_path, stem)
+    if not children:
+        raise RuntimeError("Slice produced no children — check the audio file.")
+    with session.lock:
+        parent_clip = session.clips.get(file_name)
+        inherited_prompt = parent_clip.prompt if parent_clip else ""
+        # Remove the parent from session + disk.
+        session.clips.pop(file_name, None)
+        for key in list(session.peaks_cache):
+            if key.startswith(f"{file_name}:"):
+                del session.peaks_cache[key]
+        session.duration_cache.pop(file_name, None)
+        sidecar = _sidecar_for(audio_path)
+        if audio_path.exists():
+            audio_path.unlink()
+        if sidecar.exists():
+            sidecar.unlink()
+        committed = session.metadata.get("committed_files") or []
+        if file_name in committed:
+            session.metadata["committed_files"] = [f for f in committed if f != file_name]
+        # Register children as uncommitted clips with parent linkage.
+        for child_path in children:
+            session.clips[child_path.name] = ClipState(
+                file_name=child_path.name,
+                path=str(child_path),
+                prompt=inherited_prompt,
+                committed_prompt="",
+                committed=False,
+                parent=file_name,
+            )
+    # Slicing replaces the parent's audio with N children → any cached
+    # latents reference the deleted parent and are now misaligned.
+    _invalidate_latents(name)
+    return {
+        "parent": file_name,
+        "children": [
+            {"file_name": p.name, "start_sec": pl.start_sec, "end_sec": pl.end_sec}
+            for p, pl in zip(children, plans)
+        ],
+        "project": get_project(name),
+    }

app/backend/data/slicing.py ADDED Viewed

	@@ -0,0 +1,183 @@

+"""Audio slicing for the Dataset Workbench.
+Splits one audio file into N children. Three strategies:
+  hard       — uniform cuts every `target_duration` seconds.
+  transient  — uniform anchor points, each snapped to the nearest onset
+               (librosa.onset.onset_detect).
+  silence    — uniform anchor points, each snapped to the nearest low-RMS
+               window (cleanest splice between phrases).
+All three honor `overlap_sec`, applied as a head-overlap on every child
+after the first: child i starts at (end of child i-1) - overlap_sec.
+Writes WAV regardless of source format (lossless, no codec deps). Parent
+prompt is inherited verbatim; the user edits children individually after.
+"""
+from __future__ import annotations
+import logging
+from dataclasses import dataclass
+from pathlib import Path
+from typing import List, Literal, Tuple
+logger = logging.getLogger(__name__)
+SliceStrategy = Literal["hard", "transient", "silence"]
+VALID_STRATEGIES = ("hard", "transient", "silence")
+# How far a snap is allowed to move from the uniform anchor. Beyond this we
+# just take the anchor — better a tidy cut than a wildly off-target chunk.
+SNAP_WINDOW_FRAC = 0.35
+@dataclass
+class SlicePlan:
+    """One child's location inside the parent. Times are in seconds."""
+    index: int          # 1-based
+    start_sec: float
+    end_sec: float
+def _uniform_anchors(duration_sec: float, target_sec: float, overlap_sec: float) -> List[Tuple[float, float]]:
+    """Return [(start, end), ...] for uniform cuts, before any snapping."""
+    if target_sec <= 0:
+        raise ValueError("target_duration must be positive")
+    if overlap_sec < 0 or overlap_sec >= target_sec:
+        raise ValueError("overlap_sec must be >= 0 and < target_duration")
+    step = target_sec - overlap_sec
+    anchors: List[Tuple[float, float]] = []
+    start = 0.0
+    while start < duration_sec - 0.05:  # don't emit a sub-50ms tail
+        end = min(start + target_sec, duration_sec)
+        anchors.append((start, end))
+        if end >= duration_sec:
+            break
+        start += step
+    return anchors
+def _snap_to_onsets(anchors: List[Tuple[float, float]], y, sr: int, target_sec: float) -> List[Tuple[float, float]]:
+    """Snap each cut boundary to the nearest detected onset within a window."""
+    import librosa
+    import numpy as np
+    onsets = librosa.onset.onset_detect(y=y, sr=sr, units="time", backtrack=True)
+    if len(onsets) == 0:
+        return anchors
+    snap_window = target_sec * SNAP_WINDOW_FRAC
+    out: List[Tuple[float, float]] = []
+    for i, (s, e) in enumerate(anchors):
+        if i > 0:
+            # Snap the start (= previous end) to nearest onset within window.
+            candidates = onsets[(onsets >= s - snap_window) & (onsets <= s + snap_window)]
+            if len(candidates):
+                s = float(min(candidates, key=lambda t: abs(t - s)))
+        out.append((s, e))
+    # Stitch ends to match next start so no gap/overlap drift creeps in.
+    for i in range(len(out) - 1):
+        s, _ = out[i]
+        next_s, _ = out[i + 1]
+        out[i] = (s, next_s + (target_sec * 0.0))  # next_s alone — overlap is in next_s already from caller
+    return out
+def _snap_to_silence(anchors: List[Tuple[float, float]], y, sr: int, target_sec: float) -> List[Tuple[float, float]]:
+    """Snap each cut boundary to the lowest-RMS frame within a window."""
+    import librosa
+    import numpy as np
+    # Frame-level RMS at ~20ms hop.
+    hop = max(1, sr // 50)
+    rms = librosa.feature.rms(y=y, frame_length=hop * 2, hop_length=hop)[0]
+    if len(rms) == 0:
+        return anchors
+    frame_times = librosa.frames_to_time(np.arange(len(rms)), sr=sr, hop_length=hop)
+    snap_window = target_sec * SNAP_WINDOW_FRAC
+    out: List[Tuple[float, float]] = []
+    for i, (s, e) in enumerate(anchors):
+        if i > 0:
+            mask = (frame_times >= s - snap_window) & (frame_times <= s + snap_window)
+            if mask.any():
+                local_idx = int(np.argmin(rms[mask]))
+                # Map masked-index back to absolute time.
+                masked_times = frame_times[mask]
+                s = float(masked_times[local_idx])
+        out.append((s, e))
+    return out
+def plan_slices(
+    audio_path: Path,
+    target_sec: float,
+    overlap_sec: float,
+    strategy: SliceStrategy,
+) -> List[SlicePlan]:
+    """Compute the (start, end) for each child without writing anything yet."""
+    if strategy not in VALID_STRATEGIES:
+        raise ValueError(f"Unknown strategy: {strategy}")
+    import librosa
+    # Use mono for boundary detection only; final write uses the original.
+    y, sr = librosa.load(str(audio_path), sr=22050, mono=True)
+    duration = float(len(y) / sr) if len(y) else 0.0
+    if duration <= 0:
+        raise ValueError(f"{audio_path.name} has zero duration")
+    if duration < target_sec:
+        # Single child = the whole file. Skip the slice loop entirely.
+        return [SlicePlan(index=1, start_sec=0.0, end_sec=duration)]
+    anchors = _uniform_anchors(duration, target_sec, overlap_sec)
+    if strategy == "transient":
+        anchors = _snap_to_onsets(anchors, y, sr, target_sec)
+    elif strategy == "silence":
+        anchors = _snap_to_silence(anchors, y, sr, target_sec)
+    return [
+        SlicePlan(index=i + 1, start_sec=s, end_sec=e)
+        for i, (s, e) in enumerate(anchors)
+    ]
+def write_slices(
+    audio_path: Path,
+    plans: List[SlicePlan],
+    out_dir: Path,
+    stem: str,
+) -> List[Path]:
+    """Write children as `<stem>__001.wav`, `<stem>__002.wav`, ... in `out_dir`.
+    Uses soundfile for lossless WAV write at the source's native sample rate.
+    Skips names that already exist on disk to avoid clobbering.
+    """
+    import soundfile as sf
+    import numpy as np
+    info = sf.info(str(audio_path))
+    sr = info.samplerate
+    total_frames = info.frames
+    written: List[Path] = []
+    width = max(3, len(str(len(plans))))
+    with sf.SoundFile(str(audio_path)) as src:
+        for plan in plans:
+            start_frame = max(0, int(plan.start_sec * sr))
+            end_frame = min(total_frames, int(plan.end_sec * sr))
+            if end_frame <= start_frame:
+                logger.warning("Skipping empty slice %s [%.2f-%.2f]", plan.index, plan.start_sec, plan.end_sec)
+                continue
+            src.seek(start_frame)
+            data = src.read(end_frame - start_frame, dtype="float32", always_2d=True)
+            child_name = f"{stem}__{plan.index:0{width}d}.wav"
+            child_path = out_dir / child_name
+            if child_path.exists():
+                # Don't silently overwrite; bump the suffix until free.
+                k = 2
+                while True:
+                    candidate = out_dir / f"{stem}__{plan.index:0{width}d}_{k}.wav"
+                    if not candidate.exists():
+                        child_path = candidate
+                        break
+                    k += 1
+            sf.write(str(child_path), data, sr, subtype="PCM_16")
+            written.append(child_path)
+    return written

app/core/audio/midi_input.py ADDED Viewed

	@@ -0,0 +1,172 @@

+"""Native MIDI input.
+Reads hardware MIDI via python-rtmidi (CoreMIDI on macOS, WinMM on Windows,
+ALSA on Linux) so MIDI works regardless of the web engine the OS gives us —
+WKWebView has no Web MIDI, WebView2's is flaky. Same pattern as the native
+Ableton Link binding in link_sync.py: wrap an optional native lib and no-op
+gracefully if it isn't importable.
+The backend owns the *transport* only: it enumerates input ports, opens one,
+and broadcasts incoming messages to subscribers (drained by the SSE endpoint
+in app.py). All mapping / learn / takeover logic stays in the frontend
+MidiContext — it just consumes these events instead of Web MIDI.
+"""
+from __future__ import annotations
+import ctypes
+import glob
+import os
+import queue
+import sys
+import threading
+from typing import Any, Dict, List, Optional
+def _preload_bundled_jack() -> None:
+    """Work around a broken RPATH in python-rtmidi's manylinux wheel.
+    The wheel bundles libjack as `python_rtmidi/libjack-<hash>.so.*`, but the
+    `_rtmidi` extension's RPATH points at a directory that doesn't exist
+    (`$ORIGIN/../python_rtmidi.` — note the stray trailing dot), so the loader
+    can't find it and `import rtmidi` dies with
+    `ImportError: libjack-<hash>.so...: cannot open shared object file`.
+    The bundled lib's soname matches the extension's DT_NEEDED exactly, so
+    dlopen'ing it with RTLD_GLOBAL first lets the loader satisfy the dependency
+    from the already-loaded object. Doing it here (rather than patching the
+    venv) survives a pip reinstall and needs no patchelf/root. Linux-only; a
+    no-op everywhere the glob finds nothing.
+    """
+    if not sys.platform.startswith("linux"):
+        return
+    for base in sys.path:
+        if not base or not os.path.isdir(base):
+            continue
+        for lib in glob.glob(os.path.join(base, "python_rtmidi*", "libjack-*.so*")):
+            try:
+                ctypes.CDLL(lib, mode=ctypes.RTLD_GLOBAL)
+            except OSError:
+                pass
+try:
+    import rtmidi  # python-rtmidi
+    _RTMIDI_OK = True
+except Exception:  # pragma: no cover - import guard
+    # Most likely the bundled-libjack RPATH bug — preload it and retry once.
+    try:
+        _preload_bundled_jack()
+        import rtmidi
+        _RTMIDI_OK = True
+    except Exception:
+        rtmidi = None
+        _RTMIDI_OK = False
+_lock = threading.Lock()
+_midi_in: Any = None                 # the open rtmidi.MidiIn, or None
+_current_port: Optional[str] = None  # name of the open port, or None
+_subscribers: List["queue.Queue"] = []
+def is_available() -> bool:
+    """True if the native MIDI backend is importable."""
+    return _RTMIDI_OK
+def list_inputs() -> List[Dict[str, Any]]:
+    """Enumerate input ports. `id` is the port name (stable across index
+    shuffles); `index` is its current rtmidi index."""
+    if not _RTMIDI_OK:
+        return []
+    mi = rtmidi.MidiIn()
+    try:
+        names = mi.get_ports()
+    finally:
+        mi.delete()
+    return [{"id": name, "name": name, "index": i} for i, name in enumerate(names)]
+def current_port() -> Optional[str]:
+    with _lock:
+        return _current_port
+def _on_message(event, _data=None) -> None:
+    """rtmidi callback (runs on its own thread). `event` is (message, delta).
+    Broadcast the raw status/data bytes so the frontend can reuse its existing
+    Web-MIDI-shaped dispatcher unchanged."""
+    message, _delta = event
+    payload = {"data": list(message)}
+    with _lock:
+        subs = list(_subscribers)
+    for q in subs:
+        try:
+            q.put_nowait(payload)
+        except queue.Full:
+            pass  # slow consumer — drop rather than block the MIDI thread
+def close_input() -> None:
+    global _midi_in, _current_port
+    with _lock:
+        mi = _midi_in
+        _midi_in = None
+        _current_port = None
+    if mi is not None:
+        try:
+            mi.cancel_callback()
+        except Exception:
+            pass
+        try:
+            mi.close_port()
+        except Exception:
+            pass
+        try:
+            mi.delete()
+        except Exception:
+            pass
+def open_input(port_id: Optional[str]) -> bool:
+    """Open the input port whose name == port_id. A falsy port_id just closes
+    the current port. Returns True on success (or on a pure close)."""
+    if not _RTMIDI_OK:
+        return False
+    close_input()
+    if not port_id:
+        return True
+    mi = rtmidi.MidiIn()
+    idx = None
+    for i, name in enumerate(mi.get_ports()):
+        if name == port_id:
+            idx = i
+            break
+    if idx is None:
+        mi.delete()
+        return False
+    mi.open_port(idx)
+    # Drop sysex / timing-clock / active-sensing so the stream stays to the
+    # control messages the mapper cares about (CC + notes).
+    mi.ignore_types(sysex=True, timing=True, active_sense=True)
+    mi.set_callback(_on_message)
+    global _midi_in, _current_port
+    with _lock:
+        _midi_in = mi
+        _current_port = port_id
+    return True
+def subscribe() -> "queue.Queue":
+    q: "queue.Queue" = queue.Queue(maxsize=512)
+    with _lock:
+        _subscribers.append(q)
+    return q
+def unsubscribe(q: "queue.Queue") -> None:
+    with _lock:
+        if q in _subscribers:
+            _subscribers.remove(q)

app/core/config.py CHANGED Viewed

@@ -4,6 +4,7 @@ from pathlib import Path
 from typing import Dict, Any, Optional
 import json
 class ProjectConfig:
     def __init__(self, project_root: Optional[Path] = None) -> None:
@@ -18,11 +19,11 @@ class ProjectConfig:
                 self.user_data_dir = Path.home() / "Library" / "Application Support" / "FragmentaDesktop"
             else:
                 self.user_data_dir = Path.home() / ".local" / "share" / "FragmentaDesktop"
             self.user_data_dir.mkdir(parents=True, exist_ok=True)
             print(f"Running in frozen mode. Project root: {self.project_root}")
             print(f"User data directory: {self.user_data_dir}")
         else:
             self.frozen = False
             if project_root is None:
@@ -37,123 +38,52 @@ class ProjectConfig:
                             break
                     else:
                         project_root = config_file_dir
             self.project_root: Path = Path(project_root).resolve()
             self.user_data_dir = self.project_root
         fine_tuned_override = os.environ.get("FRAGMENTA_FINE_TUNED_DIR")
         fine_tuned_dir = Path(fine_tuned_override) if fine_tuned_override else self.user_data_dir / "models" / "fine_tuned"
-        data_override = os.environ.get("FRAGMENTA_DATA_DIR")
-        data_dir = Path(data_override) if data_override else self.user_data_dir / "data"
         self.paths: Dict[str, Path] = {
             "models": self.user_data_dir / "models",
             "models_config": self.user_data_dir / "models" / "config",
             "models_pretrained": self.user_data_dir / "models" / "pretrained",
             "models_fine_tuned": fine_tuned_dir,
-            "data": data_dir,
             "logs": self.user_data_dir / "logs",
             "output": self.user_data_dir / "output",
             "application": self.project_root,
             "backend": self.project_root / "app" / "backend",
             "frontend": self.project_root / "app" / "frontend",
-            "stable_audio_tools": self.project_root / "vendor" / "stable-audio-tools",
-            "loraw_vendor": self.project_root / "vendor" / "loraw_vendor",
             "venv": self.project_root / "venv",
         }
         self._ensure_directories()
-        self.model_configs: Dict[str, Dict[str, str]
-                                 ] = self._load_model_configs()
     def _ensure_directories(self) -> None:
         for path_name, path in self.paths.items():
-            if path_name.endswith(('_fine_tuned', 'data')):
                 path.mkdir(parents=True, exist_ok=True)
-    def _load_model_configs(self) -> Dict[str, Dict[str, str]]:
-        return {
-            "stable-audio-open-1.0": {
-                "config": str(self.paths["models_config"] / "model_config.json"),
-                "ckpt": str(self.paths["models_pretrained"] / "stable-audio-open-model.safetensors")
-            },
-            "stable-audio-open-small": {
-                "config": str(self.paths["models_config"] / "model_config_small.json"),
-                "ckpt": str(self.paths["models_pretrained"] / "stable-audio-open-small-model.safetensors")
-            },
-            "custom": {
-                "config": str(self.paths["models_config"] / "model_config_small.json"),
-                "ckpt": str(self.paths["models_pretrained"] / "stable-audio-open-small-model.safetensors")
-            }
-        }
     def get_path(self, path_name: str) -> Path:
         if path_name not in self.paths:
             raise ValueError(f"Unknown path name: {path_name}")
         return self.paths[path_name]
-    def get_model_config(self, model_name: str) -> Dict[str, str]:
-        if model_name not in self.model_configs:
-            raise ValueError(f"Unknown model: {model_name}")
-        return self.model_configs[model_name]
-    def get_dataset_config_path(self) -> str:
-        return str(self.paths["models_config"] / "dataset-config.json")
-    def get_custom_metadata_path(self) -> str:
-        return str(self.project_root / "vendor" / "stable-audio-tools" / "custom_metadata.py")
-    def get_metadata_json_path(self) -> str:
-        return str(self.paths["data"] / "metadata.json")
-    def update_dataset_config(self) -> None:
-        from app.backend.data.simple_audio_processor import SimpleAudioProcessor
-        try:
-            processor = SimpleAudioProcessor(
-                model_config_path=self.paths["models_config"] / "model_config.json"
-            )
-            result = processor.create_dataset_config(
-                input_dir=self.paths["data"],
-                output_dir=self.paths["data"]
-            )
-            target_config = self.paths["models_config"] / "dataset-config.json"
-            with open(target_config, 'w') as f:
-                json.dump(result["dataset_config"], f, indent=4)
-            print(f"Updated dataset config: {target_config}")
-            print(f"Points to {result['file_count']} original audio files")
-            print(f"Sample size: {result['sample_size']} samples ({result['sample_size']/result['sample_rate']:.1f}s)")
-            print(f"Random cropping during training (correct!)")
-        except Exception as e:
-            print(f"Failed to update dataset config: {e}")
-            print("Falling back to basic dataset config...")
-            dataset_config: Dict[str, Any] = {
-                "dataset_type": "audio_dir",
-                "datasets": [
-                    {
-                        "id": "fine_tune_data",
-                        "path": str(self.paths["data"]),
-                        "custom_metadata_module": "custom_metadata"
-                    }
-                ],
-                "random_crop": True
-            }
-            config_path = self.paths["models_config"] / "dataset-config.json"
-            with open(config_path, 'w') as f:
-                json.dump(dataset_config, f, indent=4)
-            print(f"Updated fallback dataset config: {config_path}")
     def to_dict(self) -> Dict[str, Any]:
         return {
             "project_root": str(self.project_root),

 from typing import Dict, Any, Optional
 import json
 class ProjectConfig:
     def __init__(self, project_root: Optional[Path] = None) -> None:
                 self.user_data_dir = Path.home() / "Library" / "Application Support" / "FragmentaDesktop"
             else:
                 self.user_data_dir = Path.home() / ".local" / "share" / "FragmentaDesktop"
             self.user_data_dir.mkdir(parents=True, exist_ok=True)
             print(f"Running in frozen mode. Project root: {self.project_root}")
             print(f"User data directory: {self.user_data_dir}")
         else:
             self.frozen = False
             if project_root is None:
                             break
                     else:
                         project_root = config_file_dir
             self.project_root: Path = Path(project_root).resolve()
             self.user_data_dir = self.project_root
         fine_tuned_override = os.environ.get("FRAGMENTA_FINE_TUNED_DIR")
         fine_tuned_dir = Path(fine_tuned_override) if fine_tuned_override else self.user_data_dir / "models" / "fine_tuned"
+        # Scratch area for browser folder uploads (/api/upload-folder). The
+        # SA2-era "data" dataset directory is gone in 0.2.0 — datasets are now
+        # Dataset Workbench projects under projects/.
+        uploads_override = os.environ.get("FRAGMENTA_UPLOADS_DIR")
+        uploads_dir = Path(uploads_override) if uploads_override else self.user_data_dir / "uploads"
         self.paths: Dict[str, Path] = {
             "models": self.user_data_dir / "models",
             "models_config": self.user_data_dir / "models" / "config",
             "models_pretrained": self.user_data_dir / "models" / "pretrained",
             "models_fine_tuned": fine_tuned_dir,
+            "uploads": uploads_dir,
             "logs": self.user_data_dir / "logs",
             "output": self.user_data_dir / "output",
             "application": self.project_root,
             "backend": self.project_root / "app" / "backend",
             "frontend": self.project_root / "app" / "frontend",
+            "stable_audio_3": self.project_root / "vendor" / "stable-audio-3",
             "venv": self.project_root / "venv",
         }
         self._ensure_directories()
+        # The SA3 catalog lives in app/core/model_manager.py. This dict stays
+        # empty; it's retained only because to_dict()/print_paths() and the
+        # config validator still reference it.
+        self.model_configs: Dict[str, Dict[str, str]] = {}
     def _ensure_directories(self) -> None:
         for path_name, path in self.paths.items():
+            if path_name.endswith(('_fine_tuned', 'uploads')):
                 path.mkdir(parents=True, exist_ok=True)
     def get_path(self, path_name: str) -> Path:
         if path_name not in self.paths:
             raise ValueError(f"Unknown path name: {path_name}")
         return self.paths[path_name]
     def to_dict(self) -> Dict[str, Any]:
         return {
             "project_root": str(self.project_root),

app/core/generation/audio_generator.py CHANGED Viewed

@@ -1,519 +1,536 @@
-import torch
-import soundfile as sf
-import numpy as np
-from pathlib import Path
-from typing import Dict, Any, Optional, List, Tuple
-import logging
 import re
 import sys
 import threading
 import time
 import warnings
-from datetime import datetime
-class GenerationStopped(Exception):
-    """Raised by the per-step callback when a stop has been requested."""
-    pass
-def _slugify_prompt(text: str, max_len: int = 40) -> str:
-    s = re.sub(r'[^a-zA-Z0-9]+', '_', text.strip().lower())
-    s = re.sub(r'_+', '_', s).strip('_')
-    return s[:max_len] or 'untitled'
-sys.path.append(
-    str(Path(__file__).parent.parent.parent.parent / "vendor" / "stable-audio-tools"))
-# LoRAW lives at <project>/vendor/loraw_vendor; expose its `loraw` package for inference.
-sys.path.append(
-    str(Path(__file__).parent.parent.parent.parent / "vendor" / "loraw_vendor"))
-warnings.filterwarnings(
-    "ignore",
-    message=r"pkg_resources is deprecated as an API.*",
-    category=UserWarning,
-)
-from stable_audio_tools.models.utils import load_ckpt_state_dict
-from stable_audio_tools.inference.generation import generate_diffusion_cond
-from stable_audio_tools.models import create_model_from_config
-from loraw.network import create_lora_from_config
-logger = logging.getLogger(__name__)
 class AudioGenerator:
-    def __init__(self, config):
-        self.config = config
-        self.model = None
-        self.device = "cuda" if torch.cuda.is_available() else "cpu"
-        self.current_model_name = None
-        self.current_model_path = None
-        self.current_model_key = None
-        self.is_distilled_small = False
-        self.is_fine_tuned = False
-        # LoRA state. `lora` holds the LoRAWrapper instance when one is active;
-        # `_active_lora_path` / `_active_lora_multiplier` are used (along with
-        # the base-model identifier) in `current_model_key` so the cache
-        # invalidates whenever the LoRA selection changes — forcing a fresh
-        # base reload because LoRAW's `activate()` is not reversible in-place.
-        self.lora = None
-        self._active_lora_path = None
-        self._active_lora_multiplier = 1.0
-        self._stop_event = threading.Event()
-        logger.info(f"Using device: {self.device}")
-    def _apply_lora(self, lora_path: str, lora_config: Dict[str, Any], multiplier: float = 1.0):
-        """Wrap the currently-loaded base model with a LoRA from LoRAW.
-        Caller is responsible for ensuring the base model is fresh (no prior
-        LoRA injected) — typically by routing through `generate_audio`'s cache
-        invalidation, which reloads the base when the LoRA selection changes.
-        """
-        if self.model is None:
-            raise RuntimeError("Base model must be loaded before applying a LoRA")
-        # torch.compile wraps in OptimizedModule, which prefixes named_modules()
-        # with `_orig_mod/`. LoRAW's saved state has no such prefix (training
-        # didn't compile). Operate on the underlying module so scan_model keys
-        # match the checkpoint exactly. The compiled wrapper still dispatches
-        # forward through this same module, so the LoRA stays active.
-        target = getattr(self.model, "_orig_mod", self.model)
-        full_config = {
-            "model_type": getattr(target, "model_type", "diffusion_cond"),
-            "lora": lora_config,
-        }
-        self.lora = create_lora_from_config(full_config, target)
-        state = torch.load(lora_path, map_location=self.device)
-        self.lora.load_weights(state, multiplier=multiplier)
-        self.lora.activate()
-        self._active_lora_path = lora_path
-        self._active_lora_multiplier = multiplier
-        logger.info(f"LoRA applied: {Path(lora_path).name} (multiplier={multiplier})")
     def request_stop(self) -> bool:
-        """Signal the in-flight diffusion loop (if any) to abort at the next step."""
-        already_set = self._stop_event.is_set()
-        self._stop_event.set()
-        return not already_set
-    def load_local_base_model(self, model_name: str = "stable-audio-open-small") -> bool:
-        try:
-            logger.info(f"Loading local base model: {model_name}")
-            self.current_model_name = model_name
-            from stable_audio_tools.models.factory import create_model_from_config
-            from stable_audio_tools.models.utils import load_ckpt_state_dict
-            if "small" in model_name:
-                config_file = "model_config_small.json"
-            else:
-                config_file = "model_config.json"
-            self.is_distilled_small = "small" in model_name.lower()
-            self.is_fine_tuned = False
-            config_path = Path(__file__).parent.parent.parent.parent / "models" / "config" / config_file
-            logger.info(f"Using config file: {config_path}")
-            with open(config_path, 'r') as f:
-                import json
-                model_config = json.load(f)
-            self.model = create_model_from_config(model_config)
-            if model_name == 'stable-audio-open-small':
-                model_file_name = 'stable-audio-open-small-model.safetensors'
-            elif model_name == 'stable-audio-open-1.0':
-                model_file_name = 'stable-audio-open-model.safetensors'
-            else:
-                model_file_name = f"{model_name}-model.safetensors"
-            model_file = Path(__file__).parent.parent.parent.parent / "models" / "pretrained" / model_file_name
-            self.current_model_path = str(model_file)
-            logger.info(f"Loading weights from: {model_file}")
-            if not model_file.exists():
-                raise FileNotFoundError(f"Local model file not found: {model_file}")
-            state_dict = load_ckpt_state_dict(str(model_file))
-            self.model.load_state_dict(state_dict, strict=False)
-            self.model = self.model.to(self.device)
-            self.model.eval()
-            self.model.requires_grad_(False)
-            if self.device.startswith("cuda"):
-                self.model = torch.compile(self.model, mode="reduce-overhead")
-            logger.info("Local base model loaded successfully")
-            return True
-        except Exception as e:
-            logger.error(f"Failed to load local base model: {e}")
             return False
-    def load_model(self, model_path: Optional[Path] = None) -> bool:
-        try:
-            print(f"Loading model from {model_path}")
-            if model_path is None:
-                return self.load_local_base_model("stable-audio-open-small")
-            else:
-                safetensors_files = list(model_path.glob("*.safetensors"))
-                if safetensors_files:
-                    unwrapped_path = str(safetensors_files[0])
-                    print(f"Found safetensors file: {unwrapped_path}")
-                    return self.load_unwrapped_model(unwrapped_path)
-                else:
-                    print(f"No safetensors files found in {model_path}, using local base model")
-                    return self.load_local_base_model("stable-audio-open-small")
-        except Exception as e:
-            print(f"Failed to load model: {e}")
-            return False
-    def load_unwrapped_model(self, unwrapped_model_path: str, config_file: str = None) -> bool:
-        try:
-            print(f"Loading unwrapped model from {unwrapped_model_path}")
-            self.current_model_path = unwrapped_model_path
-            from stable_audio_tools.models.factory import create_model_from_config
-            from stable_audio_tools.models.utils import load_ckpt_state_dict
-            if config_file is None:
-                config_file = "model_config_small.json"
-            self.is_distilled_small = "small" in config_file.lower()
-            metadata_path = Path(unwrapped_model_path).parent.parent / "training_metadata.json"
-            self.is_fine_tuned = metadata_path.exists()
-            if self.is_fine_tuned:
-                logger.info(
-                    f"Detected fine-tuned model via {metadata_path}; "
-                    f"using full diffusion sampler recipe instead of distilled 8-step pingpong"
                 )
-            config_path = Path(__file__).parent.parent.parent.parent / \
-                "models" / "config" / config_file
-            print(f"Using config file: {config_path}")
-            with open(config_path, 'r') as f:
                 import json
-                model_config = json.load(f)
-            self.model = create_model_from_config(model_config)
-            state_dict = load_ckpt_state_dict(unwrapped_model_path)
-            self.model.load_state_dict(state_dict, strict=False)
-            self.model = self.model.to(self.device)
-            self.model.eval()
-            self.model.requires_grad_(False)
-            if self.device.startswith("cuda"):
-                self.model = torch.compile(self.model, mode="reduce-overhead")
-            print(f"AUDIO GENERATOR: Unwrapped model loaded successfully")
-            return True
-        except Exception as e:
-            print(f"Failed to load unwrapped model: {e}")
             return False
     def generate_audio(
         self,
         prompt: str,
-        model_path: Optional[Path] = None,
-        unwrapped_model_path: Optional[str] = None,
-        config_file: Optional[str] = None,
         duration: float = 10.0,
-        cfg_scale: float = 7.0,
-        steps: int = 250,
         seed: int = -1,
-        output_path: Optional[Path] = None,
-        batch_index: int = 1,
-        batch_total: int = 1,
-        loop_mode: bool = False,
-        lora_path: Optional[str] = None,
-        lora_config: Optional[Dict[str, Any]] = None,
-        lora_multiplier: float = 1.0,
     ) -> Path:
-        print(f"\nAUDIO GENERATOR: generate_audio called")
-        print(f"   - Prompt: '{prompt}'")
-        print(f"   - Duration: {duration}s")
-        if lora_path:
-            print(f"   - LoRA: {lora_path} (×{lora_multiplier})")
-        # The cache key includes LoRA selection so the base reloads whenever
-        # the LoRA changes (LoRAW's activate() is not reversible in-place;
-        # the only safe way to drop or swap a LoRA is to reload the base).
-        lora_signature = (lora_path, lora_multiplier) if lora_path else (None, 1.0)
-        if unwrapped_model_path:
-            target_key = ('unwrapped', str(unwrapped_model_path), lora_signature)
-        elif model_path:
-            target_key = ('path', str(model_path), lora_signature)
-        else:
-            target_key = ('default', 'stable-audio-open-small', lora_signature)
-        if self.model is not None and self.current_model_key == target_key:
-            print(f"AUDIO GENERATOR: Reusing already-loaded model")
-        else:
-            print(f"AUDIO GENERATOR: Loading new model")
-            # Reset any prior LoRA state — load_*_model rebuilds self.model fresh.
-            self.lora = None
-            self._active_lora_path = None
-            self._active_lora_multiplier = 1.0
-            if unwrapped_model_path:
-                print(f"AUDIO GENERATOR: Loading unwrapped model from {unwrapped_model_path}")
-                if not self.load_unwrapped_model(unwrapped_model_path, config_file):
-                    raise ValueError(f"Failed to load unwrapped model from {unwrapped_model_path}")
-            elif model_path:
-                model_path_str = str(model_path)
-                print(f"AUDIO GENERATOR: Checking model path: {model_path_str}")
-                if "stable-audio-open-small" in model_path_str:
-                    print(f"AUDIO GENERATOR: Loading local small base model")
-                    if not self.load_local_base_model("stable-audio-open-small"):
-                        raise ValueError("Failed to load local small base model")
-                elif "stable-audio-open-model" in model_path_str:
-                    print(f"AUDIO GENERATOR: Loading local large base model")
-                    if not self.load_local_base_model("stable-audio-open-1.0"):
-                        raise ValueError("Failed to load local large base model")
-                else:
-                    print(f"AUDIO GENERATOR: Loading fine-tuned model from {model_path}")
-                    if not self.load_model(model_path):
-                        raise ValueError(f"Failed to load model from {model_path}")
-            else:
-                print(f"AUDIO GENERATOR: Loading default local small base model")
-                if not self.load_local_base_model("stable-audio-open-small"):
-                    raise ValueError("Failed to load default local base model")
-            # Attach the LoRA (if requested) onto the freshly loaded base.
-            if lora_path:
-                if not lora_config:
-                    raise ValueError("lora_config required when lora_path is set")
-                self._apply_lora(lora_path, lora_config, lora_multiplier)
-            self.current_model_key = target_key
-        print(f"AUDIO GENERATOR: Model loaded successfully")
-        self._stop_event.clear()
-        def _stop_callback(state):
-            if self._stop_event.is_set():
-                raise GenerationStopped("Stop requested mid-diffusion")
-        try:
-            # Three recipes, picked by what the loaded weights actually are:
-            #   1. Original distilled small — rectified-flow + CFG distillation
-            #      baked in. Requires pingpong / 8 steps / CFG 1.0.
-            #   2. Fine-tuned small — distillation destroyed by SFT but the
-            #      objective is still rectified-flow, so the sampler name must
-            #      come from the rectified-flow family (euler|rk4|dpmpp|pingpong),
-            #      NOT from the v-diffusion family. Use external CFG.
-            #   3. Large model — standard v-diffusion, accepts dpmpp-3m-sde.
-            use_distilled_recipe = self.is_distilled_small and not self.is_fine_tuned
-            if use_distilled_recipe:
-                effective_sampler = "pingpong"
-                effective_steps = 8
-                effective_cfg = 1.0
-                sigma_kwargs = {}
-            elif self.is_distilled_small:
-                effective_sampler = "dpmpp"
-                effective_steps = steps
-                effective_cfg = cfg_scale
-                sigma_kwargs = {"sigma_max": 1.0}
-            else:
-                effective_sampler = "dpmpp-3m-sde"
-                effective_steps = steps
-                effective_cfg = cfg_scale
-                sigma_kwargs = {"sigma_min": 0.03, "sigma_max": 1000}
-            print(f"Generating audio for prompt: '{prompt}'")
-            recipe_note = ""
-            if use_distilled_recipe:
-                recipe_note = " (distilled small overrides applied)"
-            elif self.is_fine_tuned and self.is_distilled_small:
-                recipe_note = " (fine-tuned small: rectified-flow dpmpp + external CFG)"
-            print(
-                f"Duration: {duration}s, CFG scale: {effective_cfg}, "
-                f"Steps: {effective_steps}, Sampler: {effective_sampler}"
-                + recipe_note
-            )
-            requested_sample_size = int(duration * self.model.sample_rate)
-            max_sample_size = None
-            try:
-                max_sample_size = self.model.sample_size
-            except AttributeError:
-                if hasattr(self.model, 'model') and hasattr(self.model.model, 'sample_size'):
-                    max_sample_size = self.model.model.sample_size
-                else:
-                    config_path = Path(__file__).parent.parent.parent.parent / "models" / "config"
-                    if hasattr(self, 'current_model_name') and self.current_model_name:
-                        if 'small' in self.current_model_name:
-                            config_file = config_path / "model_config_small.json"
-                        else:
-                            config_file = config_path / "model_config.json"
-                    else:
-                        if hasattr(self, 'current_model_path') and self.current_model_path:
-                            model_file = Path(self.current_model_path)
-                            if model_file.exists():
-                                file_size_gb = model_file.stat().st_size / (1024**3)
-                                if file_size_gb < 2.0:
-                                    config_file = config_path / "model_config_small.json"
-                                else:
-                                    config_file = config_path / "model_config.json"
-                            else:
-                                config_file = config_path / "model_config_small.json"
-                        else:
-                            config_file = config_path / "model_config_small.json"
-                    if config_file.exists():
-                        with open(config_file, 'r') as f:
-                            import json
-                            config_data = json.load(f)
-                            max_sample_size = config_data.get('sample_size', 44100 * 10)
-                    else:
-                        max_sample_size = 44100 * 10
-            if max_sample_size and requested_sample_size > max_sample_size:
-                print(f"Requested duration {duration}s exceeds model maximum. Truncating.")
-                requested_sample_size = max_sample_size
-                duration = requested_sample_size / self.model.sample_rate
-            if seed == -1:
-                import numpy as np
-                seed = np.random.randint(0, 2**32 - 1, dtype=np.int64)
-            print(f"Using seed: {seed}")
-            if loop_mode and max_sample_size:
-                song_seconds = max(int(duration),
-                                   int(max_sample_size / self.model.sample_rate))
-            else:
-                song_seconds = int(duration)
-            conditioning = [{
-                "prompt": prompt,
-                "seconds_start": 0,
-                "seconds_total": song_seconds,
-            }]
-            device = next(self.model.parameters()).device
-            print(f"Using device: {device}")
-            with warnings.catch_warnings():
-                # Known torchsde float-boundary chatter from dpmpp-3m-sde.
-                warnings.filterwarnings(
-                    "ignore",
-                    message=r"Should have tb<=t1 but got tb=.*",
-                    category=UserWarning,
-                    module=r"torchsde\._brownian\.brownian_interval",
                 )
-                warnings.filterwarnings(
-                    "ignore",
-                    message=r"Should have ta>=t0 but got ta=.*",
-                    category=UserWarning,
-                    module=r"torchsde\._brownian\.brownian_interval",
-                )
-                audio = generate_diffusion_cond(
-                    model=self.model,
-                    steps=effective_steps,
-                    cfg_scale=effective_cfg,
-                    conditioning=conditioning,
-                    batch_size=1,
-                    sample_size=requested_sample_size,
-                    seed=seed,
-                    device=str(device),
-                    sampler_type=effective_sampler,
-                    callback=_stop_callback,
-                    **sigma_kwargs,
                 )
-            print(f"Generation complete, audio shape: {audio.shape}")
-            from einops import rearrange
-            audio = rearrange(audio, "b d n -> d (b n)").to(torch.float32)
-            audio = audio / audio.abs().max()
-            audio_int16 = (audio.clamp(-1, 1) * 32767).to(torch.int16).cpu()
-            if output_path is None:
-                output_dir = Path(__file__).parent.parent.parent.parent / "output"
-                output_dir.mkdir(exist_ok=True)
-                ts = datetime.now().strftime('%Y%m%d_%H%M%S')
-                slug = _slugify_prompt(prompt)
-                suffix = f"_{batch_index}" if batch_total > 1 else ""
-                output_path = output_dir / f"fragmenta_{ts}_{slug}{suffix}.wav"
-            self.save_audio(audio_int16, output_path, self.model.sample_rate)
-            print(f"AUDIO GENERATOR: Generation complete")
-            print(f"   - Output file: {output_path}")
-            print(f"   - Output file size: {output_path.stat().st_size} bytes")
-            return output_path
         except GenerationStopped:
-            print("AUDIO GENERATOR: Generation stopped by user request")
             raise
-        except Exception as e:
-            print(f"AUDIO GENERATOR: Error during generation: {str(e)}")
-            import traceback
-            traceback.print_exc()
             raise
-        finally:
-            self._stop_event.clear()
-    def generate_batch(
-        self,
-        prompts: List[str],
-        duration: float = 10.0,
-        cfg_scale: float = 6.0,
-        steps: int = 250,
-        seed: int = -1,
-        output_dir: Optional[Path] = None
-    ) -> List[Path]:
-        results = []
-        for i, prompt in enumerate(prompts):
-            print(f"Generating audio {i+1}/{len(prompts)}")
-            current_seed = seed if seed != -1 else seed + i
-            output_path = None
-            if output_dir:
-                output_dir.mkdir(exist_ok=True, parents=True)
-                output_path = output_dir / f"generated_{i+1:03d}.wav"
-            try:
-                output_path = self.generate_audio(
-                    prompt=prompt,
-                    duration=duration,
-                    cfg_scale=cfg_scale,
-                    steps=steps,
-                    seed=current_seed,
-                    output_path=output_path
-                )
-                results.append(output_path)
-            except Exception as e:
-                print(f"Failed to generate audio for prompt {i+1}: {e}")
-                results.append(None)
-        return results
-    def save_audio(self, audio: torch.Tensor, output_path: Path, sample_rate: int):
-        output_path.parent.mkdir(exist_ok=True, parents=True)
-        audio_np = audio.detach().cpu().transpose(0, 1).numpy()
-        sf.write(str(output_path), audio_np, sample_rate, subtype="PCM_16")
-    def get_model_info(self) -> Dict[str, Any]:
-        if self.model is None:
-            return {"status": "no_model_loaded"}
-        return {
-            "status": "loaded",
-            "sample_rate": self.model.sample_rate,
-            "device": str(self.device),
-            "model_type": getattr(self.model, 'model_type', 'unknown'),
-            "io_channels": getattr(self.model, 'io_channels', 'unknown')
-        }

+"""SA3 inference engine.
+Thin wrapper around stable_audio_3.StableAudioModel.from_pretrained() that
+caches the loaded model between requests (eviction on model_id change),
+auto-detects the device, and writes 44.1 kHz stereo int16 WAV.
+Cancellation is wired via `request_stop()` for API parity, but SA3's
+generate() doesn't expose a per-step callback yet — the flag is checked
+between calls, not inside them. A finer-grained cancel hook is a Phase
+3.1 follow-up.
+"""
+import os
+import platform
 import re
 import sys
 import threading
 import time
 import warnings
+from pathlib import Path
+from typing import Any, Callable, Dict, Optional, Tuple
+import numpy as np
+import soundfile as sf
+import torch
+from utils.logger import get_logger
+logger = get_logger("AudioGenerator")
+# Live progress from the SA3 sampler. SA3's `model.generate(**sampler_kwargs)`
+# forwards `callback=fn` into the sampler, which fires it per ODE step with
+# `{'i': step_index, ...}`. We mirror that into this dict so the frontend can
+# poll real progress instead of a fake ticker. Reset on each new generation.
+_generation_state: Dict[str, Any] = {
+    "is_generating": False,
+    # idle | loading | sampling | decoding | complete | failed
+    "phase": "idle",
+    "step": 0,
+    "total_steps": 0,
+    "progress": 0,          # 0-100, derived
+    "batch_index": 0,
+    "batch_total": 0,
+    "started_at": None,
+    "ended_at": None,
+    "error": None,
+}
+_generation_state_lock = threading.Lock()
+def get_generation_progress() -> Dict[str, Any]:
+    """Snapshot of the current generation's live progress. Cheap to call."""
+    with _generation_state_lock:
+        return dict(_generation_state)
+def _set_progress(**kwargs: Any) -> None:
+    """Merge fields into _generation_state under the lock. Recomputes
+    `progress` automatically when step/total_steps land in the same update."""
+    with _generation_state_lock:
+        _generation_state.update(kwargs)
+        total = int(_generation_state.get("total_steps") or 0)
+        step = int(_generation_state.get("step") or 0)
+        _generation_state["progress"] = (
+            int(round(100 * step / total)) if total > 0 else 0
+        )
+def _reset_progress() -> None:
+    with _generation_state_lock:
+        _generation_state.update({
+            "is_generating": False, "phase": "idle",
+            "step": 0, "total_steps": 0, "progress": 0,
+            "batch_index": 0, "batch_total": 0,
+            "started_at": None, "ended_at": None, "error": None,
+        })
+# Vendored SA3 lives at <repo>/vendor/stable-audio-3 — put it on sys.path so
+# `import stable_audio_3` resolves without a global pip install.
+_SA3_VENDOR = Path(__file__).resolve().parents[3] / "vendor" / "stable-audio-3"
+if str(_SA3_VENDOR) not in sys.path:
+    sys.path.insert(0, str(_SA3_VENDOR))
+# model_id -> (sa3_name passed to StableAudioModel.from_pretrained,
+#              "user-visible or base" tag, max duration seconds).
+# Kept in sync manually with _SA3_CATALOG in app/core/model_manager.py.
+_MODEL_INFO: Dict[str, Tuple[str, str, int]] = {
+    "sa3-small-music":      ("small-music",      "post", 120),
+    "sa3-small-sfx":        ("small-sfx",        "post", 120),
+    "sa3-medium":           ("medium",           "post", 380),
+    "sa3-small-music-base": ("small-music-base", "base", 120),
+    "sa3-small-sfx-base":   ("small-sfx-base",   "base", 120),
+    "sa3-medium-base":      ("medium-base",      "base", 380),
+}
+class GenerationStopped(Exception):
+    """Raised when an in-flight generation is interrupted by a stop request."""
+def _slugify(text: str, max_len: int = 40) -> str:
+    s = re.sub(r"[^a-zA-Z0-9_-]+", "_", text or "")
+    return s[:max_len].strip("_").lower() or "audio"
+def _autodetect_device() -> str:
+    """cuda → mps → cpu, with FRAGMENTA_FORCE_DEVICE override."""
+    override = os.environ.get("FRAGMENTA_FORCE_DEVICE")
+    if override:
+        return override
+    if torch.cuda.is_available():
+        return "cuda"
+    if getattr(torch.backends, "mps", None) and torch.backends.mps.is_available():
+        return "mps"
+    return "cpu"
 class AudioGenerator:
+    """One-model warm cache. Reload only when model_id changes."""
+    def __init__(self, config: Any) -> None:
+        self.config = config
+        self.model: Any = None
+        self._model_id: Optional[str] = None
+        self._device: Optional[str] = None
+        self._stop_requested: bool = False
+        # Tracks LoRAs currently injected into self.model. List of
+        # {"path": str, "strength": float}. Empty when no LoRAs are active.
+        self._loaded_loras: list = []
+    # --- cooperative cancel ---------------------------------------------------
     def request_stop(self) -> bool:
+        if self._stop_requested:
             return False
+        self._stop_requested = True
+        return True
+    # --- model load -----------------------------------------------------------
+    def _ensure_model(
+        self,
+        model_id: str,
+        device: Optional[str] = None,
+        half: bool = True,
+    ) -> None:
+        if model_id not in _MODEL_INFO:
+            raise ValueError(f"Unknown SA3 model_id: {model_id}")
+        sa3_name, _kind, _max_dur = _MODEL_INFO[model_id]
+        if model_id in ("sa3-medium", "sa3-medium-base"):
+            # Medium normally requires Flash Attention 2 for its long-form (up
+            # to 380s) sliding-window attention. FRAGMENTA_MEDIUM_NO_FLASH=1 is
+            # the Path-B validation switch: it lets medium load WITHOUT
+            # flash_attn and fall back to PyTorch-native attention
+            # (flex_attention -> chunked-halo SDPA -> masked SDPA; see
+            # transformer.apply_attn). Output is math-equivalent, but VRAM is
+            # higher and sampling slower at long durations. Off by default, so
+            # the shipped behaviour is unchanged until the fallback is validated.
+            allow_no_flash = os.environ.get("FRAGMENTA_MEDIUM_NO_FLASH") == "1"
+            try:
+                import flash_attn  # noqa: F401
+                have_flash = True
+            except ImportError as err:
+                have_flash = False
+                _flash_err = err
+            if not have_flash and not allow_no_flash:
+                if platform.system() == "Windows":
+                    raise RuntimeError(
+                        "sa3-medium requires Flash Attention 2, which doesn't "
+                        "have Windows wheels. Use sa3-small-music / sa3-small-sfx, "
+                        "run Fragmenta via Docker on WSL2, or set "
+                        "FRAGMENTA_MEDIUM_NO_FLASH=1 to run on the (slower, "
+                        "higher-memory) PyTorch attention fallback."
+                    ) from _flash_err
+                raise RuntimeError(
+                    "sa3-medium needs Flash Attention 2 (flash_attn) but the "
+                    f"current install is unusable: {_flash_err}.\n"
+                    "Pick the wheel matching your torch+ABI+Python+CUDA from\n"
+                    "  https://github.com/Dao-AILab/flash-attention/releases\n"
+                    "and install with `pip install --no-deps <wheel-url>`. "
+                    "See the note next to flash-attn in requirements.txt for an example.\n"
+                    "Or set FRAGMENTA_MEDIUM_NO_FLASH=1 to use the PyTorch "
+                    "attention fallback."
+                ) from _flash_err
+            if not have_flash:
+                logger.warning(
+                    "sa3-medium loading WITHOUT Flash Attention 2 "
+                    "(FRAGMENTA_MEDIUM_NO_FLASH=1). Using the PyTorch-native "
+                    "attention fallback — expect higher VRAM and slower sampling "
+                    "at long durations. Validate memory headroom before "
+                    "generating long-form (up to 380s) clips."
                 )
+        device = device or _autodetect_device()
+        if (
+            self.model is not None
+            and self._model_id == model_id
+            and self._device == device
+        ):
+            return  # warm cache hit
+        if self.model is not None:
+            del self.model
+            self.model = None
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+        # Two layouts to support during the unification transition:
+        #   1. Canonical (post-Phase 5c): HF cache layout rooted at
+        #      <app>/models/pretrained/sa3/hub/. model_manager sets
+        #      HF_HUB_CACHE to that path, so StableAudioModel.from_pretrained
+        #      finds files there without going to ~/.cache/huggingface.
+        #   2. Legacy: <app>/models/pretrained/sa3/<model_id>/ flat layout
+        #      from earlier downloads. We fall back to direct load so
+        #      pre-existing users don't have to re-download.
+        #
+        # Defense-in-depth: re-force the HF cache vars here too. model_manager
+        # sets them at construction, but if generation is reached via an
+        # alternate code path or the env was clobbered later, we still
+        # guarantee resolution into <pretrained>/sa3/hub/.
+        hub_dir = self.config.get_path("models_pretrained") / "sa3" / "hub"
+        hf_env_keys = ("HF_HUB_CACHE", "HUGGINGFACE_HUB_CACHE",
+                       "TRANSFORMERS_CACHE", "HF_HUB_OFFLINE")
+        prev_env = {k: os.environ.get(k) for k in hf_env_keys}
+        os.environ["HF_HUB_CACHE"] = str(hub_dir)
+        os.environ["HUGGINGFACE_HUB_CACHE"] = str(hub_dir)
+        os.environ["TRANSFORMERS_CACHE"] = str(hub_dir)
+        os.environ["HF_HUB_OFFLINE"] = "1"
+        # huggingface_hub captures HF_HUB_CACHE and HF_HUB_OFFLINE as
+        # module-level constants AT IMPORT TIME. The Flask backend imports
+        # huggingface_hub (transitively, via model_manager.py) before we ever
+        # set these env vars, so the constants point at ~/.cache/huggingface/
+        # and offline=False. Setting os.environ now has no effect on already-
+        # captured constants. We have to monkey-patch them directly.
+        # Same trick we used for the CLAP loader.
+        prev_hub_constants = {}
+        try:
+            import huggingface_hub.constants as _hf_const
+            prev_hub_constants = {
+                "HF_HUB_CACHE": _hf_const.HF_HUB_CACHE,
+                "HF_HUB_OFFLINE": _hf_const.HF_HUB_OFFLINE,
+            }
+            _hf_const.HF_HUB_CACHE = str(hub_dir)
+            _hf_const.HF_HUB_OFFLINE = True
+        except Exception:
+            _hf_const = None
+        try:
+            try:
+                from stable_audio_3 import StableAudioModel
+                with warnings.catch_warnings():
+                    warnings.simplefilter("ignore")
+                    self.model = StableAudioModel.from_pretrained(
+                        sa3_name, device=device, model_half=half,
+                    )
+            except (FileNotFoundError, OSError) as primary_err:
+                # HF cache miss — fall back to flat layout.
+                legacy_dir = self.config.get_path("models_pretrained") / "sa3" / model_id
+                config_path = legacy_dir / "model_config.json"
+                ckpt_path = legacy_dir / "model.safetensors"
+                if not (config_path.exists() and ckpt_path.exists()):
+                    raise FileNotFoundError(
+                        f"Checkpoint '{model_id}' not found in HF cache "
+                        f"({os.environ.get('HF_HUB_CACHE')}) or legacy flat "
+                        f"layout ({legacy_dir}). Download it from the "
+                        f"Checkpoint Manager."
+                    ) from primary_err
                 import json
+                with open(config_path) as fh:
+                    model_config = json.load(fh)
+                from stable_audio_3.loading_utils import load_diffusion_cond
+                with warnings.catch_warnings():
+                    warnings.simplefilter("ignore")
+                    inner = load_diffusion_cond(
+                        model_config, str(ckpt_path),
+                        device=device, model_half=half,
+                    )
+                    inner.use_lora = False
+                    inner.lora_names = []
+                    self.model = StableAudioModel(inner, model_config, device, half)
+        finally:
+            for k, v in prev_env.items():
+                if v is None:
+                    os.environ.pop(k, None)
+                else:
+                    os.environ[k] = v
+            # Restore the patched constants so we don't permanently alter
+            # global huggingface_hub state for anything else in-process.
+            if _hf_const is not None and prev_hub_constants:
+                _hf_const.HF_HUB_CACHE = prev_hub_constants["HF_HUB_CACHE"]
+                _hf_const.HF_HUB_OFFLINE = prev_hub_constants["HF_HUB_OFFLINE"]
+        self._model_id = model_id
+        self._device = device
+    # --- LoRA stack -----------------------------------------------------------
+    def _apply_loras(self, loras: list) -> None:
+        """Inject the given LoRA stack into self.model (idempotent).
+        loras: [{"path": str, "strength": float}, ...]
+        Strategy:
+          * Same paths in same order → just update strengths in place.
+          * Different paths → remove all, load fresh.
+        """
+        if self.model is None:
+            return
+        new_paths = [l["path"] for l in loras]
+        cur_paths = [l["path"] for l in self._loaded_loras]
+        if new_paths == cur_paths:
+            # Path-set unchanged; only strengths may have moved.
+            for i, l in enumerate(loras):
+                self.model.set_lora_strength(l["strength"], lora_index=i)
+            self._loaded_loras = list(loras)
+            return
+        # Path-set changed. Remove any currently loaded, then load the new set.
+        if cur_paths:
+            try:
+                from stable_audio_3.models.lora import remove_lora
+                # SA3 applies LoRA to the DiffusionCond's DiT (.model) and
+                # conditioner (.conditioner) — mirror StableAudioModel's own
+                # set_lora_strength which iterates both submodules.
+                # `self.model` is StableAudioModel; `self.model.model` is the
+                # inner DiffusionCond.
+                #
+                # remove_lora() strips *every* LoRA parametrization in one
+                # pass. We use it instead of remove_lora_by_index(..., 0) in a
+                # loop: removal does NOT renumber the remaining adapters, so
+                # repeatedly popping index 0 only ever clears the first LoRA
+                # and leaves indices 1..n-1 stranded — stale adapters then
+                # contaminate every later generation with a different stack.
+                inner = self.model.model
+                remove_lora(inner.model)
+                remove_lora(inner.conditioner)
+            except Exception as exc:
+                # If removal fails (e.g. an upstream API change), force a
+                # base-model reload so we don't carry stale adapters. KEEP
+                # _model_id intact — _ensure_model needs it to know what to
+                # reload. (Previous code zeroed it; the reload then raised
+                # "Unknown SA3 model_id: None".)
+                logger.warning(
+                    "LoRA removal failed (%s); reloading base model %s",
+                    exc, self._model_id,
+                )
+                self.model = None
+        if self.model is None and self._model_id is not None:
+            # Forced full reload (only if remove failed above).
+            self._ensure_model(self._model_id, device=self._device, half=True)
+        if loras:
+            with warnings.catch_warnings():
+                warnings.simplefilter("ignore")
+                self.model.load_lora(new_paths)
+            for i, l in enumerate(loras):
+                self.model.set_lora_strength(l["strength"], lora_index=i)
+        self._loaded_loras = list(loras)
+    def set_lora_strength(self, index: int, strength: float) -> bool:
+        """Live-update one slot's strength. Returns False if index invalid."""
+        if not self.model or index < 0 or index >= len(self._loaded_loras):
             return False
+        self.model.set_lora_strength(float(strength), lora_index=index)
+        self._loaded_loras[index]["strength"] = float(strength)
+        return True
+    # --- public entry ---------------------------------------------------------
     def generate_audio(
         self,
         prompt: str,
+        *,
+        model_id: str,
         duration: float = 10.0,
+        steps: Optional[int] = None,
+        cfg_scale: Optional[float] = None,
         seed: int = -1,
+        negative_prompt: Optional[str] = None,
+        batch_size: int = 1,
+        device: Optional[str] = None,
+        half: bool = True,
+        chunked_decode: Optional[bool] = None,
+        loop_mode: bool = False,                 # bars-mode passthrough
+        loras: Optional[list] = None,            # [{path, strength}, ...]
+        # Phase 7: audio-to-audio + inpainting -----------------------------
+        init_audio_path: Optional[str] = None,
+        init_noise_level: float = 1.0,
+        inpaint_audio_path: Optional[str] = None,
+        inpaint_starts: Optional[list] = None,   # list[float], seconds
+        inpaint_ends: Optional[list] = None,
+        # Phase 7: seamless looping ----------------------------------------
+        loop_stitch: Optional[str] = None,       # "inpaint" | "crossfade" | None
+        loop_bars: Optional[int] = None,
+        loop_bpm: Optional[float] = None,
+        **_ignored_legacy_kwargs: Any,
     ) -> Path:
+        self._stop_requested = False
+        if self._stop_requested:                  # honour pre-call stop
+            raise GenerationStopped()
+        # `loop_stitch` / `loop_bars` / `loop_bpm` are accepted for API
+        # compatibility but ignored — the seamless-loop pipeline was
+        # removed because user A/B testing showed it degraded audio
+        # quality on every prompt class. We deliver raw model output.
+        _set_progress(
+            is_generating=True, phase="loading",
+            step=0, total_steps=0, error=None,
+            started_at=time.time(), ended_at=None,
+        )
+        self._ensure_model(model_id, device=device, half=half)
+        self._apply_loras(loras or [])
+        init_audio = self._load_audio(init_audio_path) if init_audio_path else None
+        inpaint_audio = self._load_audio(inpaint_audio_path) if inpaint_audio_path else None
+        _, kind, max_dur = _MODEL_INFO[model_id]
+        is_base = (kind == "base")
+        # Defaults differ by model kind. Post-trained models distilled CFG
+        # away; we force cfg=1.0 there even if the caller overrides.
+        effective_steps = int(steps) if steps else (50 if is_base else 8)
+        effective_cfg = float(cfg_scale) if (cfg_scale is not None and is_base) else (
+            7.0 if is_base else 1.0
+        )
+        duration = float(min(max(1.0, float(duration)), float(max_dur)))
+        target_samples = int(round(duration * 44100))
+        gen_duration = duration
+        total_steps_logical = effective_steps
+        if self._stop_requested:                  # one more check before the heavy call
+            raise GenerationStopped()
+        # Sampler callback — fires per ODE step. Also gives us a cheap
+        # cancellation hook: raising mid-callback aborts the sampler.
+        def _cb(info: Dict[str, Any]) -> None:
+            if self._stop_requested:
+                raise GenerationStopped()
+            i = info.get("i")
+            if isinstance(i, int):
+                _set_progress(step=min(i + 1, total_steps_logical))
+        _set_progress(phase="sampling", total_steps=int(total_steps_logical), step=0)
+        gen_kwargs = dict(
+            prompt=prompt,
+            negative_prompt=negative_prompt or None,
+            duration=gen_duration,
+            steps=effective_steps,
+            cfg_scale=effective_cfg,
+            seed=int(seed),
+            batch_size=int(batch_size),
+            chunked_decode=chunked_decode,
+            callback=_cb,
+        )
+        if init_audio is not None:
+            gen_kwargs["init_audio"] = init_audio
+            gen_kwargs["init_noise_level"] = float(init_noise_level)
+        if inpaint_audio is not None:
+            gen_kwargs["inpaint_audio"] = inpaint_audio
+            if inpaint_starts is not None and len(inpaint_starts) > 0:
+                # SA3 accepts a single float or a list for multi-region.
+                gen_kwargs["inpaint_mask_start_seconds"] = (
+                    list(inpaint_starts) if len(inpaint_starts) > 1 else float(inpaint_starts[0])
                 )
+            if inpaint_ends is not None and len(inpaint_ends) > 0:
+                gen_kwargs["inpaint_mask_end_seconds"] = (
+                    list(inpaint_ends) if len(inpaint_ends) > 1 else float(inpaint_ends[0])
                 )
+        try:
+            audio = self.model.generate(**gen_kwargs)
+            # audio: torch.Tensor[B, channels=2, samples] in [-1, 1] @ 44.1 kHz
         except GenerationStopped:
+            _set_progress(phase="idle", is_generating=False, ended_at=time.time())
             raise
+        except Exception as exc:
+            _set_progress(phase="failed", is_generating=False,
+                          error=str(exc), ended_at=time.time())
             raise
+        # Seamless-loop processing (quantize, inpaint, crossfade) was
+        # removed: the user A/B-compared raw SA3 output against the full
+        # pipeline and confirmed the post-processing made every prompt
+        # worse — silence-at-start on percussion, smeared transients,
+        # off-grid anchoring. We now deliver the raw model output. The
+        # `loop_stitch` / `loop_bars` / `loop_bpm` parameters are still
+        # accepted from the frontend for API compatibility but are
+        # ignored. Performance-Bars looping will have an audible click
+        # at the wrap point and multi-channel stacks will not be
+        # sample-aligned — both acceptable trade-offs vs. the artifacts
+        # the quantizer was introducing.
+        _set_progress(phase="decoding", step=total_steps_logical)
+        try:
+            return self._finalize(audio, prompt=prompt, model_id=model_id)
+        finally:
+            _set_progress(phase="complete", is_generating=False,
+                          step=total_steps_logical, ended_at=time.time())
+    # --- audio loader (a2a + inpaint inputs) ----------------------------------
+    @staticmethod
+    def _load_audio(path: str):
+        """Load a wav/mp3/flac into the (sample_rate, tensor) tuple SA3 expects.
+        Returns a stereo float32 tensor of shape (channels, samples). Mono
+        inputs are duplicated to stereo (SA3 expects 2 channels); ≥3-channel
+        inputs are truncated to the first 2.
+        """
+        import torchaudio
+        wav, sr = torchaudio.load(str(path))   # (channels, samples), float32
+        if wav.shape[0] == 1:
+            wav = wav.repeat(2, 1)
+        elif wav.shape[0] > 2:
+            wav = wav[:2]
+        return int(sr), wav
+    # --- output --------------------------------------------------------------
+    def _finalize(self, audio: torch.Tensor, *, prompt: str, model_id: str) -> Path:
+        audio = audio.detach().clamp_(-1.0, 1.0).cpu()
+        if audio.ndim != 3:
+            raise RuntimeError(f"Unexpected SA3 output shape {tuple(audio.shape)}")
+        first = audio[0]                           # [C, samples]
+        pcm = (first.numpy() * 32767.0).astype(np.int16).T  # → [samples, C]
+        out_dir = self.config.get_path("output")
+        out_dir.mkdir(parents=True, exist_ok=True)
+        ts = time.strftime("%Y%m%d_%H%M%S")
+        out_path = out_dir / f"{ts}_{model_id}_{_slugify(prompt)}.wav"
+        sf.write(str(out_path), pcm, 44100, subtype="PCM_16")
+        return out_path

app/core/generation/audio_post_process.py CHANGED Viewed

@@ -1,9 +1,40 @@
 """Beat-align and tempo-conform a generated WAV to a target BPM and bar count.
 """
 from __future__ import annotations
 import logging
 from pathlib import Path
 from typing import Optional, Tuple
@@ -14,35 +45,429 @@ import soundfile as sf
 logger = logging.getLogger(__name__)
-# Safe range for phase-vocoder time-stretching. Wider than the previous
-# [0.7, 1.4] so we actually warp in more cases — librosa's vocoder produces
-# acceptable audio across this range for music, and the alternative
-# (no warp at all) drifts off the grid completely on loop.
 _STRETCH_SAFE_MIN = 0.6
 _STRETCH_SAFE_MAX = 1.7
 def align_to_grid(
     input_path: Path,
     target_bpm: float,
     target_bars: int,
     beats_per_bar: int = 4,
 ) -> Path:
     audio, sr = sf.read(str(input_path), always_2d=True)
     audio = audio.astype(np.float32, copy=False)
-    target_samples = int(round(target_bars * beats_per_bar * 60.0 / target_bpm * sr))
     mono = audio.mean(axis=1) if audio.shape[1] > 1 else audio[:, 0]
-    # Pass target_bpm as a prior to librosa — biases the beat tracker away
-    # from half-time / double-time interpretations of the same grid.
-    detected_bpm, first_beat = _detect_grid_anchor(mono, sr, start_bpm=target_bpm)
     head_offset = 0
-    if first_beat is not None and 0 < first_beat < sr * 1.5:
-        head_offset = first_beat
-        logger.info(f"align_to_grid: trimmed {head_offset / sr * 1000:.1f} ms to first beat")
-    elif first_beat is None:
         head_offset = _detect_first_onset_sample(mono, sr)
         if head_offset > 0:
             logger.info(f"align_to_grid: trimmed {head_offset / sr * 1000:.1f} ms (onset fallback)")
@@ -50,12 +475,26 @@ def align_to_grid(
     if head_offset > 0:
         audio = audio[head_offset:]
         mono = mono[head_offset:]
     if detected_bpm is not None:
-        rate, effective_bpm = _best_stretch_rate(detected_bpm, target_bpm)
-        if rate is not None:
-            if abs(rate - 1.0) > 1e-3:
-                audio = _time_stretch_multichannel(audio, rate)
             interp_note = (
                 f" (interpreted as {effective_bpm:.2f} BPM, "
                 f"octave={effective_bpm / detected_bpm:.2f}×)"
@@ -66,33 +505,267 @@ def align_to_grid(
                 f"align_to_grid: detected {detected_bpm:.2f} BPM{interp_note}, "
                 f"stretched by {rate:.4f} to match target {target_bpm:.2f} BPM"
             )
         else:
             logger.info(
                 f"align_to_grid: detected {detected_bpm:.2f} BPM has no safe "
-                f"interpretation vs target {target_bpm:.2f}; skipping warp"
             )
     else:
         logger.info("align_to_grid: no usable tempo detected; skipping warp")
     if audio.shape[0] > target_samples:
-        audio = audio[:target_samples]
-        # 8ms tail fade prevents the click at the loop boundary when the
-        # truncation point lands mid-waveform.
-        fade_samples = min(int(0.008 * sr), audio.shape[0])
-        if fade_samples > 1:
-            fade = np.linspace(1.0, 0.0, fade_samples, dtype=audio.dtype)
-            audio[-fade_samples:] *= fade[:, np.newaxis] if audio.ndim > 1 else fade
-    elif audio.shape[0] < target_samples:
-        pad = np.zeros((target_samples - audio.shape[0], audio.shape[1]), dtype=audio.dtype)
-        audio = np.concatenate([audio, pad], axis=0)
     sf.write(str(input_path), audio, sr, subtype="PCM_16")
     return input_path
 def _best_stretch_rate(
     detected_bpm: float,
     target_bpm: float,
 ) -> Tuple[Optional[float], float]:
     """Pick the time-stretch rate that maps detected → target, considering
     half-time and double-time interpretations of the detected tempo. Returns
@@ -101,27 +774,20 @@ def _best_stretch_rate(
     nothing safe is available.
     Order of preference:
-      1. Detected as-is, if it lands inside the safe stretch range.
       2. Octave-corrected (detected × 0.5 or × 2.0), only when the as-is
          interpretation is out of range. This is the librosa half-/double-
          time error recovery path.
-    This biases the algorithm toward honesty: only re-interpret the
-    detector's reading when it can't otherwise produce a usable stretch.
     """
-    # First, try the detector's reading at face value.
     rate_asis = target_bpm / detected_bpm
-    if _STRETCH_SAFE_MIN <= rate_asis <= _STRETCH_SAFE_MAX:
         return rate_asis, detected_bpm
-    # As-is is out of safe range — almost certainly a librosa octave error.
-    # Try the half-time and double-time reinterpretations and pick whichever
-    # is closest to a no-op stretch.
     candidates = []
     for octave_factor in (0.5, 2.0):
         interpreted = detected_bpm * octave_factor
         rate = target_bpm / interpreted
-        if _STRETCH_SAFE_MIN <= rate <= _STRETCH_SAFE_MAX:
             candidates.append((abs(rate - 1.0), rate, interpreted))
     if not candidates:
         return None, detected_bpm
@@ -130,6 +796,7 @@ def _best_stretch_rate(
     return best_rate, best_interp
 def _detect_first_onset_sample(mono: np.ndarray, sr: int) -> int:
     """Return the sample index of the first detected onset, or 0 if none found."""
     try:
@@ -147,15 +814,16 @@ def _detect_first_onset_sample(mono: np.ndarray, sr: int) -> int:
     return first
-def _detect_grid_anchor(
     mono: np.ndarray,
     sr: int,
     start_bpm: Optional[float] = None,
-) -> Tuple[Optional[float], Optional[int]]:
-    """Run librosa beat tracking with the target tempo as a prior. Passing
-    start_bpm reduces (but doesn't eliminate) half-time / double-time errors.
-    The octave-correction in _best_stretch_rate handles whatever librosa
-    still gets wrong."""
     try:
         kwargs = {"y": mono, "sr": sr, "units": "samples"}
         if start_bpm is not None and start_bpm > 0:
@@ -169,9 +837,10 @@ def _detect_grid_anchor(
     bpm = float(np.atleast_1d(tempo).flatten()[0])
     if not (40.0 <= bpm <= 240.0):
         return None, None
-    return bpm, int(beats[0])
 def _time_stretch_multichannel(audio: np.ndarray, rate: float) -> np.ndarray:
     """Phase-vocoder time stretch, applied per channel and re-stacked."""
     stretched = librosa.effects.time_stretch(audio.T, rate=rate)

 """Beat-align and tempo-conform a generated WAV to a target BPM and bar count.
+DEPRECATED — this entire module is superseded by ``app/core/loop_quantizer/``
+(see ``task_1.md`` and ``AUDIT.md`` §9 "Scheduled for removal"). The legacy
+``align_to_grid`` / ``align_for_loop`` path and the gated ``_stage_a_v2`` path
+both live here until the new module passes acceptance; once it does, every
+public symbol below is removed and the file deletes itself. Do NOT add new
+callers, do NOT extend the v1 helpers, and prefer adding work directly under
+``app/core/loop_quantizer/`` for any new behaviour.
+SA3 generates at the exact requested duration via variable-length flow
+matching, so the post-processor's role is **drift correction**, not length
+control: it only nudges the audio when librosa detects that the realised
+tempo has drifted from the target. The tempo-conform gate is intentionally
+tight — `|rate - 1| > 5%` AND `rate in [0.85, 1.15]` — so we never warp
+audibly when SA3 was already close.
+Pipeline (in order):
+  1. Detect tempo + beat grid via librosa (with target BPM as prior).
+  2. Head-trim to the first detected beat (or first onset as fallback),
+     followed by a 3 ms equal-power fade-in to mask the trim seam.
+  3. Tempo-conform via phase-vocoder time-stretch, ONLY when the detected
+     tempo drifts >5% from target AND the resulting stretch lies inside
+     the safe range [0.85, 1.15]. Outside this window we leave the audio
+     alone and let the user re-roll.
+  4. End-anchored truncation: snap the cut to the nearest detected beat
+     within ±½ beat of the mathematical target sample count, so loops
+     don't end mid-note. Followed by an 8 ms equal-power fade-out so the
+     loop seam doesn't click.
+  5. Zero-pad if the audio came out shorter than the target.
 """
 from __future__ import annotations
 import logging
+import os
+import warnings
 from pathlib import Path
 from typing import Optional, Tuple
 logger = logging.getLogger(__name__)
+# DEPRECATED: flag goes away with the v1/v2 split (AUDIT.md §9d).
+def beatsync_v2_enabled() -> bool:
+    """Feature gate for the hardened Stage A pipeline (sample-exact length,
+    first-transient-to-zero alignment, transient-preserving stretch).
+    Off by default: with the flag unset, every Stage A function takes its
+    legacy code path, so Bars-mode output is byte-identical to pre-flag
+    builds and Seconds mode (which never enters Stage A at all) is unaffected.
+    Enable with ``FRAGMENTA_BEATSYNC_V2=1``.
+    """
+    return os.environ.get("FRAGMENTA_BEATSYNC_V2", "0").strip().lower() in (
+        "1", "true", "yes", "on",
+    )
+# DEPRECATED: flag goes away with the v1/v2 split (AUDIT.md §9d).
+def _warp_enabled() -> bool:
+    """Per-beat (Ableton 'Beats'-style) warp gate — OFF by default.
+    The warp is only as reliable as librosa's per-beat detection; on real audio
+    a single mis-detected beat scrambles the groove. Anchor + exact-crop already
+    lands real loops at ~3 ms, so the warp is opt-in for experimentation only.
+    Enable with ``FRAGMENTA_BEATSYNC_WARP=1``."""
+    return os.environ.get("FRAGMENTA_BEATSYNC_WARP", "0").strip().lower() in (
+        "1", "true", "yes", "on",
+    )
+# Liberal module-default range for `_best_stretch_rate`. Kept wide so any
+# future force-warp caller has room; the bars-mode drift-correction path
+# (`align_to_grid`) overrides with tighter bounds below.
 _STRETCH_SAFE_MIN = 0.6
 _STRETCH_SAFE_MAX = 1.7
+# Bars-mode drift correction. SA3 hits the requested duration exactly via
+# variable-length generation, so the post-processor only kicks in when the
+# detected tempo of the generated audio drifts from the requested target.
+# Tight gates avoid audible vocoder artifacts when SA3 was already close.
+_BARS_MODE_STRETCH_MIN = 0.85
+_BARS_MODE_STRETCH_MAX = 1.15
+_BARS_MODE_DEADBAND = 0.05
+# Loop-mode (Phase 7) is stricter — a 5% tempo slack compounds visibly when
+# multiple loop channels run side-by-side, even though loop iteration
+# lengths are sample-exact. 0.5% is below librosa's noise floor for beat
+# detection on rhythmic content, so we won't be acting on noise, but we
+# WILL correct anything detectable that the looser bars-mode would skip.
+_LOOP_MODE_DEADBAND = 0.005
+# Fade durations applied at trim points. Kept very short — the fade is
+# click-prevention, not a perceptible ramp. Performance Mode loops these
+# clips, and longer fades audibly "duck" the loop seam.
+_HEAD_FADE_SEC = 0.003   # mask click at the trimmed head
+_TAIL_FADE_SEC = 0.003   # mask click at a mid-note truncation; skipped on beats
+# Trailing-silence detection. SA3 occasionally pads a generation with low-
+# level tail; the post-processor used to keep that and fade over it, which
+# produced perceptible "silence + duck" at the loop point.
+_SILENCE_THRESHOLD_DB = -50.0          # anything below is silence
+_SILENCE_WINDOW_SEC = 0.05             # RMS window granularity
+_SILENCE_TAIL_KEEP_SEC = 0.010         # leave a tiny natural decay
+# v2 first-transient search: a downbeat lands within the first bar or two of
+# generated content, so we never hunt past this window for the musical "1".
+_V2_TRANSIENT_SEARCH_SEC = 1.5
+_V2_STRONG_RATIO = 0.30                 # candidate must reach 30% of peak
+_V2_RISE_RATIO = 0.15                   # rising-edge threshold for refinement
+_V2_REFINE_WIN_SEC = 0.03               # +/- window for sample-accurate refine
+# Grid confidence. librosa's beat tracker emits a tempo for ANY input — on
+# ambient/textural content it is essentially noise (measured: 49-161 BPM on a
+# 120-BPM target, 130+ ms intra-beat drift). Warping toward a wrong detected
+# tempo is worse than not warping, so we only tempo-conform when the detected
+# grid is trustworthy: beats evenly spaced (low interval CV) AND a clear pulse
+# in the onset envelope. Below the threshold we trust the *requested* grid and
+# skip the stretch (still doing the safe, tempo-independent transient@0 + crop).
+# Calibrated on real fixtures: clean drum/bass loops score 0.76-0.88, pure
+# pads 0.00 (no trackable beat), and ambiguous textures 0.44-0.57 — often with
+# a wrong detected tempo. 0.65 sits in that gap. (The safe-range gate in
+# _best_stretch_rate independently rejects octave-wrong tempos like 49/161 BPM.)
+_GRID_CONFIDENCE_MIN = 0.65
+_CV_MAX = 0.20                          # interval CV at which regularity -> 0
+# Beat-synchronous warp (Ableton "Beats"-style). Measured: real drum loops are
+# already coherent to ~3-6 ms, where anchor+exact-crop alone lands single-digit
+# ms — so a global/elastic warp there only adds phase-vocoder jitter for no gain.
+# We therefore warp ONLY when a confident grid still drifts past this threshold,
+# and need enough beats to define segments.
+_WARP_DRIFT_MIN_MS = 15.0
+_WARP_MIN_BEATS = 6
+# === Stage A v2 (FRAGMENTA_BEATSYNC_V2) ====================================
+# DEPRECATED: every symbol in this section is scheduled for relocation into
+# `app/core/loop_quantizer` (see AUDIT.md §9c). Port the logic, then delete
+# the originals here. Do NOT add new callers to anything below.
+# A single hardened core shared by both align entry points. It enforces the
+# locked invariants directly instead of relying on librosa's beat[0] for
+# phase and on end-snap/silence-trim for length:
+#   * tempo conform with a bounded phase-vocoder stretch (_conform_stretch) —
+#     gen-time warp only, no live tracking (decision: v1);
+#   * align the first STRONG transient to sample 0 (rotate-free head trim) so
+#     two independently-correct clips share a downbeat with zero per-clip code;
+#   * crop to the exact target sample count — overgenerate-then-trim, never
+#     zero-pad in the common path (pad only as a logged last resort).
+def _stage_a_v2(
+    audio: np.ndarray,
+    sr: int,
+    *,
+    target_samples: int,
+    target_bpm: float,
+    deadband: float,
+) -> np.ndarray:
+    """Hardened Stage A core. Input/return: float32 ``[T, C]``.
+    Decides per clip how to land it on the grid:
+      * low grid confidence -> place as-is (trust the requested grid; no warp,
+        no trim — Ableton likewise won't warp a pulse-less texture);
+      * confident + non-uniform drift -> beat-synchronous warp (each inter-beat
+        segment stretched onto the exact grid, Ableton "Beats" warp);
+      * confident + already coherent -> anchor + (optional) whole-loop tempo
+        nudge; the measured workhorse path (single-digit ms on real loops).
+    Always finishes with: first-strong-transient -> sample 0, then exact crop.
+    """
+    mono = audio.mean(axis=1) if audio.shape[1] > 1 else audio[:, 0]
+    detected_bpm, beats = _detect_grid(mono, sr, start_bpm=target_bpm)
+    confidence = _grid_confidence(mono, sr, beats)
+    spb = sr * 60.0 / target_bpm
+    trusted = (
+        detected_bpm is not None
+        and confidence >= _GRID_CONFIDENCE_MIN
+        and beats is not None
+        and len(beats) >= _WARP_MIN_BEATS
+    )
+    if not trusted:
+        logger.info(
+            "stage_a_v2: %s; trusting requested %.2f BPM grid, exact-length only",
+            "low grid confidence (%.2f < %.2f)" % (confidence, _GRID_CONFIDENCE_MIN)
+            if detected_bpm is not None else "no usable grid",
+            target_bpm,
+        )
+        return _exact_len(audio, target_samples, sr)
+    # --- anchor the musical "1" to sample 0 (INV#4, enables INV#9) --------
+    # Anchor to the first TRACKED beat, not the "first loud onset": the tracked
+    # beat is the same metrical position across clips, so two loops coincide;
+    # "first loud onset" lands on whatever transient happens to be loudest and
+    # differs per clip (measured: 200+ ms apart). Refine beats[0] to the exact
+    # rising edge for sample accuracy.
+    anchor = _refine_to_transient(mono, int(beats[0]), sr)
+    if anchor > 0:
+        audio = audio[anchor:]
+        mono = audio.mean(axis=1) if audio.shape[1] > 1 else audio[:, 0]
+    beats = np.asarray(beats, dtype=np.int64) - anchor
+    beats = beats[beats >= 0]
+    drift = _grid_drift_samples(beats)
+    if (_warp_enabled() and drift > _WARP_DRIFT_MIN_MS * sr / 1000.0
+            and len(beats) >= 2):
+        # OFF BY DEFAULT (FRAGMENTA_BEATSYNC_WARP). Per-beat warp is only as good
+        # as librosa's beat detection — when detection is even slightly off it
+        # warps the wrong points onto the grid and SCRAMBLES the groove on real
+        # audio. Measured gain on clean drift was marginal (it merely halved it
+        # and added jitter), while anchor + exact-crop already lands real loops
+        # at ~3 ms. So it's opt-in for experiments, not the default path.
+        audio = _beat_sync_warp(audio, beats, spb)
+        logger.info("stage_a_v2: anchored + beat-sync warp (intra-loop drift "
+                    "%.1f ms)", drift / sr * 1000)
+    else:
+        # Already coherent: a single global stretch is sufficient (and cleaner
+        # than per-segment warping) when the overall tempo is off; otherwise
+        # the anchor + exact crop is all that's needed.
+        rate, eff = _best_stretch_rate(
+            detected_bpm, target_bpm,
+            safe_min=_BARS_MODE_STRETCH_MIN, safe_max=_BARS_MODE_STRETCH_MAX,
+        )
+        if rate is not None and abs(rate - 1.0) > deadband:
+            audio = _conform_stretch(audio, rate, sr)
+            logger.info("stage_a_v2: anchored + global tempo conform x%.4f "
+                        "(detected %.2f -> %.2f)", rate, detected_bpm, target_bpm)
+        else:
+            logger.info("stage_a_v2: anchored only (low drift, on-tempo)")
+    return _exact_len(audio, target_samples, sr)
+def _exact_len(audio: np.ndarray, target_samples: int, sr: int) -> np.ndarray:
+    """Crop to exactly target_samples (INV#2/#3). Pads only as a logged last
+    resort — the generation overshoots duration so trimming is the norm."""
+    if audio.shape[0] >= target_samples:
+        return np.ascontiguousarray(audio[:target_samples], dtype=np.float32)
+    pad = target_samples - audio.shape[0]
+    logger.warning(
+        "stage_a_v2: content short by %d samp (%.0f ms) — padding as a last "
+        "resort; raise generation headroom or re-roll", pad, pad / sr * 1000,
+    )
+    return np.ascontiguousarray(
+        np.concatenate([audio, np.zeros((pad, audio.shape[1]), np.float32)], 0),
+        dtype=np.float32,
+    )
+def _grid_drift_samples(beats: Optional[np.ndarray]) -> float:
+    """Std of detected-beat residuals vs a uniform least-squares grid (samples).
+    A coherent loop sits near 0; tempo wobble shows up as a large residual."""
+    if beats is None or len(beats) < 4:
+        return 0.0
+    idx = np.arange(len(beats))
+    A = np.vstack([idx, np.ones_like(idx)]).T
+    slope, icpt = np.linalg.lstsq(A, beats.astype(float), rcond=None)[0]
+    resid = beats.astype(float) - (slope * idx + icpt)
+    return float(np.std(resid))
+def _refine_to_transient(mono: np.ndarray, approx: int, sr: int,
+                         win_sec: float = 0.015) -> int:
+    """Snap a frame-resolution beat sample to the exact rising edge of the
+    transient AT that beat. librosa picks WHICH transient is the beat (good);
+    this gives it sample accuracy (INV#4). The window is deliberately tight
+    (~15 ms): wide enough to cover beat-tracker frame jitter, narrow enough not
+    to jump to a neighbouring transient (which would desync clips, INV#9)."""
+    n = len(mono)
+    if n == 0:
+        return 0
+    approx = int(max(0, min(approx, n - 1)))
+    lo = max(0, approx - int(sr * win_sec))
+    hi = min(n, approx + int(sr * win_sec))
+    if hi - lo < 2:
+        return approx
+    seg = np.abs(mono[lo:hi])
+    pk = float(seg.max())
+    if pk <= 1e-6:
+        return approx
+    above = np.flatnonzero(seg >= _V2_RISE_RATIO * pk)
+    return int(lo + above[0]) if len(above) else approx
+def _beat_sync_warp(audio: np.ndarray, beats: np.ndarray, spb: float) -> np.ndarray:
+    """Ableton 'Beats'-style warp: stretch each inter-beat segment to exactly
+    round(spb) samples. Output starts at the first detected beat and has a
+    perfectly uniform grid, so two clips at the same tempo become sample-for-
+    sample periodic (INV#9). Phase-vocoder per segment; only invoked when drift
+    is high enough to be worth the boundary jitter."""
+    beats = np.asarray(beats, dtype=np.int64)
+    beats = beats[(beats >= 0) & (beats < audio.shape[0])]
+    if len(beats) < 2:
+        return audio
+    target_spb = int(round(spb))
+    segs = []
+    for i in range(len(beats) - 1):
+        s, e = int(beats[i]), int(beats[i + 1])
+        seg = audio[s:e]
+        if seg.shape[0] < 16:
+            continue
+        rate = float(np.clip(seg.shape[0] / spb, 0.5, 2.0))
+        w = librosa.effects.time_stretch(seg.T, rate=rate).T
+        if w.shape[0] >= target_spb:
+            w = w[:target_spb]
+        else:
+            w = np.concatenate(
+                [w, np.zeros((target_spb - w.shape[0], w.shape[1]), np.float32)], 0)
+        segs.append(np.ascontiguousarray(w, dtype=np.float32))
+    return np.concatenate(segs, 0) if segs else audio
+def _grid_confidence(
+    mono: np.ndarray, sr: int, beats: Optional[np.ndarray]
+) -> float:
+    """Trustworthiness of the detected beat grid, in [0, 1].
+    Two evidence sources, averaged:
+      * regularity — how evenly spaced the detected beats are (1 - interval
+        coefficient of variation, clamped); a locked tracker gives near-even
+        intervals, ambient content gives erratic ones;
+      * pulse clarity — the strongest off-zero peak of the onset-envelope
+        autocorrelation relative to lag 0; high when there is a real periodic
+        pulse, low for drones/pads.
+    """
+    if beats is None or len(beats) < 4:
+        return 0.0
+    intervals = np.diff(beats.astype(np.float64))
+    mean_i = float(np.mean(intervals)) if len(intervals) else 0.0
+    if mean_i <= 0:
+        return 0.0
+    cv = float(np.std(intervals) / mean_i)
+    regularity = max(0.0, min(1.0, 1.0 - cv / _CV_MAX))
+    clarity = 0.0
+    try:
+        oenv = librosa.onset.onset_strength(y=mono, sr=sr)
+        oenv = oenv - float(np.mean(oenv))
+        ac = librosa.autocorrelate(oenv)
+        if len(ac) > 4 and ac[0] > 0:
+            clarity = float(np.max(ac[4:]) / ac[0])
+            clarity = max(0.0, min(1.0, clarity))
+    except Exception as exc:
+        logger.warning("grid-confidence clarity failed: %s", exc)
+    return 0.5 * regularity + 0.5 * clarity
+def _first_strong_transient(mono: np.ndarray, sr: int) -> int:
+    """Sample index of the first STRONG transient, refined to the rising edge.
+    Two-stage so we neither latch onto low-level noise nor lose sample
+    accuracy to librosa's 512-sample hop:
+      1. librosa onset candidates; take the first whose local peak reaches
+         ``_V2_STRONG_RATIO`` of the search-window peak;
+      2. refine within a small window to the first sample crossing
+         ``_V2_RISE_RATIO`` of that local peak — the attack's true start.
+    Returns 0 when the clip is silent or no strong transient is found.
+    """
+    n = len(mono)
+    search = min(n, int(sr * _V2_TRANSIENT_SEARCH_SEC))
+    if search <= 0:
+        return 0
+    peak = float(np.max(np.abs(mono[:search])))
+    if peak <= 1e-6:
+        return 0
+    try:
+        onsets = librosa.onset.onset_detect(
+            y=mono, sr=sr, units="samples", backtrack=True
+        )
+    except Exception as exc:
+        logger.warning("v2 onset detection failed: %s", exc)
+        onsets = None
+    cand: Optional[int] = None
+    if onsets is not None and len(onsets) > 0:
+        look = int(sr * 0.05)
+        for o in np.asarray(onsets, dtype=np.int64):
+            if o >= search:
+                break
+            lo, hi = int(o), min(n, int(o) + look)
+            if float(np.max(np.abs(mono[lo:hi]))) >= _V2_STRONG_RATIO * peak:
+                cand = int(o)
+                break
+    if cand is None:
+        # No qualifying onset — fall back to the first sample that crosses a
+        # fraction of the window peak (handles smooth/pad content).
+        idx = np.flatnonzero(np.abs(mono[:search]) >= _V2_STRONG_RATIO * peak)
+        return int(idx[0]) if len(idx) else 0
+    win = int(sr * _V2_REFINE_WIN_SEC)
+    lo = max(0, cand - win)
+    hi = min(n, cand + win)
+    local_peak = float(np.max(np.abs(mono[lo:hi]))) or peak
+    seg = np.abs(mono[lo:hi])
+    above = np.flatnonzero(seg >= _V2_RISE_RATIO * local_peak)
+    return int(lo + above[0]) if len(above) else cand
+def _conform_stretch(audio: np.ndarray, rate: float, sr: int) -> np.ndarray:
+    """Tempo-conform time-stretch — the INV#5 "justified equivalent".
+    We use the librosa phase vocoder (no external binary to ship) rather than
+    RubberBand's transient mode, justified by three properties that keep
+    transient smearing perceptually negligible here:
+      1. Bounded rate. This only runs inside the safe range [0.85, 1.15] — at
+         most a 15% stretch — where phase-vocoder transient blur is minor.
+      2. Rare path. It fires only on high grid-confidence, off-by->0.5%-tempo
+         loops; SA3 usually hits the target at gen-time and skips it entirely.
+      3. The perceptually critical transient — the downbeat — is positioned by
+         the sample-accurate trim in `_stage_a_v2`, NOT by this stretch, so the
+         musical "1" is never vocoded.
+    `sr` is accepted for call-site symmetry (the phase vocoder is rate-only)."""
+    if abs(rate - 1.0) < 1e-9:
+        return audio
+    return _time_stretch_multichannel(audio, rate)
+# DEPRECATED: superseded by app/core/loop_quantizer (see task_1.md / AUDIT.md §9a).
+# Public entry; emits DeprecationWarning at runtime. Scheduled for removal once
+# the new module passes acceptance.
 def align_to_grid(
     input_path: Path,
     target_bpm: float,
     target_bars: int,
     beats_per_bar: int = 4,
 ) -> Path:
+    warnings.warn(
+        "align_to_grid is deprecated and will be removed once "
+        "app/core/loop_quantizer ships (see task_1.md / AUDIT.md §9a).",
+        DeprecationWarning,
+        stacklevel=2,
+    )
     audio, sr = sf.read(str(input_path), always_2d=True)
     audio = audio.astype(np.float32, copy=False)
+    samples_per_beat = sr * 60.0 / float(target_bpm)
+    target_samples = int(round(target_bars * beats_per_bar * samples_per_beat))
+    if beatsync_v2_enabled():
+        out = _stage_a_v2(
+            np.ascontiguousarray(audio), sr,
+            target_samples=target_samples, target_bpm=float(target_bpm),
+            deadband=_BARS_MODE_DEADBAND,
+        )
+        # 3 ms head fade-in masks any click at the new sample-0 transient.
+        _apply_fade(out, _HEAD_FADE_SEC, sr, fade_in=True)
+        sf.write(str(input_path), out, sr, subtype="PCM_16")
+        logger.info("align_to_grid[v2]: %d samples (exact target %d)",
+                    out.shape[0], target_samples)
+        return input_path
     mono = audio.mean(axis=1) if audio.shape[1] > 1 else audio[:, 0]
+    detected_bpm, beat_samples = _detect_grid(mono, sr, start_bpm=target_bpm)
+    # --- Head trim ---------------------------------------------------------
     head_offset = 0
+    if beat_samples is not None and len(beat_samples) > 0:
+        first_beat = int(beat_samples[0])
+        if 0 < first_beat < sr * 1.5:
+            head_offset = first_beat
+            logger.info(f"align_to_grid: trimmed {head_offset / sr * 1000:.1f} ms to first beat")
+    elif beat_samples is None:
         head_offset = _detect_first_onset_sample(mono, sr)
         if head_offset > 0:
             logger.info(f"align_to_grid: trimmed {head_offset / sr * 1000:.1f} ms (onset fallback)")
     if head_offset > 0:
         audio = audio[head_offset:]
         mono = mono[head_offset:]
+        if beat_samples is not None:
+            shifted = np.asarray(beat_samples, dtype=np.int64) - head_offset
+            beat_samples = shifted[shifted > 0]
+        # Head fade-in: 3 ms equal-power so the trim seam doesn't click.
+        _apply_fade(audio, _HEAD_FADE_SEC, sr, fade_in=True)
+    # --- Tempo conform -----------------------------------------------------
     if detected_bpm is not None:
+        rate, effective_bpm = _best_stretch_rate(
+            detected_bpm,
+            target_bpm,
+            safe_min=_BARS_MODE_STRETCH_MIN,
+            safe_max=_BARS_MODE_STRETCH_MAX,
+        )
+        if rate is not None and abs(rate - 1.0) > _BARS_MODE_DEADBAND:
+            audio = _time_stretch_multichannel(audio, rate)
+            # Beats have moved — re-detect from the warped audio so the
+            # end-snap step below sees current beat positions.
+            mono = audio.mean(axis=1) if audio.shape[1] > 1 else audio[:, 0]
+            _, beat_samples = _detect_grid(mono, sr, start_bpm=target_bpm)
             interp_note = (
                 f" (interpreted as {effective_bpm:.2f} BPM, "
                 f"octave={effective_bpm / detected_bpm:.2f}×)"
                 f"align_to_grid: detected {detected_bpm:.2f} BPM{interp_note}, "
                 f"stretched by {rate:.4f} to match target {target_bpm:.2f} BPM"
             )
+        elif rate is not None:
+            logger.info(
+                f"align_to_grid: detected {detected_bpm:.2f} BPM is within "
+                f"{_BARS_MODE_DEADBAND * 100:.0f}% of target {target_bpm:.2f}; "
+                f"skipping stretch to preserve transients"
+            )
         else:
             logger.info(
                 f"align_to_grid: detected {detected_bpm:.2f} BPM has no safe "
+                f"interpretation vs target {target_bpm:.2f} within "
+                f"[{_BARS_MODE_STRETCH_MIN:.2f}, {_BARS_MODE_STRETCH_MAX:.2f}]; "
+                f"skipping warp (user re-roll recommended)"
             )
     else:
         logger.info("align_to_grid: no usable tempo detected; skipping warp")
+    # --- Trim trailing silence --------------------------------------------
+    # Done before end-snap so the snap operates on real audio, not on
+    # beats that happen to fall inside a quiet tail.
+    new_len = _trailing_audio_end(audio, sr)
+    if new_len < audio.shape[0]:
+        trimmed_ms = (audio.shape[0] - new_len) / sr * 1000
+        logger.info(f"align_to_grid: trimmed {trimmed_ms:.0f} ms trailing silence")
+        audio = audio[:new_len]
+        if beat_samples is not None:
+            beat_samples = beat_samples[beat_samples < new_len]
+    # --- End-anchored truncation ------------------------------------------
     if audio.shape[0] > target_samples:
+        end = _snap_to_beat(target_samples, beat_samples, samples_per_beat, audio.shape[0])
+        cut_on_beat = beat_samples is not None and end in beat_samples.tolist()
+        audio = audio[:end]
+        if not cut_on_beat:
+            # Mid-note cut — short fade hides the click. On a clean beat
+            # boundary the cut is on a natural transient edge, so the fade
+            # would only "duck" the start of the next beat at the loop
+            # seam without preventing any audible click.
+            _apply_fade(audio, _TAIL_FADE_SEC, sr, fade_in=False)
+    # If we came in shorter than target, return the actual audio without
+    # zero-padding. A 7.5-bar clip that loops cleanly beats an 8-bar clip
+    # with 0.5 bars of silence at the loop seam.
     sf.write(str(input_path), audio, sr, subtype="PCM_16")
     return input_path
+# --- Phase 7 loop alignment -----------------------------------------------
+# DEPRECATED: superseded by app/core/loop_quantizer (see task_1.md / AUDIT.md §9a).
+# Public entry; emits DeprecationWarning at runtime. Scheduled for removal once
+# the new module passes acceptance.
+def align_for_loop(
+    audio: np.ndarray,
+    sr: int,
+    *,
+    target_samples: int,
+    target_bpm: float,
+) -> np.ndarray:
+    """Align a baseline clip for seamless looping at an exact length.
+    DEPRECATED — superseded by ``app/core/loop_quantizer`` (see ``task_1.md`` /
+    ``AUDIT.md`` §9a). Scheduled for removal once the new module ships.
+    Pipeline (in-memory, no disk I/O):
+      1. Detect tempo + beat grid via librosa.
+      2. Time-stretch (uniformly) if detected BPM drifts past the bars-mode
+         deadband AND the required rate is in the safe range. Drift
+         beyond the safe range is left alone (caller can re-roll).
+      3. Head-trim to the first detected beat (or first onset as fallback),
+         within the first ~1.5 s. This is the phase-alignment step — it
+         puts the loop's "downbeat" at sample 0 so multiple channels'
+         beats coincide when launched on a bar boundary.
+      4. Crop or zero-pad to exactly `target_samples`. No end-snap: the
+         loop iteration length is sample-exact so it stays phase-locked
+         to the master clock across iterations.
+    Returns a `np.ndarray` of shape `(target_samples, channels)` (or 1-D
+    if input was 1-D). The caller is expected to wrap-and-inpaint the
+    output to smooth the seam — `align_for_loop` does no fade.
+    """
+    warnings.warn(
+        "align_for_loop is deprecated and will be removed once "
+        "app/core/loop_quantizer ships (see task_1.md / AUDIT.md §9a).",
+        DeprecationWarning,
+        stacklevel=2,
+    )
+    if audio.ndim == 1:
+        audio = audio[:, np.newaxis]
+        squeeze_out = True
+    else:
+        squeeze_out = False
+    audio = np.ascontiguousarray(audio, dtype=np.float32)
+    if beatsync_v2_enabled():
+        out = _stage_a_v2(
+            audio, sr,
+            target_samples=target_samples, target_bpm=float(target_bpm),
+            deadband=_LOOP_MODE_DEADBAND,
+        )
+        return out.squeeze(1) if squeeze_out else out
+    mono = audio.mean(axis=1) if audio.shape[1] > 1 else audio[:, 0]
+    detected_bpm, beat_samples = _detect_grid(mono, sr, start_bpm=target_bpm)
+    # --- 1+2: tempo conform ---------------------------------------------
+    if detected_bpm is not None:
+        rate, effective_bpm = _best_stretch_rate(
+            detected_bpm,
+            target_bpm,
+            safe_min=_BARS_MODE_STRETCH_MIN,
+            safe_max=_BARS_MODE_STRETCH_MAX,
+        )
+        if rate is not None and abs(rate - 1.0) > _LOOP_MODE_DEADBAND:
+            audio = _time_stretch_multichannel(audio, rate)
+            mono = audio.mean(axis=1) if audio.shape[1] > 1 else audio[:, 0]
+            _, beat_samples = _detect_grid(mono, sr, start_bpm=target_bpm)
+            interp = (
+                f" (interpreted as {effective_bpm:.2f} BPM)"
+                if abs(effective_bpm - detected_bpm) > 1e-2 else ""
+            )
+            logger.info(
+                "align_for_loop: detected %.2f BPM%s, stretched by %.4f to "
+                "match %.2f target",
+                detected_bpm, interp, rate, target_bpm,
+            )
+        elif rate is not None:
+            logger.info(
+                "align_for_loop: detected %.2f BPM within %.2f%% of %.2f target; "
+                "no stretch",
+                detected_bpm, _LOOP_MODE_DEADBAND * 100, target_bpm,
+            )
+        else:
+            logger.info(
+                "align_for_loop: detected %.2f BPM has no safe stretch to "
+                "%.2f target within [%.2f, %.2f]; leaving tempo as-is",
+                detected_bpm, target_bpm,
+                _BARS_MODE_STRETCH_MIN, _BARS_MODE_STRETCH_MAX,
+            )
+    else:
+        logger.info("align_for_loop: no usable tempo detected; skipping stretch")
+    # --- 3: head-trim to first beat / onset (phase alignment) -----------
+    head_offset = 0
+    if beat_samples is not None and len(beat_samples) > 0:
+        first_beat = int(beat_samples[0])
+        if 0 < first_beat < sr * 1.5:
+            head_offset = first_beat
+    if head_offset == 0:
+        # Onset fallback when beat tracking didn't lock — gives at least
+        # a transient-aligned start instead of mid-attack on sample 0.
+        head_offset = _detect_first_onset_sample(mono, sr)
+        if head_offset >= sr * 1.5:
+            head_offset = 0
+    if head_offset > 0:
+        audio = audio[head_offset:]
+        logger.info(
+            "align_for_loop: head-trimmed %.1f ms to first beat/onset",
+            head_offset / sr * 1000,
+        )
+    # --- 4: crop or pad to exact target_samples -------------------------
+    if audio.shape[0] > target_samples:
+        audio = audio[:target_samples]
+    elif audio.shape[0] < target_samples:
+        pad = target_samples - audio.shape[0]
+        audio = np.concatenate(
+            [audio, np.zeros((pad, audio.shape[1]), dtype=audio.dtype)],
+            axis=0,
+        )
+    return audio.squeeze(1) if squeeze_out else audio
+# --- helpers ---------------------------------------------------------------
+# DEPRECATED: legacy v1 helper; delete with this module (AUDIT.md §9b).
+def _trailing_audio_end(audio: np.ndarray, sr: int) -> int:
+    """Return the sample index just past the last audible content.
+    Walks backwards in non-overlapping windows of `_SILENCE_WINDOW_SEC` and
+    finds the last window whose RMS exceeds `_SILENCE_THRESHOLD_DB`. Returns
+    the end of that window plus a small natural-decay tail.
+    Falls back to the original audio length when the entire clip is below
+    threshold (silent input) or shorter than one window.
+    """
+    n = audio.shape[0]
+    window = int(sr * _SILENCE_WINDOW_SEC)
+    if n <= window:
+        return n
+    mono = audio.mean(axis=1) if audio.ndim > 1 else audio
+    # Squared amplitudes — comparing to threshold² is equivalent to RMS vs
+    # threshold but avoids a sqrt per window.
+    sq = (mono ** 2)
+    thresh_sq = (10.0 ** (_SILENCE_THRESHOLD_DB / 20.0)) ** 2
+    tail_keep = int(sr * _SILENCE_TAIL_KEEP_SEC)
+    end = n
+    while end > 0:
+        start = max(0, end - window)
+        if float(sq[start:end].mean()) > thresh_sq:
+            return min(n, end + tail_keep)
+        end = start
+    # Whole clip is below threshold — leave as-is rather than truncate to 0.
+    return n
+# DEPRECATED: legacy v1 helper; delete with this module (AUDIT.md §9b).
+def _snap_to_beat(
+    target_samples: int,
+    beat_samples: Optional[np.ndarray],
+    samples_per_beat: float,
+    audio_len: int,
+) -> int:
+    """Return the cut point: the nearest detected beat within ±½ beat of
+    target_samples, falling back to target_samples itself if no beat is in
+    range. Never overshoots audio length."""
+    fallback = min(target_samples, audio_len)
+    if beat_samples is None or len(beat_samples) == 0:
+        return fallback
+    tol = samples_per_beat * 0.5
+    valid = beat_samples[(beat_samples > 0) & (beat_samples <= audio_len)]
+    if len(valid) == 0:
+        return fallback
+    diffs = np.abs(valid - target_samples)
+    idx = int(np.argmin(diffs))
+    if diffs[idx] <= tol:
+        return int(valid[idx])
+    return fallback
+# DEPRECATED: superseded by loop_quantizer (AUDIT.md §9b); may be ported if reused.
+def _apply_fade(audio: np.ndarray, duration_sec: float, sr: int, *, fade_in: bool) -> None:
+    """In-place equal-power fade on the head (fade_in=True) or tail."""
+    n = min(int(duration_sec * sr), audio.shape[0])
+    if n <= 1:
+        return
+    ramp = _equal_power_ramp(n, fade_in=fade_in, dtype=audio.dtype)
+    if audio.ndim > 1:
+        ramp = ramp[:, np.newaxis]
+    if fade_in:
+        audio[:n] *= ramp
+    else:
+        audio[-n:] *= ramp
+# DEPRECATED: superseded by loop_quantizer (AUDIT.md §9b); may be ported if reused.
+def _equal_power_ramp(n: int, *, fade_in: bool, dtype) -> np.ndarray:
+    """Cosine-shaped equal-power fade. Energy at the midpoint is preserved
+    when summing fade-out + fade-in of complementary segments, avoiding the
+    perceptible 'duck' that linear ramps produce at loop seams."""
+    t = np.linspace(0.0, np.pi / 2.0, n).astype(dtype, copy=False)
+    return np.sin(t) if fade_in else np.cos(t)
+# DEPRECATED: legacy v1 helper; delete with this module (AUDIT.md §9b).
 def _best_stretch_rate(
     detected_bpm: float,
     target_bpm: float,
+    *,
+    safe_min: float = _STRETCH_SAFE_MIN,
+    safe_max: float = _STRETCH_SAFE_MAX,
 ) -> Tuple[Optional[float], float]:
     """Pick the time-stretch rate that maps detected → target, considering
     half-time and double-time interpretations of the detected tempo. Returns
     nothing safe is available.
     Order of preference:
+      1. Detected as-is, if it lands inside [safe_min, safe_max].
       2. Octave-corrected (detected × 0.5 or × 2.0), only when the as-is
          interpretation is out of range. This is the librosa half-/double-
          time error recovery path.
     """
     rate_asis = target_bpm / detected_bpm
+    if safe_min <= rate_asis <= safe_max:
         return rate_asis, detected_bpm
     candidates = []
     for octave_factor in (0.5, 2.0):
         interpreted = detected_bpm * octave_factor
         rate = target_bpm / interpreted
+        if safe_min <= rate <= safe_max:
             candidates.append((abs(rate - 1.0), rate, interpreted))
     if not candidates:
         return None, detected_bpm
     return best_rate, best_interp
+# DEPRECATED: legacy v1 helper; delete with this module (AUDIT.md §9b).
 def _detect_first_onset_sample(mono: np.ndarray, sr: int) -> int:
     """Return the sample index of the first detected onset, or 0 if none found."""
     try:
     return first
+# DEPRECATED: superseded by loop_quantizer detector (AUDIT.md §9c); port or replace.
+def _detect_grid(
     mono: np.ndarray,
     sr: int,
     start_bpm: Optional[float] = None,
+) -> Tuple[Optional[float], Optional[np.ndarray]]:
+    """Run librosa beat tracking with the target tempo as a prior. Returns
+    (bpm, beat_samples_array). Passing start_bpm reduces (but doesn't
+    eliminate) half-time / double-time errors; the octave-correction in
+    _best_stretch_rate handles whatever librosa still gets wrong."""
     try:
         kwargs = {"y": mono, "sr": sr, "units": "samples"}
         if start_bpm is not None and start_bpm > 0:
     bpm = float(np.atleast_1d(tempo).flatten()[0])
     if not (40.0 <= bpm <= 240.0):
         return None, None
+    return bpm, np.asarray(beats, dtype=np.int64)
+# DEPRECATED: legacy v1 helper; delete with this module (AUDIT.md §9b).
 def _time_stretch_multichannel(audio: np.ndarray, rate: float) -> np.ndarray:
     """Phase-vocoder time stretch, applied per channel and re-stacked."""
     stretched = librosa.effects.time_stretch(audio.T, rate=rate)

app/core/model_manager.py CHANGED Viewed

@@ -1,478 +1,669 @@
-import os
 import json
 import shutil
-from pathlib import Path
-from typing import Dict, List, Optional, Callable
 from datetime import datetime
-import requests
-from huggingface_hub import snapshot_download, hf_hub_download
-import hashlib
 class ModelManager:
-    def __init__(self, config):
         self.config = config
-        self.models_dir = config.get_path("models_pretrained")
         self.models_dir.mkdir(exist_ok=True, parents=True)
-        # Use fragmenta-models repo on HF Spaces, Stability AI models elsewhere
-        use_custom_repo = os.getenv('FRAGMENTA_USE_CUSTOM_MODELS', '').lower() == 'true'
-        if use_custom_repo:
-            models_repo = 'MazCodes/fragmenta-models'
-            small_file = 'stable-audio-open-small-model.safetensors'
-            large_file = 'stable-audio-open-model.safetensors'
-        else:
-            models_repo_small = 'stabilityai/stable-audio-open-small'
-            models_repo_large = 'stabilityai/stable-audio-open-1.0'
-            small_file = 'model.safetensors'
-            large_file = 'model.safetensors'
-        self.available_models = {
-            'stable-audio-open-small': {
-                'name': 'Stable Audio Open Small',
-                'repo': models_repo if use_custom_repo else models_repo_small,
-                'files': [small_file],
-                'size': '2.1 GB',
-                'description': 'Fast generation, good quality, lower memory usage',
-                'best_for': 'Beginners, quick experiments, limited GPU',
-                'license': 'Stability AI License',
-                'checksum': 'sha256:abc123...'
-            },
-            'stable-audio-open-1.0': {
-                'name': 'Stable Audio Open 1.0',
-                'repo': models_repo if use_custom_repo else models_repo_large,
-                'files': [large_file],
-                'size': '8.2 GB',
-                'description': 'Highest quality, more detailed audio',
-                'best_for': 'Professional use, high-end GPUs',
-                'license': 'Stability AI License',
-                'checksum': 'sha256:def456...'
-            }
         }
-        self.terms_file = Path("config/terms_accepted.json")
-        self.terms_file.parent.mkdir(exist_ok=True)
-    def get_available_models(self) -> List[Dict]:
-        models = []
-        for model_id, info in self.available_models.items():
-            is_downloaded = self.is_model_downloaded(model_id)
-            downloaded_size = None
-            if is_downloaded:
-                if model_id == 'stable-audio-open-small':
-                    model_file = self.models_dir / 'stable-audio-open-small-model.safetensors'
-                    downloaded_size = self._get_file_size(
-                        model_file) if model_file.exists() else None
-                elif model_id == 'stable-audio-open-1.0':
-                    model_file = self.models_dir / 'stable-audio-open-model.safetensors'
-                    downloaded_size = self._get_file_size(
-                        model_file) if model_file.exists() else None
-                else:
-                    model_path = self.models_dir / model_id
-                    downloaded_size = self._get_downloaded_size(
-                        model_path) if model_path.exists() else None
-            models.append({
-                'id': model_id,
-                'name': info['name'],
-                'size': info['size'],
-                'description': info['description'],
-                'best_for': info['best_for'],
-                'license': info['license'],
-                'downloaded': is_downloaded,
-                'downloaded_size': downloaded_size,
-                'terms_accepted': self.is_terms_accepted(model_id)
-            })
-        return models
-    def _get_file_size(self, file_path: Path) -> str:
-        if not file_path.exists() or not file_path.is_file():
-            return "0 B"
-        size = file_path.stat().st_size
-        return self._bytes_to_human(size)
-    def _get_downloaded_size(self, model_path: Path) -> str:
-        if not model_path.exists():
-            return "0 B"
-        total_size = 0
-        for file_path in model_path.rglob("*"):
-            if file_path.is_file():
-                total_size += file_path.stat().st_size
-        for unit in ['B', 'KB', 'MB', 'GB']:
-            if total_size < 1024.0:
-                return f"{total_size:.1f} {unit}"
-            total_size /= 1024.0
-        return f"{total_size:.1f} TB"
-    def get_model_info(self, model_id: str) -> Optional[Dict]:
-        if model_id not in self.available_models:
-            return None
-        info = self.available_models[model_id].copy()
-        info['id'] = model_id
-        info['downloaded'] = self.is_model_downloaded(model_id)
-        info['terms_accepted'] = self.is_terms_accepted(model_id)
-        return info
     def is_model_downloaded(self, model_id: str) -> bool:
-        if model_id == 'stable-audio-open-small':
-            model_file = self.models_dir / 'stable-audio-open-small-model.safetensors'
-            return model_file.exists() and model_file.is_file()
-        elif model_id == 'stable-audio-open-1.0':
-            model_file = self.models_dir / 'stable-audio-open-model.safetensors'
-            return model_file.exists() and model_file.is_file()
-        else:
-            model_path = self.models_dir / model_id
-            if model_path.exists() and model_path.is_dir():
-                return any(model_path.iterdir())
-            pattern = f"*{model_id}*.safetensors"
-            matching_files = list(self.models_dir.glob(pattern))
-            return len(matching_files) > 0
-    def is_terms_accepted(self, model_id: str) -> bool:
-        if not self.terms_file.exists():
             return False
-        try:
-            with open(self.terms_file, 'r') as f:
-                terms_data = json.load(f)
-            return terms_data.get(model_id, {}).get('accepted', False)
-        except:
             return False
-    def accept_terms(self, model_id: str) -> bool:
-        if model_id not in self.available_models:
             return False
-        terms_data = {}
-        if self.terms_file.exists():
-            try:
-                with open(self.terms_file, 'r') as f:
-                    terms_data = json.load(f)
-            except:
-                terms_data = {}
-        terms_data[model_id] = {
-            'accepted': True,
-            'accepted_at': datetime.now().isoformat(),
-            'model_name': self.available_models[model_id]['name'],
-            'license': self.available_models[model_id]['license']
-        }
         try:
-            with open(self.terms_file, 'w') as f:
-                json.dump(terms_data, f, indent=2)
-            return True
-        except Exception as e:
-            print(f"Error saving terms acceptance: {e}")
             return False
-    def download_model(self, model_id: str, progress_callback: Optional[Callable] = None) -> bool:
-        if model_id not in self.available_models:
             return False
-        if not self.is_terms_accepted(model_id):
-            print(f"Terms not accepted for {model_id}")
-            self.accept_terms(model_id)
-            print(f"Automatically accepted terms for {model_id}")
-        model_info = self.available_models[model_id]
-        target_dir = self.models_dir
-        target_dir.mkdir(exist_ok=True, parents=True)
-        try:
-            print(f"Downloading {model_info['name']} to {target_dir}")
-            if progress_callback:
-                progress_callback(
-                    0, f"Starting download of {model_info['name']}...")
-            from huggingface_hub import HfApi
-            api = HfApi()
             try:
-                user = api.whoami()
-                print(f"Authenticated as: {user}")
                 if progress_callback:
-                    progress_callback(10, "Authentication verified...")
-            except Exception as auth_error:
-                print(f"Not authenticated with Hugging Face: {auth_error}")
-                if progress_callback:
-                    progress_callback(0, "Authentication required...")
-                print("To download models, you need to:")
-                print(
-                    "1. Visit https://huggingface.co/stabilityai/stable-audio-open-small")
-                print("2. Accept the terms and conditions")
-                print("3. Log in to your Hugging Face account")
-                print(
-                    "4. Get your access token from https://huggingface.co/settings/tokens")
-                print("5. Use the in-app Hugging Face login dialog")
                 if progress_callback:
-                    progress_callback(0, "Please authenticate in the app first")
-                return False
-            if progress_callback:
-                progress_callback(20, "Starting file download...")
-            try:
-                from huggingface_hub import hf_hub_download
-                import shutil
-                from tqdm import tqdm
-                import sys
-                class TqdmToCallback:
-                    def __init__(self, callback, file_index, total_files):
-                        self.callback = callback
-                        self.file_index = file_index
-                        self.total_files = total_files
-                        self.last_percent = 0
-                    def __call__(self, t):
-                        def inner(bytes_amount=1):
-                            if t.total:
-                                file_progress = (t.n / t.total)
-                                overall_progress = (self.file_index + file_progress) / self.total_files
-                                percent = 20 + int(overall_progress * 70)
-                                if percent != self.last_percent:
-                                    self.last_percent = percent
-                                    downloaded_mb = t.n / (1024 * 1024)
-                                    total_mb = t.total / (1024 * 1024)
-                                    if self.callback:
-                                        self.callback(
-                                            percent,
-                                            f"Downloading: {downloaded_mb:.1f}MB / {total_mb:.1f}MB"
-                                        )
-                        return inner
-                downloaded_files = []
-                total_files = len(model_info['files'])
-                for i, file_pattern in enumerate(model_info['files']):
                     if progress_callback:
                         progress_callback(
-                            20 + int((i / total_files) * 70),
-                            f"Starting download of {file_pattern}..."
                         )
-                    try:
-                        if file_pattern == 'model.safetensors':
-                            if model_id == 'stable-audio-open-small':
-                                final_filename = 'stable-audio-open-small-model.safetensors'
-                            elif model_id == 'stable-audio-open-1.0':
-                                final_filename = 'stable-audio-open-model.safetensors'
-                            else:
-                                final_filename = f"{model_id}-model.safetensors"
-                        else:
-                            final_filename = f"{model_id}-{file_pattern}"
-                        tqdm_callback = TqdmToCallback(progress_callback, i, total_files)
-                        # hf_hub_download drives its own tqdm — monkey-patch its init/update so we
-                        # forward byte progress to progress_callback without a second progress bar.
-                        original_tqdm_init = tqdm.__init__
-                        def patched_tqdm_init(self, *args, **kwargs):
-                            original_tqdm_init(self, *args, **kwargs)
-                            original_update = self.update
-                            def new_update(n=1):
-                                result = original_update(n)
-                                if progress_callback and self.total:
-                                    file_progress = (self.n / self.total)
-                                    overall_progress = (i + file_progress) / total_files
-                                    percent = 20 + int(overall_progress * 70)
-                                    downloaded_mb = self.n / (1024 * 1024)
-                                    total_mb = self.total / (1024 * 1024)
-                                    progress_callback(
-                                        percent,
-                                        f"Downloading: {downloaded_mb:.1f}MB / {total_mb:.1f}MB"
-                                    )
-                                return result
-                            self.update = new_update
-                        tqdm.__init__ = patched_tqdm_init
-                        try:
-                            downloaded_file = hf_hub_download(
-                                repo_id=model_info['repo'],
-                                filename=file_pattern,
-                                resume_download=True
-                            )
-                        finally:
-                            tqdm.__init__ = original_tqdm_init
-                        downloaded_path = Path(downloaded_file)
-                        final_path = target_dir / final_filename
-                        final_path.parent.mkdir(parents=True, exist_ok=True)
-                        shutil.copy2(str(downloaded_path), str(final_path))
-                        print(f"Saved as {final_filename}")
-                        downloaded_files.append(str(final_path))
-                        if progress_callback:
-                            progress_callback(
-                                20 + int(((i + 1) / total_files) * 70),
-                                f"Completed {file_pattern}"
-                            )
-                    except Exception as file_error:
-                        print(
-                            f"Failed to download {file_pattern}: {file_error}")
-                        if progress_callback:
-                            progress_callback(
-                                0, f"Failed to download {file_pattern}")
-                        continue
-                print(f"Downloaded {len(downloaded_files)} files")
-                if progress_callback:
-                    progress_callback(
-                        95, "Download completed, verifying files...")
-            except Exception as download_error:
-                print(f"Error during download: {download_error}")
-                if progress_callback:
-                    progress_callback(
-                        0, f"Download failed: {str(download_error)}")
-                return False
             if progress_callback:
-                progress_callback(95, "Verifying download...")
-            expected_files = []
-            if model_id == 'stable-audio-open-small':
-                expected_files.append(
-                    'stable-audio-open-small-model.safetensors')
-            elif model_id == 'stable-audio-open-1.0':
-                expected_files.append('stable-audio-open-model.safetensors')
-            else:
-                expected_files.append(f"{model_id}-model.safetensors")
-            files_exist = any((target_dir / expected_file).exists()
-                              for expected_file in expected_files)
-            if files_exist:
-                if progress_callback:
-                    progress_callback(100, "Download complete!")
-                print(f"Successfully downloaded {model_info['name']}")
-                return True
-            else:
-                if progress_callback:
-                    progress_callback(0, "Download verification failed")
-                print(f"Expected files not found: {expected_files}")
-                return False
-        except Exception as e:
-            print(f"Error downloading {model_info['name']}: {e}")
-            if progress_callback:
-                progress_callback(0, f"Error: {str(e)}")
-            if "403" in str(e) and "gated repositories" in str(e).lower():
-                print("Token permission issue detected!")
-                print(
-                    "Your Hugging Face token needs 'Read access to public gated repositories'")
-                print("Please:")
-                print("1. Go to https://huggingface.co/settings/tokens")
-                print("2. Edit your token or create a new one")
-                print("3. Enable 'Read access to public gated repositories'")
-                print("4. Try the download again")
-            elif "401" in str(e) or "restricted" in str(e).lower():
-                print("This model requires Hugging Face authentication.")
-                print("Please visit the model page and accept terms first:")
-                print(f"https://huggingface.co/{model_info['repo']}")
-            return False
     def delete_model(self, model_id: str) -> bool:
-        deleted_something = False
-        if model_id == 'stable-audio-open-small':
-            model_file = self.models_dir / 'stable-audio-open-small-model.safetensors'
-            config_file = self.models_dir / 'stable-audio-open-small-config.json'
-        elif model_id == 'stable-audio-open-1.0':
-            model_file = self.models_dir / 'stable-audio-open-model.safetensors'
-            config_file = self.models_dir / 'stable-audio-open-1.0-config.json'
-        else:
-            model_file = self.models_dir / f"{model_id}-model.safetensors"
-            config_file = self.models_dir / f"{model_id}-config.json"
-        for file_path in [model_file, config_file]:
-            if file_path.exists():
-                try:
-                    file_path.unlink()
-                    print(f"Deleted {file_path.name}")
-                    deleted_something = True
-                except Exception as e:
-                    print(f"Error deleting {file_path.name}: {e}")
-        model_path = self.models_dir / model_id
-        if model_path.exists() and model_path.is_dir():
-            try:
-                shutil.rmtree(model_path)
-                print(f"Deleted {model_id} directory")
-                deleted_something = True
-            except Exception as e:
-                print(f"Error deleting {model_id} directory: {e}")
-        if deleted_something:
-            print(f"Deleted {model_id}")
-            return True
-        else:
-            print(f"No files found for {model_id}")
             return False
-    def get_download_progress(self, model_id: str) -> Dict:
-        return {
-            'model_id': model_id,
-            'downloaded': self.is_model_downloaded(model_id),
-            'size': self.available_models.get(model_id, {}).get('size', 'Unknown')
-        }
-    def get_storage_info(self) -> Dict:
-        total_size = 0
-        model_count = 0
-        if self.models_dir.exists():
-            for model_id in self.available_models.keys():
-                if self.is_model_downloaded(model_id):
-                    model_count += 1
-            for file_path in self.models_dir.rglob("*"):
-                if file_path.is_file():
-                    total_size += file_path.stat().st_size
         return {
-            'total_size_bytes': total_size,
-            'total_size_human': self._bytes_to_human(total_size),
-            'model_count': model_count,
-            'models_dir': str(self.models_dir)
         }
-    def _bytes_to_human(self, bytes_value: int) -> str:
-        for unit in ['B', 'KB', 'MB', 'GB']:
-            if bytes_value < 1024.0:
-                return f"{bytes_value:.1f} {unit}"
-            bytes_value /= 1024.0
-        return f"{bytes_value:.1f} TB"

+"""Checkpoint Manager — SA3 catalog, HF downloads, license + auth.
+Phase 2a in SA3_INTEGRATION_PLAN.md. Replaces the SA2-era SAO catalog.
+Eight downloadable artifacts (3 post-trained + 3 base + 2 autoencoders);
+each is fetched via `huggingface_hub.snapshot_download` with cooperative
+cancel + progress reporting.
+The Phase 2b frontend (CheckpointManagerWindow.js) consumes the JSON shapes
+returned by the `/api/checkpoints/*` endpoints in `app/backend/app.py`.
+"""
 import json
+import os
 import shutil
+import threading
+import uuid
+from dataclasses import dataclass, field
 from datetime import datetime
+from pathlib import Path
+from typing import Any, Callable, Dict, List, Optional
+from huggingface_hub import get_token, snapshot_download, whoami
+from huggingface_hub.errors import GatedRepoError, RepositoryNotFoundError
+# --- Catalog ------------------------------------------------------------------
+# Approximate sizes; the frontend can refine these by hitting
+# `huggingface_hub.HfApi().model_info(repo_id)` lazily. Numbers come from the
+# HF model cards (paragraph parameter counts × bytes/param, rounded).
+_SA3_CATALOG: Dict[str, Dict[str, Any]] = {
+    # --- Generation models (post-trained) ----------------------------------
+    "sa3-small-music": {
+        "user_visible": True,
+        "kind": "post-trained",
+        "name": "Small - Music",
+        "sa3_name": "small-music",
+        "repo": "stabilityai/stable-audio-3-small-music",
+        "size_bytes": 2_270_000_000,
+        "hardware": "cpu",                       # CPU / MPS / CUDA all work
+        "max_duration_sec": 120,
+        "description": "Fast distilled music generation. Locked to 8 steps, cfg 1.0.",
+    },
+    "sa3-small-sfx": {
+        "user_visible": True,
+        "kind": "post-trained",
+        "name": "Small - SFX",
+        "sa3_name": "small-sfx",
+        "repo": "stabilityai/stable-audio-3-small-sfx",
+        "size_bytes": 2_270_000_000,
+        "hardware": "cpu",
+        "max_duration_sec": 120,
+        "description": "Fast distilled SFX/foley generation. Locked to 8 steps, cfg 1.0.",
+    },
+    "sa3-medium": {
+        "user_visible": True,
+        "kind": "post-trained",
+        "name": "Medium",
+        "sa3_name": "medium",
+        "repo": "stabilityai/stable-audio-3-medium",
+        "size_bytes": 9_220_000_000,
+        "hardware": "cuda+flash-attn",
+        "max_duration_sec": 380,
+        "description": "Fast distilled hi-fi generation, up to 380s. Locked to 8 steps, cfg 1.0.",
+    },
+    # --- Base checkpoints (full artist control) ----------------------------
+    # These are the CFG-aware pre-distillation models. Slower (~50 steps,
+    # cfg ~7), but the user controls cfg_scale, steps, and the inference
+    # trajectory. Also the canonical targets for LoRA training.
+    "sa3-small-music-base": {
+        "user_visible": True,
+        "kind": "base",
+        "name": "Small - Music (Base)",
+        "sa3_name": "small-music-base",
+        "repo": "stabilityai/stable-audio-3-small-music-base",
+        "size_bytes": 2_270_000_000,
+        "hardware": "cpu",
+        "max_duration_sec": 120,
+        "description": "CFG-aware base. Full control over cfg_scale, steps. Slower than distilled.",
+    },
+    "sa3-small-sfx-base": {
+        "user_visible": True,
+        "kind": "base",
+        "name": "Small - SFX (Base)",
+        "sa3_name": "small-sfx-base",
+        "repo": "stabilityai/stable-audio-3-small-sfx-base",
+        "size_bytes": 2_270_000_000,
+        "hardware": "cpu",
+        "max_duration_sec": 120,
+        "description": "CFG-aware base. Full control over cfg_scale, steps. Slower than distilled.",
+    },
+    "sa3-medium-base": {
+        "user_visible": True,
+        "kind": "base",
+        "name": "Medium (Base)",
+        "sa3_name": "medium-base",
+        "repo": "stabilityai/stable-audio-3-medium-base",
+        "size_bytes": 9_220_000_000,
+        "hardware": "cuda+flash-attn",
+        "max_duration_sec": 380,
+        "description": "CFG-aware base. Full control over cfg_scale, steps. Slower than distilled.",
+    },
+    # Standalone autoencoders: the AE is bundled INSIDE each DiT repo
+    # already (StableAudioModel.from_pretrained loads it from there), so
+    # we don't surface SAME-S / SAME-L in the manager. They remain
+    # downloadable via /api/checkpoints?include=all for advanced uses
+    # (autoencoder-only workflows, pre-encoding datasets for training).
+    "sa3-same-s": {
+        "user_visible": False,
+        "kind": "autoencoder",
+        "name": "SAME-S",
+        "sa3_name": "same-s",
+        "repo": "stabilityai/SAME-S",
+        "size_bytes": 530_000_000,
+        "hardware": "cpu",
+        "description": "Standalone autoencoder (266M). Already bundled with the small-* DiTs.",
+    },
+    "sa3-same-l": {
+        "user_visible": False,
+        "kind": "autoencoder",
+        "name": "SAME-L",
+        "sa3_name": "same-l",
+        "repo": "stabilityai/SAME-L",
+        "size_bytes": 3_400_000_000,
+        "hardware": "cuda",
+        "description": "Standalone autoencoder (1.7B). Already bundled with medium.",
+    },
+    # --- Auto-annotation tools ---------------------------------------------
+    # Single-file HF download, lives under <models_pretrained>/clap/.
+    # `is_model_downloaded` and `_run_download` special-case kind=="tagger".
+    "clap-music": {
+        "user_visible": True,
+        "kind": "tagger",
+        "name": "LAION-CLAP (music)",
+        "sa3_name": "clap-music",
+        "repo": "lukewys/laion_clap",
+        "filename": "music_audioset_epoch_15_esc_90.14.pt",
+        # ~2.35 GB .pt + ~1.4 GB of text-encoder snapshots (roberta-base,
+        # bert-base-uncased, facebook/bart-base) that laion_clap loads at
+        # construction. download_clap_checkpoint pulls all of them.
+        "size_bytes": 3_800_000_000,
+        "hardware": "cpu",
+        "description": (
+            "Zero-shot tagger used by the dataset prep's rich-tier annotation. "
+            "Scores each clip against your genre / mood / instrument vocabulary."
+        ),
+    },
+}
+# --- Job state for in-flight downloads ----------------------------------------
+@dataclass
+class _DownloadJob:
+    """In-memory record of one download attempt."""
+    job_id: str
+    model_id: str
+    status: str = "queued"             # queued | running | complete | failed | cancelled
+    downloaded_bytes: int = 0
+    total_bytes: int = 0
+    error: Optional[str] = None
+    started_at: Optional[str] = None
+    finished_at: Optional[str] = None
+    _cancel_flag: threading.Event = field(default_factory=threading.Event)
+    _thread: Optional[threading.Thread] = None
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "job_id": self.job_id,
+            "model_id": self.model_id,
+            "status": self.status,
+            "downloaded_bytes": self.downloaded_bytes,
+            "total_bytes": self.total_bytes,
+            "error": self.error,
+            "started_at": self.started_at,
+            "finished_at": self.finished_at,
+        }
+class _DownloadCancelled(Exception):
+    """Raised inside the tqdm hook when a job's cancel flag fires."""
+# --- ModelManager -------------------------------------------------------------
 class ModelManager:
+    """Owns the SA3 catalog and the on-disk pretrained directory."""
+    def __init__(self, config: Any) -> None:
         self.config = config
+        self.models_dir: Path = config.get_path("models_pretrained")
         self.models_dir.mkdir(exist_ok=True, parents=True)
+        # Project-wide policy: every HF download lands inside
+        # <app>/models/pretrained/. SA3 generation + training uses
+        # <pretrained>/sa3/hub/; CLAP text deps use <pretrained>/clap/hub/.
+        # Both are HF cache layout so snapshot_download / hf_hub_download /
+        # from_pretrained resolve there transparently.
+        self.hub_dir: Path = self.models_dir / "sa3" / "hub"
+        self.hub_dir.mkdir(exist_ok=True, parents=True)
+        # Hard-force the resolution vars — never let an external env leak
+        # downloads into ~/.cache/huggingface or anywhere else outside the
+        # app folder. Covers huggingface_hub (current + legacy name) and
+        # transformers (which still consults TRANSFORMERS_CACHE).
+        os.environ["HF_HUB_CACHE"] = str(self.hub_dir)
+        os.environ["HUGGINGFACE_HUB_CACHE"] = str(self.hub_dir)
+        os.environ["TRANSFORMERS_CACHE"] = str(self.hub_dir)
+        # available_models is exposed for backwards compat with the existing
+        # /api/models/available endpoint. New code should use get_catalog().
+        self.available_models: Dict[str, Dict] = {
+            mid: dict(meta) for mid, meta in _SA3_CATALOG.items()
         }
+        self._jobs: Dict[str, _DownloadJob] = {}
+        self._jobs_lock = threading.Lock()
+    # --- Catalog --------------------------------------------------------------
+    def get_catalog(self, include_hidden: bool = False) -> List[Dict[str, Any]]:
+        """Checkpoint Manager catalog with per-item state.
+        Default returns only user-visible entries (the three generation
+        models). `include_hidden=True` also returns base + standalone-AE
+        entries — used by the Phase 5 training subprocess to ensure the
+        right base variant is on disk before kicking train_lora.py.
+        """
+        return [
+            self._catalog_entry(mid)
+            for mid, info in _SA3_CATALOG.items()
+            if include_hidden or info.get("user_visible")
+        ]
+    def _catalog_entry(self, model_id: str) -> Dict[str, Any]:
+        info = _SA3_CATALOG[model_id]
+        downloaded = self.is_model_downloaded(model_id)
+        bytes_total = 0
+        if downloaded:
+            for d in (self._hub_cache_dir_for(model_id), self._legacy_flat_dir_for(model_id)):
+                if d.exists():
+                    bytes_total += self._dir_size(d)
+        # Surface the most recent in-flight job for this model so the
+        # frontend can resume the progress bar after the Checkpoint Manager
+        # dialog is closed and reopened. The job lives on the backend; only
+        # the polling died with the dismissed UI.
+        active_job = None
+        with self._jobs_lock:
+            in_flight = [
+                j for j in self._jobs.values()
+                if j.model_id == model_id and j.status in ("queued", "running")
+            ]
+            if in_flight:
+                in_flight.sort(key=lambda j: j.started_at or "", reverse=True)
+                active_job = in_flight[0].to_dict()
+        return {
+            "id": model_id,
+            "kind": info.get("kind"),
+            "name": info["name"],
+            "sa3_name": info["sa3_name"],
+            "repo": info["repo"],
+            "size_bytes": info["size_bytes"],
+            "hardware": info["hardware"],
+            "max_duration_sec": info.get("max_duration_sec"),
+            "description": info["description"],
+            "user_visible": info.get("user_visible", False),
+            "downloaded": downloaded,
+            "downloaded_bytes": bytes_total,
+            "active_job": active_job,
+        }
+    def get_model_info(self, model_id: str) -> Optional[Dict[str, Any]]:
+        if model_id not in _SA3_CATALOG:
+            return None
+        return self._catalog_entry(model_id)
+    # --- Filesystem layout ----------------------------------------------------
+    def _hub_cache_dir_for(self, model_id: str) -> Path:
+        """HF-cache-shaped directory inside the app folder."""
+        info = _SA3_CATALOG.get(model_id)
+        if info is None:
+            return self.hub_dir / "_unknown"
+        safe = "models--" + info["repo"].replace("/", "--")
+        return self.hub_dir / safe
+    def _legacy_flat_dir_for(self, model_id: str) -> Path:
+        """Pre-unification per-model dir. Read-only fallback for migration."""
+        return self.models_dir / "sa3" / model_id
+    def _local_dir_for(self, model_id: str) -> Path:
+        """Public: returns the canonical (HF cache) directory for a model."""
+        return self._hub_cache_dir_for(model_id)
     def is_model_downloaded(self, model_id: str) -> bool:
+        if model_id not in _SA3_CATALOG:
             return False
+        info = _SA3_CATALOG[model_id]
+        if info.get("kind") == "tagger":
+            # Single-file artifacts live in <models_pretrained>/<group>/<filename>.
+            # auto_annotator owns the exact path for CLAP, so we delegate.
+            from app.backend.data.auto_annotator import clap_checkpoint_available
+            return clap_checkpoint_available(self.models_dir)
+        # Canonical: HF cache layout under <app>/models/pretrained/sa3/hub/.
+        # Look for the *top-level* model.safetensors only — NOT recursive —
+        # because a sibling repo may have only its conditioner subfolder
+        # downloaded (e.g. via the eager T5Gemma companion fetch when the
+        # user installed the matching *-base), and that doesn't make the
+        # post-trained model "downloaded".
+        main_present = False
+        hub = self._hub_cache_dir_for(model_id)
+        if hub.is_dir():
+            snaps = hub / "snapshots"
+            if snaps.is_dir():
+                for sub in snaps.iterdir():
+                    if any(sub.glob("*.safetensors")):
+                        main_present = True
+                        break
+        if not main_present:
+            # Fallback: legacy flat layout (predates the unification). Counts
+            # as downloaded for inference purposes; trainer will re-stage into
+            # hub.
+            legacy = self._legacy_flat_dir_for(model_id)
+            if legacy.is_dir() and any(legacy.glob("*.safetensors")):
+                main_present = True
+        if not main_present:
             return False
+        # Base models need a T5Gemma conditioner that lives in a subfolder
+        # of the *post-trained sibling* repo. "Installed" must mean "ready
+        # to train / generate" — without the companion the first run blocks
+        # for 30s+ on an HF fetch.
+        return self._is_companion_present(model_id)
+    def _is_companion_present(self, model_id: str) -> bool:
+        from app.core.training.sa3_lora_runner import SA3_T5GEMMA_SIBLINGS
+        sibling = SA3_T5GEMMA_SIBLINGS.get(model_id)
+        if not sibling:
+            return True  # nothing to check (post-trained / autoencoder / tagger)
+        sib_repo, sib_subfolder = sibling
+        safe = "models--" + sib_repo.replace("/", "--")
+        sib_hub = self.hub_dir / safe
+        snaps = sib_hub / "snapshots"
+        if not snaps.is_dir():
             return False
+        for sub in snaps.iterdir():
+            if (sub / sib_subfolder).is_dir():
+                # Any non-empty file presence is good enough — the eager
+                # fetch always pulls the tokenizer + config + safetensors.
+                if any((sub / sib_subfolder).iterdir()):
+                    return True
+        return False
+    # --- HF auth --------------------------------------------------------------
+    @staticmethod
+    def hf_auth_status() -> Dict[str, Any]:
+        token = get_token()
+        if not token:
+            return {"signed_in": False, "username": None}
         try:
+            user = whoami(token=token)
+            return {"signed_in": True, "username": user.get("name") or user.get("fullname")}
+        except Exception as err:
+            return {"signed_in": False, "username": None, "error": str(err)}
+    # --- Downloads ------------------------------------------------------------
+    def start_download(
+        self,
+        model_id: str,
+        progress_callback: Optional[Callable[[int, str], None]] = None,
+    ) -> Dict[str, Any]:
+        """Spawn a background download job. Returns the job descriptor."""
+        if model_id not in _SA3_CATALOG:
+            return {"error": f"Unknown checkpoint: {model_id}"}
+        job = _DownloadJob(
+            job_id=str(uuid.uuid4()),
+            model_id=model_id,
+            total_bytes=_SA3_CATALOG[model_id]["size_bytes"],
+        )
+        with self._jobs_lock:
+            self._jobs[job.job_id] = job
+        thread = threading.Thread(
+            target=self._run_download,
+            args=(job, progress_callback),
+            daemon=True,
+            name=f"sa3-download:{model_id}",
+        )
+        job._thread = thread
+        thread.start()
+        return job.to_dict()
+    def get_job(self, job_id: str) -> Optional[Dict[str, Any]]:
+        with self._jobs_lock:
+            job = self._jobs.get(job_id)
+        return job.to_dict() if job else None
+    def list_jobs(self) -> List[Dict[str, Any]]:
+        with self._jobs_lock:
+            return [j.to_dict() for j in self._jobs.values()]
+    def cancel_job(self, job_id: str) -> bool:
+        with self._jobs_lock:
+            job = self._jobs.get(job_id)
+        if not job:
             return False
+        if job.status not in ("queued", "running"):
             return False
+        job._cancel_flag.set()
+        return True
+    def _run_download(
+        self,
+        job: _DownloadJob,
+        progress_callback: Optional[Callable[[int, str], None]],
+    ) -> None:
+        info = _SA3_CATALOG[job.model_id]
+        job.status = "running"
+        job.started_at = datetime.now().isoformat()
+        # Tagger kind (e.g. CLAP) is a .pt file plus auxiliary HF snapshots
+        # living outside the sa3/hub layout. Multi-phase: 1 hf_hub_download
+        # for the audio .pt, then N sequential snapshot_downloads for the
+        # text encoders. Each spawns its own tqdm bars, so we use the
+        # cumulative hook to accumulate bytes across phases, and a phase_cb
+        # to prefix the message with which step the user is on.
+        if info.get("kind") == "tagger":
             try:
+                from app.backend.data.auto_annotator import download_clap_checkpoint
                 if progress_callback:
+                    progress_callback(0, f"Downloading {info['name']}…")
+                # Pin total to the catalog estimate so the % stays anchored
+                # even before tqdm reports any file's size.
+                job.total_bytes = info["size_bytes"]
+                current_phase = {"label": ""}
+                def phase_cb(idx: int, total: int, label: str) -> None:
+                    current_phase["label"] = f"[{idx}/{total}] {label}"
+                    if progress_callback:
+                        pct = (int(job.downloaded_bytes / job.total_bytes * 100)
+                               if job.total_bytes else 0)
+                        progress_callback(pct, current_phase["label"])
+                with _cumulative_tqdm_hook(job, progress_callback, current_phase):
+                    download_clap_checkpoint(self.models_dir, phase_cb=phase_cb)
+                job.downloaded_bytes = job.total_bytes
+                job.status = "complete"
+                job.finished_at = datetime.now().isoformat()
                 if progress_callback:
+                    progress_callback(100, f"Downloaded {info['name']}")
+            except _DownloadCancelled:
+                job.status = "cancelled"
+                job.error = "Cancelled by user"
+                job.finished_at = datetime.now().isoformat()
+            except Exception as err:
+                job.status = "failed"
+                job.error = f"{type(err).__name__}: {err}"
+                job.finished_at = datetime.now().isoformat()
+            return
+        cache_dir = self._hub_cache_dir_for(job.model_id).parent  # = self.hub_dir
+        cache_dir.mkdir(exist_ok=True, parents=True)
+        target = self._hub_cache_dir_for(job.model_id)
+        token = get_token()
+        try:
+            with _tqdm_progress_hook(job, progress_callback):
+                # Write into hub/ in HF cache layout. snapshot_download in
+                # hf-hub 1.x populates `<cache_dir>/models--<org>--<name>/`
+                # with the blobs/refs/snapshots structure that
+                # hf_hub_download() and StableAudioModel.from_pretrained()
+                # both consume.
+                snapshot_download(
+                    repo_id=info["repo"],
+                    cache_dir=str(cache_dir),
+                    token=token,
+                    allow_patterns=[
+                        "*.safetensors", "*.json", "*.txt", "*.model",
+                        "tokenizer*", "*.tiktoken",
+                    ],
+                )
+                # Companion fetch: base models reference their T5Gemma
+                # conditioner in a subfolder of the *post-trained sibling*
+                # repo. Without it the training subprocess crashes at
+                # AutoTokenizer.from_pretrained, and inference can't build
+                # the conditioner either. Pull it eagerly so "Installed"
+                # actually means "ready to use".
+                from app.core.training.sa3_lora_runner import SA3_T5GEMMA_SIBLINGS
+                sibling = SA3_T5GEMMA_SIBLINGS.get(job.model_id)
+                if sibling:
+                    sib_repo, sib_subfolder = sibling
                     if progress_callback:
                         progress_callback(
+                            min(99, int(job.downloaded_bytes / max(1, job.total_bytes) * 100)),
+                            f"Fetching T5Gemma conditioner from {sib_repo}…",
                         )
+                    snapshot_download(
+                        repo_id=sib_repo,
+                        cache_dir=str(cache_dir),
+                        token=token,
+                        allow_patterns=[f"{sib_subfolder}/*"],
+                    )
+            job.status = "complete"
+            job.downloaded_bytes = self._dir_size(target)
             if progress_callback:
+                progress_callback(100, f"Downloaded {info['name']}")
+        except _DownloadCancelled:
+            job.status = "cancelled"
+            job.error = "Cancelled by user"
+            shutil.rmtree(target, ignore_errors=True)
+        except GatedRepoError as err:
+            job.status = "failed"
+            job.error = f"hf_auth_required: {err}"
+        except RepositoryNotFoundError as err:
+            job.status = "failed"
+            job.error = f"Repository not found: {err}"
+        except Exception as err:
+            job.status = "failed"
+            job.error = str(err)
+        finally:
+            job.finished_at = datetime.now().isoformat()
+    # --- Delete ---------------------------------------------------------------
     def delete_model(self, model_id: str) -> bool:
+        if model_id not in _SA3_CATALOG:
             return False
+        # Remove both the canonical hub copy and the legacy flat copy if
+        # they exist. Either being present is enough to consider the
+        # model "downloaded", so both must be cleaned for the row to
+        # flip back to "Get".
+        hub = self._hub_cache_dir_for(model_id)
+        legacy = self._legacy_flat_dir_for(model_id)
+        any_existed = hub.exists() or legacy.exists()
+        if hub.exists():
+            shutil.rmtree(hub, ignore_errors=True)
+        if legacy.exists():
+            shutil.rmtree(legacy, ignore_errors=True)
+        return any_existed and not (hub.exists() or legacy.exists())
+    # --- Storage --------------------------------------------------------------
+    def get_storage_info(self) -> Dict[str, Any]:
+        per_model: List[Dict[str, Any]] = []
+        total_used = 0
+        for mid in _SA3_CATALOG:
+            bytes_ = 0
+            for d in (self._hub_cache_dir_for(mid), self._legacy_flat_dir_for(mid)):
+                if d.exists():
+                    bytes_ += self._dir_size(d)
+            per_model.append({
+                "id": mid,
+                "downloaded": self.is_model_downloaded(mid),
+                "bytes": bytes_,
+            })
+            total_used += bytes_
         return {
+            "total_used_bytes": total_used,
+            "total_free_bytes": shutil.disk_usage(self.models_dir).free,
+            "per_model": per_model,
         }
+    # --- Helpers --------------------------------------------------------------
+    @staticmethod
+    def _dir_size(path: Path) -> int:
+        if not path.exists():
+            return 0
+        return sum(p.stat().st_size for p in path.rglob("*") if p.is_file())
+# --- tqdm hook ----------------------------------------------------------------
+import contextlib
+@contextlib.contextmanager
+def _tqdm_progress_hook(
+    job: _DownloadJob,
+    progress_callback: Optional[Callable[[int, str], None]],
+):
+    """Monkey-patch tqdm so snapshot_download updates flow into the job state.
+    `snapshot_download` doesn't expose a progress callback. tqdm is its
+    internal progress bar — we wrap `update` to update job state and raise
+    `_DownloadCancelled` when the job's cancel flag fires.
+    """
+    from tqdm.auto import tqdm
+    original_init = tqdm.__init__
+    def patched_init(self, *args: Any, **kwargs: Any) -> None:
+        original_init(self, *args, **kwargs)
+        original_update = self.update
+        def new_update(n: int = 1) -> Any:
+            if job._cancel_flag.is_set():
+                raise _DownloadCancelled()
+            result = original_update(n)
+            if self.total:
+                job.downloaded_bytes = max(job.downloaded_bytes, self.n)
+                if job.total_bytes < self.total:
+                    job.total_bytes = self.total
+                if progress_callback:
+                    pct = int(self.n / self.total * 100) if self.total else 0
+                    mb_done = self.n / (1024 * 1024)
+                    mb_total = self.total / (1024 * 1024)
+                    progress_callback(pct, f"Downloading: {mb_done:.1f}MB / {mb_total:.1f}MB")
+            return result
+        self.update = new_update  # type: ignore[method-assign]
+    tqdm.__init__ = patched_init  # type: ignore[method-assign]
+    try:
+        yield
+    finally:
+        tqdm.__init__ = original_init  # type: ignore[method-assign]
+@contextlib.contextmanager
+def _cumulative_tqdm_hook(
+    job: _DownloadJob,
+    progress_callback: Optional[Callable[[int, str], None]],
+    current_phase: Dict[str, str],
+):
+    """Like _tqdm_progress_hook, but sums bytes across sequential bars.
+    Each tqdm bar reports `self.n` cumulative within ITS file. The single-bar
+    hook uses max() which freezes the UI when a fresh bar starts smaller than
+    the previous bar's total. Here we track the previous `self.n` per bar id
+    and add only the delta to job.downloaded_bytes — so progress climbs
+    monotonically across all phases.
+    """
+    from tqdm.auto import tqdm
+    original_init = tqdm.__init__
+    prev_n: Dict[int, int] = {}
+    def patched_init(self, *args: Any, **kwargs: Any) -> None:
+        original_init(self, *args, **kwargs)
+        original_update = self.update
+        prev_n[id(self)] = 0
+        def new_update(n: int = 1) -> Any:
+            if job._cancel_flag.is_set():
+                raise _DownloadCancelled()
+            result = original_update(n)
+            prev = prev_n.get(id(self), 0)
+            delta = self.n - prev
+            prev_n[id(self)] = self.n
+            if delta > 0:
+                job.downloaded_bytes += delta
+                if progress_callback and job.total_bytes:
+                    pct = min(int(job.downloaded_bytes / job.total_bytes * 100), 99)
+                    mb_done = job.downloaded_bytes / (1024 * 1024)
+                    mb_total = job.total_bytes / (1024 * 1024)
+                    label = current_phase.get("label", "")
+                    msg = (f"{label} · {mb_done:.0f} MB / {mb_total:.0f} MB"
+                           if label else f"{mb_done:.0f} MB / {mb_total:.0f} MB")
+                    progress_callback(pct, msg)
+            return result
+        self.update = new_update  # type: ignore[method-assign]
+    tqdm.__init__ = patched_init  # type: ignore[method-assign]
+    try:
+        yield
+    finally:
+        tqdm.__init__ = original_init  # type: ignore[method-assign]

app/core/training/hyperparam_suggester.py CHANGED Viewed

@@ -1,76 +1,67 @@
-"""Heuristic hyperparameter suggester for the Training tab's "Suggest" button.
-Given the dataset on disk and the current hardware, returns a config that
-trades off "small dataset, needs more updates per epoch" vs "big dataset,
-batch up for throughput", plus the practical VRAM ceilings of the LoRA path
-on Stable Audio Open 1.0. Returns the same shape the frontend `trainingConfig`
-uses, so Apply can spread the result into state directly.
 """
 from __future__ import annotations
-import json
-import os
-import subprocess
 from pathlib import Path
-from typing import Any, Dict, List, Optional
-AUDIO_EXTS = {".wav", ".mp3", ".flac", ".m4a"}
-# Cache file for total-duration measurement. ffprobe across 500 files takes
-# 10-30s; we don't want to pay that on every button click. Cache key is the
-# (file_count, max_mtime_int) pair — invalidates automatically when files
-# are added/removed/touched.
-_DURATION_CACHE_NAME = ".duration_cache.json"
 def _list_audio_files(data_dir: Path) -> List[Path]:
     if not data_dir.exists():
         return []
     return [
         p for p in data_dir.iterdir()
-        if p.is_file() and p.suffix.lower() in AUDIO_EXTS
     ]
-def _measure_total_duration(audio_files: List[Path], cache_path: Path) -> float:
-    if not audio_files:
-        return 0.0
-    file_count = len(audio_files)
-    max_mtime = int(max(p.stat().st_mtime for p in audio_files))
-    cache_key = f"{file_count}:{max_mtime}"
-    if cache_path.exists():
-        try:
-            cached = json.loads(cache_path.read_text())
-            if cached.get("key") == cache_key:
-                return float(cached["duration_sec"])
-        except Exception:
-            pass
-    total = 0.0
     for f in audio_files:
-        try:
-            out = subprocess.check_output(
-                ["ffprobe", "-v", "error", "-show_entries", "format=duration",
-                 "-of", "default=noprint_wrappers=1:nokey=1", str(f)],
-                text=True, timeout=10,
-            ).strip()
-            total += float(out)
-        except Exception:
-            # Skip files ffprobe can't read; better to under-report than crash.
-            continue
-    try:
-        cache_path.write_text(json.dumps({
-            "key": cache_key,
-            "duration_sec": total,
-        }))
-    except Exception:
-        pass
-    return total
 def _detect_vram_gb() -> Optional[float]:
@@ -83,6 +74,9 @@ def _detect_vram_gb() -> Optional[float]:
     return None
 def _bucket(file_count: int) -> str:
     if file_count < 20:
         return "tiny"
@@ -93,66 +87,136 @@ def _bucket(file_count: int) -> str:
     return "large"
-def _heuristic(file_count: int, vram_gb: Optional[float], mode: str) -> Dict[str, Any]:
-    """The rules-of-thumb. Same shape regardless of mode; the frontend ignores
-    LoRA-specific keys when mode='full'."""
     bucket = _bucket(file_count)
-    has_vram = vram_gb is not None
-    constrained = (has_vram and vram_gb < 12)
-    # Target total weight updates. Sublinear with dataset size so tiny sets
-    # still get enough gradient steps, while large sets don't run forever.
-    target_steps_by_bucket = {
-        "tiny":   2500,
-        "small":  2000,
-        "medium": 1500,
-        "large":  3000,
-    }
-    target_steps = target_steps_by_bucket[bucket]
-    # Rank/LR/alpha scale with how much "capacity per data point" the run needs.
-    # Small dataset trick: keep rank moderate (16) and conservative LR (1e-4 —
-    # 2e-4 caused overshoot/flat loss in testing), but boost alpha so the
-    # LoRA delta trains at higher effective voltage (scaling = alpha/rank).
-    # This produces a stronger imprint without the parameter bloat of rank=32
-    # or the instability of higher LR.
-    if bucket in ("tiny", "small"):
-        rank, alpha, lr = 16, 32, 1e-4
-    else:
-        rank, alpha, lr = 16, 16, 1e-4
-    # Batch size: smaller on small datasets (more updates per epoch + better
-    # gradient noise); larger on medium/large for throughput. VRAM caps the top.
-    if bucket == "tiny":
-        batch = 1 if constrained else 2
-    elif bucket == "small":
-        # Hold batch=2 even on roomy VRAM — the noise benefit on a small
-        # dataset outweighs the throughput win, and it keeps the epoch
-        # count to a reasonable display number.
-        batch = 2
-    elif bucket == "medium":
-        batch = 2 if constrained else 4
-    else:
-        batch = 4 if constrained else 8
-    steps_per_epoch = max(1, file_count // batch)
-    epochs = max(20, round(target_steps / steps_per_epoch))
     return {
         "batchSize": batch,
-        "learningRate": lr,
-        "epochs": epochs,
-        "loraRank": rank,
-        "loraAlpha": alpha,
-        "loraDropout": 0,
-        "loraMultiplier": 1.0,
         "_meta": {
             "bucket": bucket,
-            "target_steps": target_steps,
-            "steps_per_epoch": steps_per_epoch,
-            "total_steps": steps_per_epoch * epochs,
             "vram_constrained": constrained,
         },
     }
@@ -166,68 +230,162 @@ def _format_duration(seconds: float) -> str:
     return f"{m}m {s}s"
-def _compose_rationale(file_count: int, duration_sec: float, vram_gb: Optional[float],
-                       mode: str, meta: Dict[str, Any]) -> List[str]:
-    """Human-readable explanation, returned as a list of bullet strings."""
-    bullets = []
     bullets.append(
-        f"Dataset: {file_count} audio file{'s' if file_count != 1 else ''}, "
-        f"total {_format_duration(duration_sec)} → "
-        f"\"{meta['bucket']}\" bucket."
     )
     if vram_gb is not None:
-        constraint = "VRAM-constrained" if meta["vram_constrained"] else "comfortable VRAM headroom"
-        bullets.append(f"Detected GPU with {vram_gb:.1f} GB ({constraint}).")
     else:
-        bullets.append("No GPU detected — assuming consumer-class constraints.")
-    bullets.append(
-        f"Targeting ~{meta['target_steps']} weight updates total; with batch_size "
-        f"the dataset gives {meta['steps_per_epoch']} steps/epoch, so "
-        f"{meta['total_steps']} steps over the recommended epoch count."
-    )
-    if meta["bucket"] in ("tiny", "small"):
         bullets.append(
-            "Small dataset → conservative 1e-4 LR + rank=16 for stability, "
-            "but alpha=32 (alpha/rank = 2.0) so the LoRA delta trains at "
-            "double voltage. Stronger imprint without overshoot risk."
         )
     else:
         bullets.append(
-            "Larger dataset → moderate batch + standard 1e-4 LR. Rank=16 has "
-            "plenty of capacity for the prompt distribution this size implies."
         )
-    return bullets
-def suggest(data_dir: Path, mode: str = "lora") -> Dict[str, Any]:
-    """Public entry point. Returns the suggestion + a rationale + raw stats."""
     audio_files = _list_audio_files(data_dir)
     file_count = len(audio_files)
     if file_count == 0:
         return {
             "ok": False,
-            "error": f"No audio files found in {data_dir}",
         }
-    cache_path = data_dir / _DURATION_CACHE_NAME
-    duration_sec = _measure_total_duration(audio_files, cache_path)
     vram_gb = _detect_vram_gb()
-    suggestion = _heuristic(file_count, vram_gb, mode)
     meta = suggestion.pop("_meta")
-    rationale = _compose_rationale(file_count, duration_sec, vram_gb, mode, meta)
     return {
         "ok": True,
         "stats": {
             "file_count": file_count,
-            "duration_sec": duration_sec,
-            "duration_human": _format_duration(duration_sec),
             "vram_gb": round(vram_gb, 2) if vram_gb is not None else None,
             "bucket": meta["bucket"],
-            "steps_per_epoch": meta["steps_per_epoch"],
-            "total_steps": meta["total_steps"],
         },
         "config": suggestion,
-        "rationale": rationale,
     }

+"""SA3 LoRA hyperparameter suggester for the Training tab's "Suggest" button.
+Reads a Dataset Workbench project directly — counts SA3-compatible audio
+files, measures their durations via the same `soundfile.info()` header-only
+probe used elsewhere in the app, factors in the user's picked base model
+and detected GPU VRAM, and returns a config that:
+  * matches the upstream SA3 LoRA docs as the starting point
+    (see vendor/stable-audio-3/docs/workflows/lora.md)
+  * sets `--include transformer.layers` and `--exclude seconds_total
+    to_local_embed` by default (documented best practices, prevents the
+    "conditioner hijacking" failure mode on small datasets)
+  * picks a `-XS` adapter family when VRAM is tight for the chosen base
+  * proposes a `duration` derived from the actual clip lengths in the
+    project — not a hardcoded 30s
+  * warns when the dataset is below SA3's documented minimum (~20 clips)
+    or when clips are too short to learn from
+Returns the same shape the frontend `trainingConfig` uses, so Apply can
+spread the result into state directly.
 """
 from __future__ import annotations
+import math
 from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple
+from app.backend.data.projects import _clip_duration_sec
+from app.core.training.sa3_lora_runner import SA3_AUDIO_EXTENSIONS, SA3_BASE_MODELS
+# --- Discovery -------------------------------------------------------------
 def _list_audio_files(data_dir: Path) -> List[Path]:
+    """Files SA3's loader would actually train on. Mirrors the loader's filter."""
     if not data_dir.exists():
         return []
     return [
         p for p in data_dir.iterdir()
+        if p.is_file() and p.suffix.lower() in SA3_AUDIO_EXTENSIONS
     ]
+def _duration_stats(audio_files: List[Path]) -> Dict[str, Optional[float]]:
+    """Header-only duration probe + summary stats. None-safe for unreadable files."""
+    durations: List[float] = []
     for f in audio_files:
+        d = _clip_duration_sec(f)
+        if d is not None and d > 0:
+            durations.append(d)
+    if not durations:
+        return {"count": 0, "total": 0.0, "median": None, "p95": None, "max": None, "min": None}
+    durations.sort()
+    n = len(durations)
+    return {
+        "count": n,
+        "total": float(sum(durations)),
+        "median": float(durations[n // 2]),
+        "p95": float(durations[min(n - 1, int(math.ceil(0.95 * n)) - 1)]),
+        "max": float(durations[-1]),
+        "min": float(durations[0]),
+    }
 def _detect_vram_gb() -> Optional[float]:
     return None
+# --- Bucketing & sizing ----------------------------------------------------
 def _bucket(file_count: int) -> str:
     if file_count < 20:
         return "tiny"
     return "large"
+# SA3's documented quick-start: --steps 1000, with no dataset-size caveat.
+# (vendor/stable-audio-3/docs/workflows/lora.md, "Standard (recommended starting point)".)
+# SA3 trains by *windows seen*, not epochs, so a 5h dataset doesn't need more
+# steps than a 30min one — it just produces more diverse sampling per step.
+# We keep the SA3 default for tiny/small, and bump modestly only when a
+# dataset is large enough that 1000 steps won't see all unique windows.
+_STEPS_BY_BUCKET: Dict[str, int] = {
+    "tiny":   1000,
+    "small":  1000,
+    "medium": 2000,
+    "large":  4000,
+}
+# Per-base-model VRAM table from SA3 docs. (standard_gb, xs_bf16_gb)
+# Source: docs/workflows/lora.md memory table.
+_VRAM_REQ: Dict[str, Tuple[float, float]] = {
+    "sa3-small-music-base": (2.5, 2.0),
+    "sa3-small-sfx-base":   (2.5, 2.0),
+    "sa3-medium-base":      (6.5, 5.5),
+}
+def _pick_adapter(base_model: Optional[str], vram_gb: Optional[float]) -> Tuple[str, bool]:
+    """Choose adapter family. Returns (adapter_type, vram_constrained_flag).
+    SA3 docs recommend the `-xs` family + bf16 base precision for VRAM-limited
+    hosts. Headroom rule: standard_gb + 4 GB activations is the comfort target;
+    below that we pick the xs family.
+    """
+    default = "dora-rows"
+    if base_model is None or vram_gb is None:
+        return default, False
+    std_gb, _xs_gb = _VRAM_REQ.get(base_model, (2.5, 2.0))
+    comfort = std_gb + 4.0
+    constrained = vram_gb < comfort
+    return ("dora-rows-xs" if constrained else default), constrained
+def _model_max_window_sec(base_model: Optional[str]) -> float:
+    """SA3's native training length for the base, from its model config
+    sample_size / sample_rate: medium-base ≈380s, small bases ≈120s. The
+    `seconds_total` conditioner caps at 384s, so 380 is the safe medium ceiling.
+    Longer windows aren't a model limit below these — they're VRAM/time bound.
+    """
+    if base_model and "medium" in base_model:
+        return 380.0
+    return 120.0
+def _pick_duration(p95_clip_sec: Optional[float], base_model: Optional[str]) -> float:
+    """Set training window from the project's actual p95 clip length.
+    Floors at 5s; caps at — and defaults to — the model's native length
+    (≈120s small / ≈380s medium) rather than an arbitrary 30s. SA3 random-crops
+    longer files, so the only real limits are the model's sequence length and
+    VRAM. Rounds up p95 with 2s headroom so the window isn't cropping the tails
+    of typical clips. With no duration data, defaults to the model max.
+    """
+    model_max = _model_max_window_sec(base_model)
+    if p95_clip_sec is None or p95_clip_sec <= 0:
+        return model_max
+    suggested = math.ceil(p95_clip_sec + 2.0)
+    return float(max(5, min(model_max, suggested)))
+def _pick_batch_size(bucket: str, vram_gb: Optional[float]) -> int:
+    """SA3 examples all use batch 1. Only go higher on roomy hardware + big data.
+    24 GB threshold for batch 2 leaves enough headroom for medium-base + bf16
+    activations across two samples. Going beyond batch 2 hits diminishing
+    returns and risks OOM mid-run.
+    """
+    if vram_gb is None or vram_gb < 24:
+        return 1
+    if bucket in ("medium", "large"):
+        return 2
+    return 1
+# Filter pattern straight from SA3 docs:
+#   --include transformer.layers --exclude seconds_total to_local_embed
+# "Everything except local embedding and seconds_total conditioner" — prevents
+# the conditioner-hijacking failure mode that bites small datasets hardest.
+_INCLUDE_DEFAULT: List[str] = ["transformer.layers"]
+_EXCLUDE_DEFAULT: List[str] = ["seconds_total", "to_local_embed"]
+# --- Suggestion + rationale ------------------------------------------------
+def _heuristic(
+    file_count: int,
+    dur_stats: Dict[str, Optional[float]],
+    base_model: Optional[str],
+    vram_gb: Optional[float],
+) -> Dict[str, Any]:
     bucket = _bucket(file_count)
+    steps = _STEPS_BY_BUCKET[bucket]
+    adapter, constrained = _pick_adapter(base_model, vram_gb)
+    duration = _pick_duration(dur_stats.get("p95"), base_model)
+    batch = _pick_batch_size(bucket, vram_gb)
+    # Mild dropout for tiny datasets only — extra regularization where overfit
+    # is most likely. SA3 default is 0.0; we deviate intentionally.
+    dropout = 0.05 if bucket == "tiny" else 0.0
+    # Checkpoint cadence: ~10 checkpoints per run, but keep within sane bounds
+    # so we don't write a checkpoint every 50 steps on tiny runs or sit on a
+    # 2K-step gap on long ones.
+    checkpoint_every = max(250, min(1000, steps // 10))
     return {
+        "steps": steps,
         "batchSize": batch,
+        "learningRate": 1e-4,
+        "loraRank": 16,
+        "loraAlpha": 16,
+        "loraDropout": dropout,
+        "adapterType": adapter,
+        "precision": "bf16",
+        "duration": duration,
+        "checkpointSteps": checkpoint_every,
+        "include": list(_INCLUDE_DEFAULT),
+        "exclude": list(_EXCLUDE_DEFAULT),
         "_meta": {
             "bucket": bucket,
+            "target_steps": steps,
             "vram_constrained": constrained,
+            "picked_adapter_for_vram": constrained,
         },
     }
     return f"{m}m {s}s"
+def _compose_rationale(
+    file_count: int,
+    dur_stats: Dict[str, Optional[float]],
+    base_model: Optional[str],
+    vram_gb: Optional[float],
+    config: Dict[str, Any],
+    meta: Dict[str, Any],
+) -> Tuple[List[str], List[str]]:
+    """Return (bullets, warnings). Warnings are surfaced separately in the UI."""
+    bullets: List[str] = []
+    warnings: List[str] = []
+    total = dur_stats.get("total") or 0.0
     bullets.append(
+        f"Dataset: {file_count} clip{'s' if file_count != 1 else ''}, "
+        f"total {_format_duration(total)} → \"{meta['bucket']}\" bucket."
     )
+    p95 = dur_stats.get("p95")
+    median = dur_stats.get("median")
+    if p95 is not None and median is not None:
+        bullets.append(
+            f"Clip durations: median {median:.1f}s, p95 {p95:.1f}s. "
+            f"Training window set to {config['duration']:.0f}s."
+        )
     if vram_gb is not None:
+        bullets.append(
+            f"Detected GPU: {vram_gb:.1f} GB"
+            + (" (tight for the chosen base — switched adapter to a -XS variant)."
+               if meta["vram_constrained"] else " (comfortable headroom).")
+        )
     else:
+        bullets.append("No CUDA GPU detected — adapter defaults to dora-rows; "
+                       "training will run on CPU/MPS where supported.")
+    if meta["target_steps"] == 1000:
         bullets.append(
+            "Target 1 000 optimizer steps — SA3's documented quick-start. "
+            "LoRAs typically overfit well before this; watch the loss curve."
         )
     else:
         bullets.append(
+            f"Target {meta['target_steps']:,} optimizer steps — modest bump "
+            f"above SA3's 1 000-step default for larger datasets to see more "
+            "unique sampling windows."
         )
+    bullets.append(
+        f"Layer filter: include `{config['include'][0]}`, exclude "
+        f"`{' '.join(config['exclude'])}`. "
+        "Documented SA3 default — prevents conditioner-hijacking on small sets."
+    )
+    bullets.append(
+        f"Adapter `{config['adapterType']}` · rank 16 · α 16 · "
+        f"dropout {config['loraDropout']} · {config['precision']} base."
+    )
+    # --- Warnings (separate channel) ---------------------------------------
+    if file_count < 20:
+        warnings.append(
+            f"{file_count} clips is below SA3's documented minimum of ~20. "
+            "Expect heavy overfit and poor generalization — add more data if you can."
+        )
+    if median is not None and median < 2.0:
+        warnings.append(
+            f"Median clip is only {median:.1f}s — most of the training window "
+            f"({config['duration']:.0f}s) will be silence-padded. "
+            "Re-slice the source material to longer chunks for better signal."
+        )
+    if config["duration"] > 45:
+        warnings.append(
+            f"Training window is {config['duration']:.0f}s. Longer windows use "
+            "markedly more VRAM and step time (DiT attention scales with length). "
+            "If you hit OOM, lower the window or pre-encode the dataset first."
+        )
+    # VRAM × base model crosscheck
+    if base_model in _VRAM_REQ:
+        std_gb, xs_gb = _VRAM_REQ[base_model]
+        if vram_gb is None:
+            if base_model == "sa3-medium-base":
+                warnings.append(
+                    "No CUDA GPU detected, but you picked Medium-Base. "
+                    "Medium-base needs CUDA + Flash-Attn 2 (Linux) and ≥5.5 GB VRAM. "
+                    "Consider Small-Music-Base or Small-SFX-Base for CPU/MPS hosts."
+                )
+        elif vram_gb < xs_gb:
+            warnings.append(
+                f"GPU has {vram_gb:.1f} GB; even {base_model} with bf16+lora-xs needs "
+                f"~{xs_gb:.1f} GB. Training will likely OOM. Pick a smaller base."
+            )
+        elif vram_gb < std_gb:
+            warnings.append(
+                f"GPU has {vram_gb:.1f} GB; {base_model} standard config needs "
+                f"~{std_gb:.1f} GB. The -XS adapter (selected) brings it to ~{xs_gb:.1f} GB."
+            )
+    return bullets, warnings
+def suggest(data_dir: Path, base_model: Optional[str] = None) -> Dict[str, Any]:
+    """Public entry point. SA3 is LoRA-only; no `mode` switch."""
     audio_files = _list_audio_files(data_dir)
     file_count = len(audio_files)
     if file_count == 0:
         return {
             "ok": False,
+            "error": (
+                f"No SA3-compatible audio in {data_dir}. SA3's loader accepts "
+                + ", ".join(SA3_AUDIO_EXTENSIONS) + "."
+            ),
         }
+    dur_stats = _duration_stats(audio_files)
     vram_gb = _detect_vram_gb()
+    suggestion = _heuristic(file_count, dur_stats, base_model, vram_gb)
     meta = suggestion.pop("_meta")
+    bullets, warnings = _compose_rationale(
+        file_count, dur_stats, base_model, vram_gb, suggestion, meta
+    )
+    # Caption coverage: SA3 trains on audio + matching .txt sidecars, and
+    # silently drops clips whose prompt is blank. Surface missing captions so
+    # the user isn't unknowingly training on a fraction of the dataset.
+    uncaptioned = sum(
+        1 for p in audio_files
+        if not (p.with_suffix(".txt").exists()
+                and p.with_suffix(".txt").read_text(encoding="utf-8", errors="ignore").strip())
+    )
+    if uncaptioned:
+        warnings.insert(0, (
+            f"{uncaptioned} of {file_count} clip{'s' if file_count != 1 else ''} "
+            "have no annotation. SA3 silently skips un-captioned clips at train "
+            "time — annotate them first or they won't contribute to the LoRA."
+        ))
     return {
         "ok": True,
         "stats": {
             "file_count": file_count,
+            "duration_sec": dur_stats.get("total") or 0.0,
+            "duration_human": _format_duration(dur_stats.get("total") or 0.0),
+            "median_clip_sec": dur_stats.get("median"),
+            "p95_clip_sec": dur_stats.get("p95"),
+            "max_clip_sec": dur_stats.get("max"),
+            "min_clip_sec": dur_stats.get("min"),
             "vram_gb": round(vram_gb, 2) if vram_gb is not None else None,
             "bucket": meta["bucket"],
+            "total_steps": meta["target_steps"],
+            "base_model": base_model,
         },
         "config": suggestion,
+        "rationale": bullets,
+        "warnings": warnings,
     }

app/core/training/sa3_lora_runner.py ADDED Viewed

	@@ -0,0 +1,331 @@

+"""Helpers for the SA3 LoRA training pipeline.
+Responsibilities:
+  * Pre-stage the base model in an app-folder HF cache so the training
+    subprocess finds it without falling back to ~/.cache/huggingface.
+  * Build the train_lora.py subprocess command + env.
+  * Convert PyTorch Lightning .ckpt LoRA outputs to SA3-native .safetensors
+    with the base_model and run name embedded in the metadata header.
+"""
+from __future__ import annotations
+import json
+import os
+import sys
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple
+# SA3 model_id → (sa3_name passed to train_lora.py --model, HF repo id)
+# Only `*-base` variants are valid LoRA targets — SA3 won't train against
+# the post-trained / distilled checkpoints.
+SA3_BASE_MODELS: Dict[str, Tuple[str, str]] = {
+    "sa3-small-music-base": ("small-music-base", "stabilityai/stable-audio-3-small-music-base"),
+    "sa3-small-sfx-base":   ("small-sfx-base",   "stabilityai/stable-audio-3-small-sfx-base"),
+    "sa3-medium-base":      ("medium-base",      "stabilityai/stable-audio-3-medium-base"),
+}
+# Each *-base config references its T5Gemma conditioner at a subfolder of the
+# *post-trained sibling* repo (e.g., medium-base's t5gemma lives at
+# stabilityai/stable-audio-3-medium / t5gemma-b-b-ul2/). Without that subtree
+# in the cache, training crashes inside the conditioner constructor when SA3
+# does `AutoTokenizer.from_pretrained(repo_id, subfolder=...)`.
+# Keep in sync with model_config.json's `conditioning.configs[0].config.repo_id`.
+SA3_T5GEMMA_SIBLINGS: Dict[str, Tuple[str, str]] = {
+    "sa3-small-music-base": ("stabilityai/stable-audio-3-small-music", "t5gemma-b-b-ul2"),
+    "sa3-small-sfx-base":   ("stabilityai/stable-audio-3-small-sfx",   "t5gemma-b-b-ul2"),
+    "sa3-medium-base":      ("stabilityai/stable-audio-3-medium",      "t5gemma-b-b-ul2"),
+}
+# Extensions SA3's training data loader actually accepts.
+# Source: vendor/stable-audio-3/stable_audio_3/data/dataset.py:91.
+# Single source of truth — both the health check and the hyperparam suggester
+# use this so what we count matches what the loader will train on.
+SA3_AUDIO_EXTENSIONS: Tuple[str, ...] = (".wav", ".mp3", ".flac", ".ogg", ".aif", ".opus")
+# --- Base model pre-staging -------------------------------------------------
+def prestage_base_model(
+    sa3_model_id: str,
+    hub_dir: Path,
+    token: Optional[str] = None,
+    progress_callback: Optional[Any] = None,
+) -> Path:
+    """Ensure the base model is in `hub_dir` (HF-cache layout, inside app folder).
+    train_lora.py calls `model_cfg.resolve()` which is hf_hub_download under
+    the hood — it reads from the HF cache root. We point it at hub_dir via
+    the HF_HUB_CACHE env var on the subprocess; for that to actually find
+    files we need to download into hub_dir using snapshot_download with
+    `cache_dir=hub_dir`.
+    Idempotent: if the model is already cached there, returns the cached
+    snapshot dir without re-downloading.
+    """
+    if sa3_model_id not in SA3_BASE_MODELS:
+        raise ValueError(
+            f"'{sa3_model_id}' is not a valid LoRA base. Pick one of "
+            f"{list(SA3_BASE_MODELS)} (only *-base variants are CFG-aware)."
+        )
+    sa3_name, repo_id = SA3_BASE_MODELS[sa3_model_id]
+    hub_dir.mkdir(parents=True, exist_ok=True)
+    from huggingface_hub import snapshot_download
+    allow_patterns = [
+        "*.safetensors", "*.json", "*.txt", "*.model",
+        "tokenizer*", "*.tiktoken",
+    ]
+    if progress_callback:
+        progress_callback(5, f"Staging {sa3_name} base model in {hub_dir.name}/...")
+    # Prefer cache. snapshot_download otherwise phones home on every run to
+    # check the model's revision — wasteful and noisy when the user just
+    # downloaded the weights through the Checkpoint Manager. If anything's
+    # missing, fall back to an online fetch.
+    try:
+        local_snap = snapshot_download(
+            repo_id=repo_id,
+            cache_dir=str(hub_dir),
+            token=token,
+            allow_patterns=allow_patterns,
+            local_files_only=True,
+        )
+        if progress_callback:
+            progress_callback(15, "Base model ready (from cache).")
+    except Exception:
+        if progress_callback:
+            progress_callback(8, "Cache miss — fetching from HuggingFace…")
+        local_snap = snapshot_download(
+            repo_id=repo_id,
+            cache_dir=str(hub_dir),
+            token=token,
+            allow_patterns=allow_patterns,
+        )
+        if progress_callback:
+            progress_callback(15, "Base model ready.")
+    # Pre-stage the T5Gemma conditioner from the post-trained sibling repo.
+    # SA3's *-base model_config.json points the prompt conditioner at
+    # e.g. stabilityai/stable-audio-3-medium / t5gemma-b-b-ul2/, NOT at the
+    # base repo. Without this subtree in the cache, the training subprocess
+    # (HF_HUB_OFFLINE=1) crashes when AutoTokenizer.from_pretrained tries
+    # to phone home.
+    sibling = SA3_T5GEMMA_SIBLINGS.get(sa3_model_id)
+    if sibling:
+        sib_repo, sib_subfolder = sibling
+        sib_patterns = [f"{sib_subfolder}/*"]
+        if progress_callback:
+            progress_callback(16, f"Staging T5Gemma conditioner from {sib_repo}…")
+        try:
+            snapshot_download(
+                repo_id=sib_repo,
+                cache_dir=str(hub_dir),
+                token=token,
+                allow_patterns=sib_patterns,
+                local_files_only=True,
+            )
+            if progress_callback:
+                progress_callback(18, "T5Gemma conditioner ready (from cache).")
+        except Exception:
+            if progress_callback:
+                progress_callback(17, f"T5Gemma cache miss — fetching from {sib_repo}…")
+            snapshot_download(
+                repo_id=sib_repo,
+                cache_dir=str(hub_dir),
+                token=token,
+                allow_patterns=sib_patterns,
+            )
+            if progress_callback:
+                progress_callback(18, "T5Gemma conditioner ready.")
+    return Path(local_snap)
+# --- Subprocess command builder ---------------------------------------------
+def build_train_command(
+    *,
+    venv_python: str,
+    sa3_vendor_dir: Path,
+    sa3_model_name: str,
+    data_dir: Path,
+    encoded_dir: Optional[Path] = None,
+    svd_bases_path: Optional[Path] = None,
+    save_dir: Path,
+    rank: int = 16,
+    lora_alpha: Optional[int] = None,
+    adapter_type: str = "dora-rows",
+    dropout: float = 0.0,
+    lr: float = 1e-4,
+    steps: int = 5000,
+    batch_size: int = 1,
+    duration: float = 30.0,
+    base_precision: str = "bf16",
+    include: Optional[List[str]] = None,
+    exclude: Optional[List[str]] = None,
+    seed: int = 42,
+    checkpoint_every: int = 500,
+    # `--log_every` controls how often DiffusionCondTrainingWrapper calls
+    # self.log(). 50 is SA3's example value and gives a much cleaner chart
+    # than per-step logging — diffusion loss is intrinsically noisy (each
+    # step samples a random timestep), so per-step values bounce wildly and
+    # the trend is hard to read. Sampling every 50 steps gives ~20 points
+    # for a 1000-step run, which the EMA smoother turns into a legible
+    # descent. First point arrives after step 49 (≈15s on small, ≈50s on
+    # medium, dominated by first-step JIT warmup anyway).
+    log_every: int = 50,
+    num_workers: int = 2,
+    name: str = "fragmenta-lora",
+) -> List[str]:
+    """Construct the train_lora.py subprocess argv."""
+    cmd = [
+        venv_python,
+        str(sa3_vendor_dir / "scripts" / "train_lora.py"),
+        "--model", sa3_model_name,
+        "--data_dir", str(data_dir),
+        "--save_dir", str(save_dir),
+        "--rank", str(int(rank)),
+        "--adapter_type", adapter_type,
+        "--dropout", str(float(dropout)),
+        "--lr", str(float(lr)),
+        "--steps", str(int(steps)),
+        "--batch_size", str(int(batch_size)),
+        "--duration", str(float(duration)),
+        "--base_precision", base_precision,
+        "--seed", str(int(seed)),
+        "--checkpoint_every", str(int(checkpoint_every)),
+        "--log_every", str(int(log_every)),
+        "--num_workers", str(int(num_workers)),
+        "--name", name,
+        "--logger", "csv",
+        # demo_every set to a very large number — Fragmenta's training
+        # monitor doesn't surface demo audio, no need to spend cycles.
+        "--demo_every", "1000000",
+    ]
+    if encoded_dir is not None:
+        # Phase 6 — feed pre-encoded latents directory. SA3's train_lora.py
+        # then uses PreEncodedDataset instead of SampleDataset and skips
+        # the SAME autoencoder pass per step.
+        cmd += ["--encoded_dir", str(encoded_dir)]
+    if svd_bases_path is not None and adapter_type.endswith("-xs"):
+        # -XS adapters factor weights against precomputed SVD bases. SA3 only
+        # *loads* bases from this path (it doesn't write them), so we pass it
+        # only when a cached .pt already exists — otherwise SA3 recomputes the
+        # SVD per layer on device (slower, but correct). See SA3Trainer for the
+        # cache path convention.
+        cmd += ["--svd_bases_path", str(svd_bases_path)]
+    if lora_alpha is not None:
+        cmd += ["--lora_alpha", str(int(lora_alpha))]
+    if include:
+        cmd += ["--include", *include]
+    if exclude:
+        cmd += ["--exclude", *exclude]
+    return cmd
+# --- Checkpoint conversion (.ckpt → .safetensors with base_model metadata) ---
+def convert_run_checkpoints_to_safetensors(
+    run_dir: Path,
+    base_model: str,
+    model_name: Optional[str] = None,
+    delete_originals: bool = True,
+) -> List[Path]:
+    """Convert PyTorch Lightning .ckpt files in a run's checkpoints/ directory
+    to SA3's native .safetensors LoRA format, with `base_model` injected into
+    the safetensors metadata header so /api/loras can filter by it.
+    Why: SA3's `train_lora.py` writes Lightning .ckpt files. The inference
+    LoRA picker (/api/loras) globs for *.safetensors only. Without this
+    conversion, every trained LoRA is functionally orphaned — saved
+    correctly to disk but invisible to the inference loader.
+    Idempotent: skips any .ckpt whose .safetensors sibling already exists
+    with a non-zero size.
+    Returns the list of paths to the produced .safetensors files (sorted).
+    """
+    ckpt_dir = run_dir / "checkpoints"
+    if not ckpt_dir.exists():
+        return []
+    # Imports deferred so this module can be imported without the SA3 vendor
+    # being on sys.path (e.g., during pure orchestrator construction).
+    from app.core.config import get_config
+    sa3_vendor = get_config().get_path("stable_audio_3")
+    pp = sys.path[:]
+    if str(sa3_vendor) not in pp:
+        sys.path.insert(0, str(sa3_vendor))
+    try:
+        from stable_audio_3.models.lora.utils import load_lora_checkpoint
+        from safetensors.torch import save_file as st_save_file
+    finally:
+        # Don't permanently mutate sys.path from a helper call.
+        if sys.path != pp:
+            sys.path[:] = pp
+    written: List[Path] = []
+    for ckpt_path in sorted(ckpt_dir.glob("*.ckpt")):
+        out_path = ckpt_path.with_suffix(".safetensors")
+        if out_path.exists() and out_path.stat().st_size > 0:
+            # Already converted (older artifact or a previous pass). Just
+            # bookkeep so the caller sees it in the return list.
+            written.append(out_path)
+            continue
+        try:
+            state_dict, lora_config = load_lora_checkpoint(ckpt_path)
+        except Exception:
+            # Corrupt or truncated ckpt — skip rather than crash the
+            # post-training pass.
+            continue
+        # Top-level metadata is what /api/loras' safetensors reader inspects
+        # directly. We also keep the canonical `lora_config` JSON blob so
+        # SA3's own load_lora_checkpoint() can parse the file as-is.
+        metadata = {
+            "lora_config": json.dumps(lora_config or {}),
+            "base_model": base_model,
+        }
+        if model_name:
+            metadata["model_name"] = model_name
+        # Cast fp16 to keep file sizes consistent with SA3's standard format.
+        fp16_dict = {k: (v.half() if v.is_floating_point() else v)
+                     for k, v in state_dict.items()}
+        st_save_file(fp16_dict, str(out_path), metadata=metadata)
+        if delete_originals:
+            try:
+                ckpt_path.unlink()
+            except OSError:
+                pass
+        written.append(out_path)
+    return sorted(written)
+def build_train_env(sa3_vendor_dir: Path, hub_dir: Path) -> Dict[str, str]:
+    """Subprocess env: redirect HF cache into the app folder + silence WANDB."""
+    env = os.environ.copy()
+    # Make `import stable_audio_3` work without pip-installing the package.
+    pp = env.get("PYTHONPATH", "")
+    env["PYTHONPATH"] = (
+        f"{sa3_vendor_dir}{os.pathsep}{pp}" if pp else str(sa3_vendor_dir)
+    )
+    # Pin the HF cache to our app-folder hub dir; otherwise train_lora.py's
+    # model_cfg.resolve() would write into ~/.cache/huggingface/hub. Cover
+    # the legacy + transformers env names too for defense-in-depth.
+    env["HF_HUB_CACHE"] = str(hub_dir)
+    env["HUGGINGFACE_HUB_CACHE"] = str(hub_dir)
+    env["TRANSFORMERS_CACHE"] = str(hub_dir)
+    env["HF_HUB_DISABLE_PROGRESS_BARS"] = "1"
+    env["WANDB_DISABLED"] = "1"
+    # Force the training subprocess into offline mode for HF — we already
+    # pre-staged the base model in prestage_base_model(), so any remaining
+    # network call from the SA3 internals would be a noisy revision check
+    # against a cache we know is current.
+    env["HF_HUB_OFFLINE"] = "1"
+    env["TRANSFORMERS_OFFLINE"] = "1"
+    return env

app/core/training/sa3_trainer.py ADDED Viewed

	@@ -0,0 +1,839 @@

+"""SA3 LoRA training orchestrator — Phase 5.
+Public surface (matches what app/backend/app.py imports):
+    start_training(config)        -> dict
+    get_training_status()         -> dict
+    stop_training()               -> dict
+    preview_training_plan(config) -> dict
+    class SA3Trainer
+Training is dispatched as a subprocess running
+`vendor/stable-audio-3/scripts/train_lora.py`. Progress comes back through
+two channels:
+  * stdout/stderr from the subprocess (parsed for tqdm "step X/Y" lines)
+  * metrics.csv that train_lora.py writes under --save_dir
+Config shape (from the frontend training form):
+{
+    "modelName":       "my-lora",            # used for run dir name
+    "baseModel":       "sa3-medium-base",    # must end in -base
+    "projectName":     "my_first_track",     # Dataset Workbench project name
+    "steps":           5000,
+    "checkpointSteps": 500,                  # checkpoint cadence
+    "batchSize":       1,
+    "learningRate":    1.0e-4,
+    "duration":        30.0,                 # max clip seconds per sample
+    "loraRank":        16,
+    "loraAlpha":       16,                   # null → defaults to rank
+    "loraDropout":     0.0,
+    "adapterType":     "dora-rows",
+    "precision":       "bf16",               # bf16|fp16
+    "seed":            42,
+    "include":         null,                 # list[str] or null
+    "exclude":         null
+}
+"""
+from __future__ import annotations
+import csv
+import json
+import os
+import re
+import shlex
+import signal
+import subprocess
+import sys
+import threading
+import time
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+from app.backend.data.projects import project_path
+from app.core.config import get_config
+from app.core.training.sa3_lora_runner import (
+    SA3_BASE_MODELS,
+    build_train_command,
+    build_train_env,
+    convert_run_checkpoints_to_safetensors,
+    prestage_base_model,
+)
+from utils.logger import get_logger
+logger = get_logger("SA3Trainer")
+# --- Defaults --------------------------------------------------------------
+DEFAULT_STEPS = 5000
+DEFAULT_CHECKPOINT_STEPS = 500
+DEFAULT_BATCH_SIZE = 1
+DEFAULT_LR = 1e-4
+DEFAULT_DURATION = 30.0
+DEFAULT_RANK = 16
+DEFAULT_ADAPTER = "dora-rows"
+DEFAULT_PRECISION = "bf16"
+# --- SA3Trainer singleton --------------------------------------------------
+class SA3Trainer:
+    def __init__(self, config: Dict[str, Any]) -> None:
+        self.config: Dict[str, Any] = config or {}
+        self.process: Optional[subprocess.Popen] = None
+        self.run_dir: Optional[Path] = None
+        self.metrics_csv: Optional[Path] = None
+        self._monitor_thread: Optional[threading.Thread] = None
+        self.status: Dict[str, Any] = {
+            "is_training": False,
+            "status": "idle",
+            "step": 0,
+            "total_steps": 0,
+            "loss": None,
+            "message": "",
+            "started_at": None,
+            "ended_at": None,
+            "log_tail": [],          # last ~50 stdout lines
+            "checkpoints": [],       # safetensors written so far
+            "error": None,
+        }
+    # --- Public API --------------------------------------------------------
+    def start(self) -> Dict[str, Any]:
+        # Fresh run on this trainer — clear any stop flag from a prior run.
+        self._stop_requested = False
+        # Mark training as in-flight BEFORE any blocking work. /api/start-training
+        # can block for tens of seconds (T5Gemma sibling fetch, base-model
+        # prestaging) — during that window the frontend polls
+        # /api/training-status and would otherwise see is_training=False from
+        # the __init__ default and interpret it as "training complete".
+        self.status.update({
+            "is_training": True,
+            "status": "staging",
+            "started_at": time.time(),
+            "ended_at": None,
+            "step": 0,
+            "total_steps": int(self.config.get("steps") or DEFAULT_STEPS),
+            "loss": None,
+            "error": None,
+            "checkpoints": [],
+            # Surface the concrete seed (the backend rolls a random one when the
+            # UI requests it) so the user can reproduce a run they liked.
+            "seed": (int(self.config["seed"]) if self.config.get("seed") is not None else None),
+            "message": "Preparing dataset and base model…",
+        })
+        try:
+            self._maybe_wipe_run_dir()
+            self._resolve_paths()
+            self._stage_dataset()
+            self._stage_base_model()
+            cmd, env = self._build_invocation()
+            self._spawn(cmd, env)
+            logger.info(
+                "Training started · project=%s · base=%s · adapter=%s · "
+                "rank=%s · steps=%s · batch=%s · lr=%s · duration=%ss",
+                self.config.get("projectName"),
+                self.config.get("baseModel"),
+                self.config.get("adapterType") or DEFAULT_ADAPTER,
+                self.config.get("loraRank") or DEFAULT_RANK,
+                self.config.get("steps") or DEFAULT_STEPS,
+                self.config.get("batchSize") or DEFAULT_BATCH_SIZE,
+                self.config.get("learningRate") or DEFAULT_LR,
+                self.config.get("duration") or DEFAULT_DURATION,
+            )
+            return {"success": True, "run_dir": str(self.run_dir)}
+        except Exception as e:
+            self.status["error"] = str(e)
+            self.status["status"] = "failed"
+            self.status["is_training"] = False
+            self.status["ended_at"] = time.time()
+            logger.error("Training failed to start: %s", e)
+            return {"error": str(e)}
+    def get_status(self) -> Dict[str, Any]:
+        # Snapshot + add a few derived fields the frontend already reads, so
+        # the polling loop in App.js doesn't have to know about both names.
+        # SA3 is step-based; we no longer expose `current_epoch`.
+        # If the on-disk checkpoint count looks stale (run finished, glob
+        # ran with the old filter, no live files surfaced), rescan once
+        # lazily so the UI catches up without needing a backend restart.
+        if not self.status.get("checkpoints") and self.run_dir is not None:
+            ckpt_dir = self.run_dir / "checkpoints"
+            if ckpt_dir.exists() and any(ckpt_dir.glob("*.ckpt")):
+                self._scan_checkpoints()
+        s = dict(self.status)
+        total = int(s.get("total_steps") or 0)
+        step = int(s.get("step") or 0)
+        s["current_step"] = step
+        s["progress"] = int(round(100 * step / total)) if total > 0 else 0
+        s["checkpoints_saved"] = len(s.get("checkpoints") or [])
+        return s
+    def stop(self) -> Dict[str, Any]:
+        if not self.process or self.process.poll() is not None:
+            return {"error": "Nothing to stop — no active training run."}
+        try:
+            # Flag the stop so the monitor thread labels the exit "stopped"
+            # rather than "failed" — SIGINT doesn't yield a stable rc==-2.
+            self._stop_requested = True
+            self.process.send_signal(signal.SIGINT)
+            try:
+                self.process.wait(timeout=10)
+            except subprocess.TimeoutExpired:
+                self.process.terminate()
+                try:
+                    self.process.wait(timeout=5)
+                except subprocess.TimeoutExpired:
+                    self.process.kill()
+            self.status["status"] = "stopped"
+            self.status["is_training"] = False
+            self.status["ended_at"] = time.time()
+            return {"success": True}
+        except Exception as e:
+            return {"error": str(e)}
+    def preview_plan(self) -> Dict[str, Any]:
+        try:
+            self._resolve_paths(create_dirs=False)
+        except FileNotFoundError as e:
+            return {"error": str(e)}
+        steps = int(self.config.get("steps") or DEFAULT_STEPS)
+        ckpt_every = int(self.config.get("checkpointSteps") or DEFAULT_CHECKPOINT_STEPS)
+        ckpts = max(1, steps // max(1, ckpt_every))
+        proj_name = self.config.get("projectName") or self.config.get("project_name")
+        data_dir = str(project_path(proj_name)) if proj_name else None
+        return {
+            "model_name": self.config.get("modelName", "fragmenta-lora"),
+            "base_model": self.config.get("baseModel"),
+            "project_name": proj_name,
+            "data_dir": data_dir,
+            "save_dir": str(self.run_dir / "checkpoints") if self.run_dir else None,
+            "steps": steps,
+            "checkpoint_every": ckpt_every,
+            "expected_checkpoints": ckpts,
+            "rank": int(self.config.get("loraRank") or DEFAULT_RANK),
+            "alpha": int(self.config.get("loraAlpha") or self.config.get("loraRank") or DEFAULT_RANK),
+            "adapter_type": self.config.get("adapterType") or DEFAULT_ADAPTER,
+            "batch_size": int(self.config.get("batchSize") or DEFAULT_BATCH_SIZE),
+            "lr": float(self.config.get("learningRate") or DEFAULT_LR),
+            "duration": float(self.config.get("duration") or DEFAULT_DURATION),
+            "precision": self.config.get("precision") or DEFAULT_PRECISION,
+        }
+    # --- Internals ---------------------------------------------------------
+    def _resolve_paths(self, create_dirs: bool = True) -> None:
+        cfg = get_config()
+        run_name = self._safe_name(self.config.get("modelName") or "lora-run")
+        self.run_dir = cfg.get_path("models_fine_tuned") / run_name
+        # Lightning's CSVLogger writes metrics.csv under
+        # `<save_dir>/lightning_logs/version_X/metrics.csv`. We don't know X
+        # upfront, so leave this unset and let _scrape_loss_history /
+        # _scrape_csv_loss rglob for it the first time they're called.
+        self.metrics_csv = None
+        if create_dirs:
+            self.run_dir.mkdir(parents=True, exist_ok=True)
+            (self.run_dir / "checkpoints").mkdir(exist_ok=True)
+    @classmethod
+    def existing_run_info(cls, model_name: str) -> Optional[Dict[str, Any]]:
+        """Look up an existing run dir for a given LoRA name. Returns a dict
+        of countable artifacts if the dir exists with content, else None.
+        Used by /api/start-training to refuse a same-name run unless the
+        caller explicitly opts in to overwrite. Counts only *.ckpt and
+        *.safetensors so a half-set-up dir with only a metadata file
+        doesn't trip the prompt.
+        """
+        import shutil  # noqa: F401  # ensures shutil resolves if user calls _maybe_wipe later
+        cfg = get_config()
+        run_name = cls._safe_name(model_name or "lora-run")
+        run_dir = cfg.get_path("models_fine_tuned") / run_name
+        if not run_dir.exists():
+            return None
+        ckpt_dir = run_dir / "checkpoints"
+        files = []
+        if ckpt_dir.exists():
+            for ext in ("*.safetensors", "*.ckpt"):
+                files.extend(ckpt_dir.glob(ext))
+        if not files and not (run_dir / "training.log").exists():
+            return None
+        return {
+            "run_dir": str(run_dir),
+            "run_name": run_name,
+            "checkpoint_count": len(files),
+            "has_log": (run_dir / "training.log").exists(),
+        }
+    def _maybe_wipe_run_dir(self) -> None:
+        """Honor the `overwrite` flag — wipe the run dir before staging."""
+        if not self.config.get("overwrite"):
+            return
+        cfg = get_config()
+        run_name = self._safe_name(self.config.get("modelName") or "lora-run")
+        run_dir = cfg.get_path("models_fine_tuned") / run_name
+        if run_dir.exists():
+            import shutil
+            shutil.rmtree(run_dir)
+            logger.info("Cleared existing run dir before restart: %s", run_dir)
+    def _stage_dataset(self) -> None:
+        """Resolve --data_dir from a Dataset Workbench project.
+        Training reads the committed `.txt` sidecars sitting next to each
+        audio file inside `<projects_dir>/<projectName>/`. The Workbench's
+        "Create Dataset" action materialised those sidecars; we don't
+        rewrite anything here.
+        """
+        project_name = self.config.get("projectName") or self.config.get("project_name")
+        if not project_name:
+            raise FileNotFoundError(
+                "projectName is required. Pick a project in the Training "
+                "tab's Dataset picker before starting a run."
+            )
+        proj_dir = project_path(project_name)
+        if not proj_dir.exists():
+            raise FileNotFoundError(f"project not found: {project_name}")
+        sidecars = list(proj_dir.glob("*.txt"))
+        if not sidecars:
+            raise RuntimeError(
+                f"project “{project_name}” has no committed prompts yet — "
+                "annotate the clips and click Create Dataset, then retry."
+            )
+        # SA3's caption_metadata_fn rejects clips whose sidecar is empty,
+        # so they silently drop out of the training set. Count them upfront
+        # so the user knows what they're actually training on (and refuse
+        # to start if NONE have prompts — that would just waste GPU hours).
+        non_empty = [p for p in sidecars if p.read_text(encoding="utf-8").strip()]
+        if not non_empty:
+            raise RuntimeError(
+                f"project “{project_name}” has {len(sidecars)} clip(s) but every "
+                "sidecar is empty — SA3 will reject all of them. Annotate at "
+                "least one clip and re-commit before training."
+            )
+        blank = len(sidecars) - len(non_empty)
+        if blank > 0:
+            logger.warning(
+                "%d of %d clip(s) in project '%s' have empty prompts — "
+                "SA3 will silently drop them. Training on %d clip(s).",
+                blank, len(sidecars), project_name, len(non_empty),
+            )
+            self.status["log_tail"].append(
+                f"Warning: {blank}/{len(sidecars)} clips have empty prompts and "
+                "will be dropped by SA3's data loader."
+            )
+        self.status["log_tail"].append(
+            f"Dataset: project '{project_name}' · {len(non_empty)} usable clip(s) · {proj_dir}"
+        )
+        self._data_dir = proj_dir
+        # Phase 6 — opt into pre-encoded latents if a compatible .latents/
+        # cache exists. SA3's `train_lora.py --encoded_dir` then skips the
+        # autoencoder pass per step. The cache is AE-bound (same-s vs
+        # same-l) so we verify the manifest matches the picked base before
+        # using it — otherwise we'd feed the DiT mis-shaped latents.
+        self._encoded_dir: Optional[Path] = None
+        try:
+            from app.backend.data.pre_encoder import (
+                latents_dir, latents_count, latents_match_base,
+            )
+            ldir = latents_dir(project_name)
+            base_model = self.config.get("baseModel")
+            if ldir.exists() and latents_count(project_name) > 0:
+                if latents_match_base(project_name, base_model):
+                    self._encoded_dir = ldir
+                    self.status["log_tail"].append(
+                        f"Using pre-encoded latents: {latents_count(project_name)} "
+                        f"file(s) · {ldir}"
+                    )
+                    logger.info(
+                        "Pre-encoded latents detected for project '%s' (%d files) — "
+                        "skipping SAME autoencoder per step.",
+                        project_name, latents_count(project_name),
+                    )
+                else:
+                    logger.warning(
+                        "Pre-encoded latents exist for project '%s' but were "
+                        "produced by a different autoencoder than the chosen "
+                        "base (%s) — falling back to live encoding.",
+                        project_name, base_model,
+                    )
+                    self.status["log_tail"].append(
+                        f"Note: project has cached latents but they're for a "
+                        f"different autoencoder than {base_model}. Training "
+                        "will re-encode audio per step."
+                    )
+        except Exception as exc:
+            logger.warning("Pre-encoded latents probe failed: %s", exc)
+    def _stage_base_model(self) -> None:
+        cfg = get_config()
+        base_model = self.config.get("baseModel")
+        if base_model not in SA3_BASE_MODELS:
+            raise ValueError(
+                f"baseModel must be one of {list(SA3_BASE_MODELS)}. "
+                "Post-trained checkpoints (no -base suffix) can't be used "
+                "as a LoRA training base — CFG distillation has collapsed "
+                "the gradient signal LoRAs target."
+            )
+        hub_dir = cfg.get_path("models_pretrained") / "sa3" / "hub"
+        try:
+            from huggingface_hub import get_token
+            token = get_token()
+        except Exception:
+            token = None
+        def _cb(pct: int, msg: str) -> None:
+            self.status["message"] = msg
+            self.status["log_tail"].append(f"[stage] {msg}")
+            # Mirror to the project logger so the terminal shows what's
+            # happening during long blocking operations (e.g. first-time
+            # T5Gemma sibling fetch can take ~30s on medium-base).
+            logger.info("[stage] %s", msg)
+        prestage_base_model(base_model, hub_dir, token=token, progress_callback=_cb)
+        self._hub_dir = hub_dir
+    def _build_invocation(self):
+        cfg = get_config()
+        sa3_vendor = cfg.get_path("stable_audio_3")
+        sa3_name, _repo = SA3_BASE_MODELS[self.config["baseModel"]]
+        # Use the Fragmenta venv's python so we share installed packages.
+        venv_python = sys.executable
+        precision_raw = (self.config.get("precision") or DEFAULT_PRECISION).lower()
+        precision = "bf16" if precision_raw in ("bf16", "bfloat16", "auto", "") else "fp16"
+        include = self.config.get("include")
+        if include and isinstance(include, str):
+            include = shlex.split(include)
+        exclude = self.config.get("exclude")
+        if exclude and isinstance(exclude, str):
+            exclude = shlex.split(exclude)
+        adapter_type = self.config.get("adapterType") or DEFAULT_ADAPTER
+        # -XS adapters can reuse a precomputed SVD-bases cache keyed by base
+        # model, skipping the per-layer SVD at startup. SA3 only loads (never
+        # writes) this file, so we pass it only when present; population is a
+        # manual/precompute step. Ensure the dir exists so it's discoverable.
+        svd_bases_path = None
+        if adapter_type.endswith("-xs"):
+            svd_cache_dir = get_config().get_path("models_fine_tuned") / ".svd_cache"
+            svd_cache_dir.mkdir(parents=True, exist_ok=True)
+            candidate = svd_cache_dir / f"{self.config['baseModel']}.pt"
+            if candidate.exists():
+                svd_bases_path = candidate
+        cmd = build_train_command(
+            venv_python=venv_python,
+            sa3_vendor_dir=sa3_vendor,
+            sa3_model_name=sa3_name,
+            data_dir=self._data_dir,
+            encoded_dir=getattr(self, "_encoded_dir", None),
+            svd_bases_path=svd_bases_path,
+            save_dir=self.run_dir / "checkpoints",
+            rank=int(self.config.get("loraRank") or DEFAULT_RANK),
+            lora_alpha=self.config.get("loraAlpha"),
+            adapter_type=adapter_type,
+            dropout=float(self.config.get("loraDropout") or 0.0),
+            lr=float(self.config.get("learningRate") or DEFAULT_LR),
+            steps=int(self.config.get("steps") or DEFAULT_STEPS),
+            batch_size=int(self.config.get("batchSize") or DEFAULT_BATCH_SIZE),
+            # Default to AND clamp at the base model's native training length
+            # (medium ≈380s, small ≈120s) — SA3's DiT tops out at 4096 latent
+            # tokens, so a longer window would exceed the model, not just cost
+            # VRAM. A missing duration defaults to the model max.
+            duration=min(
+                float(self.config.get("duration") or (380.0 if "medium" in sa3_name else 120.0)),
+                380.0 if "medium" in sa3_name else 120.0,
+            ),
+            base_precision=precision,
+            include=include,
+            exclude=exclude,
+            seed=(int(self.config["seed"]) if self.config.get("seed") is not None else 42),
+            checkpoint_every=int(self.config.get("checkpointSteps") or DEFAULT_CHECKPOINT_STEPS),
+            name=self.config.get("modelName") or "fragmenta-lora",
+        )
+        env = build_train_env(sa3_vendor, self._hub_dir)
+        return cmd, env
+    def _spawn(self, cmd: List[str], env: Dict[str, str]) -> None:
+        log_path = self.run_dir / "training.log"
+        rank = int(self.config.get("loraRank") or DEFAULT_RANK)
+        alpha_cfg = self.config.get("loraAlpha")
+        alpha = int(alpha_cfg) if alpha_cfg not in (None, "") else rank
+        # Stamp training_metadata.json so /api/loras can find the base_model
+        # if the embedded safetensors metadata is missing it (legacy paths).
+        (self.run_dir / "training_metadata.json").write_text(json.dumps({
+            "mode": "lora",
+            "engine": "sa3",
+            "base_model": self.config.get("baseModel"),
+            "model_name": self.config.get("modelName"),
+            "started_at": time.time(),
+            "lora_config": {
+                "rank": rank,
+                "alpha": alpha,
+                "adapter_type": self.config.get("adapterType") or DEFAULT_ADAPTER,
+                "dropout": float(self.config.get("loraDropout") or 0.0),
+            },
+            "steps": int(self.config.get("steps") or DEFAULT_STEPS),
+            "lr": float(self.config.get("learningRate") or DEFAULT_LR),
+            "batch_size": int(self.config.get("batchSize") or DEFAULT_BATCH_SIZE),
+        }, indent=2))
+        self.status.update({
+            "is_training": True,
+            "status": "running",
+            "step": 0,
+            "total_steps": int(self.config.get("steps") or DEFAULT_STEPS),
+            "loss": None,
+            "error": None,
+            "started_at": time.time(),
+            "ended_at": None,
+            "checkpoints": [],
+            "message": "Starting training subprocess...",
+        })
+        self.process = subprocess.Popen(
+            cmd,
+            cwd=str(get_config().project_root),
+            env=env,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT,
+            text=True,
+            bufsize=1,
+        )
+        self._monitor_thread = threading.Thread(
+            target=self._monitor,
+            args=(log_path,),
+            daemon=True,
+            name=f"sa3-train-monitor:{self.run_dir.name}",
+        )
+        self._monitor_thread.start()
+    def _monitor(self, log_path: Path) -> None:
+        """Pull stdout, parse PyTorch Lightning progress, scrape loss, watch checkpoints.
+        SA3 trains via PL whose default progress bar emits *per-epoch* step
+        counts ("Epoch 6: 50%|...| 25/50 [00:07<00:07, 3.36it/s, train/loss=0.559]").
+        We derive the global step as `epoch * batches_per_epoch + step_in_epoch`,
+        capture `batches_per_epoch` from the first such line (it's stable across
+        epochs since SampleDataset returns a fixed length), and clamp the
+        result to the configured max_steps so the percentage doesn't go past
+        100 if the final epoch overruns.
+        """
+        epoch_pat = re.compile(r"Epoch\s+(\d+):")
+        in_epoch_pat = re.compile(r"\|\s*(\d+)/(\d+)\b")  # tqdm's "current/total"
+        loss_pat = re.compile(r"train/loss=([\d.eE+\-]+)")
+        speed_pat = re.compile(r"([\d.]+)it/s")
+        last_log_flush = time.time()
+        last_ckpt_scan = 0.0
+        last_terminal_log = 0.0
+        last_logged_step = -1
+        prev_ckpt_count = 0
+        current_epoch = 0
+        batches_per_epoch = 0
+        try:
+            with open(log_path, "w") as logf:
+                if self.process and self.process.stdout:
+                    for line in self.process.stdout:
+                        line = line.rstrip()
+                        logf.write(line + "\n")
+                        if time.time() - last_log_flush > 1:
+                            logf.flush()
+                            last_log_flush = time.time()
+                        self.status["log_tail"].append(line)
+                        if len(self.status["log_tail"]) > 80:
+                            self.status["log_tail"] = self.status["log_tail"][-50:]
+                        # Only parse the step counter on lines that ARE the
+                        # training progress bar (prefixed with "Epoch N:"),
+                        # so unrelated tqdm bars during startup (e.g.
+                        # "Loading checkpoint shards: 9/9") don't pollute
+                        # batches_per_epoch.
+                        m_epoch = epoch_pat.search(line)
+                        if m_epoch:
+                            current_epoch = int(m_epoch.group(1))
+                            m_step = in_epoch_pat.search(line)
+                            if m_step:
+                                cur_in_epoch = int(m_step.group(1))
+                                per_epoch = int(m_step.group(2))
+                                if per_epoch > 0 and batches_per_epoch == 0:
+                                    batches_per_epoch = per_epoch
+                                if batches_per_epoch > 0:
+                                    global_step = current_epoch * batches_per_epoch + cur_in_epoch
+                                    max_steps = self.status.get("total_steps") or 0
+                                    if max_steps > 0:
+                                        global_step = min(global_step, max_steps)
+                                    if global_step > self.status.get("step", 0):
+                                        self.status["step"] = global_step
+                        m_loss = loss_pat.search(line)
+                        if m_loss:
+                            try:
+                                self.status["loss"] = float(m_loss.group(1))
+                            except ValueError:
+                                pass
+                        # Live checkpoint enumeration + loss history scrape.
+                        # Lightning writes *.ckpt every N steps; we want the
+                        # count to climb in the UI as files appear, not only
+                        # at end-of-run. Bucketed to ~2s so we don't pound
+                        # the FS. The loss history scrape backfills step
+                        # 0..49 from metrics.csv since PL's stdout postfix
+                        # doesn't show train/loss until end-of-epoch-0.
+                        now = time.time()
+                        if now - last_ckpt_scan > 2.0:
+                            last_ckpt_scan = now
+                            self._scan_checkpoints()
+                            self._scrape_loss_history()
+                            cur_ckpt_count = len(self.status.get("checkpoints") or [])
+                            if cur_ckpt_count > prev_ckpt_count:
+                                logger.info(
+                                    "Checkpoint saved · %d total · run=%s",
+                                    cur_ckpt_count, self.run_dir.name,
+                                )
+                                prev_ckpt_count = cur_ckpt_count
+                        # Throttled progress to the backend terminal log.
+                        # Lightning emits step lines ~3× per second; we
+                        # condense to one tidy summary every 5s. Omit the
+                        # loss segment when we don't have a value yet (the
+                        # CSV scrape runs every 2s but PL may not have
+                        # logged anything during the very first second).
+                        cur_step = self.status.get("step") or 0
+                        if (cur_step > last_logged_step
+                                and now - last_terminal_log >= 5.0):
+                            total = self.status.get("total_steps") or 0
+                            loss = self.status.get("loss")
+                            pct = round(100 * cur_step / total) if total > 0 else 0
+                            speed_m = speed_pat.search(line)
+                            parts = [f"step {cur_step}/{total} ({pct}%)"]
+                            if isinstance(loss, (int, float)):
+                                parts.append(f"loss {loss:.4f}")
+                            if speed_m:
+                                parts.append(f"{speed_m.group(1)} it/s")
+                            logger.info(" · ".join(parts))
+                            last_terminal_log = now
+                            last_logged_step = cur_step
+                rc = self.process.wait() if self.process else 1
+        except Exception as e:
+            self.status["error"] = str(e)
+            rc = -1
+        self.status["ended_at"] = time.time()
+        self.status["is_training"] = False
+        # A user-requested stop wins regardless of the exit code (SIGINT can
+        # surface as various negative/non-zero codes across platforms).
+        if getattr(self, "_stop_requested", False):
+            self.status["status"] = "stopped"
+        else:
+            self.status["status"] = "complete" if rc == 0 else "failed"
+        if self.status["status"] == "failed" and not self.status.get("error"):
+            self.status["error"] = f"train_lora.py exited with code {rc}"
+        # Convert PyTorch Lightning .ckpt files to SA3's native .safetensors
+        # LoRA format — the inference loader (/api/loras) only sees
+        # .safetensors, so unconverted .ckpt files would be functionally
+        # orphaned. We also inject `base_model` into the safetensors header
+        # so /api/loras' metadata filter passes without a JSON fallback.
+        # Best-effort: failure here doesn't fail the run.
+        if self.status["status"] in ("complete", "stopped") and self.run_dir:
+            try:
+                produced = convert_run_checkpoints_to_safetensors(
+                    self.run_dir,
+                    base_model=self.config.get("baseModel"),
+                    model_name=self.config.get("modelName"),
+                )
+                if produced:
+                    logger.info(
+                        "Converted %d checkpoint(s) to .safetensors · run=%s",
+                        len(produced), self.run_dir.name,
+                    )
+            except Exception as exc:
+                logger.warning("Checkpoint conversion failed: %s", exc)
+        # Final pass: enumerate written checkpoints + full loss history +
+        # latest single-value loss.
+        self._scan_checkpoints()
+        self._scrape_loss_history()
+        self._scrape_csv_loss()
+        final_step = self.status.get("step") or 0
+        final_total = self.status.get("total_steps") or 0
+        final_loss = self.status.get("loss")
+        final_ckpts = len(self.status.get("checkpoints") or [])
+        loss_str = f"{final_loss:.4f}" if isinstance(final_loss, (int, float)) else "—"
+        if self.status["status"] == "complete":
+            logger.info(
+                "Training complete · %d/%d steps · final loss %s · %d checkpoint(s) · run=%s",
+                final_step, final_total, loss_str, final_ckpts, self.run_dir.name,
+            )
+        elif self.status["status"] == "stopped":
+            logger.info(
+                "Training stopped at step %d/%d · %d checkpoint(s) · run=%s",
+                final_step, final_total, final_ckpts, self.run_dir.name,
+            )
+        else:
+            logger.error(
+                "Training failed (exit %s) · %d/%d steps · error: %s · run=%s",
+                rc, final_step, final_total, self.status.get("error"), self.run_dir.name,
+            )
+    def _scrape_loss_history(self) -> None:
+        """Refresh self.status['loss_history'] from Lightning's metrics.csv.
+        PL's tqdm postfix only surfaces `train/loss=` *after* the first
+        metrics flush (typically end-of-epoch-0), so step 0..49 of a fresh
+        run never appear in stdout. metrics.csv, on the other hand, has
+        per-step rows from step 0 — we just need to read it.
+        Cheap: even at 10K steps a CSV scan is sub-10ms. Skipped silently
+        if the file hasn't been created yet (early in the run, before PL's
+        CSVLogger flushes anything).
+        """
+        if not self.metrics_csv or not self.metrics_csv.exists():
+            # CSVLogger writes under <save_dir>/lightning_logs/version_*/
+            if self.run_dir:
+                for p in (self.run_dir / "checkpoints").rglob("metrics.csv"):
+                    self.metrics_csv = p
+                    break
+        if not self.metrics_csv or not self.metrics_csv.exists():
+            return
+        try:
+            with open(self.metrics_csv) as f:
+                rows = list(csv.DictReader(f))
+        except Exception:
+            return
+        points: List[Dict[str, Any]] = []
+        loss_keys = ("train/loss", "loss", "train_loss")
+        for row in rows:
+            step_raw = row.get("step")
+            if step_raw in (None, ""):
+                continue
+            try:
+                step = int(step_raw)
+            except ValueError:
+                continue
+            for k in loss_keys:
+                v = row.get(k)
+                if v not in (None, ""):
+                    try:
+                        points.append({"step": step, "loss": float(v)})
+                    except ValueError:
+                        pass
+                    break
+        # Dedupe: csv can have multiple rows per step (different metric flush
+        # boundaries) — keep the last loss seen for each step.
+        by_step: Dict[int, float] = {}
+        for p in points:
+            by_step[p["step"]] = p["loss"]
+        ordered = sorted(by_step.items())
+        self.status["loss_history"] = [{"step": s, "loss": l} for s, l in ordered]
+        # Also surface the most recent loss as the scalar so the terminal
+        # log and "Current Loss" field don't show "—" until end-of-epoch-0.
+        # PL's tqdm postfix is async; the CSV row lands a beat ahead.
+        if ordered:
+            self.status["loss"] = ordered[-1][1]
+    def _scan_checkpoints(self) -> None:
+        """Update self.status['checkpoints'] from on-disk artifacts.
+        SA3's train_lora.py uses PyTorch Lightning's ModelCheckpoint, which
+        writes `.ckpt` files (Lightning pickle format). The diffusion wrapper's
+        `on_save_checkpoint` hook strips the state_dict to LoRA-only weights
+        plus the embedded `lora_config`, so each .ckpt IS a LoRA checkpoint.
+        We also accept .safetensors for forward-compat with a future export
+        path or manual conversion.
+        """
+        if not self.run_dir:
+            return
+        ckpt_dir = self.run_dir / "checkpoints"
+        if not ckpt_dir.exists():
+            return
+        found = []
+        for ext in ("*.safetensors", "*.ckpt"):
+            found.extend(ckpt_dir.glob(ext))
+        # Lightning writes nested lightning_logs/version_X/* — those aren't
+        # the user-facing artifacts; skip recursion.
+        project_root = get_config().project_root
+        self.status["checkpoints"] = sorted(
+            str(p.relative_to(project_root)) for p in found
+        )
+    def _scrape_csv_loss(self) -> None:
+        if not self.metrics_csv or not self.metrics_csv.exists():
+            # train_lora.py writes its CSV under the lightning logger dir,
+            # which is `<save_dir>/<name>/version_*/metrics.csv`. Walk to
+            # find it.
+            ckpt_dir = self.run_dir / "checkpoints"
+            for p in ckpt_dir.rglob("metrics.csv"):
+                self.metrics_csv = p
+                break
+        if not self.metrics_csv or not self.metrics_csv.exists():
+            return
+        try:
+            with open(self.metrics_csv) as f:
+                rows = list(csv.DictReader(f))
+            for row in reversed(rows):
+                for k in ("train/loss", "loss", "train_loss"):
+                    v = row.get(k)
+                    if v not in (None, ""):
+                        try:
+                            self.status["loss"] = float(v)
+                            return
+                        except ValueError:
+                            pass
+        except Exception:
+            pass
+    @staticmethod
+    def _safe_name(s: str) -> str:
+        return re.sub(r"[^a-zA-Z0-9_-]+", "_", s).strip("_") or "lora-run"
+# --- Module-level singleton + public functions -----------------------------
+_active: Optional[SA3Trainer] = None
+_lock = threading.Lock()
+def get_trainer() -> Optional[SA3Trainer]:
+    return _active
+def start_training(config: Dict[str, Any]) -> Dict[str, Any]:
+    global _active
+    with _lock:
+        if _active and _active.status.get("is_training"):
+            return {"error": "A training run is already in progress."}
+        _active = SA3Trainer(config)
+        return _active.start()
+def get_training_status() -> Dict[str, Any]:
+    if _active is None:
+        return {
+            "is_training": False,
+            "status": "idle",
+            "message": "No training run has been started yet.",
+            "progress": 0,
+            "current_step": 0,
+            "total_steps": 0,
+            "checkpoints_saved": 0,
+            "loss": None,
+        }
+    return _active.get_status()
+def stop_training() -> Dict[str, Any]:
+    if _active is None:
+        return {"error": "No training run to stop."}
+    return _active.stop()
+def preview_training_plan(config: Dict[str, Any]) -> Dict[str, Any]:
+    return SA3Trainer(config).preview_plan()

app/frontend/index.html CHANGED Viewed

@@ -7,15 +7,38 @@
     <meta name="theme-color" content="#000000" />
     <meta
       name="description"
-      content="Fragmenta Desktop - Stable Audio Fine-Tuning Application"
     />
     <link rel="manifest" href="/manifest.json" />
-    <link rel="preconnect" href="https://fonts.googleapis.com">
-    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
-    <link href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@300;400;500;600&family=Space+Mono:wght@400;700&family=IBM+Plex+Mono:wght@300;400;500;600&display=swap" rel="stylesheet">
     <style>
       @font-face {
         font-family: 'Bitcount Single';
         src: url('/BitcountSingle-VariableFont_CRSV,ELSH,ELXP,slnt,wght.ttf') format('truetype');
@@ -25,7 +48,7 @@
       }
     </style>
-    <title>Fragmenta Desktop</title>
   </head>
   <body>
     <noscript>You need to enable JavaScript to run this app.</noscript>

     <meta name="theme-color" content="#000000" />
     <meta
       name="description"
+      content="Fragmenta — Stable Audio Fine-Tuning Application"
     />
     <link rel="manifest" href="/manifest.json" />
     <style>
+      /* Layout floor — channel grid + master strip need 1300px
+         horizontally to sit side-by-side, and the vertical layout (top
+         bar + channels + bottom bar) gets cramped below 830px. Below
+         either floor, scrollbars appear so the layout stays intact
+         instead of collapsing. The launcher (start.py) opens Chromium
+         at 1300×830 so the fresh-launch experience lands exactly at
+         the floor. */
+      html, body {
+        min-width: 1300px;
+        min-height: 830px;
+      }
+      /* Local variable fonts — ship with the app, no network dependency. */
+      @font-face {
+        font-family: 'Bricolage Grotesque';
+        src: url('/BricolageGrotesque-VariableFont_opsz,wdth,wght.ttf') format('truetype');
+        font-weight: 200 800;
+        font-style: normal;
+        font-display: swap;
+      }
+      @font-face {
+        font-family: 'Inter Tight';
+        src: url('/InterTight-VariableFont_wght.ttf') format('truetype');
+        font-weight: 100 900;
+        font-style: normal;
+        font-display: swap;
+      }
       @font-face {
         font-family: 'Bitcount Single';
         src: url('/BitcountSingle-VariableFont_CRSV,ELSH,ELXP,slnt,wght.ttf') format('truetype');
       }
     </style>
+    <title>Fragmenta</title>
   </head>
   <body>
     <noscript>You need to enable JavaScript to run this app.</noscript>

app/frontend/logs/fragmenta_20260525.log ADDED Viewed

	@@ -0,0 +1,8 @@

+2026-05-25 11:21:33 | [92mINFO[0m | FragmentaLogger | setup_logging:105 | Logging system initialized (Level: INFO)
+2026-05-25 11:21:33 | [92mINFO[0m | FragmentaLogger | setup_logging:107 | Log file: logs/fragmenta_20260525.log
+2026-05-25 11:44:54 | [92mINFO[0m | FragmentaLogger | setup_logging:105 | Logging system initialized (Level: INFO)
+2026-05-25 11:44:54 | [92mINFO[0m | FragmentaLogger | setup_logging:107 | Log file: logs/fragmenta_20260525.log
+2026-05-25 13:55:04 | [92mINFO[0m | FragmentaLogger | setup_logging:105 | Logging system initialized (Level: INFO)
+2026-05-25 13:55:04 | [92mINFO[0m | FragmentaLogger | setup_logging:107 | Log file: logs/fragmenta_20260525.log
+2026-05-25 13:55:05 | [92mINFO[0m | FragmentaLogger | setup_logging:105 | Logging system initialized (Level: INFO)
+2026-05-25 13:55:05 | [92mINFO[0m | FragmentaLogger | setup_logging:107 | Log file: logs/fragmenta_20260525.log

app/frontend/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
     "name": "fragmenta-desktop",
-    "version": "0.1.2",
-    "description": "Fragmenta Desktop",
     "type": "module",
     "scripts": {
         "dev": "vite",

 {
     "name": "fragmenta-desktop",
+    "version": "0.2.0",
+    "description": "Fragmenta",
     "type": "module",
     "scripts": {
         "dev": "vite",

app/frontend/public/BricolageGrotesque-VariableFont_opsz,wdth,wght.ttf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:31b91d15aae398699fae58363dbc8ca1167faffe7d2cd62e68c716dcaa7d5fdd
+size 407844

app/frontend/public/InterTight-VariableFont_wght.ttf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4b8ef9ed255ebe7341aa566554c0f3e87ee10ce06d2085f07ccf66f41ef96c28
+size 580572

app/frontend/public/fragmenta_background.png CHANGED Viewed

Git LFS Details

SHA256: 048aea503935f9763e76db3f5d1fcd6d561d3db9aeac415605c46527a3d6631b
Pointer size: 131 Bytes
Size of remote file: 133 kB

Git LFS Details

SHA256: f7c5c50356c595570f790621b89da04b93680b2be43803810b33a165111e8600
Pointer size: 131 Bytes
Size of remote file: 162 kB

app/frontend/public/interface.png CHANGED Viewed

Git LFS Details

SHA256: 00d2730e2f53440597b018538ed30200928e26d1034c51ec8ef7a95fc0477e98
Pointer size: 132 Bytes
Size of remote file: 1.81 MB

Git LFS Details

SHA256: a05704da0c9b7ea812b44d94186d81fe969a3963bf11cca6c79fbadf5d33f645
Pointer size: 132 Bytes
Size of remote file: 1.59 MB

app/frontend/src/App.js CHANGED Viewed

The diff for this file is too large to render. See raw diff

app/frontend/src/api.js CHANGED Viewed

@@ -37,6 +37,7 @@ const api = {
     get: (url, config) => request('GET', url, null, config),
     post: (url, body, config) => request('POST', url, body, config),
     put: (url, body, config) => request('PUT', url, body, config),
     delete: (url, config) => request('DELETE', url, null, config),
 };

     get: (url, config) => request('GET', url, null, config),
     post: (url, body, config) => request('POST', url, body, config),
     put: (url, body, config) => request('PUT', url, body, config),
+    patch: (url, body, config) => request('PATCH', url, body, config),
     delete: (url, config) => request('DELETE', url, null, config),
 };

app/frontend/src/components/AboutDialog.js ADDED Viewed

	@@ -0,0 +1,130 @@

+import React from 'react';
+import {
+    Box,
+    Button,
+    Dialog,
+    DialogActions,
+    DialogContent,
+    DialogTitle,
+    Typography,
+} from '@mui/material';
+import {
+    Info as InfoIcon,
+    BookOpen as BookOpenIcon,
+} from 'lucide-react';
+import { appStyles } from '../theme';
+import { APP_VERSION } from '../version';
+/**
+ * "About Fragmenta" dialog — logo + title, short intro, three doc buttons
+ * (About / Documentation / Tutorials), and the Stability AI Community
+ * License attribution footer.
+ *
+ * Props:
+ *   open:                       bool
+ *   onClose:                    () => void
+ *   onOpenDocumentation:        ('about' | 'documentation') => void
+ *   isOpeningDocumentation:     bool — disables the doc buttons while a
+ *                               native open-file call is in flight
+ */
+export default function AboutDialog({
+    open,
+    onClose,
+    onOpenDocumentation,
+    isOpeningDocumentation,
+}) {
+    return (
+        <Dialog
+            open={open}
+            onClose={onClose}
+            aria-labelledby="about-documentation-dialog-title"
+            maxWidth="sm"
+            fullWidth
+        >
+            <DialogTitle id="about-documentation-dialog-title">
+                <Box sx={{ display: 'flex', flexDirection: 'column', alignItems: 'center', gap: 1 }}>
+                    <Box sx={{
+                        ...appStyles.logo,
+                        width: 52, height: 52,
+                        border: 'none',
+                        boxShadow: 'none',
+                        filter: 'none',
+                    }} />
+                    <Typography variant="h5" component="span" sx={appStyles.title}>
+                        Fragmenta
+                    </Typography>
+                    <Typography variant="caption" color="text.secondary" sx={{ fontSize: '0.7rem', letterSpacing: '0.04em' }}>
+                        v{APP_VERSION}
+                    </Typography>
+                </Box>
+            </DialogTitle>
+            <DialogContent>
+                <Typography sx={appStyles.infoDialogIntro}>
+                    Fragmenta is an open source, local-first suit to prepare datasets, train, generate and perform with text-to-audio diffusion models.
+                    Made by the composer and researcher Misagh Azimi.
+                </Typography>
+                <Box sx={appStyles.infoDialogActionStack}>
+                    <Button
+                        variant="contained"
+                        size="small"
+                        startIcon={<InfoIcon size={16} />}
+                        onClick={() => onOpenDocumentation('about')}
+                        disabled={isOpeningDocumentation}
+                        sx={appStyles.infoDocButton}
+                    >
+                        About
+                    </Button>
+                    <Button
+                        variant="outlined"
+                        size="small"
+                        startIcon={<BookOpenIcon size={16} />}
+                        onClick={() => onOpenDocumentation('documentation')}
+                        disabled={isOpeningDocumentation}
+                        sx={appStyles.infoDocButton}
+                    >
+                        Documentation
+                    </Button>
+                    <Button
+                        variant="outlined"
+                        size="small"
+                        disabled
+                        sx={appStyles.infoDocButton}
+                    >
+                        Tutorials (Coming soon...)
+                    </Button>
+                </Box>
+                <Box sx={{ mt: 3, pt: 1.5, borderTop: '1px solid', borderColor: 'divider', textAlign: 'center' }}>
+                    <Typography variant="caption" color="textSecondary" sx={{ display: 'block', fontStyle: 'italic', fontSize: '0.6rem', lineHeight: 1.5 }}>
+                        Powered by{' '}
+                        <Typography
+                            component="a"
+                            variant="caption"
+                            href="https://github.com/Stability-AI/stable-audio-3"
+                            target="_blank"
+                            rel="noopener noreferrer"
+                            sx={{ color: 'primary.main', textDecoration: 'underline', fontStyle: 'italic', fontSize: '0.6rem' }}
+                        >
+                            Stable Audio 3
+                        </Typography>{' '}by Stability AI. "This Stability AI Model is licensed under the{' '}
+                        <Typography
+                            component="a"
+                            variant="caption"
+                            href="https://stability.ai/license"
+                            target="_blank"
+                            rel="noopener noreferrer"
+                            sx={{ color: 'primary.main', textDecoration: 'underline', fontStyle: 'italic', fontSize: '0.6rem' }}
+                        >
+                            Stability AI Community License
+                        </Typography>,{' '}
+                        Copyright © Stability AI Ltd. All Rights Reserved"
+                    </Typography>
+                </Box>
+            </DialogContent>
+            <DialogActions>
+                <Button onClick={onClose}>Close</Button>
+            </DialogActions>
+        </Dialog>
+    );
+}

app/frontend/src/components/AudioWaveform.js ADDED Viewed

	@@ -0,0 +1,258 @@

+import React, { useEffect, useRef, useState, useCallback } from 'react';
+import { Box, Typography } from '@mui/material';
+/**
+ * Canvas waveform with a single draggable region (for SA3 inpaint UX).
+ *
+ * Decodes the supplied File via the Web Audio API (no network round-trip),
+ * computes per-pixel min/max peaks once per (file, width) pair, and renders
+ * a region overlay + two draggable handles. Region drag in three modes:
+ *   - drag the left handle  → adjust start
+ *   - drag the right handle → adjust end
+ *   - drag the body         → shift the whole region in place
+ *
+ * Region is controlled: parent owns `start` / `end` in seconds.
+ *
+ * Props:
+ *   file:            File | null    — source audio
+ *   duration:        number          — clip length in seconds (must be passed; we
+ *                                       don't infer it from decoded length so the
+ *                                       caller can drive a probe before decode
+ *                                       finishes)
+ *   start, end:      number          — region in seconds
+ *   onRegionChange:  (start, end) => void
+ *   minRegionSec:    number          — default 0.1
+ *   height:          number          — canvas height in px (default 96)
+ *   color:           CSS color       — waveform peak color (default theme accent)
+ *   regionColor:     CSS color       — fill for the region rect
+ */
+export default function AudioWaveform({
+    file,
+    duration,
+    start,
+    end,
+    onRegionChange,
+    minRegionSec = 0.1,
+    height = 96,
+    color = '#279FBB',
+    regionColor = 'rgba(253, 162, 43, 0.28)',
+}) {
+    const canvasRef = useRef(null);
+    const containerRef = useRef(null);
+    const [width, setWidth] = useState(0);
+    const [peaks, setPeaks] = useState(null);
+    const [decoding, setDecoding] = useState(false);
+    const [decodeError, setDecodeError] = useState(null);
+    // Drag state lives in a ref to avoid re-renders during pointer move.
+    const dragRef = useRef(null);
+    // --- responsive width via ResizeObserver -----------------------------
+    useEffect(() => {
+        const el = containerRef.current;
+        if (!el) return;
+        const ro = new ResizeObserver((entries) => {
+            const w = Math.max(1, Math.floor(entries[0].contentRect.width));
+            setWidth(w);
+        });
+        ro.observe(el);
+        return () => ro.disconnect();
+    }, []);
+    // --- decode + peak computation ---------------------------------------
+    useEffect(() => {
+        if (!file || !width) return;
+        let cancelled = false;
+        setDecoding(true);
+        setDecodeError(null);
+        (async () => {
+            try {
+                const buf = await file.arrayBuffer();
+                if (cancelled) return;
+                // Reuse one AudioContext where possible. Safari and Chrome both
+                // permit creating an offline one for pure decode without user
+                // gesture, which is what we want.
+                const Ctx = window.OfflineAudioContext || window.webkitOfflineAudioContext;
+                const tmpCtx = Ctx
+                    ? new Ctx(1, 44100, 44100)
+                    : new (window.AudioContext || window.webkitAudioContext)();
+                const audio = await tmpCtx.decodeAudioData(buf.slice(0));
+                if (cancelled) return;
+                // Average across channels into mono peaks, then bucket into
+                // `width` columns. Each column gets (min, max) in [-1, 1].
+                const ch0 = audio.getChannelData(0);
+                const ch1 = audio.numberOfChannels > 1 ? audio.getChannelData(1) : null;
+                const totalSamples = ch0.length;
+                const bucketSize = Math.max(1, Math.floor(totalSamples / width));
+                const out = new Float32Array(width * 2);
+                for (let i = 0; i < width; i++) {
+                    const s = i * bucketSize;
+                    const e = Math.min(totalSamples, s + bucketSize);
+                    let mn = 0, mx = 0;
+                    for (let j = s; j < e; j++) {
+                        const v = ch1 ? (ch0[j] + ch1[j]) * 0.5 : ch0[j];
+                        if (v < mn) mn = v;
+                        if (v > mx) mx = v;
+                    }
+                    out[i * 2] = mn;
+                    out[i * 2 + 1] = mx;
+                }
+                setPeaks(out);
+            } catch (err) {
+                setDecodeError(err.message || 'Failed to decode audio');
+            } finally {
+                if (!cancelled) setDecoding(false);
+            }
+        })();
+        return () => { cancelled = true; };
+    }, [file, width]);
+    // --- canvas drawing --------------------------------------------------
+    const draw = useCallback(() => {
+        const canvas = canvasRef.current;
+        if (!canvas || !width || !height) return;
+        const dpr = window.devicePixelRatio || 1;
+        canvas.width = width * dpr;
+        canvas.height = height * dpr;
+        const ctx = canvas.getContext('2d');
+        ctx.setTransform(dpr, 0, 0, dpr, 0, 0);
+        ctx.clearRect(0, 0, width, height);
+        // Background: faint center line so empty audio still shows scale.
+        ctx.fillStyle = 'rgba(255, 255, 255, 0.05)';
+        ctx.fillRect(0, height / 2 - 0.5, width, 1);
+        // Peaks
+        if (peaks) {
+            ctx.fillStyle = color;
+            const mid = height / 2;
+            const scale = (height - 4) / 2;
+            for (let i = 0; i < width; i++) {
+                const mn = peaks[i * 2];
+                const mx = peaks[i * 2 + 1];
+                const y0 = mid - mx * scale;
+                const y1 = mid - mn * scale;
+                ctx.fillRect(i, y0, 1, Math.max(1, y1 - y0));
+            }
+        }
+        // Region overlay
+        if (duration > 0 && Number.isFinite(start) && Number.isFinite(end)) {
+            const sPx = Math.max(0, Math.min(width, (start / duration) * width));
+            const ePx = Math.max(0, Math.min(width, (end / duration) * width));
+            const rectW = Math.max(1, ePx - sPx);
+            ctx.fillStyle = regionColor;
+            ctx.fillRect(sPx, 0, rectW, height);
+            // Handles
+            ctx.fillStyle = '#FDA22B';
+            ctx.fillRect(sPx - 1, 0, 2, height);
+            ctx.fillRect(ePx - 1, 0, 2, height);
+        }
+    }, [width, height, peaks, color, regionColor, start, end, duration]);
+    useEffect(() => { draw(); }, [draw]);
+    // --- pointer interaction --------------------------------------------
+    const HIT_PX = 8;
+    const pxToSec = useCallback((px) => {
+        return Math.max(0, Math.min(duration, (px / width) * duration));
+    }, [width, duration]);
+    const onPointerDown = (e) => {
+        if (!duration || !width) return;
+        const rect = canvasRef.current.getBoundingClientRect();
+        const px = e.clientX - rect.left;
+        const sPx = (start / duration) * width;
+        const ePx = (end / duration) * width;
+        let mode;
+        if (Math.abs(px - sPx) <= HIT_PX) mode = 'start';
+        else if (Math.abs(px - ePx) <= HIT_PX) mode = 'end';
+        else if (px > sPx && px < ePx) mode = 'body';
+        else mode = 'new'; // start a new region by drag
+        dragRef.current = {
+            mode,
+            startPx: px,
+            origStart: start,
+            origEnd: end,
+        };
+        canvasRef.current.setPointerCapture(e.pointerId);
+        if (mode === 'new') {
+            const t = pxToSec(px);
+            onRegionChange?.(t, Math.min(duration, t + minRegionSec));
+            dragRef.current.mode = 'end';
+            dragRef.current.origStart = t;
+            dragRef.current.origEnd = t + minRegionSec;
+        }
+    };
+    const onPointerMove = (e) => {
+        const d = dragRef.current;
+        if (!d) return;
+        const rect = canvasRef.current.getBoundingClientRect();
+        const px = e.clientX - rect.left;
+        const delta = pxToSec(px) - pxToSec(d.startPx);
+        let s = d.origStart;
+        let en = d.origEnd;
+        if (d.mode === 'start') {
+            s = Math.max(0, Math.min(d.origEnd - minRegionSec, d.origStart + delta));
+        } else if (d.mode === 'end') {
+            en = Math.max(d.origStart + minRegionSec, Math.min(duration, d.origEnd + delta));
+        } else if (d.mode === 'body') {
+            const span = d.origEnd - d.origStart;
+            s = Math.max(0, Math.min(duration - span, d.origStart + delta));
+            en = s + span;
+        }
+        onRegionChange?.(s, en);
+    };
+    const onPointerUp = (e) => {
+        if (dragRef.current) {
+            canvasRef.current.releasePointerCapture(e.pointerId);
+            dragRef.current = null;
+        }
+    };
+    // --- render ----------------------------------------------------------
+    return (
+        <Box ref={containerRef} sx={{ width: '100%', position: 'relative' }}>
+            <canvas
+                ref={canvasRef}
+                style={{
+                    width: '100%',
+                    height,
+                    display: 'block',
+                    cursor: dragRef.current ? 'grabbing' : 'crosshair',
+                    touchAction: 'none',
+                    borderRadius: 4,
+                    background: 'rgba(255,255,255,0.02)',
+                }}
+                onPointerDown={onPointerDown}
+                onPointerMove={onPointerMove}
+                onPointerUp={onPointerUp}
+                onPointerCancel={onPointerUp}
+            />
+            {(decoding || decodeError || !file) && (
+                <Box
+                    sx={{
+                        position: 'absolute',
+                        inset: 0,
+                        display: 'flex',
+                        alignItems: 'center',
+                        justifyContent: 'center',
+                        pointerEvents: 'none',
+                    }}
+                >
+                    <Typography variant="caption" color="text.secondary">
+                        {decodeError
+                            ? `decode failed: ${decodeError}`
+                            : !file
+                                ? 'no source loaded'
+                                : 'decoding…'}
+                    </Typography>
+                </Box>
+            )}
+        </Box>
+    );
+}

app/frontend/src/components/ChannelFragmentHistory.js ADDED Viewed

	@@ -0,0 +1,217 @@

+import React, { useState } from 'react';
+import {
+    Box,
+    IconButton,
+    Dialog,
+    DialogTitle,
+    DialogContent,
+    DialogContentText,
+    DialogActions,
+    Button,
+} from '@mui/material';
+import { TIPS } from '../tooltips';
+import Tooltip from './Tooltip';
+import {
+    Play as PlayIcon,
+    Square as StopIcon,
+    Star as StarIcon,
+    Trash2 as DeleteIcon,
+    Check as CommitIcon,
+    Eraser as ClearAllIcon,
+} from 'lucide-react';
+import { performanceChannelStyles as styles } from '../theme';
+import { MidiMappable } from './MidiContext';
+/**
+ * Per-channel rolling fragment history. Always visible (empty-state included)
+ * so the user knows the strip exists. Chronological order — oldest at
+ * the top, newest at the bottom; scrolls vertically when the list grows
+ * past ~4 visible rows.
+ *
+ * Each row exposes four actions, all visible by default (no hover-reveal —
+ * Performance use is fast, can't afford the discoverability tax):
+ *   • Cue ▶/■   — audition through the cue output (separate from main mix)
+ *   • Star ★/☆ — mark as a keeper. Starred fragments survive the cap
+ *                  eviction; unstarred get dropped FIFO when over cap.
+ *   • Delete ⌫  — remove this fragment from history (cancellable confirm not
+ *                  shown for single deletes — the entry can be regenerated
+ *                  or audition can be retriggered after a quick re-tap).
+ *   • Load ✓   — commit this fragment to the channel strip (becomes the
+ *                  audio the channel plays). Disabled while already loaded.
+ *
+ * Props:
+ *   fragments:      [{ id, audioUrl, blob, prompt, duration, createdAt,
+ *                     starred, number }]
+ *   color:          channel accent color
+ *   auditioningId:  the id currently playing through cue, or null
+ *   committedId:    the id currently loaded into the channel strip, or null
+ *   maxFragments:   cap, default 50 (informational; eviction lives in parent)
+ *   on{Audition,Commit,ToggleStar,Delete}:  (fragmentId) => void
+ *   onClearAll:     () => void  (parent confirms separately — we still show
+ *                   a confirm dialog here for the trash-everything action)
+ */
+export default function ChannelFragmentHistory({
+    fragments,
+    color,
+    channelIndex,
+    auditioningId,
+    committedId,
+    maxFragments = 50,
+    onAudition,
+    onCommit,
+    onToggleStar,
+    onDelete,
+    onClearAll,
+}) {
+    const [clearConfirmOpen, setClearConfirmOpen] = useState(false);
+    // Channel-scoped MIME type for drag-and-drop. The waveform drop target on
+    // this same channel listens for this exact type — cross-channel drags
+    // won't highlight or accept because the mime won't match.
+    const dragMime = `application/x-fragmenta-fragment-ch${channelIndex}`;
+    return (
+        <Box sx={styles.fragmentHistoryPanel}>
+            <Box sx={styles.fragmentHistoryHeader}>
+                <Box component="span" sx={styles.fragmentHistoryHeaderText}>
+                    Fragments
+                </Box>
+                {fragments.length > 0 && (
+                    <IconButton
+                        size="small"
+                        onClick={() => setClearConfirmOpen(true)}
+                        sx={styles.fragmentHistoryHeaderBtn}
+                        aria-label="Clear all fragments"
+                    >
+                        <ClearAllIcon size={12} />
+                    </IconButton>
+                )}
+            </Box>
+            {fragments.length === 0 ? (
+                <Box sx={styles.fragmentHistoryEmpty}>Empty</Box>
+            ) : (
+                <Box sx={styles.fragmentHistoryList}>
+                    {fragments.map((fragment) => {
+                        const isAuditioning = auditioningId === fragment.id;
+                        const isCommitted = committedId === fragment.id;
+                        return (
+                            <Box
+                                key={fragment.id}
+                                draggable
+                                onDragStart={(e) => {
+                                    e.dataTransfer.setData(dragMime, fragment.id);
+                                    e.dataTransfer.effectAllowed = 'copy';
+                                }}
+                                sx={{
+                                    ...styles.fragmentRow(color, isCommitted, isAuditioning),
+                                    cursor: 'grab',
+                                    '&:active': { cursor: 'grabbing' },
+                                }}
+                            >
+                                <MidiMappable
+                                    id={`channel.${channelIndex}.fragment.${fragment.id}.audition`}
+                                    label={`Ch ${channelIndex + 1} · Fragment ${fragment.number} audition`}
+                                    kind="trigger"
+                                    onChange={() => onAudition(fragment.id)}
+                                >
+                                    <Tooltip
+                                        title={TIPS.fragments.audition(isAuditioning)}
+                                        placement="top"
+                                        arrow
+                                        enterDelay={300}
+                                    >
+                                        <IconButton
+                                            size="small"
+                                            onClick={() => onAudition(fragment.id)}
+                                            sx={styles.fragmentIconBtn(color, isAuditioning, true)}
+                                            aria-label={isAuditioning ? 'Stop cue' : 'Audition'}
+                                        >
+                                            {isAuditioning
+                                                ? <StopIcon size={12} />
+                                                : <PlayIcon size={12} />}
+                                        </IconButton>
+                                    </Tooltip>
+                                </MidiMappable>
+                                <Box sx={styles.fragmentMeta}>
+                                    <Box component="span" sx={styles.fragmentOrdinal}>
+                                        F{fragment.number}
+                                    </Box>
+                                </Box>
+                                <Tooltip
+                                    title={TIPS.fragments.star(fragment.starred)}
+                                    placement="top"
+                                    arrow
+                                    enterDelay={300}
+                                >
+                                    <IconButton
+                                        size="small"
+                                        onClick={() => onToggleStar(fragment.id)}
+                                        sx={styles.fragmentIconBtn(color, fragment.starred)}
+                                        aria-label={fragment.starred ? 'Unstar fragment' : 'Star fragment'}
+                                    >
+                                        <StarIcon
+                                            size={12}
+                                            fill={fragment.starred ? color : 'none'}
+                                            strokeWidth={2}
+                                        />
+                                    </IconButton>
+                                </Tooltip>
+                                <IconButton
+                                    size="small"
+                                    onClick={() => onDelete(fragment.id)}
+                                    sx={styles.fragmentDeleteBtn}
+                                    aria-label="Delete fragment"
+                                >
+                                    <DeleteIcon size={12} />
+                                </IconButton>
+                                <Tooltip
+                                    title={TIPS.fragments.commit(isCommitted)}
+                                    placement="top"
+                                    arrow
+                                    enterDelay={300}
+                                >
+                                    <span>
+                                        <IconButton
+                                            size="small"
+                                            onClick={() => onCommit(fragment.id)}
+                                            disabled={isCommitted}
+                                            sx={styles.fragmentIconBtn(color, isCommitted, true)}
+                                            aria-label="Load fragment into channel"
+                                        >
+                                            <CommitIcon size={12} strokeWidth={isCommitted ? 3 : 2} />
+                                        </IconButton>
+                                    </span>
+                                </Tooltip>
+                            </Box>
+                        );
+                    })}
+                </Box>
+            )}
+            <Dialog open={clearConfirmOpen} onClose={() => setClearConfirmOpen(false)}>
+                <DialogTitle>Clear fragment history?</DialogTitle>
+                <DialogContent>
+                    <DialogContentText>
+                        Removes all {fragments.length} fragments from this channel's history,
+                        including starred ones. The currently loaded clip stays loaded
+                        — only the history entries are dropped.
+                    </DialogContentText>
+                </DialogContent>
+                <DialogActions>
+                    <Button onClick={() => setClearConfirmOpen(false)}>Cancel</Button>
+                    <Button
+                        onClick={() => { setClearConfirmOpen(false); onClearAll?.(); }}
+                        color="error"
+                        variant="contained"
+                    >
+                        Clear all
+                    </Button>
+                </DialogActions>
+            </Dialog>
+        </Box>
+    );
+}

app/frontend/src/components/CheckpointManagerWindow.js ADDED Viewed

	@@ -0,0 +1,243 @@

+import React, { useCallback, useEffect, useState } from 'react';
+import {
+    Dialog,
+    DialogTitle,
+    DialogContent,
+    DialogActions,
+    Box,
+    Typography,
+    Button,
+    IconButton,
+    Stack,
+    Alert,
+    TextField,
+    LinearProgress,
+} from '@mui/material';
+import {
+    X as CloseIcon,
+    HardDrive as StorageIcon,
+    LogIn as LoginIcon,
+    LogOut as LogoutIcon,
+} from 'lucide-react';
+import api from '../api';
+import CheckpointRow from './CheckpointRow';
+import StorageDrilldown from './StorageDrilldown';
+const fmtBytes = (n) => {
+    if (!n && n !== 0) return '—';
+    const units = ['B', 'KB', 'MB', 'GB', 'TB'];
+    let v = n;
+    let u = 0;
+    while (v >= 1000 && u < units.length - 1) { v /= 1000; u += 1; }
+    return `${v.toFixed(v < 10 ? 2 : 1)} ${units[u]}`;
+};
+export default function CheckpointManagerWindow({ open, onClose }) {
+    const [catalog, setCatalog] = useState([]);
+    const [storage, setStorage] = useState(null);
+    const [env, setEnv] = useState(null);
+    const [hfAuth, setHfAuth] = useState({ signed_in: false, username: null });
+    const [tokenDraft, setTokenDraft] = useState('');
+    const [showTokenInput, setShowTokenInput] = useState(false);
+    const [authError, setAuthError] = useState(null);
+    const [showStorage, setShowStorage] = useState(false);
+    const [loading, setLoading] = useState(false);
+    const [error, setError] = useState(null);
+    const refresh = useCallback(async () => {
+        setLoading(true);
+        setError(null);
+        try {
+            const [cat, store, auth, environment] = await Promise.all([
+                api.get('/api/checkpoints'),
+                api.get('/api/checkpoints/storage'),
+                api.get('/api/hf-auth/status'),
+                api.get('/api/environment'),
+            ]);
+            setCatalog(cat.data.checkpoints);
+            setStorage(store.data);
+            setHfAuth(auth.data);
+            setEnv(environment.data);
+        } catch (e) {
+            setError(e.response?.data?.error || e.message);
+        } finally {
+            setLoading(false);
+        }
+    }, []);
+    useEffect(() => {
+        if (open) refresh();
+    }, [open, refresh]);
+    const submitToken = async () => {
+        setAuthError(null);
+        try {
+            await api.post('/api/hf-auth', { token: tokenDraft.trim() });
+            setTokenDraft('');
+            setShowTokenInput(false);
+            refresh();
+        } catch (e) {
+            setAuthError(e.response?.data?.error || e.message);
+        }
+    };
+    const logout = async () => {
+        try {
+            await api.delete('/api/hf-auth');
+            refresh();
+        } catch (e) {
+            setAuthError(e.response?.data?.error || e.message);
+        }
+    };
+    const anyInstalled = catalog.some(c => c.downloaded);
+    return (
+        <>
+            <Dialog open={open} onClose={onClose} maxWidth="md" fullWidth scroll="paper">
+                <DialogTitle sx={{ display: 'flex', alignItems: 'center', gap: 1 }}>
+                    <Box sx={{ flex: 1 }}>Checkpoint Manager</Box>
+                    <IconButton size="small" onClick={onClose}><CloseIcon size={18} /></IconButton>
+                </DialogTitle>
+                <DialogContent dividers>
+                    <Box sx={{ mb: 2 }}>
+                        <Stack direction="row" alignItems="center" spacing={2} flexWrap="wrap">
+                            <Button
+                                size="small"
+                                variant="text"
+                                startIcon={<StorageIcon size={14} />}
+                                onClick={() => setShowStorage(true)}
+                                disabled={!storage}
+                            >
+                                {storage
+                                    ? `${fmtBytes(storage.total_used_bytes)} used · ${fmtBytes(storage.total_free_bytes)} free`
+                                    : '—'}
+                            </Button>
+                            <Box sx={{ flex: 1 }} />
+                            {hfAuth.signed_in ? (
+                                <Stack direction="row" alignItems="center" spacing={1}>
+                                    <Typography variant="caption" color="text.secondary">
+                                        HuggingFace: signed in as <strong>{hfAuth.username}</strong>
+                                    </Typography>
+                                    <Button
+                                        size="small"
+                                        variant="text"
+                                        startIcon={<LogoutIcon size={14} />}
+                                        onClick={logout}
+                                    >
+                                        Sign out
+                                    </Button>
+                                </Stack>
+                            ) : showTokenInput ? (
+                                <Stack direction="row" alignItems="center" spacing={1}>
+                                    <TextField
+                                        size="small"
+                                        placeholder="hf_..."
+                                        value={tokenDraft}
+                                        onChange={(e) => setTokenDraft(e.target.value)}
+                                        type="password"
+                                        sx={{ width: 240 }}
+                                    />
+                                    <Button size="small" variant="contained" onClick={submitToken}>
+                                        Sign in
+                                    </Button>
+                                    <Button size="small" onClick={() => { setShowTokenInput(false); setTokenDraft(''); }}>
+                                        Cancel
+                                    </Button>
+                                </Stack>
+                            ) : (
+                                <Button
+                                    size="small"
+                                    variant="outlined"
+                                    startIcon={<LoginIcon size={14} />}
+                                    onClick={() => setShowTokenInput(true)}
+                                >
+                                    Sign in to HuggingFace
+                                </Button>
+                            )}
+                        </Stack>
+                        {authError && <Alert severity="error" sx={{ mt: 1 }}>{authError}</Alert>}
+                    </Box>
+                    {!hfAuth.signed_in ? (
+                        <Alert severity="info" sx={{ mb: 2 }}>
+                            SA3 checkpoints are gated on HuggingFace. You need a{' '}
+                            <a href="https://huggingface.co/join" target="_blank" rel="noreferrer">
+                                HuggingFace account
+                            </a>
+                            {' '}to continue. Then{' '}
+                            <a href="https://huggingface.co/settings/tokens" target="_blank" rel="noreferrer">
+                                create a Read access token
+                            </a>
+                            {' '}and sign in above.
+                        </Alert>
+                    ) : (
+                        <Alert severity="info" sx={{ mb: 2 }}>
+                            You're signed in. Each model is gated — click its name below to open the
+                            HuggingFace page and accept the model's terms before downloading.
+                        </Alert>
+                    )}
+                    {error && <Alert severity="error" sx={{ mb: 2 }}>{error}</Alert>}
+                    {loading && <LinearProgress sx={{ mb: 2 }} />}
+                    {!loading && !anyInstalled && catalog.length > 0 && (
+                        <Box sx={{
+                            p: 2, mb: 2, borderRadius: 1, bgcolor: 'action.hover',
+                        }}>
+                            <Typography variant="body2" fontWeight={500}>
+                                Pick a model to get started.
+                            </Typography>
+                            <Typography variant="caption" color="text.secondary">
+                                Small - Music (1.2 GB) is a good first choice on a laptop or any GPU.
+                            </Typography>
+                        </Box>
+                    )}
+                    {[
+                        { kind: 'post-trained', label: 'Distilled (fast)', hint: '8 steps, cfg locked at 1.0. Prompt, duration and seed only.' },
+                        { kind: 'base', label: 'Base (full control)', hint: 'CFG-aware. ~50 steps, cfg ~7. Cfg-scale and steps are live controls.' },
+                        { kind: 'tagger', label: 'Auto-annotation tools', hint: 'Optional helpers for dataset prep. CLAP scores audio against your vocabulary.' },
+                    ].map(group => {
+                        const rows = catalog.filter(c => c.kind === group.kind);
+                        if (!rows.length) return null;
+                        return (
+                            <Box key={group.kind} sx={{ mb: 2 }}>
+                                <Typography variant="subtitle2" sx={{ mb: 0.25 }}>{group.label}</Typography>
+                                <Typography variant="caption" color="text.secondary" sx={{ display: 'block', mb: 0.75 }}>
+                                    {group.hint}
+                                </Typography>
+                                <Box sx={{ border: '1px solid', borderColor: 'divider', borderRadius: 1 }}>
+                                    {rows.map(c => (
+                                        <CheckpointRow
+                                            key={c.id}
+                                            checkpoint={c}
+                                            env={env}
+                                            onAuthRequired={() => setShowTokenInput(true)}
+                                            onChanged={refresh}
+                                        />
+                                    ))}
+                                </Box>
+                            </Box>
+                        );
+                    })}
+                </DialogContent>
+                <DialogActions>
+                    <Button onClick={refresh} disabled={loading}>Refresh</Button>
+                    <Button onClick={onClose} variant="contained">Close</Button>
+                </DialogActions>
+            </Dialog>
+            <StorageDrilldown
+                open={showStorage}
+                onClose={() => setShowStorage(false)}
+                storage={storage}
+                catalog={catalog}
+            />
+        </>
+    );
+}

app/frontend/src/components/CheckpointRow.js ADDED Viewed

	@@ -0,0 +1,270 @@

+import React, { useEffect, useRef, useState } from 'react';
+import {
+    Box,
+    Typography,
+    Button,
+    Chip,
+    LinearProgress,
+    Stack,
+    IconButton,
+} from '@mui/material';
+import { TIPS } from '../tooltips';
+import Tooltip from './Tooltip';
+import {
+    CloudDownload as DownloadIcon,
+    Trash2 as DeleteIcon,
+    X as CancelIcon,
+} from 'lucide-react';
+import api from '../api';
+const fmtBytes = (n) => {
+    if (!n && n !== 0) return '—';
+    const units = ['B', 'KB', 'MB', 'GB', 'TB'];
+    let v = n;
+    let u = 0;
+    while (v >= 1000 && u < units.length - 1) { v /= 1000; u += 1; }
+    return `${v.toFixed(v < 10 ? 2 : 1)} ${units[u]}`;
+};
+const hardwareLabel = (hw) => ({
+    'cpu': 'CPU / GPU',
+    'cuda': 'CUDA',
+    'cuda+flash-attn': 'CUDA + Flash-Attn',
+}[hw] || hw);
+// Why this host can't run a given model, or null if it can. Mirrors the gate
+// in audio_generator._ensure_model. `env` comes from GET /api/environment.
+const hostIncompatReason = (hw, env) => {
+    if (!env) return null;  // capabilities unknown — don't block
+    if (hw === 'cuda+flash-attn') {
+        if (!env.cuda_available) {
+            return 'Requires an NVIDIA CUDA GPU. Use a Small model — those run on CPU, Apple Silicon, or any GPU.';
+        }
+        // Gate on the real capability, not the platform: Windows works once a
+        // matching flash-attn wheel is installed (Blackwell/Ampere + cu12x).
+        // No wheel → guide the user to install one (or use Docker on WSL2).
+        if (!env.flash_attn_available) {
+            return env.platform === 'Windows'
+                ? 'Requires Flash Attention 2 (flash-attn). No official Windows wheel — install a matching prebuilt/built wheel for your torch+CUDA, or run via Docker on WSL2.'
+                : 'Requires Flash Attention 2 (flash-attn) — not installed. Install it, or use a Small model.';
+        }
+    }
+    if (hw === 'cuda' && !env.cuda_available) {
+        return 'Recommended on an NVIDIA CUDA GPU; this host has none.';
+    }
+    return null;
+};
+export default function CheckpointRow({ checkpoint, env, onAuthRequired, onChanged }) {
+    const [jobId, setJobId] = useState(checkpoint.active_job?.job_id || null);
+    const [job, setJob] = useState(checkpoint.active_job || null);
+    const [error, setError] = useState(null);
+    const [busy, setBusy] = useState(false);
+    const pollTimer = useRef(null);
+    // If the parent's refresh tells us about an in-flight job and we don't
+    // already have one locally (typical case: dialog was closed mid-download
+    // and just got reopened), adopt it. Don't stomp a freshly-started local
+    // job_id with stale catalog data — only sync when the local state is empty
+    // or a *different* job is now active for this checkpoint.
+    useEffect(() => {
+        const incoming = checkpoint.active_job?.job_id || null;
+        if (incoming && incoming !== jobId) {
+            setJobId(incoming);
+            setJob(checkpoint.active_job);
+        }
+    }, [checkpoint.active_job, jobId]);
+    useEffect(() => {
+        if (!jobId) return undefined;
+        const tick = async () => {
+            try {
+                const r = await api.get(`/api/checkpoints/jobs/${jobId}`);
+                setJob(r.data);
+                if (['complete', 'failed', 'cancelled'].includes(r.data.status)) {
+                    if (r.data.status === 'failed' && (r.data.error || '').startsWith('hf_auth_required')) {
+                        onAuthRequired?.();
+                    } else if (r.data.status === 'failed') {
+                        setError(r.data.error);
+                    }
+                    setJobId(null);
+                    onChanged?.();
+                }
+            } catch (e) {
+                setError(e.response?.data?.error || e.message);
+                setJobId(null);
+            }
+        };
+        tick();
+        pollTimer.current = setInterval(tick, 1500);
+        return () => clearInterval(pollTimer.current);
+    }, [jobId, onAuthRequired, onChanged]);
+    const startDownload = async () => {
+        setBusy(true);
+        setError(null);
+        try {
+            const r = await api.post(`/api/checkpoints/${checkpoint.id}/download`);
+            setJobId(r.data.job_id);
+        } catch (e) {
+            setError(e.response?.data?.error || e.message);
+        } finally {
+            setBusy(false);
+        }
+    };
+    const cancelDownload = async () => {
+        try {
+            await api.post(`/api/checkpoints/${checkpoint.id}/cancel-download`);
+        } catch (e) {
+            setError(e.response?.data?.error || e.message);
+        }
+    };
+    const deleteCheckpoint = async () => {
+        if (!window.confirm(`Delete ${checkpoint.name} (${fmtBytes(checkpoint.downloaded_bytes)})?`)) return;
+        setBusy(true);
+        try {
+            await api.delete(`/api/checkpoints/${checkpoint.id}`);
+            onChanged?.();
+        } catch (e) {
+            setError(e.response?.data?.error || e.message);
+        } finally {
+            setBusy(false);
+        }
+    };
+    const downloading = !!jobId && job?.status === 'running';
+    const queued = !!jobId && job?.status === 'queued';
+    const pct = job?.total_bytes ? (job.downloaded_bytes / job.total_bytes) * 100 : 0;
+    const incompatReason = hostIncompatReason(checkpoint.hardware, env);
+    const renderAction = () => {
+        if (downloading || queued) {
+            return (
+                <IconButton size="small" onClick={cancelDownload} aria-label="Cancel download"><CancelIcon size={16} /></IconButton>
+            );
+        }
+        if (checkpoint.downloaded) {
+            return (
+                <IconButton size="small" onClick={deleteCheckpoint} disabled={busy} aria-label="Delete from disk">
+                    <DeleteIcon size={16} />
+                </IconButton>
+            );
+        }
+        if (incompatReason) {
+            return (
+                <Tooltip title={incompatReason}>
+                    {/* span wrapper so the tooltip works on a disabled button */}
+                    <span>
+                        <Button
+                            size="small"
+                            variant="outlined"
+                            startIcon={<DownloadIcon size={14} />}
+                            disabled
+                        >
+                            Get
+                        </Button>
+                    </span>
+                </Tooltip>
+            );
+        }
+        return (
+            <Button
+                size="small"
+                variant="contained"
+                startIcon={<DownloadIcon size={14} />}
+                onClick={startDownload}
+                disabled={busy}
+            >
+                Get
+            </Button>
+        );
+    };
+    return (
+        <Box
+            sx={{
+                py: 1.25,
+                px: 1.5,
+                borderBottom: '1px solid',
+                borderColor: 'divider',
+                '&:last-child': { borderBottom: 'none' },
+            }}
+        >
+            <Stack direction="row" alignItems="center" spacing={2}>
+                <Box sx={{ flex: 1, minWidth: 0, opacity: (incompatReason && !checkpoint.downloaded) ? 0.55 : 1 }}>
+                    <Stack direction="row" alignItems="center" spacing={1}>
+                        <Tooltip title={TIPS.checkpoints.gatedAccess}>
+                            <Typography
+                                component="a"
+                                href={`https://huggingface.co/${checkpoint.repo}`}
+                                target="_blank"
+                                rel="noreferrer"
+                                variant="body2"
+                                sx={{
+                                    fontWeight: 500,
+                                    color: 'inherit',
+                                    textDecoration: 'none',
+                                    borderBottom: '1px dashed',
+                                    borderColor: 'text.disabled',
+                                    '&:hover': { color: 'primary.main', borderColor: 'primary.main' },
+                                }}
+                            >
+                                {checkpoint.name}
+                            </Typography>
+                        </Tooltip>
+                        <Chip
+                            size="small"
+                            label={hardwareLabel(checkpoint.hardware)}
+                            variant="outlined"
+                            sx={{ height: 18, fontSize: 10 }}
+                        />
+                        {checkpoint.downloaded && (
+                            <Chip
+                                size="small"
+                                label="installed"
+                                sx={{
+                                    height: 18,
+                                    fontSize: 10,
+                                    fontWeight: 600,
+                                    bgcolor: 'success.main',
+                                    color: 'common.white',
+                                }}
+                            />
+                        )}
+                    </Stack>
+                    <Typography variant="caption" color="text.secondary">
+                        {fmtBytes(checkpoint.size_bytes)}
+                        {checkpoint.max_duration_sec && ` · up to ${checkpoint.max_duration_sec}s`}
+                    </Typography>
+                    {incompatReason && !checkpoint.downloaded && (
+                        <Typography variant="caption" color="warning.main" sx={{ display: 'block' }}>
+                            Not supported on this machine
+                        </Typography>
+                    )}
+                </Box>
+                <Box>{renderAction()}</Box>
+            </Stack>
+            {(downloading || queued) && (
+                <Box sx={{ mt: 1 }}>
+                    <LinearProgress
+                        variant={queued ? 'indeterminate' : 'determinate'}
+                        value={Math.min(100, pct)}
+                        sx={{ height: 4, borderRadius: 2 }}
+                    />
+                    <Typography variant="caption" color="text.secondary" sx={{ mt: 0.5, display: 'block' }}>
+                        {queued ? 'Queued…' : `${fmtBytes(job?.downloaded_bytes)} / ${fmtBytes(job?.total_bytes)}`}
+                    </Typography>
+                </Box>
+            )}
+            {error && (
+                <Typography variant="caption" color="error" sx={{ mt: 0.5, display: 'block' }}>
+                    {error}
+                </Typography>
+            )}
+        </Box>
+    );
+}

app/frontend/src/components/DatasetPrep.js ADDED Viewed

	@@ -0,0 +1,1823 @@

+import React, { useCallback, useEffect, useRef, useState } from 'react';
+import {
+    Accordion,
+    AccordionDetails,
+    AccordionSummary,
+    Alert,
+    Autocomplete,
+    Box,
+    Button,
+    Checkbox,
+    Chip,
+    Dialog,
+    DialogActions,
+    DialogContent,
+    DialogTitle,
+    FormControl,
+    FormControlLabel,
+    IconButton,
+    InputLabel,
+    LinearProgress,
+    MenuItem,
+    Paper,
+    Portal,
+    Radio,
+    RadioGroup,
+    Select,
+    Snackbar,
+    Stack,
+    Switch,
+    Table,
+    TableBody,
+    TableCell,
+    TableContainer,
+    TableHead,
+    TableRow,
+    TextField,
+    Typography,
+    useTheme,
+} from '@mui/material';
+import { TIPS } from '../tooltips';
+import Tooltip from './Tooltip';
+import {
+    ChevronDown as ChevronDownIcon,
+    FolderOpenIcon,
+    PlusIcon,
+    WandSparkles,
+    SaveIcon,
+    Database as Database,
+    DatabaseZap as DatasetIcon,
+    Square as StopIcon,
+    Trash2 as TrashIcon,
+    Play as PlayIcon,
+    Pause as PauseIcon,
+    Scissors as ScissorsIcon,
+    Music as MusicIcon,
+    Activity as HealthIcon,
+} from 'lucide-react';
+import api from '../api';
+import { appStyles } from '../theme';
+/**
+ * DatasetPrep — sidecar-native dataset surface with a buffered editing model.
+ *
+ * One page, no modes. Pick or create a project. The dataset folder on disk
+ * is the *committed* state. Edits, auto-annotate output, and just-ingested
+ * audio all live in an in-memory session until the user explicitly hits
+ * Save (writes a draft) or Commit (writes .txt sidecars).
+ */
+export default function DatasetPrep({ onOpenCheckpointManager }) {
+    const [projects, setProjects] = useState([]);
+    const [selectedName, setSelectedName] = useState(() => {
+        try { return window.localStorage.getItem('fragmenta.datasetPrep.lastProject') || ''; }
+        catch { return ''; }
+    });
+    const [project, setProject] = useState(null);
+    const [createOpen, setCreateOpen] = useState(false);
+    const [loadOpen, setLoadOpen] = useState(false);
+    const [ingestOpen, setIngestOpen] = useState(false);
+    const [sliceTarget, setSliceTarget] = useState(null);  // file_name or null
+    // Single confirm-dialog state powering destructive actions. Mirrors the
+    // Free GPU / Start Fresh confirm style from App.js — replaces the
+    // browser-native window.confirm() prompts so the UX is consistent.
+    const [confirm, setConfirm] = useState(null);
+    const [confirmBusy, setConfirmBusy] = useState(false);
+    const [error, setError] = useState('');
+    const [errorCode, setErrorCode] = useState('');
+    const [errorExtra, setErrorExtra] = useState(null);
+    const [annotateJob, setAnnotateJob] = useState(null);
+    const [notice, setNotice] = useState(null);  // { severity, message } | null
+    // Phase 6 — pre-encoded latents
+    const [preEncodeJob, setPreEncodeJob] = useState(null);
+    const [preEncodeOffer, setPreEncodeOffer] = useState(false); // post-commit dialog
+    const [tier, setTier] = useState(() => {
+        try { return window.localStorage.getItem('fragmenta.datasetPrep.tier') || 'basic'; }
+        catch { return 'basic'; }
+    });
+    const [skipExisting, setSkipExisting] = useState(true);
+    const pollHandleRef = useRef(null);
+    const preEncodePollRef = useRef(null);
+    const isAnnotating = annotateJob?.state === 'running';
+    const isPreEncoding = preEncodeJob?.state === 'running' || preEncodeJob?.state === 'queued';
+    // --- Multi-row selection (for bulk Slice) -----------------------------
+    // Set<string> of clip file_names. Reset whenever the active project
+    // changes, since selections from a different project are meaningless.
+    const [selectedFiles, setSelectedFiles] = useState(() => new Set());
+    useEffect(() => { setSelectedFiles(new Set()); }, [selectedName]);
+    const toggleSelected = useCallback((fileName) => {
+        setSelectedFiles((prev) => {
+            const next = new Set(prev);
+            if (next.has(fileName)) next.delete(fileName);
+            else next.add(fileName);
+            return next;
+        });
+    }, []);
+    const toggleSelectAll = useCallback((clips) => {
+        setSelectedFiles((prev) => {
+            const allNames = clips.map((c) => c.file_name);
+            const allSelected = allNames.length > 0 && allNames.every((n) => prev.has(n));
+            return allSelected ? new Set() : new Set(allNames);
+        });
+    }, []);
+    const clearSelection = useCallback(() => setSelectedFiles(new Set()), []);
+    // --- Per-row audio preview --------------------------------------------
+    // One <audio> for the whole table. Rows just say "play me" / "pause";
+    // the parent reconciles which file is loaded and where the playhead is.
+    const audioRef = useRef(null);
+    const [playingFile, setPlayingFile] = useState(null);
+    const [playProgress, setPlayProgress] = useState(0);  // 0..1
+    const stopPlayback = useCallback(() => {
+        const audio = audioRef.current;
+        if (audio) { audio.pause(); }
+        setPlayingFile(null);
+        setPlayProgress(0);
+    }, []);
+    const handlePlayToggle = useCallback((fileName) => {
+        if (!selectedName) return;
+        const audio = audioRef.current;
+        if (!audio) return;
+        if (playingFile === fileName) {
+            audio.pause();
+            setPlayingFile(null);
+            return;
+        }
+        const url = `/api/projects/${encodeURIComponent(selectedName)}/clip/${encodeURIComponent(fileName)}/audio`;
+        audio.src = url;
+        setPlayProgress(0);
+        setPlayingFile(fileName);
+        audio.play().catch(() => {
+            setPlayingFile(null);
+        });
+    }, [selectedName, playingFile]);
+    // Stop playback when the project changes — the audio element's src would
+    // suddenly refer to a different project's file.
+    useEffect(() => { stopPlayback(); }, [selectedName, stopPlayback]);
+    const refreshProjects = useCallback(async () => {
+        try {
+            const { data } = await api.get('/api/projects');
+            setProjects(data.projects || []);
+        } catch (e) { setError(extractError(e, 'Failed to list projects')); }
+    }, []);
+    const [health, setHealth] = useState(null);
+    const refreshHealth = useCallback(async (name) => {
+        if (!name) { setHealth(null); return; }
+        try {
+            const { data } = await api.get(`/api/projects/${encodeURIComponent(name)}/health`);
+            setHealth(data);
+        } catch {
+            // Non-fatal — strip just hides until next refresh.
+            setHealth(null);
+        }
+    }, []);
+    const refreshProject = useCallback(async (name) => {
+        if (!name) { setProject(null); setHealth(null); return; }
+        try {
+            const { data } = await api.get(`/api/projects/${encodeURIComponent(name)}`);
+            setProject(data);
+            refreshHealth(name);
+        } catch (e) {
+            if (e?.response?.status === 404) {
+                setSelectedName('');
+                setProject(null);
+                setHealth(null);
+                await refreshProjects();
+                return;
+            }
+            setError(extractError(e, 'Failed to load project'));
+        }
+    }, [refreshProjects, refreshHealth]);
+    useEffect(() => { refreshProjects(); }, [refreshProjects]);
+    const pollAnnotateStatus = useCallback(async function poll(name) {
+        try {
+            const { data } = await api.get(`/api/projects/${encodeURIComponent(name)}/annotate/status`);
+            setAnnotateJob(data.job);
+            if (data.job.state === 'done') {
+                await refreshProject(name);
+                return;
+            }
+            if (data.job.state === 'error') {
+                setError(data.job.error || 'Annotation failed');
+                return;
+            }
+            // Only keep polling while the backend is actively annotating. Other
+            // states ('idle', 'cancelled', missing) terminate the loop so a
+            // freshly-mounted tab doesn't poll forever for a non-existent job.
+            if (data.job.state === 'running') {
+                pollHandleRef.current = window.setTimeout(() => poll(name), 500);
+            }
+        } catch (e) { setError(extractError(e, 'Status poll failed')); }
+    }, [refreshProject]);
+    // Phase 6 — pre-encode polling. Same survives-tab-switch shape as the
+    // annotate poller above.
+    const pollPreEncodeStatus = useCallback(async function poll(name) {
+        try {
+            const { data } = await api.get(`/api/projects/${encodeURIComponent(name)}/pre-encode/status`);
+            setPreEncodeJob(data.job);
+            if (data.job.state === 'complete') {
+                refreshProject(name);
+                return;
+            }
+            if (data.job.state === 'failed') {
+                setError(data.job.error || 'Pre-encoding failed');
+                return;
+            }
+            if (data.job.state === 'running' || data.job.state === 'queued') {
+                preEncodePollRef.current = window.setTimeout(() => poll(name), 750);
+            }
+        } catch (e) { /* non-fatal — bar just freezes */ }
+    }, [refreshProject]);
+    useEffect(() => {
+        if (selectedName) {
+            try { window.localStorage.setItem('fragmenta.datasetPrep.lastProject', selectedName); } catch {}
+            refreshProject(selectedName);
+            // Re-bootstrap progress polling on (re)mount or project switch, so
+            // the progress strip survives tab changes while a job runs.
+            pollAnnotateStatus(selectedName);
+            pollPreEncodeStatus(selectedName);
+        } else {
+            setProject(null);
+            setAnnotateJob(null);
+            setPreEncodeJob(null);
+        }
+        return () => {
+            if (pollHandleRef.current) {
+                window.clearTimeout(pollHandleRef.current);
+                pollHandleRef.current = null;
+            }
+            if (preEncodePollRef.current) {
+                window.clearTimeout(preEncodePollRef.current);
+                preEncodePollRef.current = null;
+            }
+        };
+    }, [selectedName, refreshProject, pollAnnotateStatus, pollPreEncodeStatus]);
+    function changeTier(value) {
+        setTier(value);
+        try { window.localStorage.setItem('fragmenta.datasetPrep.tier', value); } catch {}
+    }
+    function trySelectProject(nextName) {
+        // Confirm before switching if there are unsaved or uncommitted edits.
+        if (project && (project.dirty || project.has_unsaved_changes) && nextName !== project.name) {
+            const ok = window.confirm(
+                `“${project.name}” has unsaved or uncommitted changes. Switch anyway? They'll stay in memory until you reload the project — but a backend restart will lose them.`,
+            );
+            if (!ok) return;
+        }
+        setSelectedName(nextName);
+    }
+    async function handleAnnotate(scope /* "all" | [file_names] */, opts = {}) {
+        if (!project) return;
+        setError(''); setErrorCode(''); setErrorExtra(null);
+        try {
+            await api.post(`/api/projects/${encodeURIComponent(project.name)}/annotate`, {
+                tier,
+                scope: scope ?? 'all',
+                skip_existing: opts.skip_existing ?? skipExisting,
+            });
+            pollAnnotateStatus(project.name);
+        } catch (e) {
+            const body = e?.response?.data || {};
+            setError(extractError(e, 'Failed to start annotation'));
+            setErrorCode(body.code || '');
+            setErrorExtra(body.install_command ? { install_command: body.install_command } : null);
+        }
+    }
+    async function handleCancelAnnotate() {
+        if (!project) return;
+        try {
+            await api.post(`/api/projects/${encodeURIComponent(project.name)}/annotate/cancel`);
+        } catch (e) { setError(extractError(e, 'Cancel failed')); }
+    }
+    async function handleSave() {
+        if (!project) return;
+        setError('');
+        try {
+            const { data } = await api.post(`/api/projects/${encodeURIComponent(project.name)}/save`);
+            setProject(data);
+            setNotice({ severity: 'success', message: `Draft saved · ${data.clip_count} clips` });
+        } catch (e) { setError(extractError(e, 'Save failed')); }
+    }
+    async function handleStartPreEncode() {
+        if (!project) return;
+        setError('');
+        try {
+            const { data } = await api.post(`/api/projects/${encodeURIComponent(project.name)}/pre-encode`);
+            setPreEncodeJob(data.job);
+            pollPreEncodeStatus(project.name);
+        } catch (e) { setError(extractError(e, 'Pre-encode failed to start')); }
+    }
+    async function handleCancelPreEncode() {
+        if (!project) return;
+        try {
+            await api.post(`/api/projects/${encodeURIComponent(project.name)}/pre-encode/cancel`);
+        } catch (e) { setError(extractError(e, 'Cancel failed')); }
+    }
+    async function persistPreEncodeSuppression(suppress) {
+        if (!project) return;
+        try {
+            const { data } = await api.patch(
+                `/api/projects/${encodeURIComponent(project.name)}/pre-encode/prompt`,
+                { suppress: !!suppress },
+            );
+            setProject(data);
+        } catch (e) { /* non-fatal — dialog still closes */ }
+    }
+    async function handleCommit() {
+        if (!project) return;
+        setError('');
+        try {
+            const { data } = await api.post(`/api/projects/${encodeURIComponent(project.name)}/commit`);
+            setProject(data);
+            await refreshProjects();
+            // Phase 6 — post-commit pre-encode prompt.
+            // Open the dialog unless: (a) latents already present (re-commit
+            // wiped them but we still avoid re-asking immediately), or
+            // (b) the user previously chose "Don't ask again".
+            if (!data.suppress_pre_encode_prompt && !data.latents_present && data.clip_count > 0) {
+                setPreEncodeOffer(true);
+            }
+            setNotice({
+                severity: 'success',
+                message: `Dataset created · ${data.clip_count} clips written to disk`,
+            });
+        } catch (e) { setError(extractError(e, 'Create Dataset failed')); }
+    }
+    function handleDiscard() {
+        if (!project) return;
+        setConfirm({
+            title: 'Delete unsaved changes',
+            body: `Delete all changes in “${project.name}” since the last created dataset? Audio files added since then will be removed.`,
+            warning: 'This cannot be undone.',
+            confirmLabel: 'Delete',
+            busyLabel: 'Deleting…',
+            danger: true,
+            onConfirm: async () => {
+                setError('');
+                try {
+                    const { data } = await api.post(`/api/projects/${encodeURIComponent(project.name)}/discard`);
+                    setProject(data);
+                    await refreshProjects();
+                    setNotice({ severity: 'info', message: 'Unsaved changes discarded' });
+                } catch (e) { setError(extractError(e, 'Delete failed')); }
+            },
+        });
+    }
+    function handleDeleteProject(name) {
+        if (!name) return;
+        setConfirm({
+            title: 'Delete project',
+            body: `Permanently delete project “${name}”? Audio files, sidecars, and any drafts will be removed from disk.`,
+            warning: 'This cannot be undone.',
+            confirmLabel: 'Delete',
+            busyLabel: 'Deleting…',
+            danger: true,
+            onConfirm: async () => {
+                setError('');
+                try {
+                    await api.delete(`/api/projects/${encodeURIComponent(name)}`);
+                    if (selectedName === name) {
+                        stopPlayback();
+                        setSelectedName('');
+                        setProject(null);
+                        try { window.localStorage.removeItem('fragmenta.datasetPrep.lastProject'); } catch {}
+                    }
+                    await refreshProjects();
+                } catch (e) { setError(extractError(e, 'Delete project failed')); }
+            },
+        });
+    }
+    async function handleChangeTemplatePreset(presetId) {
+        if (!project) return;
+        try {
+            const { data } = await api.patch(
+                `/api/projects/${encodeURIComponent(project.name)}/template`,
+                { preset: presetId },
+            );
+            setProject(data);
+        } catch (e) {
+            setError(extractError(e, 'Could not update annotation style'));
+        }
+    }
+    function handleClearSelectedAnnotations() {
+        if (!project || selectedFiles.size === 0) return;
+        const count = selectedFiles.size;
+        const files = Array.from(selectedFiles);
+        setConfirm({
+            title: 'Clear',
+            body: `Clear annotations on ${count} clip${count === 1 ? '' : 's'}? Buffered in memory until you Save or Create Dataset.`,
+            warning: 'Use the Delete button to revert; this action itself can’t be undone in place.',
+            confirmLabel: `Clear (${count})`,
+            busyLabel: 'Clearing…',
+            danger: true,
+            onConfirm: async () => {
+                setError('');
+                try {
+                    for (const f of files) {
+                        await api.patch(
+                            `/api/projects/${encodeURIComponent(project.name)}/clip/${encodeURIComponent(f)}`,
+                            { prompt: '' },
+                        );
+                    }
+                    clearSelection();
+                    await refreshProject(project.name);
+                } catch (e) { setError(extractError(e, 'Clear annotations failed')); }
+            },
+        });
+    }
+    async function handleClipPromptChange(fileName, newPrompt) {
+        if (!project) return;
+        try {
+            await api.patch(
+                `/api/projects/${encodeURIComponent(project.name)}/clip/${encodeURIComponent(fileName)}`,
+                { prompt: newPrompt },
+            );
+            // Reload to pick up dirty-state flip in the header.
+            await refreshProject(project.name);
+        } catch (e) { setError(extractError(e, 'Failed to save prompt')); }
+    }
+    async function handleClipDelete(fileName) {
+        if (!project) return;
+        if (!window.confirm(`Remove ${fileName} from this project? (Deletes the audio file from disk immediately — cannot be discarded back.)`)) return;
+        try {
+            await api.delete(
+                `/api/projects/${encodeURIComponent(project.name)}/clip/${encodeURIComponent(fileName)}`,
+            );
+            await refreshProject(project.name);
+        } catch (e) { setError(extractError(e, 'Failed to delete clip')); }
+    }
+    return (
+        <Paper variant="outlined" sx={{ p: { xs: 2.25, sm: 3 }, borderRadius: 2.5 }}>
+        <Stack spacing={2.5}>
+            <Box>
+                <Box sx={{ ...appStyles.sectionCardHeader, mb: 0.5 }}>
+                    <Box component="span" sx={appStyles.sectionCardIcon}>
+                        <Database size={20} />
+                    </Box>
+                    <Typography variant="h6" sx={appStyles.sectionCardTitle}>
+                        Dataset Workbench
+                    </Typography>
+                    <Box sx={{ flex: 1 }} />
+                    <Button
+                        variant="outlined"
+                        size="small"
+                        startIcon={<FolderOpenIcon size={16} />}
+                        onClick={() => setLoadOpen(true)}
+                        disabled={projects.length === 0}
+                    >
+                        Load project
+                    </Button>
+                    <Button
+                        variant="outlined"
+                        size="small"
+                        startIcon={<PlusIcon size={16} />}
+                        onClick={() => setCreateOpen(true)}
+                    >
+                        New project
+                    </Button>
+                </Box>
+                <Typography variant="body2" color="text.secondary">
+                    Create a new dataset or load and edit one.
+                </Typography>
+                <Typography variant="body2" color="text.secondary" paddingBottom={2}>
+                     You can auto-annotate using Librosa and CLAP or annotate everything manually.
+                </Typography>
+            </Box>
+            {error && (
+                <Alert
+                    severity={(errorCode === 'clap_not_available' || errorCode === 'clap_package_missing') ? 'warning' : 'error'}
+                    onClose={() => { setError(''); setErrorCode(''); setErrorExtra(null); }}
+                    action={
+                        errorCode === 'clap_not_available' && onOpenCheckpointManager ? (
+                            <Button
+                                color="inherit"
+                                size="small"
+                                onClick={() => { setError(''); setErrorCode(''); setErrorExtra(null); onOpenCheckpointManager(); }}
+                            >
+                                Open Model Management
+                            </Button>
+                        ) : null
+                    }
+                >
+                    <Box>
+                        <Typography variant="body2">{error}</Typography>
+                        {errorCode === 'clap_package_missing' && errorExtra?.install_command && (
+                            <Box
+                                component="pre"
+                                sx={{
+                                    mt: 1,
+                                    mb: 0,
+                                    p: 1,
+                                    borderRadius: 1,
+                                    bgcolor: 'action.hover',
+                                    fontSize: '0.8rem',
+                                    fontFamily: 'monospace',
+                                    overflowX: 'auto',
+                                }}
+                            >
+                                {errorExtra.install_command}
+                            </Box>
+                        )}
+                    </Box>
+                </Alert>
+            )}
+            {project && (
+                <Stack spacing={2}>
+                    <ProjectHeader
+                        project={project}
+                        onSave={handleSave}
+                        onCommit={handleCommit}
+                        onDiscard={handleDiscard}
+                        onAddAudio={() => setIngestOpen(true)}
+                        disabled={isAnnotating}
+                    />
+                    <HealthStrip
+                        health={health}
+                        onSelectFiles={(files) => setSelectedFiles(new Set(files))}
+                    />
+                    {isAnnotating && annotateJob && (
+                        <Box>
+                            <LinearProgress
+                                variant={annotateJob.total > 0 ? 'determinate' : 'indeterminate'}
+                                value={annotateJob.total > 0 ? (annotateJob.current / annotateJob.total) * 100 : undefined}
+                            />
+                            <Box sx={{ mt: 0.75, display: 'flex', alignItems: 'center', gap: 1.5 }}>
+                                <Typography variant="caption" color="text.secondary" sx={{ flex: 1 }}>
+                                    Annotating {annotateJob.current} / {annotateJob.total}
+                                    {annotateJob.current_file ? ` · ${annotateJob.current_file}` : ''}
+                                </Typography>
+                                <Button
+                                    size="small"
+                                    variant="outlined"
+                                    color="error"
+                                    startIcon={<StopIcon size={14} />}
+                                    onClick={handleCancelAnnotate}
+                                >
+                                    Stop
+                                </Button>
+                            </Box>
+                        </Box>
+                    )}
+                    {isPreEncoding && preEncodeJob && (
+                        <Box>
+                            <LinearProgress
+                                variant={preEncodeJob.total > 0 ? 'determinate' : 'indeterminate'}
+                                value={preEncodeJob.total > 0 ? (preEncodeJob.current / preEncodeJob.total) * 100 : undefined}
+                            />
+                            <Box sx={{ mt: 0.75, display: 'flex', alignItems: 'center', gap: 1.5 }}>
+                                <Typography variant="caption" color="text.secondary" sx={{ flex: 1 }}>
+                                    Pre-encoding latents · {preEncodeJob.current} / {preEncodeJob.total}
+                                    {preEncodeJob.autoencoder ? ` · ${preEncodeJob.autoencoder}` : ''}
+                                </Typography>
+                                <Button
+                                    size="small"
+                                    variant="outlined"
+                                    color="error"
+                                    startIcon={<StopIcon size={14} />}
+                                    onClick={handleCancelPreEncode}
+                                >
+                                    Stop
+                                </Button>
+                            </Box>
+                        </Box>
+                    )}
+                    <ClipTable
+                        projectName={selectedName}
+                        clips={project.clips}
+                        playingFile={playingFile}
+                        playProgress={playProgress}
+                        onPlayToggle={handlePlayToggle}
+                        onPromptChange={handleClipPromptChange}
+                        onAnnotate={(fname) => handleAnnotate([fname], { skip_existing: false })}
+                        onDelete={(fname) => {
+                            if (playingFile === fname) stopPlayback();
+                            return handleClipDelete(fname);
+                        }}
+                        onSlice={(fname) => {
+                            if (playingFile === fname) stopPlayback();
+                            setSliceTarget(fname);
+                        }}
+                        selectedFiles={selectedFiles}
+                        onToggleSelected={toggleSelected}
+                        onToggleSelectAll={() => toggleSelectAll(project.clips)}
+                        disabled={isAnnotating}
+                        toolbar={
+                            <Stack spacing={1}>
+                                <Box sx={{ display: 'flex', alignItems: 'center', flexWrap: 'wrap', gap: 1.5 }}>
+                                    <Button
+                                        variant="contained"
+                                        color="warm"
+                                        size="small"
+                                        startIcon={<WandSparkles size={16} />}
+                                        onClick={() => handleAnnotate('all')}
+                                        disabled={isAnnotating || project.clip_count === 0}
+                                    >
+                                        Auto-annotate all
+                                    </Button>
+                                    <FormControl size="small" sx={{ minWidth: 180 }}>
+                                        <Select
+                                            value={project.prompt_template_preset || 'music'}
+                                            onChange={(e) => handleChangeTemplatePreset(e.target.value)}
+                                            disabled={isAnnotating}
+                                            renderValue={(v) => {
+                                                const p = (project.prompt_template_presets || []).find((x) => x.id === v);
+                                                return p ? p.label : v;
+                                            }}
+                                        >
+                                            {(project.prompt_template_presets || []).map((p) => (
+                                                <MenuItem key={p.id} value={p.id}>
+                                                    <Box>
+                                                        <Typography variant="body2">{p.label}</Typography>
+                                                        <Typography variant="caption" color="text.secondary">
+                                                            {p.description}
+                                                        </Typography>
+                                                    </Box>
+                                                </MenuItem>
+                                            ))}
+                                        </Select>
+                                    </FormControl>
+                                    <Tooltip title={TIPS.dataset.richAnnotate}>
+                                        <FormControlLabel
+                                            control={
+                                                <Switch
+                                                    size="small"
+                                                    checked={tier === 'rich'}
+                                                    onChange={(e) => changeTier(e.target.checked ? 'rich' : 'basic')}
+                                                    disabled={isAnnotating}
+                                                />
+                                            }
+                                            label={<Typography variant="caption" color="text.secondary">Rich annotation</Typography>}
+                                            sx={{ mr: 0 }}
+                                        />
+                                    </Tooltip>
+                                    <Tooltip title={TIPS.dataset.skipAnnotated}>
+                                        <FormControlLabel
+                                            control={
+                                                <Switch
+                                                    size="small"
+                                                    checked={skipExisting}
+                                                    onChange={(e) => setSkipExisting(e.target.checked)}
+                                                    disabled={isAnnotating}
+                                                />
+                                            }
+                                            label={<Typography variant="caption" color="text.secondary">Skip already annotated</Typography>}
+                                            sx={{ mr: 0 }}
+                                        />
+                                    </Tooltip>
+                                    <Box sx={{ flex: 1 }} />
+                                    {selectedFiles.size > 0 && (
+                                        <Button
+                                            variant="outlined"
+                                            color="error"
+                                            size="small"
+                                            startIcon={<TrashIcon size={16} />}
+                                            onClick={handleClearSelectedAnnotations}
+                                            disabled={isAnnotating}
+                                        >
+                                            Clear annotations ({selectedFiles.size})
+                                        </Button>
+                                    )}
+                                </Box>
+                                {tier === 'rich' && (
+                                    <ClapVocabAccordion disabled={isAnnotating} />
+                                )}
+                            </Stack>
+                        }
+                    />
+                    <audio
+                        ref={audioRef}
+                        style={{ display: 'none' }}
+                        onTimeUpdate={(e) => {
+                            const a = e.currentTarget;
+                            if (a.duration && isFinite(a.duration)) {
+                                setPlayProgress(a.currentTime / a.duration);
+                            }
+                        }}
+                        onEnded={() => { setPlayingFile(null); setPlayProgress(0); }}
+                        onError={() => { setPlayingFile(null); setPlayProgress(0); }}
+                    />
+                </Stack>
+            )}
+            <CreateProjectDialog
+                open={createOpen}
+                existingNames={projects.map((p) => p.name)}
+                onClose={() => setCreateOpen(false)}
+                onCreated={async (name) => {
+                    setCreateOpen(false);
+                    await refreshProjects();
+                    setSelectedName(name);
+                }}
+            />
+            <LoadProjectDialog
+                open={loadOpen}
+                projects={projects}
+                currentName={selectedName}
+                onClose={() => setLoadOpen(false)}
+                onLoad={(name) => {
+                    setLoadOpen(false);
+                    trySelectProject(name);
+                }}
+                onDeleteProject={handleDeleteProject}
+            />
+            <IngestDialog
+                open={ingestOpen}
+                projectName={project?.name}
+                onClose={() => setIngestOpen(false)}
+                onIngested={async () => {
+                    setIngestOpen(false);
+                    if (project) await refreshProject(project.name);
+                    await refreshProjects();
+                }}
+            />
+            <SliceDialog
+                open={Boolean(sliceTarget)}
+                projectName={project?.name}
+                fileName={sliceTarget}
+                onClose={() => setSliceTarget(null)}
+                onSliced={async () => {
+                    clearSelection();
+                    if (project) await refreshProject(project.name);
+                    await refreshProjects();
+                }}
+            />
+            <Dialog
+                open={Boolean(confirm)}
+                onClose={confirmBusy ? undefined : () => setConfirm(null)}
+                aria-labelledby="dataset-confirm-title"
+            >
+                <DialogTitle id="dataset-confirm-title">
+                    {confirm?.title}
+                </DialogTitle>
+                <DialogContent>
+                    <Typography sx={appStyles.dialogBodyText}>
+                        {confirm?.body}
+                    </Typography>
+                    {confirm?.warning && (
+                        <Typography variant="body2" color="warning.main" sx={appStyles.dialogErrorText}>
+                            {confirm.warning}
+                        </Typography>
+                    )}
+                </DialogContent>
+                <DialogActions>
+                    <Button onClick={() => setConfirm(null)} disabled={confirmBusy}>
+                        Cancel
+                    </Button>
+                    <Button
+                        onClick={async () => {
+                            if (!confirm?.onConfirm) { setConfirm(null); return; }
+                            setConfirmBusy(true);
+                            try {
+                                await confirm.onConfirm();
+                            } finally {
+                                setConfirmBusy(false);
+                                setConfirm(null);
+                            }
+                        }}
+                        color={confirm?.danger ? 'error' : 'primary'}
+                        variant="contained"
+                        disabled={confirmBusy}
+                    >
+                        {confirmBusy ? (confirm?.busyLabel || 'Working…') : (confirm?.confirmLabel || 'Confirm')}
+                    </Button>
+                </DialogActions>
+            </Dialog>
+            {/* Phase 6 — post-commit pre-encode dialog. Surfaces after a
+                successful Create Dataset commit unless the user previously
+                chose "Don't ask again". */}
+            <Dialog
+                open={preEncodeOffer}
+                onClose={() => setPreEncodeOffer(false)}
+                maxWidth="xs"
+                fullWidth
+            >
+                <DialogTitle>Pre-encode latents?</DialogTitle>
+                <DialogContent>
+                    <Typography variant="body2" sx={{ mb: 1 }}>
+                        Encode your audio into SA3 latents now to speed up training. The
+                        autoencoder runs once up-front instead of every training step.
+                    </Typography>
+                    <Typography variant="caption" color="text.secondary">
+                        Takes a few minutes for ~50 clips. Latents live in
+                        <code> {project?.name}/.latents/</code> and get wiped automatically
+                        when you next commit or edit a clip.
+                    </Typography>
+                </DialogContent>
+                <DialogActions sx={{ flexWrap: 'wrap' }}>
+                    <Button
+                        onClick={() => {
+                            persistPreEncodeSuppression(true);
+                            setPreEncodeOffer(false);
+                        }}
+                        sx={{ mr: 'auto' }}
+                    >
+                        Don't ask again
+                    </Button>
+                    <Button onClick={() => setPreEncodeOffer(false)}>Not now</Button>
+                    <Button
+                        variant="contained"
+                        onClick={() => {
+                            setPreEncodeOffer(false);
+                            handleStartPreEncode();
+                        }}
+                    >
+                        Pre-encode now
+                    </Button>
+                </DialogActions>
+            </Dialog>
+            <Portal>
+                <Snackbar
+                    open={Boolean(notice)}
+                    autoHideDuration={4000}
+                    onClose={() => setNotice(null)}
+                    anchorOrigin={{ vertical: 'bottom', horizontal: 'right' }}
+                >
+                    {notice ? (
+                        <Alert
+                            onClose={() => setNotice(null)}
+                            severity={notice.severity}
+                            variant="filled"
+                            sx={{ width: '100%' }}
+                        >
+                            {notice.message}
+                        </Alert>
+                    ) : undefined}
+                </Snackbar>
+            </Portal>
+        </Stack>
+        </Paper>
+    );
+}
+// ---------- subcomponents --------------------------------------------------
+function ClapVocabAccordion({ disabled }) {
+    const [labels, setLabels] = useState({ genre: [], mood: [], instruments: [] });
+    const [overridden, setOverridden] = useState(false);
+    const [dirty, setDirty] = useState(false);
+    const [busy, setBusy] = useState(false);
+    const [vocabError, setVocabError] = useState('');
+    useEffect(() => {
+        let cancelled = false;
+        (async () => {
+            try {
+                const { data } = await api.get('/api/annotator-labels');
+                if (cancelled) return;
+                setLabels(data.labels || { genre: [], mood: [], instruments: [] });
+                setOverridden(!!data.overridden);
+                setDirty(false);
+            } catch (e) {
+                if (!cancelled) setVocabError(extractError(e, 'Failed to load vocabulary'));
+            }
+        })();
+        return () => { cancelled = true; };
+    }, []);
+    function setCategory(cat, values) {
+        setLabels((prev) => ({ ...prev, [cat]: values }));
+        setDirty(true);
+    }
+    async function save() {
+        setBusy(true);
+        setVocabError('');
+        try {
+            await api.put('/api/annotator-labels', labels);
+            setDirty(false);
+            setOverridden(true);
+        } catch (e) {
+            setVocabError(extractError(e, 'Failed to save vocabulary'));
+        } finally {
+            setBusy(false);
+        }
+    }
+    async function reset() {
+        if (!window.confirm('Reset vocabulary to the built-in defaults? Your custom tags will be lost.')) return;
+        setBusy(true);
+        setVocabError('');
+        try {
+            await api.delete('/api/annotator-labels');
+            const { data } = await api.get('/api/annotator-labels');
+            setLabels(data.labels || { genre: [], mood: [], instruments: [] });
+            setOverridden(false);
+            setDirty(false);
+        } catch (e) {
+            setVocabError(extractError(e, 'Failed to reset vocabulary'));
+        } finally {
+            setBusy(false);
+        }
+    }
+    const tagCount = (labels.genre?.length || 0) + (labels.mood?.length || 0) + (labels.instruments?.length || 0);
+    return (
+        <Accordion
+            disableGutters
+            sx={{ '&, &.Mui-expanded': { mt: 0, mb: 0 } }}
+        >
+            <AccordionSummary
+                expandIcon={<ChevronDownIcon size={18} />}
+                sx={{
+                    minHeight: 48,
+                    '&.Mui-expanded': { minHeight: 48 },
+                    '& .MuiAccordionSummary-content': {
+                        margin: '12px 0',
+                        '&.Mui-expanded': { margin: '12px 0' },
+                    },
+                }}
+            >
+                <Typography variant="subtitle1">CLAP Vocabulary</Typography>
+                <Typography variant="caption" color="text.secondary" sx={{ ml: 1.5, alignSelf: 'center' }}>
+                    {overridden ? 'custom' : 'defaults'} · {tagCount} tags
+                </Typography>
+            </AccordionSummary>
+            <AccordionDetails>
+                <Stack spacing={2}>
+                    <Typography variant="body2" color="text.secondary">
+                        Words CLAP scores each clip against. Empty categories are ignored. Tweak to match your dataset's territory.
+                    </Typography>
+                    <VocabCategory
+                        label="Genre"
+                        values={labels.genre || []}
+                        onChange={(v) => setCategory('genre', v)}
+                        disabled={disabled || busy}
+                    />
+                    <VocabCategory
+                        label="Mood"
+                        values={labels.mood || []}
+                        onChange={(v) => setCategory('mood', v)}
+                        disabled={disabled || busy}
+                    />
+                    <VocabCategory
+                        label="Instruments"
+                        values={labels.instruments || []}
+                        onChange={(v) => setCategory('instruments', v)}
+                        disabled={disabled || busy}
+                    />
+                    {vocabError && <Alert severity="error" onClose={() => setVocabError('')}>{vocabError}</Alert>}
+                    <Box sx={{ display: 'flex', alignItems: 'center', gap: 1.5 }}>
+                        <Button
+                            variant="text"
+                            size="small"
+                            onClick={reset}
+                            disabled={disabled || busy || !overridden}
+                        >
+                            Reset to defaults
+                        </Button>
+                        <Box sx={{ flex: 1 }} />
+                        <Button
+                            variant="contained"
+                            size="small"
+                            onClick={save}
+                            disabled={disabled || busy || !dirty}
+                        >
+                            Save vocabulary
+                        </Button>
+                    </Box>
+                </Stack>
+            </AccordionDetails>
+        </Accordion>
+    );
+}
+function VocabCategory({ label, values, onChange, disabled }) {
+    return (
+        <Autocomplete
+            multiple
+            freeSolo
+            options={[]}
+            value={values}
+            onChange={(_e, newValues) => onChange(newValues)}
+            disabled={disabled}
+            renderTags={(value, getTagProps) =>
+                value.map((option, index) => {
+                    const tagProps = getTagProps({ index });
+                    return (
+                        <Chip
+                            variant="outlined"
+                            size="small"
+                            label={option}
+                            {...tagProps}
+                            key={`${option}-${index}`}
+                        />
+                    );
+                })
+            }
+            renderInput={(params) => (
+                <TextField
+                    {...params}
+                    label={label}
+                    placeholder="Add tag, press Enter"
+                    size="small"
+                />
+            )}
+        />
+    );
+}
+function LoadProjectDialog({ open, projects, currentName, onClose, onLoad, onDeleteProject }) {
+    const [picked, setPicked] = useState(currentName || '');
+    useEffect(() => {
+        if (open) setPicked(currentName || (projects[0]?.name ?? ''));
+    }, [open, currentName, projects]);
+    return (
+        <Dialog open={open} onClose={onClose} maxWidth="sm" fullWidth>
+            <DialogTitle>Load project</DialogTitle>
+            <DialogContent>
+                {projects.length === 0 ? (
+                    <Typography variant="body2" color="text.secondary" sx={{ py: 2 }}>
+                        No projects yet. Create one first.
+                    </Typography>
+                ) : (
+                    <RadioGroup value={picked} onChange={(e) => setPicked(e.target.value)}>
+                        {projects.map((p) => (
+                            <Box
+                                key={p.name}
+                                sx={{ display: 'flex', alignItems: 'center', gap: 1, py: 0.25 }}
+                            >
+                                <FormControlLabel
+                                    value={p.name}
+                                    control={<Radio size="small" />}
+                                    label={
+                                        <Box>
+                                            <Typography variant="body2">{p.name}</Typography>
+                                            <Typography variant="caption" color="text.secondary">
+                                                {p.clip_count} clip{p.clip_count === 1 ? '' : 's'}
+                                                {p.has_draft ? ' · has unsaved draft' : ''}
+                                            </Typography>
+                                        </Box>
+                                    }
+                                    sx={{ alignItems: 'flex-start', flex: 1, mr: 0 }}
+                                />
+                                <Tooltip title={TIPS.dataset.deleteProject}>
+                                    <span>
+                                        <IconButton
+                                            size="small"
+                                            sx={{ color: 'text.disabled', '&:hover': { color: 'error.main', bgcolor: 'action.hover' } }}
+                                            onClick={() => onDeleteProject(p.name)}
+                                        >
+                                            <TrashIcon size={16} />
+                                        </IconButton>
+                                    </span>
+                                </Tooltip>
+                            </Box>
+                        ))}
+                    </RadioGroup>
+                )}
+            </DialogContent>
+            <DialogActions>
+                <Button onClick={onClose}>Cancel</Button>
+                <Button
+                    variant="contained"
+                    onClick={() => onLoad(picked)}
+                    disabled={!picked || projects.length === 0}
+                >
+                    Load
+                </Button>
+            </DialogActions>
+        </Dialog>
+    );
+}
+function ProjectHeader({ project, onSave, onCommit, onDiscard, onAddAudio, disabled }) {
+    const stateLabel = (() => {
+        if (project.dirty && project.has_unsaved_changes) return 'Unsaved changes';
+        if (project.dirty && !project.has_unsaved_changes) return 'Draft saved · dataset not created';
+        if (!project.dirty) return 'Dataset created';
+        return '';
+    })();
+    return (
+        <Box sx={{ display: 'flex', alignItems: 'center', gap: 2, flexWrap: 'wrap' }}>
+            <Stack direction="row" spacing={2} alignItems="center" sx={{ flex: 1, minWidth: 240 }}>
+                <Box>
+                    <Typography variant="h6">Project: &ldquo;{project.name}&rdquo;</Typography>
+                    <Typography variant="body2" color="text.secondary">
+                        {project.clip_count} clip{project.clip_count === 1 ? '' : 's'}
+                        {' · '}{stateLabel}
+                    </Typography>
+                </Box>
+                <Button
+                    variant="outlined"
+                    size="small"
+                    startIcon={<MusicIcon size={16} />}
+                    onClick={onAddAudio}
+                    disabled={disabled}
+                >
+                    Add audio
+                </Button>
+            </Stack>
+            <Stack direction="row" spacing={1}>
+                <Tooltip title={TIPS.dataset.discardChanges}>
+                    <span>
+                        <Button
+                            variant="outlined"
+                            color="error"
+                            size="small"
+                            startIcon={<TrashIcon size={16} />}
+                            onClick={onDiscard}
+                            disabled={disabled || !project.dirty}
+                        >
+                            Delete
+                        </Button>
+                    </span>
+                </Tooltip>
+                <Tooltip title={TIPS.dataset.saveDraft}>
+                    <span>
+                        <Button
+                            variant="outlined"
+                            size="small"
+                            startIcon={<SaveIcon size={16} />}
+                            onClick={onSave}
+                            disabled={disabled || !project.has_unsaved_changes}
+                        >
+                            Save
+                        </Button>
+                    </span>
+                </Tooltip>
+                <Tooltip title={TIPS.dataset.createDataset}>
+                    <span>
+                        <Button
+                            variant="contained"
+                            size="small"
+                            startIcon={<DatasetIcon size={16} />}
+                            onClick={onCommit}
+                            disabled={disabled || !project.dirty}
+                        >
+                            Create Dataset
+                        </Button>
+                    </span>
+                </Tooltip>
+            </Stack>
+        </Box>
+    );
+}
+function HealthStrip({ health, onSelectFiles }) {
+    if (!health || health.total_clips === 0) return null;
+    const empty = health.empty_prompts || { count: 0, files: [] };
+    const tooShort = health.too_short || { count: 0, files: [] };
+    const dups = health.duplicate_annotations || { count: 0, group_count: 0, files: [] };
+    const unsupported = health.unsupported_format || { count: 0, accepted: [], files: [] };
+    const issues = empty.count + tooShort.count
+        + dups.count + unsupported.count;
+    // Three-tier status driven by the share of unique clips touched by any
+    // health check. A single file showing up in multiple categories only
+    // counts once.
+    const affected = new Set([
+        ...empty.files,
+        ...tooShort.files,
+        ...dups.files,
+        ...unsupported.files,
+    ]);
+    const affectedRatio = health.total_clips > 0 ? affected.size / health.total_clips : 0;
+    let status;
+    if (affected.size === 0) status = 'ok';
+    else if (affectedRatio > 0.5) status = 'bad';
+    else status = 'warn';
+    const statusColor = (
+        status === 'ok' ? 'success.main'
+        : status === 'warn' ? 'warm.main'
+        : 'error.main'
+    );
+    const statusText = (
+        status === 'ok'
+            ? `All clean · ${health.total_clips} clip${health.total_clips === 1 ? '' : 's'} ready`
+            : `${affected.size} of ${health.total_clips} clip${health.total_clips === 1 ? '' : 's'} flagged`
+    );
+    return (
+        <Paper variant="outlined" sx={{ borderRadius: 2.5 }}>
+            <Box sx={{ px: 2, py: 1.25, display: 'flex', alignItems: 'center', gap: 1 }}>
+                <Box component="span" sx={appStyles.sectionCardIcon}>
+                    <HealthIcon size={18} />
+                </Box>
+                <Typography variant="subtitle1" sx={{ fontWeight: 500, flex: 1 }}>
+                    Dataset health
+                </Typography>
+                <Box
+                    sx={{
+                        width: 8,
+                        height: 8,
+                        borderRadius: '50%',
+                        bgcolor: statusColor,
+                        // Soft halo so the dot reads as a status indicator,
+                        // not stray decoration.
+                        boxShadow: (theme) =>
+                            `0 0 0 3px ${theme.palette.mode === 'dark' ? 'rgba(255,255,255,0.04)' : 'rgba(0,0,0,0.04)'}`,
+                    }}
+                />
+                <Typography variant="caption" sx={{ color: statusColor }}>
+                    {statusText}
+                </Typography>
+            </Box>
+            {issues > 0 && (
+                <Box
+                    sx={{
+                        px: 2,
+                        py: 1.25,
+                        borderTop: 1,
+                        borderColor: 'divider',
+                        display: 'flex',
+                        alignItems: 'center',
+                        gap: 0.75,
+                        flexWrap: 'wrap',
+                    }}
+                >
+                    {empty.count > 0 && (
+                        <Tooltip title={TIPS.dataset.selectClips}>
+                            <Chip
+                                size="small"
+                                variant="outlined"
+                                color="warning"
+                                label={`${empty.count} empty annotation${empty.count === 1 ? '' : 's'}`}
+                                onClick={() => onSelectFiles(empty.files)}
+                            />
+                        </Tooltip>
+                    )}
+                    {tooShort.count > 0 && (
+                        <Tooltip title={TIPS.dataset.tooShort(tooShort.threshold_sec)}>
+                            <Chip
+                                size="small"
+                                variant="outlined"
+                                color="error"
+                                label={`${tooShort.count} too short (< ${tooShort.threshold_sec}s)`}
+                                onClick={() => onSelectFiles(tooShort.files)}
+                            />
+                        </Tooltip>
+                    )}
+                    {dups.count > 0 && (
+                        <Tooltip title={TIPS.dataset.duplicates(dups.group_count)}>
+                            <Chip
+                                size="small"
+                                variant="outlined"
+                                color="warning"
+                                label={`${dups.count} duplicate annotation${dups.count === 1 ? '' : 's'}`}
+                                onClick={() => onSelectFiles(dups.files)}
+                            />
+                        </Tooltip>
+                    )}
+                    {unsupported.count > 0 && (
+                        <Tooltip title={TIPS.dataset.unsupported(unsupported.accepted)}>
+                            <Chip
+                                size="small"
+                                variant="outlined"
+                                color="error"
+                                label={`${unsupported.count} unsupported format${unsupported.count === 1 ? '' : 's'}`}
+                                onClick={() => onSelectFiles(unsupported.files)}
+                            />
+                        </Tooltip>
+                    )}
+                </Box>
+            )}
+        </Paper>
+    );
+}
+function Waveform({ projectName, fileName, isActive, progress }) {
+    const canvasRef = useRef(null);
+    const theme = useTheme();
+    const [peaks, setPeaks] = useState(null);
+    const [failed, setFailed] = useState(false);
+    useEffect(() => {
+        let cancelled = false;
+        setPeaks(null);
+        setFailed(false);
+        if (!projectName || !fileName) return;
+        const url = `/api/projects/${encodeURIComponent(projectName)}/clip/${encodeURIComponent(fileName)}/peaks?n=80`;
+        api.get(url)
+            .then(({ data }) => { if (!cancelled) setPeaks(data?.peaks || []); })
+            .catch(() => { if (!cancelled) setFailed(true); });
+        return () => { cancelled = true; };
+    }, [projectName, fileName]);
+    useEffect(() => {
+        const canvas = canvasRef.current;
+        if (!canvas) return;
+        const dpr = window.devicePixelRatio || 1;
+        const w = canvas.clientWidth;
+        const h = canvas.clientHeight;
+        if (canvas.width !== w * dpr) canvas.width = w * dpr;
+        if (canvas.height !== h * dpr) canvas.height = h * dpr;
+        const ctx = canvas.getContext('2d');
+        ctx.setTransform(dpr, 0, 0, dpr, 0, 0);
+        ctx.clearRect(0, 0, w, h);
+        if (!peaks || !peaks.length) {
+            ctx.fillStyle = 'rgba(0,0,0,0.08)';
+            const midY = h / 2;
+            ctx.fillRect(0, midY - 0.5, w, 1);
+            return;
+        }
+        const barCount = peaks.length;
+        const barWidth = Math.max(1, w / barCount - 1);
+        const playedIdx = isActive ? Math.floor(progress * barCount) : -1;
+        // Match the Generated-Fragments waveforms: teal accent for the played
+        // portion, dimmed (35% alpha) for the rest.
+        const playedColor = '#279FBB';
+        const restColor = '#279FBB59';
+        for (let i = 0; i < barCount; i++) {
+            const v = peaks[i];
+            const barH = Math.max(1, v * (h - 2));
+            const x = i * (w / barCount);
+            const y = (h - barH) / 2;
+            ctx.fillStyle = i <= playedIdx ? playedColor : restColor;
+            ctx.fillRect(x, y, barWidth, barH);
+        }
+    }, [peaks, isActive, progress, theme]);
+    return (
+        <Box sx={{ width: 120, height: 28, flexShrink: 0, opacity: failed ? 0.3 : 1 }}>
+            <canvas
+                ref={canvasRef}
+                style={{ width: '100%', height: '100%', display: 'block' }}
+            />
+        </Box>
+    );
+}
+function ClipTable({ projectName, clips, playingFile, playProgress, onPlayToggle, onPromptChange, onAnnotate, onDelete, onSlice, selectedFiles, onToggleSelected, onToggleSelectAll, disabled, toolbar }) {
+    const totalSelected = selectedFiles ? selectedFiles.size : 0;
+    const allSelected = clips && clips.length > 0 && totalSelected === clips.length;
+    const partiallySelected = totalSelected > 0 && !allSelected;
+    if (!clips || clips.length === 0) {
+        return (
+            <Paper variant="outlined" sx={{ borderRadius: 2.5, overflow: 'hidden' }}>
+                {toolbar && (
+                    <Box sx={{ px: 1.5, py: 1, borderBottom: 1, borderColor: 'divider' }}>
+                        {toolbar}
+                    </Box>
+                )}
+                <Box sx={{ py: 4, textAlign: 'center', color: 'text.secondary' }}>
+                    <Typography variant="body2">
+                        No clips yet. Use “Add audio” to bring in a folder.
+                    </Typography>
+                </Box>
+            </Paper>
+        );
+    }
+    return (
+        <Paper variant="outlined" sx={{ borderRadius: 2.5, overflow: 'hidden' }}>
+            {toolbar && (
+                <Box sx={{ px: 1.5, py: 1, borderBottom: 1, borderColor: 'divider' }}>
+                    {toolbar}
+                </Box>
+            )}
+            <TableContainer>
+                <Table size="small">
+                    <TableHead>
+                        <TableRow>
+                            <TableCell padding="checkbox">
+                                <Checkbox
+                                    size="small"
+                                    checked={allSelected}
+                                    indeterminate={partiallySelected}
+                                    onChange={onToggleSelectAll}
+                                    disabled={disabled || clips.length === 0}
+                                />
+                            </TableCell>
+                            <TableCell sx={{ width: '36%' }}>File</TableCell>
+                            <TableCell>Annotation</TableCell>
+                            <TableCell sx={{ width: 132, textAlign: 'right' }}>Actions</TableCell>
+                        </TableRow>
+                    </TableHead>
+                    <TableBody>
+                        {clips.map((c) => (
+                            <ClipRow
+                                key={c.file_name}
+                                projectName={projectName}
+                                clip={c}
+                                isPlaying={playingFile === c.file_name}
+                                playProgress={playingFile === c.file_name ? playProgress : 0}
+                                onPlayToggle={onPlayToggle}
+                                onPromptChange={onPromptChange}
+                                onAnnotate={onAnnotate}
+                                onDelete={onDelete}
+                                onSlice={onSlice}
+                                selected={selectedFiles ? selectedFiles.has(c.file_name) : false}
+                                onToggleSelected={onToggleSelected}
+                                disabled={disabled}
+                            />
+                        ))}
+                    </TableBody>
+                </Table>
+            </TableContainer>
+        </Paper>
+    );
+}
+// React.memo so the 60Hz audio-playhead ticks don't reconcile every row in
+// the table. Custom comparator: skip if visual props didn't change. Callback
+// identity intentionally ignored — they're stable in behavior, just inline
+// arrows from the parent, and re-creating a row only to re-bind a click
+// handler isn't worth the work. playProgress only matters on the active row.
+const ClipRow = React.memo(function ClipRow({ projectName, clip, isPlaying, playProgress, onPlayToggle, onPromptChange, onAnnotate, onDelete, onSlice, selected, onToggleSelected, disabled }) {
+    const [draft, setDraft] = useState(clip.prompt);
+    useEffect(() => { setDraft(clip.prompt); }, [clip.prompt]);
+    const dirty = draft !== clip.prompt;
+    return (
+        <TableRow hover selected={selected}>
+            <TableCell padding="checkbox">
+                <Checkbox
+                    size="small"
+                    checked={!!selected}
+                    onChange={() => onToggleSelected && onToggleSelected(clip.file_name)}
+                />
+            </TableCell>
+            <TableCell sx={{ wordBreak: 'break-all' }}>
+                <Stack direction="row" alignItems="center" spacing={1}>
+                    <IconButton
+                        size="small"
+                        onClick={() => onPlayToggle(clip.file_name)}
+                        sx={{ width: 28, height: 28 }}
+                    >
+                        {isPlaying ? <PauseIcon size={14} /> : <PlayIcon size={14} />}
+                    </IconButton>
+                    <Waveform
+                        projectName={projectName}
+                        fileName={clip.file_name}
+                        isActive={isPlaying}
+                        progress={playProgress}
+                    />
+                    <Typography variant="body2" sx={{ flex: 1, minWidth: 0, wordBreak: 'break-all' }}>
+                        {clip.file_name}
+                    </Typography>
+                </Stack>
+            </TableCell>
+            <TableCell>
+                <TextField
+                    fullWidth
+                    size="small"
+                    variant="standard"
+                    value={draft}
+                    onChange={(e) => setDraft(e.target.value)}
+                    onBlur={() => { if (dirty) onPromptChange(clip.file_name, draft); }}
+                    placeholder="(empty — write a prompt or auto-annotate)"
+                    disabled={disabled}
+                />
+            </TableCell>
+            <TableCell sx={{ textAlign: 'right', whiteSpace: 'nowrap' }}>
+                <Tooltip title={TIPS.dataset.autoAnnotateClip}>
+                    <span>
+                        <IconButton
+                            size="small"
+                            onClick={() => onAnnotate(clip.file_name)}
+                            disabled={disabled}
+                            sx={{ color: 'warm.main', '&:hover': { color: 'warm.light', bgcolor: 'action.hover' } }}
+                        >
+                            <WandSparkles size={16} />
+                        </IconButton>
+                    </span>
+                </Tooltip>
+                <Tooltip title={TIPS.dataset.sliceClip}>
+                    <span>
+                        <IconButton
+                            size="small"
+                            onClick={() => onSlice(clip.file_name)}
+                            disabled={disabled}
+                        >
+                            <ScissorsIcon size={16} />
+                        </IconButton>
+                    </span>
+                </Tooltip>
+                <Tooltip title={TIPS.dataset.removeClip}>
+                    <span>
+                        <IconButton
+                            size="small"
+                            onClick={() => onDelete(clip.file_name)}
+                            disabled={disabled}
+                        >
+                            <TrashIcon size={16} />
+                        </IconButton>
+                    </span>
+                </Tooltip>
+            </TableCell>
+        </TableRow>
+    );
+}, (prev, next) => {
+    if (prev.clip !== next.clip) return false;
+    if (prev.disabled !== next.disabled) return false;
+    if (prev.projectName !== next.projectName) return false;
+    if (prev.isPlaying !== next.isPlaying) return false;
+    if (prev.selected !== next.selected) return false;
+    // playProgress only matters when this row is the active one — inactive
+    // rows always receive playProgress=0 from the parent, so they're skipped.
+    if (next.isPlaying && prev.playProgress !== next.playProgress) return false;
+    return true;
+});
+function CreateProjectDialog({ open, existingNames, onClose, onCreated }) {
+    const [name, setName] = useState('');
+    const [busy, setBusy] = useState(false);
+    const [dialogError, setDialogError] = useState('');
+    useEffect(() => {
+        if (open) { setName(''); setDialogError(''); }
+    }, [open]);
+    const duplicate = existingNames.includes(name.trim());
+    async function submit() {
+        setDialogError('');
+        setBusy(true);
+        try {
+            const { data } = await api.post('/api/projects', { name: name.trim() });
+            await onCreated(data.name);
+        } catch (e) {
+            setDialogError(extractError(e, 'Failed to create project'));
+        } finally {
+            setBusy(false);
+        }
+    }
+    return (
+        <Dialog open={open} onClose={onClose} maxWidth="sm" fullWidth>
+            <DialogTitle>New project</DialogTitle>
+            <DialogContent>
+                <Stack spacing={2} sx={{ pt: 1 }}>
+                    <TextField
+                        autoFocus
+                        label="Project name"
+                        value={name}
+                        onChange={(e) => setName(e.target.value)}
+                        helperText="Letters, digits, spaces, dashes, underscores, dots. Becomes a folder name on disk."
+                        error={duplicate}
+                    />
+                    {duplicate && (
+                        <Typography variant="caption" color="error">
+                            A project with this name already exists.
+                        </Typography>
+                    )}
+                    {dialogError && <Alert severity="error">{dialogError}</Alert>}
+                </Stack>
+            </DialogContent>
+            <DialogActions>
+                <Button onClick={onClose} disabled={busy}>Cancel</Button>
+                <Button
+                    variant="contained"
+                    onClick={submit}
+                    disabled={busy || !name.trim() || duplicate}
+                >
+                    Create
+                </Button>
+            </DialogActions>
+        </Dialog>
+    );
+}
+function IngestDialog({ open, projectName, onClose, onIngested }) {
+    const [folder, setFolder] = useState('');
+    const [mode, setMode] = useState('copy');
+    const [busy, setBusy] = useState(false);
+    const [dialogError, setDialogError] = useState('');
+    useEffect(() => {
+        if (open) { setFolder(''); setMode('copy'); setDialogError(''); }
+    }, [open]);
+    async function pick() {
+        try {
+            const { data } = await api.post('/api/pick-folder', {});
+            if (data?.path) setFolder(data.path);
+        } catch (e) {
+            setDialogError(extractError(e, 'Folder picker failed'));
+        }
+    }
+    async function submit() {
+        if (!projectName) return;
+        setBusy(true);
+        setDialogError('');
+        try {
+            await api.post(
+                `/api/projects/${encodeURIComponent(projectName)}/ingest`,
+                { folder_path: folder, mode },
+            );
+            await onIngested();
+        } catch (e) {
+            setDialogError(extractError(e, 'Ingest failed'));
+        } finally {
+            setBusy(false);
+        }
+    }
+    return (
+        <Dialog open={open} onClose={onClose} maxWidth="sm" fullWidth>
+            <DialogTitle>Add audio to {projectName}</DialogTitle>
+            <DialogContent>
+                <Stack spacing={2} sx={{ pt: 1 }}>
+                    <Stack direction="row" spacing={1.5} alignItems="center">
+                        <Button variant="outlined" startIcon={<FolderOpenIcon size={18} />} onClick={pick}>
+                            Pick folder
+                        </Button>
+                        <Typography variant="body2" color="text.secondary" sx={{ wordBreak: 'break-all' }}>
+                            {folder || 'No folder selected'}
+                        </Typography>
+                    </Stack>
+                    <FormControl>
+                        <Typography variant="body2" gutterBottom>How to bring the audio in:</Typography>
+                        <RadioGroup value={mode} onChange={(e) => setMode(e.target.value)}>
+                            <FormControlLabel
+                                value="copy"
+                                control={<Radio size="small" />}
+                                label={<Typography variant="body2">Copy — duplicates audio into the project (safe, originals untouched)</Typography>}
+                            />
+                            <FormControlLabel
+                                value="symlink"
+                                control={<Radio size="small" />}
+                                label={<Typography variant="body2">Symlink — points at the originals (saves disk, breaks if you move them)</Typography>}
+                            />
+                        </RadioGroup>
+                    </FormControl>
+                    {dialogError && <Alert severity="error">{dialogError}</Alert>}
+                </Stack>
+            </DialogContent>
+            <DialogActions>
+                <Button onClick={onClose} disabled={busy}>Cancel</Button>
+                <Button variant="contained" onClick={submit} disabled={busy || !folder}>
+                    {busy ? 'Adding…' : 'Add'}
+                </Button>
+            </DialogActions>
+        </Dialog>
+    );
+}
+function SliceDialog({ open, projectName, fileName, onClose, onSliced }) {
+    const [target, setTarget] = useState(30);
+    const [overlap, setOverlap] = useState(0);
+    const [strategy, setStrategy] = useState('hard');
+    const [duration, setDuration] = useState(null);
+    const [busy, setBusy] = useState(false);
+    const [dialogError, setDialogError] = useState('');
+    useEffect(() => {
+        if (!open) return;
+        setTarget(30);
+        setOverlap(0);
+        setStrategy('hard');
+        setDialogError('');
+        setDuration(null);
+        if (!projectName || !fileName) return;
+        // Reuse the peaks endpoint to pull duration cheaply (cached server-side).
+        api.get(`/api/projects/${encodeURIComponent(projectName)}/clip/${encodeURIComponent(fileName)}/peaks?n=20`)
+            .then(({ data }) => setDuration(data?.duration || null))
+            .catch(() => setDuration(null));
+    }, [open, projectName, fileName]);
+    const stepSec = Math.max(0.5, target - overlap);
+    const estChildren = duration && target > 0 ? Math.max(1, Math.ceil(duration / stepSec)) : null;
+    const tooShort = duration !== null && duration <= target;
+    async function submit() {
+        setBusy(true);
+        setDialogError('');
+        try {
+            await api.post(
+                `/api/projects/${encodeURIComponent(projectName)}/clip/${encodeURIComponent(fileName)}/slice`,
+                { target_duration: target, overlap_sec: overlap, strategy },
+            );
+            await onSliced();
+            onClose();
+        } catch (e) {
+            setDialogError(extractError(e, 'Slice failed'));
+        } finally {
+            setBusy(false);
+        }
+    }
+    return (
+        <Dialog open={open} onClose={busy ? undefined : onClose} maxWidth="sm" fullWidth>
+            <DialogTitle>Slice {fileName || ''}</DialogTitle>
+            <DialogContent>
+                <Stack spacing={2.5} sx={{ pt: 1 }}>
+                    <Typography variant="body2" color="text.secondary">
+                        The original file will be replaced by the children on disk. Children inherit this clip's annotation. They stay in the project until you Create Dataset (Delete reverts them).
+                    </Typography>
+                    <Stack direction="row" spacing={2}>
+                        <TextField
+                            label="Target duration (sec)"
+                            type="number"
+                            size="small"
+                            value={target}
+                            onChange={(e) => setTarget(Math.max(0.5, parseFloat(e.target.value) || 0))}
+                            inputProps={{ step: 0.5, min: 0.5, max: 60 }}
+                            fullWidth
+                        />
+                        <TextField
+                            label="Overlap (sec)"
+                            type="number"
+                            size="small"
+                            value={overlap}
+                            onChange={(e) => setOverlap(Math.max(0, parseFloat(e.target.value) || 0))}
+                            inputProps={{ step: 0.1, min: 0, max: Math.max(0, target - 0.5) }}
+                            fullWidth
+                            helperText="Head-overlap on every child after the first"
+                        />
+                    </Stack>
+                    <FormControl>
+                        <Typography variant="body2" gutterBottom>Where each cut should land:</Typography>
+                        <RadioGroup value={strategy} onChange={(e) => setStrategy(e.target.value)}>
+                            <FormControlLabel
+                                value="hard"
+                                control={<Radio size="small" />}
+                                label={<Typography variant="body2">Hard cut — exact intervals; fastest, can split mid-note</Typography>}
+                            />
+                            <FormControlLabel
+                                value="transient"
+                                control={<Radio size="small" />}
+                                label={<Typography variant="body2">Transient-aware — snaps each cut to the nearest onset (good for drums / rhythmic)</Typography>}
+                            />
+                            <FormControlLabel
+                                value="silence"
+                                control={<Radio size="small" />}
+                                label={<Typography variant="body2">Silence-aware — snaps to the quietest moment in each window (good for melodic / phrased)</Typography>}
+                            />
+                        </RadioGroup>
+                    </FormControl>
+                    {duration !== null && (
+                        <Typography variant="caption" color="text.secondary">
+                            Source: {duration.toFixed(1)}s
+                            {estChildren !== null && !tooShort && ` · ~${estChildren} children at this setting`}
+                            {tooShort && ' · already shorter than the target — nothing to slice'}
+                        </Typography>
+                    )}
+                    {dialogError && <Alert severity="error">{dialogError}</Alert>}
+                </Stack>
+            </DialogContent>
+            <DialogActions>
+                <Button onClick={onClose} disabled={busy}>Cancel</Button>
+                <Button
+                    variant="contained"
+                    onClick={submit}
+                    disabled={busy || tooShort || target <= 0 || overlap >= target}
+                >
+                    {busy ? 'Slicing…' : 'Slice'}
+                </Button>
+            </DialogActions>
+        </Dialog>
+    );
+}
+// ---------- utils ----------------------------------------------------------
+function extractError(e, fallback) {
+    return e?.response?.data?.error || e?.message || fallback;
+}

app/frontend/src/components/EditPanel.js ADDED Viewed

	@@ -0,0 +1,597 @@

+import React, { useState, useRef, useEffect } from 'react';
+import {
+    Box,
+    Typography,
+    Button,
+    Stack,
+    TextField,
+    ToggleButton,
+    ToggleButtonGroup,
+    Slider,
+    Alert,
+    LinearProgress,
+    IconButton,
+    Switch,
+    FormControlLabel,
+} from '@mui/material';
+import { Upload as UploadIcon, X as ClearIcon, Play as PlayIcon, Square as StopIcon } from 'lucide-react';
+import api from '../api';
+import AudioWaveform from './AudioWaveform';
+import { getFragmentDragPayload } from '../utils/fragmentDrag';
+/**
+ * SA3 audio-to-audio + inpainting UI.
+ *
+ * Three modes:
+ *   - Style transfer: feed a source clip + new prompt, init_noise_level
+ *     controls how much character is preserved (0 = source-faithful,
+ *     1 = prompt-only).
+ *   - Inpaint: regenerate a region of the source clip, keeping the rest.
+ *   - Extend: append N seconds of new audio to the end of the source.
+ *
+ * All three send to /api/generate using SA3's init_audio / inpaint_audio
+ * params. The backend handles file resolution; this panel just uploads
+ * the source clip to /api/audio/upload and posts the returned path.
+ *
+ * Props:
+ *   model_id:        active SA3 model id
+ *   negativePrompt:  optional, passed through
+ *   loraStack:       [{path, strength, bypassed}] from the Generation panel —
+ *                    applied to the edit so style/inpaint/extend inherit the
+ *                    same LoRA character as plain generation.
+ *   steps:           sampler step count from the Generation panel.
+ *   cfgScale:        CFG from the Generation panel (only sent for *-base models;
+ *                    distilled models bake CFG at 1.0).
+ *   onGenerated(blob, filename, params): called with the resulting WAV
+ */
+export default function EditPanel({ model_id, negativePrompt, loraStack, steps, cfgScale, onGenerated }) {
+    const [mode, setMode] = useState('style');   // 'style' | 'inpaint' | 'extend'
+    const [sourcePath, setSourcePath] = useState('');
+    const [sourceName, setSourceName] = useState('');
+    const [sourceFile, setSourceFile] = useState(null);  // kept for in-browser decode (waveform)
+    const [sourceUploading, setSourceUploading] = useState(false);
+    const [dropActive, setDropActive] = useState(false);
+    const [prompt, setPrompt] = useState('');
+    const [duration, setDuration] = useState(8);
+    // Seed: random by default, mirroring the rest of the app. When off, the
+    // numeric field is honoured (0 included — a legitimate seed).
+    const [randomSeed, setRandomSeed] = useState(true);
+    const [seedValue, setSeedValue] = useState('');
+    // sa3-medium generates up to 380s; small models cap at 120s. Matches the
+    // generator's _MODEL_INFO so the slider can't request past the model max.
+    const maxDuration = (model_id || '').includes('medium') ? 380 : 120;
+    // Distilled (post-trained) models bake CFG at 1.0 and ignore cfg_scale; only
+    // *-base variants honour it. Same rule the Generation panel uses.
+    const isDistilledBase =
+        !!model_id && model_id.startsWith('sa3-') && !model_id.endsWith('-base');
+    // style transfer
+    const [initNoiseLevel, setInitNoiseLevel] = useState(0.7);
+    // inpaint
+    const [maskStart, setMaskStart] = useState(2.0);
+    const [maskEnd, setMaskEnd] = useState(4.0);
+    // extend
+    const [extendSeconds, setExtendSeconds] = useState(4.0);
+    const [sourceDurationSec, setSourceDurationSec] = useState(null);
+    const [generating, setGenerating] = useState(false);
+    const [error, setError] = useState(null);
+    const fileInputRef = useRef(null);
+    // Inpaint region audition — a hidden <audio> set to the source clip, played
+    // from maskStart and auto-stopped at maskEnd, so users can hear the segment
+    // they're about to regenerate before committing.
+    const regionAudioRef = useRef(null);
+    const regionStopRef = useRef(null);   // removes the active timeupdate guard
+    const [regionUrl, setRegionUrl] = useState(null);
+    const [regionPlaying, setRegionPlaying] = useState(false);
+    useEffect(() => {
+        if (!sourceFile) { setRegionUrl(null); return undefined; }
+        const url = URL.createObjectURL(sourceFile);
+        setRegionUrl(url);
+        return () => URL.revokeObjectURL(url);
+    }, [sourceFile]);
+    // Stop any in-flight preview when the source changes or the mode switches
+    // away from inpaint (don't auto-stop on every region drag — the end is
+    // captured per play, so dragging mid-play just runs to the old boundary).
+    useEffect(() => {
+        const a = regionAudioRef.current;
+        if (a) { try { a.pause(); } catch { /* ignore */ } }
+        regionStopRef.current?.();
+        regionStopRef.current = null;
+        setRegionPlaying(false);
+    }, [regionUrl, mode]);
+    const toggleRegionPreview = () => {
+        const a = regionAudioRef.current;
+        if (!a || !regionUrl) return;
+        if (regionPlaying) {
+            a.pause();
+            regionStopRef.current?.();
+            regionStopRef.current = null;
+            setRegionPlaying(false);
+            return;
+        }
+        const start = Math.max(0, Number(maskStart) || 0);
+        const end = Math.max(start + 0.05, Number(maskEnd) || 0);
+        const onTime = () => {
+            if (a.currentTime >= end) {
+                a.pause();
+                a.removeEventListener('timeupdate', onTime);
+                regionStopRef.current = null;
+                setRegionPlaying(false);
+            }
+        };
+        try { a.currentTime = start; } catch { /* ignore */ }
+        a.addEventListener('timeupdate', onTime);
+        regionStopRef.current = () => a.removeEventListener('timeupdate', onTime);
+        a.play()
+            .then(() => setRegionPlaying(true))
+            .catch(() => {
+                a.removeEventListener('timeupdate', onTime);
+                regionStopRef.current = null;
+                setRegionPlaying(false);
+            });
+    };
+    // --- source upload ---------------------------------------------------
+    const onPickFile = () => fileInputRef.current?.click();
+    const uploadFile = async (f) => {
+        if (!f) return;
+        setSourceUploading(true);
+        setError(null);
+        try {
+            const form = new FormData();
+            form.append('file', f);
+            const r = await api.post('/api/audio/upload', form);
+            setSourcePath(r.data.path);
+            setSourceName(r.data.name);
+            setSourceFile(f);  // keep for in-browser waveform decode
+            // Probe duration via a temp object URL → <audio>.
+            const url = URL.createObjectURL(f);
+            const a = new Audio(url);
+            a.addEventListener('loadedmetadata', () => {
+                if (Number.isFinite(a.duration)) {
+                    setSourceDurationSec(a.duration);
+                    // Default the output length to the source length (clamped to
+                    // the model max). For inpaint this is mandatory — the mask is
+                    // measured in source seconds, so the output must be the same
+                    // length or the masked region drifts off the audio you see.
+                    setDuration(Math.max(1, Math.min(maxDuration, Math.round(a.duration))));
+                    // Seed inpaint region to the middle quarter so the
+                    // waveform shows something sensible without a 4 s default
+                    // landing past the end of short clips.
+                    const q = a.duration / 4;
+                    setMaskStart(Math.max(0, q));
+                    setMaskEnd(Math.min(a.duration, q * 3));
+                }
+                URL.revokeObjectURL(url);
+            }, { once: true });
+        } catch (err) {
+            setError(err.response?.data?.error?.message || err.message || 'Upload failed');
+        } finally {
+            setSourceUploading(false);
+        }
+    };
+    const onFileChange = async (e) => {
+        const f = e.target.files?.[0];
+        e.target.value = '';
+        await uploadFile(f);
+    };
+    // Pull a fragment already on disk (dragged in from the Generated
+    // Fragments window) and run it through the same upload path so it gets a
+    // server path + waveform + duration probe, exactly like a picked file.
+    const loadFragmentByName = async (filename) => {
+        if (!filename) return;
+        setSourceUploading(true);
+        setError(null);
+        try {
+            const r = await api.get(`/api/fragments/${encodeURIComponent(filename)}`, { responseType: 'blob' });
+            const file = new File([r.data], filename, { type: r.data.type || 'audio/wav' });
+            await uploadFile(file);
+        } catch (err) {
+            setError(err.response?.data?.error?.message || err.message || 'Could not load fragment');
+            setSourceUploading(false);
+        }
+    };
+    const onDrop = async (e) => {
+        e.preventDefault();
+        setDropActive(false);
+        // In-app drag from the Generated Fragments window carries the
+        // fragment filename; OS file drags carry dataTransfer.files. Read the
+        // custom payload synchronously before any await.
+        const fragName = e.dataTransfer.getData('application/x-fragmenta-fragment');
+        if (fragName) {
+            // Prefer the in-memory blob handed off on dragStart — no disk
+            // round-trip, and immune to any in-memory vs on-disk name mismatch.
+            const payload = getFragmentDragPayload();
+            if (payload?.blob && payload.filename === fragName) {
+                const file = new File([payload.blob], fragName || 'fragment.wav', {
+                    type: payload.blob.type || 'audio/wav',
+                });
+                await uploadFile(file);
+            } else {
+                // Fallback: blob wasn't preloaded — fetch it from disk by name.
+                await loadFragmentByName(fragName);
+            }
+            return;
+        }
+        const f = e.dataTransfer.files?.[0];
+        await uploadFile(f);
+    };
+    const onDragOver = (e) => { e.preventDefault(); setDropActive(true); };
+    const onDragLeave = (e) => { e.preventDefault(); setDropActive(false); };
+    const clearSource = () => {
+        setSourcePath('');
+        setSourceName('');
+        setSourceFile(null);
+        setSourceDurationSec(null);
+    };
+    // --- generate --------------------------------------------------------
+    const generate = async () => {
+        if (!model_id) {
+            setError('Pick a model in the Generation tab first.');
+            return;
+        }
+        if (!sourcePath) {
+            setError('Upload a source clip first.');
+            return;
+        }
+        if (!prompt.trim() && mode !== 'extend') {
+            setError('Enter a prompt describing the change.');
+            return;
+        }
+        setGenerating(true);
+        setError(null);
+        try {
+            // Seed: -1 lets the backend pick (and record) a random one; an
+            // explicit value is parsed with parseInt so 0 stays 0 rather than
+            // collapsing to random via `|| -1`.
+            let seedToSend = -1;
+            if (!randomSeed) {
+                const parsed = parseInt(seedValue, 10);
+                if (Number.isNaN(parsed) || parsed < 0) {
+                    setError('Enter a non-negative integer seed, or switch Seed to Random.');
+                    setGenerating(false);
+                    return;
+                }
+                seedToSend = parsed;
+            }
+            const body = {
+                model_id,
+                prompt: prompt.trim() || 'continue',
+                duration,
+                seed: seedToSend,
+                steps,
+            };
+            if (negativePrompt) body.negative_prompt = negativePrompt;
+            // Only base models honour CFG; sending it on a distilled model is
+            // harmless (backend forces 1.0) but we keep the UI honest.
+            if (!isDistilledBase) body.cfg_scale = cfgScale;
+            // Inherit the Generation panel's LoRA stack. Bypassed slots stay in
+            // load order but contribute strength 0 (same as plain generation).
+            const activeLoras = (loraStack || [])
+                .filter((s) => s.path)
+                .map((s) => ({ path: s.path, strength: s.bypassed ? 0 : s.strength }));
+            if (activeLoras.length) body.loras = activeLoras;
+            if (mode === 'style') {
+                body.init_audio_path = sourcePath;
+                body.init_noise_level = initNoiseLevel;
+            } else if (mode === 'inpaint') {
+                // Pin output length to the source so the mask (measured in
+                // source seconds) maps onto the same timeline the user sees.
+                if (!Number.isFinite(sourceDurationSec)) {
+                    setError("Couldn't read source duration — re-upload the file.");
+                    setGenerating(false);
+                    return;
+                }
+                body.duration = sourceDurationSec;
+                body.inpaint_audio_path = sourcePath;
+                body.inpaint_starts = [Number(maskStart)];
+                body.inpaint_ends = [Number(maskEnd)];
+            } else if (mode === 'extend') {
+                // Extend = inpaint where the mask is the new tail. Total clip
+                // duration = source length + extendSeconds; mask covers
+                // [source_length, source_length + extendSeconds].
+                if (!Number.isFinite(sourceDurationSec)) {
+                    setError("Couldn't read source duration — re-upload the file.");
+                    setGenerating(false);
+                    return;
+                }
+                body.duration = sourceDurationSec + extendSeconds;
+                body.inpaint_audio_path = sourcePath;
+                body.inpaint_starts = [sourceDurationSec];
+                body.inpaint_ends = [sourceDurationSec + extendSeconds];
+            }
+            const resp = await api.post('/api/generate', body, { responseType: 'blob' });
+            // Use the backend's real on-disk name (header) so the fragment in
+            // the list resolves to an actual file for reveal/delete; only fall
+            // back to a synthetic name if the header is absent.
+            const fname = resp.headers?.['x-fragment-filename'] || `${mode}_${Date.now()}.wav`;
+            // Record the resolved seed (the backend picks a concrete one when we
+            // sent -1) so the fragment shows the real value, not "random".
+            const resolvedSeed = parseInt(resp.headers?.['x-fragment-seed'], 10);
+            const params = Number.isFinite(resolvedSeed) ? { ...body, seed: resolvedSeed } : body;
+            onGenerated?.(resp.data, fname, params);
+        } catch (err) {
+            setError(err.response?.data?.error?.message || err.message || 'Generation failed');
+        } finally {
+            setGenerating(false);
+        }
+    };
+    // --- render ----------------------------------------------------------
+    return (
+        <Box sx={{ p: 2 }}>
+            {/* Source picker (drag-and-drop or click) */}
+            <Box
+                sx={{ mb: 2 }}
+                onDragOver={onDragOver}
+                onDragLeave={onDragLeave}
+                onDrop={onDrop}
+            >
+                <Typography variant="caption" color="text.secondary" display="block" sx={{ mb: 0.5 }}>
+                    Source clip
+                </Typography>
+                {sourcePath ? (
+                    <Stack
+                        direction="row"
+                        alignItems="center"
+                        spacing={1}
+                        sx={{
+                            p: 1,
+                            border: '1px dashed',
+                            borderColor: dropActive ? 'primary.main' : 'divider',
+                            borderRadius: 1,
+                            transition: 'border-color 120ms',
+                        }}
+                    >
+                        <Typography variant="body2" sx={{ flex: 1, fontFamily: 'monospace', fontSize: 12, overflow: 'hidden', textOverflow: 'ellipsis' }}>
+                            {sourceName}
+                            {sourceDurationSec && ` · ${sourceDurationSec.toFixed(2)}s`}
+                        </Typography>
+                        <IconButton size="small" onClick={clearSource} aria-label="Remove source"><ClearIcon size={14} /></IconButton>
+                    </Stack>
+                ) : (
+                    <Button
+                        variant="outlined"
+                        startIcon={<UploadIcon size={14} />}
+                        onClick={onPickFile}
+                        disabled={sourceUploading}
+                        fullWidth
+                        sx={{
+                            borderStyle: 'dashed',
+                            borderColor: dropActive ? 'primary.main' : undefined,
+                            bgcolor: dropActive ? 'action.hover' : undefined,
+                            transition: 'border-color 120ms, background-color 120ms',
+                        }}
+                    >
+                        {sourceUploading ? 'Uploading…' : 'Drop a clip here, or click to pick a file'}
+                    </Button>
+                )}
+                <input
+                    ref={fileInputRef}
+                    type="file"
+                    accept=".wav,.mp3,.flac,.m4a,.ogg,.opus,audio/*"
+                    style={{ display: 'none' }}
+                    onChange={onFileChange}
+                />
+            </Box>
+            {/* Mode selector */}
+            <ToggleButtonGroup
+                value={mode}
+                exclusive
+                size="small"
+                onChange={(_, v) => v && setMode(v)}
+                sx={{ mb: 2 }}
+            >
+                <ToggleButton value="style">Style transfer</ToggleButton>
+                <ToggleButton value="inpaint">Inpaint region</ToggleButton>
+                <ToggleButton value="extend">Extend</ToggleButton>
+            </ToggleButtonGroup>
+            {/* Mode-specific controls */}
+            {mode === 'style' && (
+                <Box sx={{ mb: 2 }}>
+                    <Typography variant="caption" color="text.secondary">
+                        Preserve source character ←→ follow prompt
+                    </Typography>
+                    <Stack direction="row" alignItems="center" spacing={2}>
+                        <Slider
+                            value={initNoiseLevel}
+                            onChange={(_, v) => setInitNoiseLevel(v)}
+                            min={0}
+                            max={1}
+                            step={0.05}
+                            valueLabelDisplay="auto"
+                            marks={[
+                                { value: 0, label: '0' },
+                                { value: 0.5, label: '0.5' },
+                                { value: 1, label: '1' },
+                            ]}
+                            sx={{ flex: 1 }}
+                        />
+                        <Typography variant="body2" sx={{ width: 40, textAlign: 'right' }}>
+                            {initNoiseLevel.toFixed(2)}
+                        </Typography>
+                    </Stack>
+                </Box>
+            )}
+            {mode === 'inpaint' && (
+                <Box sx={{ mb: 2 }}>
+                    <Typography variant="caption" color="text.secondary" display="block" sx={{ mb: 0.5 }}>
+                        Drag the highlighted region to inpaint
+                    </Typography>
+                    <AudioWaveform
+                        file={sourceFile}
+                        duration={sourceDurationSec || 0}
+                        start={maskStart}
+                        end={maskEnd}
+                        onRegionChange={(s, e) => { setMaskStart(s); setMaskEnd(e); }}
+                    />
+                    <Stack direction="row" alignItems="center" spacing={2} sx={{ mt: 1 }}>
+                        <TextField
+                            label="Start (s)"
+                            type="number"
+                            size="small"
+                            value={maskStart.toFixed(2)}
+                            onChange={(e) => setMaskStart(parseFloat(e.target.value) || 0)}
+                            inputProps={{ min: 0, max: sourceDurationSec || 999, step: 0.05 }}
+                            sx={{ width: 96 }}
+                        />
+                        <TextField
+                            label="End (s)"
+                            type="number"
+                            size="small"
+                            value={maskEnd.toFixed(2)}
+                            onChange={(e) => setMaskEnd(parseFloat(e.target.value) || 0)}
+                            inputProps={{ min: 0, max: sourceDurationSec || 999, step: 0.05 }}
+                            sx={{ width: 96 }}
+                        />
+                        <Box sx={{ flex: 1, display: 'flex', alignItems: 'center', gap: 1 }}>
+                            <Button
+                                size="small"
+                                variant="outlined"
+                                startIcon={regionPlaying ? <StopIcon size={14} /> : <PlayIcon size={14} />}
+                                onClick={toggleRegionPreview}
+                                disabled={!regionUrl || (maskEnd - maskStart) < 0.05}
+                                // Fixed width so swapping "Preview" ↔ "Stop" doesn't
+                                // resize the button. Sized to fit "Preview" + icon.
+                                sx={{ width: 108, flexShrink: 0 }}
+                            >
+                                {regionPlaying ? 'Stop' : 'Preview'}
+                            </Button>
+                            <Typography variant="caption" color="text.secondary">
+                                {(maskEnd - maskStart).toFixed(2)} s
+                            </Typography>
+                        </Box>
+                    </Stack>
+                    <Typography variant="caption" color="text.secondary" display="block" sx={{ mt: 1 }}>
+                        Output is the same length as the source — only your selected region is replaced
+                    </Typography>
+                </Box>
+            )}
+            {mode === 'extend' && (
+                <Box sx={{ mb: 2 }}>
+                    <TextField
+                        label="Seconds to add at the end"
+                        type="number"
+                        size="small"
+                        value={extendSeconds}
+                        onChange={(e) => setExtendSeconds(parseFloat(e.target.value) || 0)}
+                        inputProps={{ min: 0.5, max: 60, step: 0.5 }}
+                        fullWidth
+                    />
+                    <Typography variant="caption" color="text.secondary">
+                        Source is {sourceDurationSec ? sourceDurationSec.toFixed(2) : '—'} s; final clip will be{' '}
+                        {sourceDurationSec ? (sourceDurationSec + Number(extendSeconds || 0)).toFixed(2) : '—'} s.
+                    </Typography>
+                </Box>
+            )}
+            {/* Shared inputs */}
+            <TextField
+                label={mode === 'inpaint' ? 'Prompt for the inpainting region' : 'Prompt for the edit'}
+                placeholder={
+                    mode === 'style' ? 'How the source should sound now…' :
+                    mode === 'inpaint' ? 'What goes in the gap…' :
+                    'What the continuation should sound like (optional)'
+                }
+                multiline
+                minRows={1}
+                maxRows={3}
+                value={prompt}
+                onChange={(e) => setPrompt(e.target.value)}
+                fullWidth
+                sx={{ mb: 2 }}
+            />
+            {mode === 'style' && (
+                <Stack direction="row" alignItems="center" spacing={2} sx={{ mb: 2 }}>
+                    <Typography variant="body2" color="text.secondary" sx={{ minWidth: 80 }}>
+                        Duration
+                    </Typography>
+                    <Slider
+                        value={Math.min(duration, maxDuration)}
+                        onChange={(_, v) => setDuration(v)}
+                        min={1}
+                        max={maxDuration}
+                        step={1}
+                        valueLabelDisplay="auto"
+                        sx={{ flex: 1 }}
+                    />
+                    <Typography variant="body2" sx={{ width: 40, textAlign: 'right' }}>
+                        {duration}s
+                    </Typography>
+                </Stack>
+            )}
+            {/* Seed — random by default, mirrors the Generation panel */}
+            <Stack direction="row" alignItems="center" spacing={2} sx={{ mb: 2 }}>
+                <Typography variant="body2" color="text.secondary" sx={{ minWidth: 80 }}>
+                    Seed
+                </Typography>
+                <FormControlLabel
+                    control={
+                        <Switch
+                            size="small"
+                            checked={randomSeed}
+                            onChange={(e) => setRandomSeed(e.target.checked)}
+                        />
+                    }
+                    label="Random"
+                    sx={{ mr: 0 }}
+                />
+                <TextField
+                    size="small"
+                    type="number"
+                    value={seedValue}
+                    disabled={randomSeed}
+                    onChange={(e) => setSeedValue(e.target.value)}
+                    placeholder={randomSeed ? 'Randomized each run (recorded)' : 'e.g. 42'}
+                    inputProps={{ min: 0, step: 1 }}
+                    sx={{ flex: 1 }}
+                />
+            </Stack>
+            {/* Hidden element backing the inpaint region preview */}
+            <audio
+                ref={regionAudioRef}
+                src={regionUrl || undefined}
+                preload="auto"
+                style={{ display: 'none' }}
+                onEnded={() => setRegionPlaying(false)}
+            />
+            {error && <Alert severity="error" sx={{ mb: 2 }}>{error}</Alert>}
+            {generating && <LinearProgress sx={{ mb: 2 }} />}
+            <Button
+                variant="contained"
+                fullWidth
+                onClick={generate}
+                disabled={generating || !sourcePath}
+            >
+                {generating
+                    ? 'Generating…'
+                    : mode === 'style' ? 'Apply style'
+                    : mode === 'inpaint' ? 'Inpaint region'
+                    : 'Extend clip'}
+            </Button>
+        </Box>
+    );
+}

app/frontend/src/components/GeneratedFragmentsWindow.js CHANGED Viewed

@@ -1,27 +1,242 @@
-import React, { useState, useRef, useCallback } from 'react';
-import { Paper, Box, Typography, Button, List, ListItem, IconButton } from '@mui/material';
-import { Square as StopIcon, Play as PlayIcon, Download as DownloadIcon } from 'lucide-react';
-import api from '../api';
 import { generatedFragmentsWindowStyles } from '../theme';
-export default function GeneratedFragmentsWindow({ fragments, onDownload }) {
     const [playingFragment, setPlayingFragment] = useState(null);
     const audioRefs = useRef({});
     const handlePlayPause = (fragment) => {
         const audio = audioRefs.current[fragment.id];
         if (!audio) return;
-        if (playingFragment === fragment.id) {
             audio.pause();
             setPlayingFragment(null);
-        } else {
-            if (playingFragment && audioRefs.current[playingFragment]) {
-                audioRefs.current[playingFragment].pause();
             }
-            audio.play();
-            setPlayingFragment(fragment.id);
         }
     };
     const setAudioRef = useCallback((fragmentId, audioElement) => {
@@ -31,90 +246,225 @@ export default function GeneratedFragmentsWindow({ fragments, onDownload }) {
     }, []);
     return (
-        <Paper
-            variant="outlined"
-            sx={generatedFragmentsWindowStyles.rootPaper}
-        >
             <Box sx={generatedFragmentsWindowStyles.headerRow}>
                 <Box sx={generatedFragmentsWindowStyles.titleRow}>
                     <Box component="span" sx={generatedFragmentsWindowStyles.titleIcon}>
-                        <DownloadIcon size={20} />
                     </Box>
                     <Typography variant="h6" sx={generatedFragmentsWindowStyles.titleText}>
                         Generated Fragments
                     </Typography>
                 </Box>
-                <Typography variant="caption" color="textSecondary" sx={generatedFragmentsWindowStyles.countText}>
-                    {fragments.length}
-                </Typography>
             </Box>
             {fragments.length === 0 ? (
-                <Box
-                    sx={generatedFragmentsWindowStyles.emptyState}
-                >
                     <Typography variant="body2">
-                        No fragments generated yet
                     </Typography>
                 </Box>
             ) : (
-                <List
-                    sx={generatedFragmentsWindowStyles.listRoot}
-                >
-                    {fragments.slice().reverse().map((fragment) => (
-                        <ListItem
-                            key={fragment.id}
-                            sx={generatedFragmentsWindowStyles.listItem}
-                        >
-                            <Box sx={generatedFragmentsWindowStyles.fragmentRow}>
-                                <Box sx={generatedFragmentsWindowStyles.fragmentMeta}>
                                     <Typography
-                                        variant="subtitle2"
                                         sx={generatedFragmentsWindowStyles.fragmentPrompt}
                                     >
                                         {fragment.batchTotal > 1 && (
-                                            <Box component="span" sx={{ fontWeight: 700, mr: 0.75 }}>
-                                                [{fragment.batchIndex}/{fragment.batchTotal}]
                                             </Box>
                                         )}
                                         {fragment.prompt}
                                     </Typography>
-                                    <Typography variant="caption" color="textSecondary">
-                                        {fragment.duration}s
-                                        {fragment.cfgScale !== undefined && ` • CFG ${fragment.cfgScale}`}
-                                        {fragment.seed !== undefined && ` • Seed ${fragment.seed}`}
-                                        {' • '}{fragment.timestamp}
-                                    </Typography>
                                 </Box>
-                                <Box sx={generatedFragmentsWindowStyles.fragmentActions}>
-                                    <IconButton
-                                        size="small"
-                                        onClick={() => handlePlayPause(fragment)}
-                                        color={playingFragment === fragment.id ? "primary" : "default"}
-                                        sx={generatedFragmentsWindowStyles.playPauseButton(playingFragment === fragment.id)}
-                                    >
-                                        {playingFragment === fragment.id ? <StopIcon /> : <PlayIcon />}
-                                    </IconButton>
-                                    <Button
-                                        size="small"
-                                        variant="outlined"
-                                        startIcon={<DownloadIcon />}
-                                        onClick={() => onDownload(fragment)}
                                     >
-                                        Download
-                                    </Button>
-                                </Box>
-                            </Box>
-                            <audio
-                                ref={el => setAudioRef(fragment.id, el)}
-                                src={fragment.audioUrl}
-                                onEnded={() => setPlayingFragment(null)}
-                                onPause={() => setPlayingFragment(null)}
-                                style={generatedFragmentsWindowStyles.hiddenAudio}
-                            />
-                        </ListItem>
-                    ))}
                 </List>
             )}
         </Paper>

+import React, { useState, useRef, useCallback, useEffect } from 'react';
+import {
+    Paper, Box, Typography, List, ListItem, IconButton,
+    Dialog, DialogTitle, DialogContent, DialogContentText, DialogActions, Button,
+    CircularProgress,
+} from '@mui/material';
+import { TIPS } from '../tooltips';
+import Tooltip from './Tooltip';
+import {
+    Square as StopIcon,
+    Play as PlayIcon,
+    AudioLines as TitleIcon,
+    Info as InfoIcon,
+    Trash2 as DeleteIcon,
+    Eraser as ClearAllIcon,
+    FolderOpen as RevealIcon,
+} from 'lucide-react';
 import { generatedFragmentsWindowStyles } from '../theme';
+import GenerationWaveform from './GenerationWaveform';
+import api from '../api';
+import { setFragmentDragPayload, clearFragmentDragPayload } from '../utils/fragmentDrag';
+// Compact human-readable "X ago" with absolute fallback for stale items.
+function relativeTime(createdAt) {
+    if (!createdAt) return '';
+    const sec = Math.max(0, (Date.now() - createdAt) / 1000);
+    if (sec < 10) return 'just now';
+    if (sec < 60) return `${Math.floor(sec)}s ago`;
+    const min = sec / 60;
+    if (min < 60) return `${Math.floor(min)}m ago`;
+    const hr = min / 60;
+    if (hr < 24) return `${Math.floor(hr)}h ago`;
+    const day = hr / 24;
+    if (day < 7) return `${Math.floor(day)}d ago`;
+    // Older than a week — show absolute date, no time
+    return new Date(createdAt).toLocaleDateString();
+}
+export default function GeneratedFragmentsWindow({ fragments, onDelete, onClearAll }) {
     const [playingFragment, setPlayingFragment] = useState(null);
+    const [playingTime, setPlayingTime] = useState(0);
+    const [clearConfirmOpen, setClearConfirmOpen] = useState(false);
     const audioRefs = useRef({});
+    // Tracks a play request that's between "user clicked Play" and "audio
+    // actually started". If the user clicks again during this window we
+    // need to either no-op (same fragment) or cleanly cancel (different
+    // fragment) — re-entering load() would abort the first play() and
+    // both attempts would fail with AbortError.
+    const playInFlightRef = useRef(null);
+    // Background-preload of disk-hydrated fragments. On app reload the parent
+    // gives us fragment metadata + the backend URL (/api/fragments/...) but
+    // no in-memory Blob. The first Play click on those would HTTP-fetch the
+    // file synchronously through the <audio> element and freeze briefly. We
+    // pre-fetch them in parallel on mount and gate the UI behind a single
+    // loading screen — once everything is ready, plays + waveform decodes
+    // are instant because they work off blob: URLs.
+    const fetchingIdsRef = useRef(new Set());
+    const loadedRef = useRef({});           // { [id]: { blob, blobUrl } }
+    const [loadedTick, setLoadedTick] = useState(0);
+    useEffect(() => {
+        let cancelled = false;
+        fragments.forEach((frag) => {
+            if (frag.audioBlob) return;             // already in memory
+            if (loadedRef.current[frag.id]) return; // already preloaded
+            if (fetchingIdsRef.current.has(frag.id)) return;
+            if (!frag.audioUrl) return;
+            fetchingIdsRef.current.add(frag.id);
+            fetch(frag.audioUrl)
+                .then((r) => {
+                    if (!r.ok) throw new Error(`HTTP ${r.status}`);
+                    return r.blob();
+                })
+                .then((blob) => {
+                    if (cancelled) return;
+                    const blobUrl = URL.createObjectURL(blob);
+                    loadedRef.current[frag.id] = { blob, blobUrl };
+                    setLoadedTick((t) => t + 1);
+                })
+                .catch((err) => {
+                    console.warn(`Fragment preload failed (${frag.filename || frag.id}):`, err);
+                })
+                .finally(() => {
+                    fetchingIdsRef.current.delete(frag.id);
+                });
+        });
+        return () => { cancelled = true; };
+    }, [fragments]);
+    // Revoke all preload blob URLs on unmount so we don't leak.
+    useEffect(() => () => {
+        Object.values(loadedRef.current).forEach(({ blobUrl }) => {
+            try { URL.revokeObjectURL(blobUrl); } catch { /* ignore */ }
+        });
+    }, []);
+    // Per-fragment helpers that prefer the in-memory blob (immediate) over
+    // the HTTP URL. Defined after loadedTick is read so React knows to
+    // re-render when a new fragment finishes preloading.
+    void loadedTick;
+    const effectiveBlob = (frag) => frag.audioBlob || loadedRef.current[frag.id]?.blob || null;
+    const effectiveUrl = (frag) => loadedRef.current[frag.id]?.blobUrl || frag.audioUrl;
+    const isFragmentReady = (frag) => !!frag.audioBlob || !!loadedRef.current[frag.id];
+    const readyCount = fragments.filter(isFragmentReady).length;
+    const allReady = fragments.length === 0 || readyCount === fragments.length;
+    // Safety buffer: once everything reports ready, keep the loading overlay
+    // up for an extra 5s before revealing the list. Audio decodes that are
+    // still settling in the background can't be poked (and can't crash the
+    // list) while the user is gated behind the spinner.
+    const GRACE_MS = 5000;
+    const [graceDone, setGraceDone] = useState(false);
+    useEffect(() => {
+        if (fragments.length === 0) { setGraceDone(true); return undefined; }
+        if (!allReady) { setGraceDone(false); return undefined; }
+        const t = setTimeout(() => setGraceDone(true), GRACE_MS);
+        return () => clearTimeout(t);
+    }, [allReady, fragments.length]);
+    const showLoading = fragments.length > 0 && (!allReady || !graceDone);
+    // Strict single-play with first-click readiness gate.
+    //
+    // Race-fixes the old version had:
+    //   1. Iterate audioRefs.current and pause everything that isn't the
+    //      new target — avoids losing the race when two play clicks land
+    //      before React state settles.
+    //   2. For blob URLs, Chromium often doesn't actually pull bytes until
+    //      the first play() call, and play() rejects/hangs if readyState
+    //      is too low. If we're not ready, call load() and wait for
+    //      `canplay` (with a 1500 ms safety timeout) before play().
+    //   3. Guard against the user clicking Play twice during loading. A
+    //      second load() while the first play() is still pending aborts
+    //      the first with AbortError. playInFlightRef tracks the active
+    //      request: same-fragment second click is a no-op; different
+    //      fragment cleanly cancels the prior load timer/listener.
     const handlePlayPause = (fragment) => {
         const audio = audioRefs.current[fragment.id];
         if (!audio) return;
+        // Stop case: this fragment is currently playing → pause it.
+        if (!audio.paused) {
+            playInFlightRef.current?.cleanup?.();
+            playInFlightRef.current = null;
             audio.pause();
+            audio.currentTime = 0;
             setPlayingFragment(null);
+            setPlayingTime(0);
+            return;
+        }
+        // Click during loading of the SAME fragment → ignore.
+        if (playInFlightRef.current?.fragmentId === fragment.id) {
+            return;
+        }
+        // Click during loading of a DIFFERENT fragment → cancel that.
+        if (playInFlightRef.current) {
+            playInFlightRef.current.cleanup?.();
+            playInFlightRef.current = null;
+        }
+        Object.values(audioRefs.current).forEach((el) => {
+            if (el && el !== audio) {
+                el.pause();
+                el.currentTime = 0;
             }
+        });
+        const startedFor = fragment.id;
+        setPlayingFragment(startedFor);
+        setPlayingTime(0);
+        const startPlayback = () => {
+            audio.currentTime = 0;
+            Promise.resolve(audio.play())
+                .then(() => {
+                    // Successfully playing — clear the in-flight marker so
+                    // the next Play click can fire a fresh request.
+                    if (playInFlightRef.current?.fragmentId === startedFor) {
+                        playInFlightRef.current = null;
+                    }
+                })
+                .catch((err) => {
+                    // AbortError is expected when the user cancels (clicks
+                    // Stop or switches fragments) — don't noise the log.
+                    if (err && err.name !== 'AbortError') {
+                        console.warn(`Fragment play failed (${fragment.filename || fragment.id}):`, err);
+                    }
+                    setPlayingFragment((prev) => (prev === startedFor ? null : prev));
+                    setPlayingTime(0);
+                    if (playInFlightRef.current?.fragmentId === startedFor) {
+                        playInFlightRef.current = null;
+                    }
+                });
+        };
+        if (audio.readyState >= 2) {
+            playInFlightRef.current = { fragmentId: startedFor, cleanup: null };
+            startPlayback();
+            return;
         }
+        // Not ready yet — load and wait for canplay (or 1.5 s timeout).
+        try { audio.load(); } catch { /* ignore */ }
+        let cancelled = false;
+        const onReady = () => {
+            audio.removeEventListener('canplay', onReady);
+            clearTimeout(timer);
+            if (cancelled) return;
+            startPlayback();
+        };
+        audio.addEventListener('canplay', onReady, { once: true });
+        // 5 s — disk-hydrated fragments fetch from /api/fragments/...
+        // over HTTP, which can take a couple of seconds on first request.
+        // Blob-URL fragments (in-memory) hit canplay almost instantly.
+        const timer = setTimeout(() => {
+            audio.removeEventListener('canplay', onReady);
+            if (!cancelled) startPlayback();
+        }, 5000);
+        playInFlightRef.current = {
+            fragmentId: startedFor,
+            cleanup: () => {
+                cancelled = true;
+                audio.removeEventListener('canplay', onReady);
+                clearTimeout(timer);
+            },
+        };
+    };
+    // Reveal a fragment in the OS file manager (folder opens with the file
+    // highlighted where the platform supports it). Disk-hydrated fragments
+    // always have a filename; in-memory-only ones (not yet flushed) won't.
+    const revealInFolder = (fragment) => {
+        if (!fragment.filename) return;
+        api.post('/api/reveal-fragment', { filename: fragment.filename })
+            .catch((err) => {
+                console.warn(`Reveal failed (${fragment.filename}):`, err);
+            });
     };
     const setAudioRef = useCallback((fragmentId, audioElement) => {
     }, []);
     return (
+        <Paper variant="outlined" sx={generatedFragmentsWindowStyles.rootPaper}>
             <Box sx={generatedFragmentsWindowStyles.headerRow}>
                 <Box sx={generatedFragmentsWindowStyles.titleRow}>
                     <Box component="span" sx={generatedFragmentsWindowStyles.titleIcon}>
+                        <TitleIcon size={20} />
                     </Box>
                     <Typography variant="h6" sx={generatedFragmentsWindowStyles.titleText}>
                         Generated Fragments
                     </Typography>
                 </Box>
+                <Box sx={{ display: 'flex', alignItems: 'center', gap: 0.5 }}>
+                    <Typography variant="caption" color="textSecondary" sx={generatedFragmentsWindowStyles.countText}>
+                        {fragments.length}
+                    </Typography>
+                    {fragments.length > 0 && onClearAll && (
+                        <Tooltip title={TIPS.fragments.clearAll} placement="top" arrow>
+                            <IconButton
+                                size="small"
+                                onClick={() => setClearConfirmOpen(true)}
+                                sx={{ color: 'text.disabled', '&:hover': { color: 'error.main' } }}
+                            >
+                                <ClearAllIcon size={14} />
+                            </IconButton>
+                        </Tooltip>
+                    )}
+                </Box>
             </Box>
+            <Dialog open={clearConfirmOpen} onClose={() => setClearConfirmOpen(false)}>
+                <DialogTitle>Clear all generated fragments?</DialogTitle>
+                <DialogContent>
+                    <DialogContentText>
+                        Permanently delete all {fragments.length} fragment{fragments.length === 1 ? '' : 's'} from disk.
+                        Uploaded source clips (used by Edit mode) are not affected.
+                    </DialogContentText>
+                </DialogContent>
+                <DialogActions>
+                    <Button onClick={() => setClearConfirmOpen(false)}>Cancel</Button>
+                    <Button
+                        onClick={() => { setClearConfirmOpen(false); onClearAll?.(); }}
+                        color="error"
+                        variant="contained"
+                    >
+                        Delete all
+                    </Button>
+                </DialogActions>
+            </Dialog>
             {fragments.length === 0 ? (
+                <Box sx={generatedFragmentsWindowStyles.emptyState}>
+                    <Typography variant="body2">No fragments generated yet</Typography>
+                </Box>
+            ) : showLoading ? (
+                <Box sx={{
+                    ...generatedFragmentsWindowStyles.emptyState,
+                    display: 'flex',
+                    flexDirection: 'column',
+                    alignItems: 'center',
+                    gap: 1.5,
+                }}>
+                    <CircularProgress size={28} />
                     <Typography variant="body2">
+                        {allReady
+                            ? 'Finishing up…'
+                            : `Loading fragments… ${readyCount} / ${fragments.length}`}
                     </Typography>
                 </Box>
             ) : (
+                <List sx={generatedFragmentsWindowStyles.listRoot}>
+                    {fragments.slice().reverse().map((fragment) => {
+                        const isPlaying = playingFragment === fragment.id;
+                        const ago = relativeTime(fragment.createdAt);
+                        // CFG, seed, full timestamp, and model go in the info
+                        // tooltip — accessible but not pushing the row out.
+                        const tooltipLines = [
+                            // Pre-fix fragments stored -1 for a random seed;
+                            // show that as "random" rather than a bare -1.
+                            `Seed: ${(fragment.seed != null && fragment.seed >= 0) ? fragment.seed : 'random'}`,
+                            // Distilled SA3 models have CFG distilled away — it's
+                            // genuinely not applicable, not missing.
+                            `CFG: ${fragment.cfgScale ?? 'n/a'}`,
+                            fragment.steps != null ? `Steps: ${fragment.steps}` : null,
+                            fragment.modelId ? `Model: ${fragment.modelId}` : null,
+                            fragment.editMode ? `Mode: ${fragment.editMode}` : null,
+                            `Duration: ${fragment.duration}s`,
+                            ago ? `Generated: ${ago}` : null,
+                            fragment.timestamp ? fragment.timestamp : null,
+                        ].filter(Boolean).join('\n');
+                        return (
+                            <ListItem
+                                key={fragment.id}
+                                sx={generatedFragmentsWindowStyles.listItem}
+                            >
+                                <IconButton
+                                    size="small"
+                                    onClick={() => handlePlayPause(fragment)}
+                                    aria-label={isPlaying ? 'Stop' : 'Play'}
+                                    sx={generatedFragmentsWindowStyles.playPauseButton(isPlaying)}
+                                >
+                                    {isPlaying ? <StopIcon size={16} /> : <PlayIcon size={16} />}
+                                </IconButton>
+                                <Box
+                                    sx={{ ...generatedFragmentsWindowStyles.fragmentMeta, cursor: 'grab' }}
+                                    draggable
+                                    onDragStart={(e) => {
+                                        // In-app payload consumed by EditPanel's drop zone
+                                        // ("drag a clip into the Edit tab"). Keeps the
+                                        // waveform's separate OS drag-out untouched.
+                                        e.dataTransfer.setData(
+                                            'application/x-fragmenta-fragment',
+                                            fragment.filename || '',
+                                        );
+                                        e.dataTransfer.effectAllowed = 'copy';
+                                        // Hand off the in-memory blob too so the drop can
+                                        // use it directly — no disk fetch, immune to any
+                                        // name mismatch. Falls back to the filename when
+                                        // the blob isn't preloaded yet.
+                                        const blob = effectiveBlob(fragment);
+                                        if (blob) {
+                                            setFragmentDragPayload({
+                                                filename: fragment.filename || '',
+                                                blob,
+                                            });
+                                        }
+                                    }}
+                                    onDragEnd={() => clearFragmentDragPayload()}
+                                    title="Drag into the Edit tab to use as a source clip"
+                                >
                                     <Typography
+                                        variant="body2"
                                         sx={generatedFragmentsWindowStyles.fragmentPrompt}
+                                        title={fragment.prompt}
                                     >
                                         {fragment.batchTotal > 1 && (
+                                            <Box component="span" sx={generatedFragmentsWindowStyles.batchTag}>
+                                                {fragment.batchIndex}/{fragment.batchTotal}
                                             </Box>
                                         )}
                                         {fragment.prompt}
                                     </Typography>
                                 </Box>
+                                <GenerationWaveform
+                                    blob={effectiveBlob(fragment)}
+                                    audioUrl={effectiveUrl(fragment)}
+                                    filename={fragment.filename || 'fragment.wav'}
+                                    currentTime={isPlaying ? playingTime : 0}
+                                    duration={fragment.duration || 0}
+                                />
+                                <Tooltip
+                                    title={
+                                        <Box component="span" sx={{ whiteSpace: 'pre-line' }}>
+                                            {tooltipLines}
+                                        </Box>
+                                    }
+                                    arrow
+                                    placement="top"
+                                >
+                                    <Box
+                                        component="span"
+                                        sx={generatedFragmentsWindowStyles.fragmentInfoIcon}
                                     >
+                                        <InfoIcon size={14} />
+                                    </Box>
+                                </Tooltip>
+                                {fragment.filename && (
+                                    <Tooltip title={TIPS.fragments.revealInFolder} placement="top" arrow>
+                                        <IconButton
+                                            size="small"
+                                            onClick={() => revealInFolder(fragment)}
+                                            aria-label="Show in folder"
+                                            sx={{ color: 'text.disabled', '&:hover': { color: 'primary.main', bgcolor: 'action.hover' } }}
+                                        >
+                                            <RevealIcon size={16} />
+                                        </IconButton>
+                                    </Tooltip>
+                                )}
+                                {onDelete && (
+                                    <Tooltip title={TIPS.fragments.deleteFromDisk} placement="top" arrow>
+                                        <IconButton
+                                            size="small"
+                                            onClick={() => onDelete(fragment)}
+                                            sx={{ color: 'text.disabled', '&:hover': { color: 'error.main', bgcolor: 'action.hover' } }}
+                                        >
+                                            <DeleteIcon size={16} />
+                                        </IconButton>
+                                    </Tooltip>
+                                )}
+                                <audio
+                                    ref={el => setAudioRef(fragment.id, el)}
+                                    src={effectiveUrl(fragment)}
+                                    preload="auto"
+                                    onTimeUpdate={(e) => {
+                                        if (playingFragment === fragment.id) {
+                                            setPlayingTime(e.target.currentTime);
+                                        }
+                                    }}
+                                    onEnded={() => {
+                                        if (playingFragment === fragment.id) {
+                                            setPlayingFragment(null);
+                                            setPlayingTime(0);
+                                        }
+                                    }}
+                                    onPause={() => {
+                                        if (playingFragment === fragment.id) {
+                                            setPlayingFragment(null);
+                                        }
+                                    }}
+                                    style={generatedFragmentsWindowStyles.hiddenAudio}
+                                />
+                            </ListItem>
+                        );
+                    })}
                 </List>
             )}
         </Paper>

app/frontend/src/components/GenerationWaveform.js ADDED Viewed

	@@ -0,0 +1,217 @@

+import React, { useEffect, useLayoutEffect, useRef, useState, useCallback } from 'react';
+import { Box } from '@mui/material';
+const DEFAULT_COLOR = '#279FBB';
+// Fixed, low waveform resolution — matches the dataset-page waveforms
+// (/peaks?n=80). Decoding to a constant bucket count (instead of one pair per
+// pixel) means the decode runs once per clip rather than re-running on every
+// resize, so fragments render faster.
+const PEAK_COUNT = 80;
+/**
+ * Compact waveform indicator for a single generated fragment.
+ *
+ * Decodes `blob` once per width and renders min/max peaks on a canvas.
+ * Played portion is rendered in `color`; unplayed in a dim version of it,
+ * with a thin playhead line at the current position. The whole element is
+ * draggable: dragstart sets a DownloadURL on the dataTransfer so the user
+ * can drag the fragment onto their desktop or into a DAW as a .wav file.
+ *
+ * Props:
+ *   blob:        Blob | null     — audio source (Blob is required for the
+ *                                   native drag-to-OS file write).
+ *   audioUrl:    string          — blob: URL for the same audio. Used in
+ *                                   the dataTransfer; we fall back to
+ *                                   createObjectURL(blob) if it's missing.
+ *   filename:    string          — file name the OS sees when the drag
+ *                                   resolves.
+ *   currentTime: number          — playback head position in seconds.
+ *   duration:    number          — total length in seconds.
+ *   height:      number          — canvas height in px (default 28).
+ *   color:       string          — accent color (default theme amber).
+ */
+export default function GenerationWaveform({
+    blob,
+    audioUrl,
+    filename = 'fragment.wav',
+    currentTime = 0,
+    duration = 0,
+    height = 28,
+    color = DEFAULT_COLOR,
+}) {
+    const containerRef = useRef(null);
+    const canvasRef = useRef(null);
+    // Start at a sensible non-zero width so the decode useEffect (gated on
+    // width > 0) runs on first mount instead of waiting for the async
+    // ResizeObserver callback — which is what was leaving the canvas blank.
+    const [width, setWidth] = useState(200);
+    const [peaks, setPeaks] = useState(null);
+    // Measure synchronously on mount via useLayoutEffect so we never paint
+    // at the placeholder width; ResizeObserver then keeps it in sync with
+    // sidebar collapses / window resizes.
+    useLayoutEffect(() => {
+        const el = containerRef.current;
+        if (!el) return;
+        const rect = el.getBoundingClientRect();
+        if (rect.width > 0) {
+            setWidth(Math.max(1, Math.floor(rect.width)));
+        }
+        const ro = new ResizeObserver((entries) => {
+            const w = Math.max(1, Math.floor(entries[0].contentRect.width));
+            setWidth(w);
+        });
+        ro.observe(el);
+        return () => ro.disconnect();
+    }, []);
+    // Decode into PEAK_COUNT mono min/max pairs — a fixed low resolution,
+    // independent of pixel width, so the (expensive) decode runs once per clip
+    // and not again on every resize.
+    //
+    // Audio source can be either a Blob (in-memory, fresh generations) or
+    // an HTTP audioUrl (fragments hydrated from disk on app load have
+    // audioBlob=null and audioUrl=/api/fragments/...). The blob path is
+    // preferred when available; otherwise fetch the URL.
+    useEffect(() => {
+        if (!blob && !audioUrl) return;
+        let cancelled = false;
+        (async () => {
+            try {
+                let buf;
+                if (blob) {
+                    buf = await blob.arrayBuffer();
+                } else {
+                    const r = await fetch(audioUrl);
+                    if (!r.ok) {
+                        console.warn(`GenerationWaveform fetch failed (${r.status}): ${audioUrl}`);
+                        return;
+                    }
+                    buf = await r.arrayBuffer();
+                }
+                if (cancelled) return;
+                if (!buf || buf.byteLength === 0) {
+                    console.warn('GenerationWaveform: empty audio source');
+                    return;
+                }
+                const Ctx = window.OfflineAudioContext || window.webkitOfflineAudioContext;
+                const tmpCtx = Ctx
+                    ? new Ctx(1, 44100, 44100)
+                    : new (window.AudioContext || window.webkitAudioContext)();
+                const audio = await tmpCtx.decodeAudioData(buf.slice(0));
+                if (cancelled) return;
+                const ch0 = audio.getChannelData(0);
+                const ch1 = audio.numberOfChannels > 1 ? audio.getChannelData(1) : null;
+                const totalSamples = ch0.length;
+                const bucketSize = Math.max(1, Math.floor(totalSamples / PEAK_COUNT));
+                const out = new Float32Array(PEAK_COUNT * 2);
+                for (let i = 0; i < PEAK_COUNT; i++) {
+                    const s = i * bucketSize;
+                    const e = Math.min(totalSamples, s + bucketSize);
+                    let mn = 0, mx = 0;
+                    for (let j = s; j < e; j++) {
+                        const v = ch1 ? (ch0[j] + ch1[j]) * 0.5 : ch0[j];
+                        if (v < mn) mn = v;
+                        if (v > mx) mx = v;
+                    }
+                    out[i * 2] = mn;
+                    out[i * 2 + 1] = mx;
+                }
+                if (!cancelled) setPeaks(out);
+            } catch (err) {
+                console.warn('GenerationWaveform decode failed:', err);
+            }
+        })();
+        return () => { cancelled = true; };
+    }, [blob, audioUrl]);
+    // Draw — re-runs on every currentTime tick so the playhead moves.
+    const draw = useCallback(() => {
+        const canvas = canvasRef.current;
+        if (!canvas || !width || !height) return;
+        const dpr = window.devicePixelRatio || 1;
+        canvas.width = width * dpr;
+        canvas.height = height * dpr;
+        const ctx = canvas.getContext('2d');
+        ctx.setTransform(dpr, 0, 0, dpr, 0, 0);
+        ctx.clearRect(0, 0, width, height);
+        // Always draw a faint center line so the row has a visible "this is
+        // a waveform area" cue even while decode is in flight or has failed.
+        ctx.fillStyle = `${color}33`;
+        ctx.fillRect(0, height / 2 - 0.5, width, 1);
+        if (!peaks) return;
+        const mid = height / 2;
+        const scale = (height - 2) / 2;
+        // Stretch the fixed PEAK_COUNT buckets across the canvas width as bars
+        // (with a 1px gap), matching the dataset-page waveform look.
+        const n = peaks.length / 2;
+        const step = width / n;
+        const barW = Math.max(1, step - 1);
+        const progressFrac = duration > 0
+            ? Math.max(0, Math.min(1, currentTime / duration))
+            : 0;
+        const progressPx = progressFrac * width;
+        const splitIdx = Math.floor(progressFrac * n);
+        for (let i = 0; i < n; i++) {
+            const mn = peaks[i * 2];
+            const mx = peaks[i * 2 + 1];
+            const y0 = mid - mx * scale;
+            const y1 = mid - mn * scale;
+            // Played bars: full color; unplayed: dimmed (35% alpha of accent).
+            ctx.fillStyle = i < splitIdx ? color : `${color}59`;
+            ctx.fillRect(i * step, y0, barW, Math.max(1, y1 - y0));
+        }
+        // Thin playhead at the split.
+        if (progressPx > 0 && progressPx < width) {
+            ctx.fillStyle = color;
+            ctx.fillRect(progressPx - 0.5, 0, 1, height);
+        }
+    }, [width, height, peaks, color, currentTime, duration]);
+    useEffect(() => { draw(); }, [draw]);
+    // Native drag-to-OS as a file. The DownloadURL mime type is a Chromium
+    // extension the OS interprets as "this drag is a file the browser can
+    // serve from URL X with mime/name Y". Source is whichever URL we have:
+    // a blob: URL for in-memory fragments, or the backend /api/fragments/
+    // path for disk-hydrated ones. The OS needs an ABSOLUTE URL, so we
+    // resolve relative paths against window.location.origin.
+    const canDrag = !!(audioUrl || blob);
+    const handleDragStart = (e) => {
+        if (!canDrag) return;
+        const raw = audioUrl || URL.createObjectURL(blob);
+        const absolute = (raw.startsWith('http') || raw.startsWith('blob:'))
+            ? raw
+            : `${window.location.origin}${raw.startsWith('/') ? '' : '/'}${raw}`;
+        e.dataTransfer.setData('DownloadURL', `audio/wav:${filename}:${absolute}`);
+        e.dataTransfer.effectAllowed = 'copy';
+    };
+    return (
+        <Box
+            ref={containerRef}
+            draggable={canDrag}
+            onDragStart={handleDragStart}
+            title={canDrag ? 'Drag to save or drop into a DAW' : undefined}
+            sx={{
+                // Floor the width so the container is never zero — without
+                // this, a tight flex row could collapse it before
+                // ResizeObserver fires, leaving the canvas un-sized.
+                flex: 1,
+                minWidth: 120,
+                height,
+                cursor: canDrag ? 'grab' : 'default',
+                '&:active': { cursor: canDrag ? 'grabbing' : 'default' },
+            }}
+        >
+            <canvas
+                ref={canvasRef}
+                style={{ display: 'block', width: '100%', height }}
+            />
+        </Box>
+    );
+}

app/frontend/src/components/InfoView.js ADDED Viewed

	@@ -0,0 +1,91 @@

+import React, { createContext, useCallback, useContext, useMemo, useState } from 'react';
+import { Box, Typography } from '@mui/material';
+import { Info as InfoIcon } from 'lucide-react';
+/**
+ * Ableton-style "Info View".
+ *
+ * A toggleable strip pinned to the bottom of the window that shows the help
+ * text for whatever control the pointer (or keyboard focus) is over, instead
+ * of popping a tooltip on the control itself. The shared <Tooltip> feeds this
+ * panel when the view is enabled (see components/Tooltip.js).
+ *
+ * State design: `enabled` is owned by App (changes rarely, persisted). The
+ * *hint* — which changes on every hover — lives inside the provider and is
+ * read only by the bar, so updating it never re-renders the app tree (the app
+ * is passed as `children`, whose element identity is stable across the
+ * provider's internal state changes).
+ */
+export const InfoViewContext = createContext({ enabled: false, setHint: () => {} });
+export const useInfoView = () => useContext(InfoViewContext);
+export function InfoViewProvider({ enabled, children }) {
+    const [hint, setHint] = useState(null);
+    // Stable setter so the context value only changes when `enabled` flips —
+    // hover-driven hint updates don't churn every tooltip consumer.
+    const update = useCallback((value) => setHint(value ?? null), []);
+    const value = useMemo(() => ({ enabled, setHint: update }), [enabled, update]);
+    return (
+        <InfoViewContext.Provider value={value}>
+            {children}
+            {enabled && <InfoViewBar hint={hint} />}
+        </InfoViewContext.Provider>
+    );
+}
+function InfoViewBar({ hint }) {
+    // Only present when there's something to say — no placeholder.
+    if (!hint) return null;
+    return (
+        // Full-width fixed row that centers the pill at the bottom of the page.
+        <Box
+            sx={{
+                position: 'fixed',
+                left: 0,
+                right: 0,
+                bottom: { xs: 16, md: 24 },
+                zIndex: 1340,           // under the bottom dock (1350)
+                px: 2,
+                display: 'flex',
+                justifyContent: 'center',
+                pointerEvents: 'none',  // pure overlay — never intercepts clicks
+            }}
+        >
+            <Box
+                role="status"
+                aria-live="polite"
+                sx={(theme) => ({
+                    display: 'inline-flex',
+                    alignItems: 'center',
+                    gap: 1,
+                    maxWidth: 'min(680px, 90vw)',
+                    px: 1.75,
+                    py: 0.9,
+                    borderRadius: 999,
+                    // Blurred translucent pill — just enough backing for the
+                    // text to stay readable over any content behind it.
+                    backgroundColor: theme.palette.mode === 'dark'
+                        ? 'rgba(20, 22, 24, 0.55)'
+                        : 'rgba(248, 243, 234, 0.62)',
+                    backdropFilter: 'blur(16px) saturate(160%)',
+                    WebkitBackdropFilter: 'blur(16px) saturate(160%)',
+                    border: `1px solid ${theme.palette.divider}`,
+                    boxShadow: theme.palette.mode === 'dark'
+                        ? '0 8px 28px rgba(0,0,0,0.5)'
+                        : '0 8px 28px rgba(43,31,18,0.16)',
+                    animation: 'fragmenta-fade-up 240ms cubic-bezier(0.16, 1, 0.3, 1)',
+                })}
+            >
+                <Box component="span" sx={{ flexShrink: 0, display: 'inline-flex', color: 'primary.main' }}>
+                    <InfoIcon size={15} />
+                </Box>
+                <Typography variant="body2" sx={{ color: 'text.primary', lineHeight: 1.3 }}>
+                    {hint}
+                </Typography>
+            </Box>
+        </Box>
+    );
+}

app/frontend/src/components/LoraStack.js ADDED Viewed

	@@ -0,0 +1,252 @@

+import React, { useEffect, useState } from 'react';
+import {
+    Box,
+    Accordion,
+    AccordionSummary,
+    AccordionDetails,
+    Button,
+    Typography,
+    Stack,
+    MenuItem,
+    Select,
+    Slider,
+    IconButton,
+    Chip,
+    Alert,
+} from '@mui/material';
+import { TIPS } from '../tooltips';
+import Tooltip from './Tooltip';
+import {
+    Plus as AddIcon,
+    Trash2 as RemoveIcon,
+    GripVertical as DragIcon,
+    Power as BypassIcon,
+    ChevronDown as ChevronDownIcon,
+} from 'lucide-react';
+import api from '../api';
+import { isLoraCompatible } from '../utils/loraMatch';
+const MAX_SLOTS = 4;
+/**
+ * Multi-LoRA stack for the Generation panel.
+ *
+ * Props:
+ *   selectedModel: the currently-selected base model id (e.g. "sa3-medium-base")
+ *   value:         array of { path, strength, bypassed } slots
+ *   onChange:      (newSlots) => void
+ *
+ * The picker filters available LoRAs by base-model compatibility (a `*-base`
+ * LoRA also runs on its distilled sibling — see utils/loraMatch). Slot order
+ * is the load order (slot 0 first); drag the handle to reorder. Bypass keeps
+ * a slot in the stack but sends strength 0.
+ */
+export default function LoraStack({ selectedModel, value, onChange }) {
+    const [available, setAvailable] = useState([]);
+    const [loading, setLoading] = useState(false);
+    const [error, setError] = useState(null);
+    const [dragIndex, setDragIndex] = useState(null);
+    useEffect(() => {
+        let cancelled = false;
+        setLoading(true);
+        api.get('/api/loras')
+            .then(r => { if (!cancelled) setAvailable(r.data.loras || []); })
+            .catch(e => { if (!cancelled) setError(e.response?.data?.error || e.message); })
+            .finally(() => { if (!cancelled) setLoading(false); });
+        return () => { cancelled = true; };
+    }, []);
+    // LoRAs compatible with the current generation model. A LoRA trained
+    // against `*-base` is compatible with both that base and its distilled
+    // sibling (same backbone, differ only in CFG state) — loraMatch strips
+    // the trailing `-base` before comparing.
+    const compatible = available.filter(l =>
+        isLoraCompatible(l.base_model, selectedModel)
+    );
+    // The single-LoRA case stays one click: when no slots are populated AND
+    // there's a compatible LoRA, surface one empty slot so the user sees a
+    // "Pick a LoRA" dropdown immediately.
+    const slots = (value && value.length > 0)
+        ? value
+        : (compatible.length ? [{ path: '', strength: 1.0, bypassed: false }] : []);
+    const addSlot = () => {
+        if (slots.length >= MAX_SLOTS) return;
+        onChange([...slots, { path: '', strength: 1.0, bypassed: false }]);
+    };
+    const removeSlot = (idx) => onChange(slots.filter((_, i) => i !== idx));
+    const setSlot = (idx, patch) => {
+        onChange(slots.map((s, i) => i === idx ? { ...s, ...patch } : s));
+    };
+    // --- drag-to-reorder (slot 0 is loaded first) ---------------------------
+    const onDrop = (target) => {
+        if (dragIndex === null || dragIndex === target) { setDragIndex(null); return; }
+        const next = [...slots];
+        const [moved] = next.splice(dragIndex, 1);
+        next.splice(target, 0, moved);
+        setDragIndex(null);
+        onChange(next);
+    };
+    const hint = (() => {
+        if (!selectedModel) return 'Pick a model first.';
+        if (!selectedModel.endsWith('-base')) {
+            return 'LoRAs need a Base model. Switch to a *-base checkpoint to use LoRAs.';
+        }
+        if (loading) return 'Loading LoRAs…';
+        if (!compatible.length) {
+            return `No LoRAs trained against ${selectedModel} yet. Train one in the Training tab.`;
+        }
+        return null;
+    })();
+    return (
+        <Accordion
+            disableGutters
+            defaultExpanded={Boolean(value && value.some((s) => s.path))}
+        >
+            <AccordionSummary expandIcon={<ChevronDownIcon size={18} />}>
+                {/* Hover the title to surface the help in the Info View pill
+                    (when it's on) — no inline "i", matching the rest of the app. */}
+                <Tooltip title={TIPS.lora.stackInfo(MAX_SLOTS)}>
+                    <Typography variant="subtitle1">LoRA Stack</Typography>
+                </Tooltip>
+            </AccordionSummary>
+            <AccordionDetails>
+            {error && <Alert severity="error" sx={{ mb: 1 }}>{error}</Alert>}
+            {hint && (
+                <Typography variant="caption" color="text.secondary" sx={{ display: 'block', mb: 1 }}>
+                    {hint}
+                </Typography>
+            )}
+            {slots.length > 0 && (
+                <Box sx={{ border: '1px solid', borderColor: 'divider', borderRadius: 1 }}>
+                    {slots.map((slot, idx) => {
+                        const choice = available.find(l => l.path === slot.path);
+                        const bypassed = !!slot.bypassed;
+                        return (
+                            <Box
+                                key={idx}
+                                onDragOver={(e) => { if (dragIndex !== null) e.preventDefault(); }}
+                                onDrop={() => onDrop(idx)}
+                                sx={{
+                                    p: 1.5,
+                                    borderBottom: '1px solid',
+                                    borderColor: 'divider',
+                                    '&:last-child': { borderBottom: 'none' },
+                                    bgcolor: dragIndex === idx ? 'action.hover' : 'transparent',
+                                    opacity: bypassed ? 0.5 : 1,
+                                }}
+                            >
+                                <Stack direction="row" alignItems="center" spacing={1}>
+                                    <Tooltip title={TIPS.lora.dragReorder}>
+                                        <Box
+                                            draggable={slots.length > 1}
+                                            onDragStart={() => setDragIndex(idx)}
+                                            onDragEnd={() => setDragIndex(null)}
+                                            sx={{
+                                                display: 'flex',
+                                                cursor: slots.length > 1 ? 'grab' : 'default',
+                                                color: 'text.disabled',
+                                            }}
+                                        >
+                                            <DragIcon size={16} />
+                                        </Box>
+                                    </Tooltip>
+                                    <Typography variant="caption" color="text.disabled" sx={{ width: 14 }}>
+                                        {idx}
+                                    </Typography>
+                                    <Select
+                                        size="small"
+                                        value={slot.path}
+                                        displayEmpty
+                                        onChange={(e) => setSlot(idx, { path: String(e.target.value) })}
+                                        sx={{ flex: 1, minWidth: 0 }}
+                                    >
+                                        <MenuItem value="" disabled>
+                                            <em>Pick a LoRA</em>
+                                        </MenuItem>
+                                        {compatible.map(l => (
+                                            <MenuItem key={l.id} value={l.path}>
+                                                <Box>
+                                                    <Typography variant="body2">
+                                                        {l.name} · {l.checkpoint}
+                                                    </Typography>
+                                                    <Stack direction="row" spacing={0.5} sx={{ mt: 0.25 }}>
+                                                        <Chip size="small" label={l.adapter_type || 'lora'} sx={{ height: 16, fontSize: 9 }} />
+                                                        {l.rank && <Chip size="small" label={`r=${l.rank}`} sx={{ height: 16, fontSize: 9 }} />}
+                                                    </Stack>
+                                                </Box>
+                                            </MenuItem>
+                                        ))}
+                                    </Select>
+                                    <Tooltip title={TIPS.lora.bypass(bypassed)}>
+                                        <IconButton
+                                            size="small"
+                                            color={bypassed ? 'default' : 'primary'}
+                                            onClick={() => setSlot(idx, { bypassed: !bypassed })}
+                                        >
+                                            <BypassIcon size={14} />
+                                        </IconButton>
+                                    </Tooltip>
+                                    <IconButton size="small" onClick={() => removeSlot(idx)} aria-label="Remove slot">
+                                        <RemoveIcon size={14} />
+                                    </IconButton>
+                                </Stack>
+                                <Stack direction="row" alignItems="center" spacing={1.5} sx={{ mt: 1, mb: 2 }}>
+                                    <Typography variant="caption" color="text.secondary" sx={{ width: 60 }}>
+                                        Strength
+                                    </Typography>
+                                    <Slider
+                                        size="small"
+                                        value={slot.strength}
+                                        disabled={bypassed}
+                                        onChange={(e, v) => setSlot(idx, { strength: v })}
+                                        min={-2}
+                                        max={2}
+                                        step={0.05}
+                                        valueLabelDisplay="auto"
+                                        marks={[
+                                            { value: 0, label: '0' },
+                                            { value: 1, label: '1' },
+                                        ]}
+                                        sx={{ flex: 1 }}
+                                    />
+                                    <Typography variant="body2" sx={{ width: 40, textAlign: 'right' }}>
+                                        {bypassed ? '—' : slot.strength.toFixed(2)}
+                                    </Typography>
+                                </Stack>
+                                {choice && choice.base_model && (
+                                    <Typography variant="caption" color="text.secondary" sx={{ display: 'block', mt: 0.25 }}>
+                                        Trained on {choice.base_model}
+                                    </Typography>
+                                )}
+                            </Box>
+                        );
+                    })}
+                </Box>
+            )}
+            <Stack direction="row" sx={{ mt: 1 }}>
+                <Button
+                    size="small"
+                    variant="outlined"
+                    startIcon={<AddIcon size={14} />}
+                    disabled={slots.length >= MAX_SLOTS || !compatible.length}
+                    onClick={addSlot}
+                >
+                    Add LoRA
+                </Button>
+            </Stack>
+            </AccordionDetails>
+        </Accordion>
+    );
+}

app/frontend/src/components/LossChart.js CHANGED Viewed

@@ -1,19 +1,35 @@
 import React, { useState } from 'react';
 import { lossChartStyles } from '../theme';
-// Exponential moving average. alpha controls smoothness:
-//   alpha → 1   = no smoothing (output equals input)
-//   alpha → 0   = heavy smoothing (output flat-ish line)
-// Diffusion loss is intrinsically noisy because each step samples a random
-// timestep with different difficulty, so a small alpha (heavy smoothing) is
-// what makes the underlying trend visible.
-const EMA_ALPHA = 0.06;
-function smoothEMA(values, alpha = EMA_ALPHA) {
     if (values.length === 0) return [];
-    const out = [values[0]];
-    for (let i = 1; i < values.length; i++) {
-        out.push(alpha * values[i] + (1 - alpha) * out[i - 1]);
     }
     return out;
 }

 import React, { useState } from 'react';
 import { lossChartStyles } from '../theme';
+// Bias-corrected exponential moving average — same math as the EMA used in
+// TensorBoard's loss curves and Adam's bias-corrected moments. Standard EMA
+// (out[0] = values[0]) makes the smoothed line lag the data for the first
+// 1/alpha steps; diffusion loss spikes high on step 0 (random init), so a
+// naive EMA spends ~17 steps "catching down". The 1/(1-(1-α)^(i+1)) factor
+// cancels that startup bias: by construction out[0] equals values[0], and
+// out[i] converges to the plain EMA at steady state.
+//
+// alpha is *adaptive* to the run length: more data → more smoothing is OK
+// because the underlying trend has more support; short runs need a tighter
+// window so the smoothed line still resembles the data.
+function pickAlpha(n) {
+    if (n < 50) return 0.25;
+    if (n < 200) return 0.15;
+    if (n < 1000) return 0.08;
+    return 0.05;
+}
+function smoothEMA(values, alpha) {
     if (values.length === 0) return [];
+    const a = alpha ?? pickAlpha(values.length);
+    const w = 1 - a;
+    const out = [];
+    let ema = 0;
+    for (let i = 0; i < values.length; i++) {
+        ema = w * ema + a * values[i];
+        const correction = 1 - Math.pow(w, i + 1);
+        // correction → a at i=0 (so out[0] = values[0]) and → 1 at large i.
+        out.push(ema / Math.max(correction, 1e-9));
     }
     return out;
 }

app/frontend/src/components/MidiConfigMenu.js CHANGED Viewed

@@ -8,7 +8,6 @@ import {
     MenuItem,
     Button,
     IconButton,
-    Tooltip,
     Divider,
     ToggleButton,
     ToggleButtonGroup,
@@ -16,7 +15,7 @@ import {
 } from '@mui/material';
 import { Trash2 as DeleteIcon, X as CloseIcon } from 'lucide-react';
 import { useMidi, formatMidi } from './MidiContext';
-import { perfTokens } from '../theme';
 const CHANNEL_OPTIONS = [
     { value: 0, label: 'Any' },
@@ -46,39 +45,60 @@ export default function MidiConfigMenu({ anchorEl, open, onClose }) {
             anchorEl={anchorEl}
             open={open}
             onClose={onClose}
-            anchorOrigin={{ vertical: 'bottom', horizontal: 'right' }}
-            transformOrigin={{ vertical: 'top', horizontal: 'right' }}
             slotProps={{
                 paper: {
                     sx: {
-                        width: 380,
                         maxHeight: '70vh',
-                        p: 2,
                         borderRadius: 2,
                         border: '1px solid',
                         borderColor: 'divider',
                     },
                 },
             }}
         >
-            <Box sx={{ display: 'flex', alignItems: 'center', justifyContent: 'space-between', mb: 1.5 }}>
-                <Typography variant="subtitle2" sx={{ letterSpacing: '0.08em', textTransform: 'uppercase', color: 'text.secondary' }}>
                     MIDI Settings
                 </Typography>
-                <IconButton size="small" onClick={onClose}>
-                    <CloseIcon size={14} />
                 </IconButton>
             </Box>
             {!supported && (
-                <Alert severity="warning" sx={{ mb: 1.5 }}>
-                    {permissionError || 'Web MIDI is not available in this browser. Try Chrome / Edge / Electron.'}
-                </Alert>
             )}
-            <Box sx={{ display: 'flex', flexDirection: 'column', gap: 1.5 }}>
                 <Box>
-                    <Typography variant="caption" sx={{ color: 'text.secondary', display: 'block', mb: 0.5 }}>
                         Input device
                     </Typography>
                     <FormControl size="small" fullWidth>
@@ -92,19 +112,26 @@ export default function MidiConfigMenu({ anchorEl, open, onClose }) {
                                 const found = inputs.find(i => i.id === value);
                                 return found ? found.name : 'Disconnected';
                             }}
                         >
-                            <MenuItem value="">
                                 <em>None</em>
                             </MenuItem>
                             {inputs.map((input) => (
-                                <MenuItem key={input.id} value={input.id}>
                                     {input.name}
                                 </MenuItem>
                             ))}
                         </Select>
                     </FormControl>
                     {config.deviceName && !inputs.some(i => i.name === config.deviceName) && (
-                        <Typography variant="caption" sx={{ color: 'warning.main', display: 'block', mt: 0.5 }}>
                             Saved device "{config.deviceName}" not connected
                         </Typography>
                     )}
@@ -112,7 +139,7 @@ export default function MidiConfigMenu({ anchorEl, open, onClose }) {
                 <Box sx={{ display: 'flex', gap: 1 }}>
                     <Box sx={{ flex: 1 }}>
-                        <Typography variant="caption" sx={{ color: 'text.secondary', display: 'block', mb: 0.5 }}>
                             Channel filter
                         </Typography>
                         <FormControl size="small" fullWidth>
@@ -120,16 +147,17 @@ export default function MidiConfigMenu({ anchorEl, open, onClose }) {
                                 value={config.channelFilter}
                                 onChange={(e) => setChannelFilter(Number(e.target.value))}
                                 disabled={!supported}
                             >
                                 {CHANNEL_OPTIONS.map(opt => (
-                                    <MenuItem key={opt.value} value={opt.value}>{opt.label}</MenuItem>
                                 ))}
                             </Select>
                         </FormControl>
                     </Box>
                     <Box sx={{ flex: 1 }}>
-                        <Typography variant="caption" sx={{ color: 'text.secondary', display: 'block', mb: 0.5 }}>
                             Takeover
                         </Typography>
                         <ToggleButtonGroup
@@ -138,25 +166,42 @@ export default function MidiConfigMenu({ anchorEl, open, onClose }) {
                             exclusive
                             onChange={(_, v) => { if (v) setTakeover(v); }}
                             fullWidth
-                            sx={{ height: 40 }}
                         >
-                            <ToggleButton value="jump" sx={{ fontSize: perfTokens.fontSize.body }}>Jump</ToggleButton>
-                            <ToggleButton value="pickup" sx={{ fontSize: perfTokens.fontSize.body }}>Pickup</ToggleButton>
                         </ToggleButtonGroup>
                     </Box>
                 </Box>
-                <Divider sx={{ my: 0.5 }} />
-                <Box sx={{ display: 'flex', alignItems: 'center', justifyContent: 'space-between' }}>
-                    <Typography variant="caption" sx={{ color: 'text.secondary', letterSpacing: '0.08em', textTransform: 'uppercase' }}>
                         Mappings ({config.mappings.length})
                     </Typography>
                     <Button
                         size="small"
                         onClick={clearAll}
                         disabled={config.mappings.length === 0}
-                        sx={{ fontSize: perfTokens.fontSize.small, textTransform: 'none' }}
                     >
                         Clear all
                     </Button>
@@ -167,15 +212,21 @@ export default function MidiConfigMenu({ anchorEl, open, onClose }) {
                         border: '1px solid',
                         borderColor: 'divider',
                         borderRadius: 1,
-                        maxHeight: 280,
                         overflowY: 'auto',
                         bgcolor: 'background.default',
                     }}
                 >
                     {sortedMappings.length === 0 ? (
-                        <Box sx={{ p: 2, textAlign: 'center' }}>
-                            <Typography variant="caption" sx={{ color: 'text.disabled', fontStyle: 'italic' }}>
-                                No mappings yet. Enable MIDI mode (the MIDI button), click a control, then move a hardware knob, fader, or button.
                             </Typography>
                         </Box>
                     ) : (
@@ -191,33 +242,54 @@ export default function MidiConfigMenu({ anchorEl, open, onClose }) {
                                     borderBottom: '1px solid',
                                     borderColor: 'divider',
                                     '&:last-child': { borderBottom: 'none' },
                                 }}
                             >
                                 <Box sx={{ flex: 1, minWidth: 0 }}>
-                                    <Typography variant="body2" sx={{ fontSize: perfTokens.fontSize.body, overflow: 'hidden', textOverflow: 'ellipsis', whiteSpace: 'nowrap' }}>
                                         {m.label}
                                     </Typography>
-                                    <Typography variant="caption" sx={{ color: 'text.secondary', fontSize: perfTokens.fontSize.small, fontFamily: 'ui-monospace, SFMono-Regular, Menlo, Consolas, monospace' }}>
                                         {formatMidi(m.midi)}
                                     </Typography>
                                 </Box>
-                                <Tooltip title="Remove mapping">
-                                    <IconButton
-                                        size="small"
-                                        onClick={() => clearMapping(m.controlId)}
-                                        sx={{ color: 'text.disabled', '&:hover': { color: 'error.main' } }}
-                                    >
-                                        <DeleteIcon size={13} />
-                                    </IconButton>
-                                </Tooltip>
                             </Box>
                         ))
                     )}
                 </Box>
-                <Typography variant="caption" sx={{ color: 'text.disabled', fontSize: perfTokens.fontSize.small, lineHeight: 1.4 }}>
-                    Pickup = ignore the hardware until its position matches the on-screen value (no jumps).
-                    Right-click a control while in MIDI mode to clear its mapping.
                 </Typography>
             </Box>
         </Popover>

     MenuItem,
     Button,
     IconButton,
     Divider,
     ToggleButton,
     ToggleButtonGroup,
 } from '@mui/material';
 import { Trash2 as DeleteIcon, X as CloseIcon } from 'lucide-react';
 import { useMidi, formatMidi } from './MidiContext';
+import { perfTokens, performancePanelStyles as panelStyles } from '../theme';
 const CHANNEL_OPTIONS = [
     { value: 0, label: 'Any' },
             anchorEl={anchorEl}
             open={open}
             onClose={onClose}
+            anchorOrigin={{ vertical: 'bottom', horizontal: 'left' }}
+            transformOrigin={{ vertical: 'top', horizontal: 'left' }}
             slotProps={{
                 paper: {
                     sx: {
+                        width: 360,
                         maxHeight: '70vh',
+                        p: 0,
                         borderRadius: 2,
                         border: '1px solid',
                         borderColor: 'divider',
+                        overflow: 'hidden',
                     },
                 },
             }}
         >
+            {/* Title bar — same pattern as Presets / Audio menus. */}
+            <Box sx={{
+                display: 'flex',
+                alignItems: 'center',
+                justifyContent: 'space-between',
+                px: 1.5,
+                pt: 1.25,
+                pb: 1,
+            }}>
+                <Typography sx={{ ...perfTokens.caps, color: 'text.secondary' }}>
                     MIDI Settings
                 </Typography>
+                <IconButton onClick={onClose} sx={panelStyles.compactIconBtn('md')}>
+                    <CloseIcon size={perfTokens.icon.sm} />
                 </IconButton>
             </Box>
+            <Divider />
             {!supported && (
+                <Box sx={{ px: 1.5, pt: 1.25 }}>
+                    <Alert severity="warning" sx={{ py: 0.5 }}>
+                        {permissionError || 'Web MIDI is not available in this browser. Try Chrome / Edge / Electron.'}
+                    </Alert>
+                </Box>
             )}
+            {/* SETTINGS — input device + channel filter + takeover. */}
+            <Box sx={{
+                px: 1.5,
+                pt: 1.25,
+                pb: 1.25,
+                display: 'flex',
+                flexDirection: 'column',
+                gap: 1.25,
+            }}>
                 <Box>
+                    <Typography sx={{ ...perfTokens.labelMuted, display: 'block', mb: 0.5 }}>
                         Input device
                     </Typography>
                     <FormControl size="small" fullWidth>
                                 const found = inputs.find(i => i.id === value);
                                 return found ? found.name : 'Disconnected';
                             }}
+                            sx={{ fontSize: perfTokens.fontSize.sm }}
                         >
+                            <MenuItem value="" sx={{ fontSize: perfTokens.fontSize.sm }}>
                                 <em>None</em>
                             </MenuItem>
                             {inputs.map((input) => (
+                                <MenuItem key={input.id} value={input.id} sx={{ fontSize: perfTokens.fontSize.sm }}>
                                     {input.name}
                                 </MenuItem>
                             ))}
                         </Select>
                     </FormControl>
                     {config.deviceName && !inputs.some(i => i.name === config.deviceName) && (
+                        <Typography sx={{
+                            fontSize: perfTokens.fontSize.xs,
+                            color: 'warning.main',
+                            fontStyle: 'italic',
+                            display: 'block',
+                            mt: 0.5,
+                        }}>
                             Saved device "{config.deviceName}" not connected
                         </Typography>
                     )}
                 <Box sx={{ display: 'flex', gap: 1 }}>
                     <Box sx={{ flex: 1 }}>
+                        <Typography sx={{ ...perfTokens.labelMuted, display: 'block', mb: 0.5 }}>
                             Channel filter
                         </Typography>
                         <FormControl size="small" fullWidth>
                                 value={config.channelFilter}
                                 onChange={(e) => setChannelFilter(Number(e.target.value))}
                                 disabled={!supported}
+                                sx={{ fontSize: perfTokens.fontSize.sm }}
                             >
                                 {CHANNEL_OPTIONS.map(opt => (
+                                    <MenuItem key={opt.value} value={opt.value} sx={{ fontSize: perfTokens.fontSize.sm }}>{opt.label}</MenuItem>
                                 ))}
                             </Select>
                         </FormControl>
                     </Box>
                     <Box sx={{ flex: 1 }}>
+                        <Typography sx={{ ...perfTokens.labelMuted, display: 'block', mb: 0.5 }}>
                             Takeover
                         </Typography>
                         <ToggleButtonGroup
                             exclusive
                             onChange={(_, v) => { if (v) setTakeover(v); }}
                             fullWidth
+                            sx={{ height: perfTokens.height.compact }}
                         >
+                            <ToggleButton value="jump" sx={{ fontSize: perfTokens.fontSize.sm, textTransform: 'none' }}>Jump</ToggleButton>
+                            <ToggleButton value="pickup" sx={{ fontSize: perfTokens.fontSize.sm, textTransform: 'none' }}>Pickup</ToggleButton>
                         </ToggleButtonGroup>
                     </Box>
                 </Box>
+            </Box>
+            <Divider />
+            {/* MAPPINGS — header row + bordered scrollable list. */}
+            <Box sx={{ px: 1.5, pt: 1.25, pb: 1.25 }}>
+                <Box sx={{
+                    display: 'flex',
+                    alignItems: 'center',
+                    justifyContent: 'space-between',
+                    mb: 0.75,
+                }}>
+                    <Typography sx={{ ...perfTokens.labelMuted, display: 'block' }}>
                         Mappings ({config.mappings.length})
                     </Typography>
                     <Button
                         size="small"
                         onClick={clearAll}
                         disabled={config.mappings.length === 0}
+                        sx={{
+                            fontSize: perfTokens.fontSize.xs,
+                            color: 'error.main',
+                            textTransform: 'none',
+                            py: 0,
+                            px: 0.75,
+                            minWidth: 0,
+                            '&:hover': { bgcolor: 'action.hover' },
+                            '&.Mui-disabled': { color: 'text.disabled' },
+                        }}
                     >
                         Clear all
                     </Button>
                         border: '1px solid',
                         borderColor: 'divider',
                         borderRadius: 1,
+                        maxHeight: 240,
                         overflowY: 'auto',
                         bgcolor: 'background.default',
                     }}
                 >
                     {sortedMappings.length === 0 ? (
+                        <Box sx={{ px: 1.5, py: 1.5, textAlign: 'center' }}>
+                            <Typography sx={{
+                                fontSize: perfTokens.fontSize.xs,
+                                color: 'text.disabled',
+                                fontStyle: 'italic',
+                                lineHeight: 1.4,
+                            }}>
+                                No mappings yet. Enable MIDI mode, click a control,
+                                then move a hardware knob, fader, or button.
                             </Typography>
                         </Box>
                     ) : (
                                     borderBottom: '1px solid',
                                     borderColor: 'divider',
                                     '&:last-child': { borderBottom: 'none' },
+                                    '&:hover': { bgcolor: 'action.hover' },
+                                    transition: 'background-color 120ms',
                                 }}
                             >
                                 <Box sx={{ flex: 1, minWidth: 0 }}>
+                                    <Typography sx={{
+                                        fontSize: perfTokens.fontSize.sm,
+                                        fontWeight: 500,
+                                        overflow: 'hidden',
+                                        textOverflow: 'ellipsis',
+                                        whiteSpace: 'nowrap',
+                                    }}>
                                         {m.label}
                                     </Typography>
+                                    <Typography sx={{
+                                        color: 'text.secondary',
+                                        fontSize: perfTokens.fontSize.xs,
+                                        fontFamily: 'ui-monospace, SFMono-Regular, Menlo, Consolas, monospace',
+                                    }}>
                                         {formatMidi(m.midi)}
                                     </Typography>
                                 </Box>
+                                <IconButton
+                                    size="small"
+                                    onClick={() => clearMapping(m.controlId)}
+                                    sx={panelStyles.compactIconBtn('sm', 'danger')}
+                                    aria-label="Remove mapping"
+                                >
+                                    <DeleteIcon size={perfTokens.icon.sm} />
+                                </IconButton>
                             </Box>
                         ))
                     )}
                 </Box>
+            </Box>
+            <Divider />
+            {/* Footer help text. */}
+            <Box sx={{ px: 1.5, pt: 1, pb: 1.25 }}>
+                <Typography sx={{
+                    color: 'text.disabled',
+                    fontSize: perfTokens.fontSize.xs,
+                    fontStyle: 'italic',
+                    lineHeight: 1.4,
+                }}>
+                    Pickup ignores the hardware until its position matches the on-screen
+                    value. Right-click a control while in MIDI mode to clear its mapping.
                 </Typography>
             </Box>
         </Popover>

app/frontend/src/components/MidiContext.js CHANGED Viewed

@@ -8,6 +8,7 @@ import React, {
     useState,
 } from 'react';
 import { Box } from '@mui/material';
 const STORAGE_KEY = 'fragmenta.midi.config.v1';
@@ -73,7 +74,6 @@ export function MidiProvider({ children }) {
     const [learnMode, setLearnMode] = useState(false);
     const [learnTarget, setLearnTarget] = useState(null);
-    const accessRef = useRef(null);
     const subscribersRef = useRef(new Map());
     const pickupArmedRef = useRef(new Map());
     const configRef = useRef(config);
@@ -86,39 +86,23 @@ export function MidiProvider({ children }) {
     useEffect(() => { learnTargetRef.current = learnTarget; }, [learnTarget]);
-    const refreshInputs = useCallback(() => {
-        const access = accessRef.current;
-        if (!access) return;
-        const list = [];
-        access.inputs.forEach((input) => {
-            list.push({
-                id: input.id,
-                name: input.name || 'Unknown device',
-                manufacturer: input.manufacturer || '',
-            });
-        });
-        setInputs(list);
-    }, []);
-    useEffect(() => {
-        if (typeof navigator === 'undefined' || !navigator.requestMIDIAccess) {
             setSupported(false);
-            return undefined;
         }
-        let cancelled = false;
-        navigator.requestMIDIAccess({ sysex: false })
-            .then((access) => {
-                if (cancelled) return;
-                accessRef.current = access;
-                refreshInputs();
-                access.onstatechange = refreshInputs;
-            })
-            .catch((err) => {
-                setPermissionError(err?.message || 'MIDI permission denied');
-                setSupported(false);
-            });
-        return () => { cancelled = true; };
-    }, [refreshInputs]);
     useEffect(() => {
         if (!inputs.length || !config.deviceName) return;
@@ -192,24 +176,30 @@ export function MidiProvider({ children }) {
         }
     }, [captureLearn]);
     useEffect(() => {
-        const access = accessRef.current;
-        if (!access) return undefined;
-        const bound = [];
-        access.inputs.forEach((input) => {
-            if (config.deviceId && input.id === config.deviceId) {
-                input.onmidimessage = dispatchMessage;
-                bound.push(input);
-            } else {
-                input.onmidimessage = null;
-            }
-        });
-        pickupArmedRef.current = new Map();
-        return () => {
-            bound.forEach((i) => { i.onmidimessage = null; });
         };
-    }, [config.deviceId, inputs, dispatchMessage]);
     function applyContinuous(sub, mapping, midiValue, takeover) {
         const norm = midiValue / 127;

     useState,
 } from 'react';
 import { Box } from '@mui/material';
+import api from '../api';
 const STORAGE_KEY = 'fragmenta.midi.config.v1';
     const [learnMode, setLearnMode] = useState(false);
     const [learnTarget, setLearnTarget] = useState(null);
     const subscribersRef = useRef(new Map());
     const pickupArmedRef = useRef(new Map());
     const configRef = useRef(config);
     useEffect(() => { learnTargetRef.current = learnTarget; }, [learnTarget]);
+    // Device list comes from the native backend (python-rtmidi) instead of
+    // Web MIDI, so it works in every web engine.
+    const refreshInputs = useCallback(async () => {
+        try {
+            const { data } = await api.get('/api/midi/devices');
+            setSupported(!!data.available);
+            setInputs(Array.isArray(data.inputs) ? data.inputs : []);
+            setPermissionError(data.available
+                ? null
+                : 'Native MIDI is unavailable (python-rtmidi not installed).');
+        } catch (err) {
             setSupported(false);
+            setPermissionError(err?.message || 'Could not reach the MIDI backend.');
         }
+    }, []);
+    useEffect(() => { refreshInputs(); }, [refreshInputs]);
     useEffect(() => {
         if (!inputs.length || !config.deviceName) return;
         }
     }, [captureLearn]);
+    // Stream incoming MIDI from the backend over SSE. Each event is the same
+    // {data:[status,d1,d2]} shape Web MIDI gave us, so dispatchMessage is
+    // unchanged. EventSource auto-reconnects on drop.
     useEffect(() => {
+        if (typeof EventSource === 'undefined') {
+            setSupported(false);
+            return undefined;
+        }
+        const es = new EventSource('/api/midi/stream');
+        es.onmessage = (e) => {
+            try { dispatchMessage(JSON.parse(e.data)); }
+            catch { /* malformed line — ignore */ }
         };
+        pickupArmedRef.current = new Map();
+        return () => es.close();
+    }, [dispatchMessage]);
+    // Tell the backend which port to open. The stream only carries the open
+    // port's events, so device selection happens server-side.
+    useEffect(() => {
+        if (!supported) return;
+        api.post('/api/midi/select', { port_id: config.deviceId || null })
+            .catch(() => { /* non-fatal */ });
+    }, [config.deviceId, supported]);
     function applyContinuous(sub, mapping, midiValue, takeover) {
         const norm = midiValue / 127;

app/frontend/src/components/PerformanceChannel.js CHANGED Viewed

@@ -5,27 +5,37 @@ import {
     TextField,
     IconButton,
     Slider,
-    CircularProgress,
-    Tooltip,
     Select,
     MenuItem,
     ButtonBase,
 } from '@mui/material';
 import {
     Play as PlayIcon,
     Square as StopIcon,
-    Repeat as LoopIcon,
-    Sparkles as GenerateIcon,
     Volume2 as VolumeIcon,
     VolumeX as MuteIcon,
-    Headphones as CueIcon,
-    Check as CommitIcon,
 } from 'lucide-react';
-import { performanceChannelStyles as styles, perfTokens } from '../theme';
 import { MidiMappable } from './MidiContext';
 import { playBlob as playCueBlob, stopCue, isCueSupported } from '../utils/cueAudio';
 const CHANNEL_COLORS = [
     '#35C2D4', '#9F8AE6', '#53C18A', '#E3A34B',
     '#E36C61', '#F08AD2', '#5BA0F0', '#A8D86B',
 ];
@@ -40,19 +50,31 @@ const gainDbToLinear = (db) => (db <= GAIN_DB_MIN ? 0 : Math.pow(10, db / 20));
 const KNOB_DEFS = [
     { key: 'gain', label: 'GAIN', min: GAIN_DB_MIN, max: GAIN_DB_MAX, step: 0.5, default: GAIN_DB_DEFAULT },
-    // LPF range goes from 20 Hz (full kill) to 20 kHz (bypass). We render the
-    // slider on a log axis so each octave gets equal travel — without this
-    // the bottom 5% of the knob does all the audible work.
-    { key: 'filter', label: 'LPF', min: 20, max: 20000, step: 1, default: 20000, scale: 'log' },
     { key: 'delay', label: 'DLY', min: 0, max: 1.0, step: 0.01, default: 0.0 },
     { key: 'reverb', label: 'REV', min: 0, max: 1.0, step: 0.01, default: 0.0 },
 ];
 const PAN_CENTER_SNAP = 0.06;
 const BARS_OPTIONS = [1, 2, 4, 8, 16];
 const BEATS_PER_BAR = 4;
 const BATCH_OPTIONS = [1, 2, 3, 4];
 export default function PerformanceChannel({
     index,
@@ -65,13 +87,16 @@ export default function PerformanceChannel({
     onStateChange,
     onFormStateChange,
     initialFormState,
-    maxDuration = 47,
     bpm = 120,
 }) {
     const color = CHANNEL_COLORS[index % CHANNEL_COLORS.length];
     const canvasRef = useRef(null);
     const meterRef = useRef(null);
     const meterRafRef = useRef(null);
     const init = initialFormState || {};
     const initKnobs = init.knobs || {};
@@ -83,7 +108,7 @@ export default function PerformanceChannel({
     const [prompt, setPrompt] = useState(init.prompt ?? '');
     const [duration, setDuration] = useState(init.duration ?? 8);
-    const [durationMode, setDurationMode] = useState(init.durationMode ?? 'seconds');
     const [bars, setBars] = useState(init.bars ?? 4);
     const [generating, setGenerating] = useState(false);
     const [loaded, setLoaded] = useState(false);
@@ -91,31 +116,147 @@ export default function PerformanceChannel({
     const [muted, setMuted] = useState(init.muted ?? false);
     const [soloed, setSoloed] = useState(init.soloed ?? false);
     const [batchSize, setBatchSize] = useState(init.batchSize ?? 1);
-    const [knobs, setKnobs] = useState(() => ({ ...defaultKnobs, ...initKnobs }));
-    // Candidates from the latest batch generation. Held in component state
-    // because they don't survive a page reload — the blob URLs would be dead.
-    // `committedIndex` tracks which one is currently loaded into the strip.
-    const [candidates, setCandidates] = useState([]);
-    const [auditioningIndex, setAuditioningIndex] = useState(null);
-    const [committedIndex, setCommittedIndex] = useState(null);
     const cueSupported = useMemo(() => isCueSupported(), []);
     // Stop any active cue audition when the channel unmounts.
     useEffect(() => () => stopCue(), []);
     // Mirror form state up to the panel so it can persist the session. Skip the
     // first render so we don't re-write what we just loaded from localStorage.
     const initialReportSkippedRef = useRef(false);
     useEffect(() => {
         if (!initialReportSkippedRef.current) {
             initialReportSkippedRef.current = true;
             return;
         }
         onFormStateChange?.(index, {
             prompt, duration, durationMode, bars, looping, muted, soloed, batchSize, knobs,
         });
-    }, [prompt, duration, durationMode, bars, looping, muted, soloed, batchSize, knobs, index, onFormStateChange]);
     const secondsFromBars = useMemo(
         () => bars * (60 / Math.max(bpm, 1)) * BEATS_PER_BAR,
@@ -145,12 +286,31 @@ export default function PerformanceChannel({
     const drawWave = useCallback(() => {
         if (strip && canvasRef.current) {
             strip.drawWaveform(canvasRef.current, color);
         }
     }, [strip, color]);
     useEffect(() => { drawWave(); }, [drawWave, loaded]);
     // One-shot: push restored knob/loop values into the audio strip when it
     // first becomes available, so the persisted session matches what's heard.
     // Mute/solo applies through the parent's mix handler so the panel can
@@ -161,7 +321,11 @@ export default function PerformanceChannel({
         stripStateAppliedRef.current = true;
         strip.setUserGain(gainDbToLinear(knobs.gain));
         strip.setPan(knobs.pan);
-        strip.setFilter(knobs.filter);
         strip.setDelayMix(knobs.delay);
         strip.setReverbMix(knobs.reverb);
         strip.setLoop(looping);
@@ -183,34 +347,89 @@ export default function PerformanceChannel({
         }
     }, [availableBars, bars]);
     const handleGenerate = async () => {
         if (!prompt.trim() || generating) return;
         const inBarsMode = durationMode === 'bars';
         const effectiveDuration = inBarsMode ? secondsFromBars : duration;
         setGenerating(true);
-        // Stop any in-flight cue audition and clear stale candidate state so
-        // the audition strip doesn't keep playing the old generation.
         stopCue();
-        setAuditioningIndex(null);
         try {
-            const result = await onGenerate({
                 prompt,
                 duration: effectiveDuration,
                 batchSize,
                 // Only forward alignment params in bars mode — seconds mode
                 // generates raw audio with no post-processing.
                 ...(inBarsMode ? { alignBars: bars, alignBpm: bpm } : {}),
             });
-            const blobs = Array.isArray(result) ? result : [result];
-            const next = blobs.map((b, i) => ({ index: i, blob: b }));
-            setCandidates(next);
-            // First candidate auto-loads into the channel strip; the rest sit
-            // in the audition row until the user commits a different one.
-            await strip.loadBlob(blobs[0]);
-            setCommittedIndex(0);
-            setLoaded(true);
-            onStateChange?.(index, { loaded: true });
-            requestAnimationFrame(drawWave);
         } catch (err) {
             console.error(`Channel ${index + 1} generate failed:`, err);
         } finally {
@@ -218,37 +437,166 @@ export default function PerformanceChannel({
         }
     };
-    const handleAudition = async (i) => {
-        const candidate = candidates[i];
-        if (!candidate) return;
-        if (auditioningIndex === i) {
             stopCue();
-            setAuditioningIndex(null);
             return;
         }
-        setAuditioningIndex(i);
         try {
-            await playCueBlob(candidate.blob, {
-                onEnded: () => setAuditioningIndex(prev => (prev === i ? null : prev)),
             });
         } catch (err) {
             console.warn(`Channel ${index + 1} audition failed:`, err);
-            setAuditioningIndex(null);
         }
     };
-    const handleCommit = async (i) => {
-        const candidate = candidates[i];
-        if (!candidate || committedIndex === i) return;
-        // Stop the live channel before swapping the buffer so we don't get a
-        // glitch in the middle of a loop iteration.
-        try { strip.stop(); } catch { /* not playing */ }
-        onStateChange?.(index, { playing: false });
-        await strip.loadBlob(candidate.blob);
-        setCommittedIndex(i);
         requestAnimationFrame(drawWave);
     };
     const handlePlay = () => {
         if (!loaded) return;
         if (engine) engine.playChannel(index, looping);
@@ -285,7 +633,11 @@ export default function PerformanceChannel({
         setKnobs(prev => ({ ...prev, [key]: value }));
         if (key === 'gain') strip.setUserGain(gainDbToLinear(value));
         else if (key === 'pan') strip.setPan(value);
-        else if (key === 'filter') strip.setFilter(value);
         else if (key === 'delay') strip.setDelayMix(value);
         else if (key === 'reverb') strip.setReverbMix(value);
     };
@@ -307,15 +659,45 @@ export default function PerformanceChannel({
     return (
         <Box sx={styles.strip(color, playing)}>
             <Box sx={styles.stripHeader(color)}>
-                <Box sx={styles.channelBadge(color)}>{String(index + 1).padStart(2, '0')}</Box>
                 <Box sx={styles.muteSoloRow}>
                     <MidiMappable id={ctrlId('mute')} label={ctrlLabel('Mute')} kind="trigger" onChange={handleMuteToggle}>
-                        <Tooltip title="Mute">
                             <IconButton size="small" onClick={handleMuteToggle} sx={styles.muteBtn(muted)}>M</IconButton>
                         </Tooltip>
                     </MidiMappable>
                     <MidiMappable id={ctrlId('solo')} label={ctrlLabel('Solo')} kind="trigger" onChange={handleSoloToggle}>
-                        <Tooltip title="Solo">
                             <IconButton size="small" onClick={handleSoloToggle} sx={styles.soloBtn(soloed)}>S</IconButton>
                         </Tooltip>
                     </MidiMappable>
@@ -324,18 +706,50 @@ export default function PerformanceChannel({
             <Box sx={styles.promptBox}>
                 <TextField
-                    placeholder="prompt…"
                     value={prompt}
                     onChange={(e) => setPrompt(e.target.value)}
                     multiline
                     minRows={2}
-                    maxRows={3}
                     size="small"
                     fullWidth
                     sx={styles.promptField}
                     disabled={generating}
                 />
                 <Box sx={{ ...styles.durationRow, minHeight: 26, height: 26 }}>
                     <Box
                         sx={{
                             display: 'inline-flex',
@@ -344,9 +758,13 @@ export default function PerformanceChannel({
                             borderRadius: 0.75,
                             overflow: 'hidden',
                             height: '100%',
                         }}
                     >
-                        {['sec', 'bars'].map((mode) => {
                             const value = mode === 'sec' ? 'seconds' : 'bars';
                             const active = durationMode === value;
                             return (
@@ -354,215 +772,170 @@ export default function PerformanceChannel({
                                     key={mode}
                                     onClick={() => setDurationMode(value)}
                                     sx={{
-                                        fontSize: perfTokens.fontSize.small,
-                                        letterSpacing: perfTokens.letterSpacing.wide,
-                                        textTransform: 'uppercase',
-                                        fontFamily: 'inherit',
                                         px: 0.7,
-                                        minWidth: 30,
                                         bgcolor: active ? color : 'transparent',
                                         color: active ? 'rgba(0,0,0,0.88)' : 'text.disabled',
-                                        fontWeight: active ? 600 : 400,
-                                        transition: 'background-color 120ms, color 120ms',
                                         '&:hover': {
                                             bgcolor: active ? color : 'action.hover',
                                             color: active ? 'rgba(0,0,0,0.88)' : 'text.secondary',
                                         },
                                     }}
                                 >
-                                    {mode}
                                 </ButtonBase>
                             );
                         })}
                     </Box>
-                    {durationMode === 'seconds' ? (
-                        <>
-                            <Typography variant="caption" sx={styles.durationLabel}>{duration.toFixed(0)}s</Typography>
-                            <Slider
-                                value={duration}
-                                onChange={(_, v) => setDuration(v)}
-                                min={2}
-                                max={maxDuration}
-                                step={1}
-                                size="small"
-                                sx={styles.durationSlider(color)}
-                            />
-                        </>
-                    ) : (
-                        <Select
-                            value={availableBars.includes(bars) ? bars : availableBars[availableBars.length - 1]}
-                            onChange={(e) => setBars(Number(e.target.value))}
-                            size="small"
-                            sx={{
-                                flex: 1,
-                                fontSize: perfTokens.fontSize.body,
-                                height: '100%',
-                                '& .MuiOutlinedInput-input': {
-                                    py: 0,
-                                    pl: 1,
-                                    minHeight: 'unset',
-                                },
-                                '& .MuiSelect-select': {
-                                    py: 0,
-                                    pl: 1,
-                                    minHeight: 'unset',
-                                },
-                            }}
-                        >
-                            {availableBars.map(b => (
-                                <MenuItem key={b} value={b} sx={{ fontSize: perfTokens.fontSize.body }}>
-                                    {b} {b === 1 ? 'bar' : 'bars'}
-                                </MenuItem>
-                            ))}
-                        </Select>
-                    )}
                 </Box>
                 <Box sx={{
                     display: 'flex',
                     alignItems: 'center',
-                    justifyContent: 'center',
-                    gap: 1.5,
                     mt: 0.5,
                     width: '100%',
                 }}>
                     <Tooltip
-                        title="Batch generation: produce N candidates and audition them through the cue output before committing one to this channel."
                         placement="top"
-                        disableFocusListener
-                        disableTouchListener
                         enterDelay={500}
                     >
                         <Select
                             value={batchSize}
                             onChange={(e) => setBatchSize(Number(e.target.value))}
-                            size="small"
                             disabled={generating}
-                            sx={{
-                                fontSize: perfTokens.fontSize.body,
-                                height: 32,
-                                minWidth: 64,
-                                '& .MuiOutlinedInput-input': { py: 0, pl: 1.25, pr: '28px !important', minHeight: 'unset' },
-                                '& .MuiSelect-select': { py: 0, pl: 1.25, pr: '28px !important', minHeight: 'unset' },
-                            }}
                         >
-                            {BATCH_OPTIONS.map(n => (
-                                <MenuItem key={n} value={n} sx={{ fontSize: perfTokens.fontSize.body }}>
                                     ×{n}
                                 </MenuItem>
                             ))}
                         </Select>
                     </Tooltip>
-                    <MidiMappable id={ctrlId('generate')} label={ctrlLabel('Generate')} kind="trigger" onChange={handleGenerate}>
-                        <IconButton
-                            onClick={handleGenerate}
-                            disabled={!canGenerate || !prompt.trim() || generating}
-                            sx={styles.generateBtn(color)}
-                            size="small"
                         >
-                            {generating ? <CircularProgress size={16} sx={{ color }} /> : <GenerateIcon size={16} />}
-                        </IconButton>
                     </MidiMappable>
                 </Box>
             </Box>
-            <Box sx={styles.waveformWrap}>
                 <canvas
                     ref={canvasRef}
                     width={140}
                     height={42}
-                    style={{ width: '100%', height: 42, display: 'block' }}
                 />
                 {!loaded && (
-                    <Typography variant="caption" sx={styles.waveformPlaceholder}>
-                        empty
                     </Typography>
                 )}
             </Box>
-            {candidates.length > 1 && (
-                <Box
-                    sx={{
-                        display: 'flex',
-                        alignItems: 'center',
-                        gap: 0.5,
-                        px: 1,
-                        py: 0.5,
-                        flexWrap: 'wrap',
-                    }}
-                >
-                    {candidates.map((c, i) => {
-                        const isAuditioning = auditioningIndex === i;
-                        const isCommitted = committedIndex === i;
-                        return (
-                            <Box
-                                key={c.index}
-                                sx={{
-                                    display: 'inline-flex',
-                                    alignItems: 'center',
-                                    border: '1px solid',
-                                    borderColor: isCommitted ? color : 'divider',
-                                    borderRadius: 0.75,
-                                    overflow: 'hidden',
-                                    bgcolor: isCommitted ? `${color}1a` : 'transparent',
-                                }}
-                            >
-                                <Tooltip
-                                    title={
-                                        cueSupported
-                                            ? (isAuditioning ? 'Stop cue audition' : 'Audition this take through cue output')
-                                            : 'Cue audition requires Chrome/Edge. Plays through main output.'
-                                    }
-                                >
-                                    <IconButton
-                                        onClick={() => handleAudition(i)}
-                                        size="small"
-                                        sx={{
-                                            color: isAuditioning ? color : 'text.secondary',
-                                            px: 0.5,
-                                            borderRadius: 0,
-                                        }}
-                                    >
-                                        <CueIcon size={12} />
-                                        <Box
-                                            component="span"
-                                            sx={{
-                                                ml: 0.4,
-                                                fontSize: perfTokens.fontSize.small,
-                                                fontWeight: isAuditioning ? 700 : 500,
-                                            }}
-                                        >
-                                            {i + 1}
-                                        </Box>
-                                    </IconButton>
-                                </Tooltip>
-                                <Tooltip title={isCommitted ? 'Currently in channel' : 'Use this take in the channel'}>
-                                    <span>
-                                        <IconButton
-                                            onClick={() => handleCommit(i)}
-                                            size="small"
-                                            disabled={isCommitted}
-                                            sx={{
-                                                color: isCommitted ? color : 'text.disabled',
-                                                px: 0.4,
-                                                borderRadius: 0,
-                                                borderLeft: '1px solid',
-                                                borderColor: 'divider',
-                                            }}
-                                        >
-                                            <CommitIcon size={12} />
-                                        </IconButton>
-                                    </span>
-                                </Tooltip>
-                            </Box>
-                        );
-                    })}
-                </Box>
-            )}
             <Box sx={{ px: 1, py: 1 }}>
                 <Box sx={{ display: 'flex', alignItems: 'center', gap: 1, mb: 1 }}>
-                    <Box component="span" sx={{ fontSize: perfTokens.fontSize.knob, color: 'text.secondary', letterSpacing: perfTokens.letterSpacing.wide, minWidth: 28 }}>PAN</Box>
                     <MidiMappable
                         id={ctrlId('pan')}
                         label={ctrlLabel('Pan')}
@@ -584,6 +957,10 @@ export default function PerformanceChannel({
                             marks={[{ value: 0 }]}
                             sx={{
                                 flex: 1,
                                 '& .MuiSlider-mark': {
                                     width: 2,
                                     height: 10,
@@ -604,6 +981,7 @@ export default function PerformanceChannel({
             <Box sx={styles.knobsGrid}>
                 {KNOB_DEFS.map((k) => {
                     const isLog = k.scale === 'log';
                     // For log knobs, the slider drives a 0..1 position and we
                     // convert to/from the underlying value (Hz) on the audio
                     // boundary. The knob value stored in state stays in the
@@ -635,7 +1013,24 @@ export default function PerformanceChannel({
                                     max={isLog ? 1 : k.max}
                                     step={isLog ? 0.001 : k.step}
                                     size="small"
-                                    sx={styles.knobSlider(color, k.key === 'gain')}
                                 />
                             </MidiMappable>
                             <Box component="span" sx={styles.knobLabel}>{k.label}</Box>
@@ -644,26 +1039,10 @@ export default function PerformanceChannel({
                 })}
             </Box>
             <Box sx={styles.transportRow}>
-                <MidiMappable id={ctrlId('transport')} label={ctrlLabel('Play/Stop')} kind="trigger" onChange={handleTransportToggle}>
-                    <IconButton
-                        onClick={playing ? handleStop : handlePlay}
-                        disabled={!loaded}
-                        sx={styles.transportBtn(color, playing)}
-                        size="small"
-                    >
-                        {playing ? <StopIcon size={16} /> : <PlayIcon size={16} />}
-                    </IconButton>
-                </MidiMappable>
-                <MidiMappable id={ctrlId('loop')} label={ctrlLabel('Loop')} kind="trigger" onChange={handleLoopToggle}>
-                    <IconButton
-                        onClick={handleLoopToggle}
-                        sx={styles.loopBtn(color, looping)}
-                        size="small"
-                    >
-                        <LoopIcon size={14} />
-                    </IconButton>
-                </MidiMappable>
                 <Box sx={styles.meterTrack}>
                     <Box ref={meterRef} sx={styles.meterFill(color)} />
                 </Box>

     TextField,
     IconButton,
     Slider,
     Select,
     MenuItem,
     ButtonBase,
 } from '@mui/material';
+import { TIPS } from '../tooltips';
+import Tooltip from './Tooltip';
 import {
     Play as PlayIcon,
     Square as StopIcon,
+    ArrowRight as GenerateArrowIcon,
     Volume2 as VolumeIcon,
     VolumeX as MuteIcon,
+    Shuffle as VariationIcon,
 } from 'lucide-react';
+import { performanceChannelStyles as styles, performancePanelStyles as panelStyles, perfTokens, SHEEN_DARK, RAISE_DARK } from '../theme';
 import { MidiMappable } from './MidiContext';
 import { playBlob as playCueBlob, stopCue, isCueSupported } from '../utils/cueAudio';
+import {
+    channelScope,
+    putFragmentBlob,
+    getFragmentBlob,
+    deleteFragmentBlob,
+    clearScope as clearFragmentScope,
+} from '../utils/fragmentStorage';
+import api from '../api';
+import ChannelFragmentHistory from './ChannelFragmentHistory';
 const CHANNEL_COLORS = [
+    // Original introduction palette. Ch1 teal, ch2 violet, ch3 green, ch4 amber.
+    // Light enough that the black label text on active toggles stays legible.
+    // Slots 4–7 are spares (only 4 channels render).
     '#35C2D4', '#9F8AE6', '#53C18A', '#E3A34B',
     '#E36C61', '#F08AD2', '#5BA0F0', '#A8D86B',
 ];
 const KNOB_DEFS = [
     { key: 'gain', label: 'GAIN', min: GAIN_DB_MIN, max: GAIN_DB_MAX, step: 0.5, default: GAIN_DB_DEFAULT },
+    // Bipolar "DJ-filter" knob. -1..+1 with 0 = bypass. Negative side drives
+    // the LPF cutoff down from 20 kHz → 20 Hz (kills highs). Positive side
+    // drives the HPF cutoff up from 20 Hz → 20 kHz (kills lows). The two
+    // biquads sit in series in the engine; only one side ever cuts at a time.
+    { key: 'filter', label: 'FLT', min: -1, max: 1, step: 0.001, default: 0, scale: 'bipolar' },
     { key: 'delay', label: 'DLY', min: 0, max: 1.0, step: 0.01, default: 0.0 },
     { key: 'reverb', label: 'REV', min: 0, max: 1.0, step: 0.01, default: 0.0 },
 ];
+// Map a bipolar filter position (-1..+1) to the (LPF, HPF) frequencies that
+// the engine's two biquads need. 20 Hz / 20 kHz are the bypass anchors on
+// each side; log-scaled so each octave gets equal slider travel.
+function bipolarToFilterFreqs(pos) {
+    const lpf = pos <= 0 ? 20 * Math.pow(1000, 1 + pos) : 20000;
+    const hpf = pos >= 0 ? 20 * Math.pow(1000, pos) : 20;
+    return { lpf, hpf };
+}
 const PAN_CENTER_SNAP = 0.06;
 const BARS_OPTIONS = [1, 2, 4, 8, 16];
 const BEATS_PER_BAR = 4;
 const BATCH_OPTIONS = [1, 2, 3, 4];
+// Per-channel rolling fragment history cap. Starred fragments survive eviction.
+const FRAGMENT_CAP = 200;
 export default function PerformanceChannel({
     index,
     onStateChange,
     onFormStateChange,
     initialFormState,
+    maxDuration = 380,
     bpm = 120,
 }) {
     const color = CHANNEL_COLORS[index % CHANNEL_COLORS.length];
     const canvasRef = useRef(null);
     const meterRef = useRef(null);
     const meterRafRef = useRef(null);
+    // IDB scope key for this channel's fragment blobs. Stable across the
+    // component's lifetime since the channel index doesn't change.
+    const scope = channelScope(index);
     const init = initialFormState || {};
     const initKnobs = init.knobs || {};
     const [prompt, setPrompt] = useState(init.prompt ?? '');
     const [duration, setDuration] = useState(init.duration ?? 8);
+    const [durationMode, setDurationMode] = useState(init.durationMode ?? 'bars');
     const [bars, setBars] = useState(init.bars ?? 4);
     const [generating, setGenerating] = useState(false);
     const [loaded, setLoaded] = useState(false);
     const [muted, setMuted] = useState(init.muted ?? false);
     const [soloed, setSoloed] = useState(init.soloed ?? false);
     const [batchSize, setBatchSize] = useState(init.batchSize ?? 1);
+    // Live progress for the Generate pill while a generation is in flight.
+    // 0–100; polled from /api/generation-progress. Resets on each new run.
+    const [progress, setProgress] = useState(0);
+    const [knobs, setKnobs] = useState(() => {
+        const merged = { ...defaultKnobs, ...initKnobs };
+        // Migration: pre-bipolar `filter` was a raw Hz value (20..20000).
+        // Anything outside the new -1..+1 range is a legacy save — reset
+        // to bypass (0) rather than feeding nonsense into the engine.
+        if (merged.filter < -1 || merged.filter > 1) merged.filter = 0;
+        return merged;
+    });
+    // Per-channel rolling fragment history. Each fragment:
+    //   { id, blob, audioUrl, prompt, duration, createdAt, starred, number }
+    // Oldest-first. Capped at FRAGMENT_CAP via FIFO eviction with star
+    // priority (starred fragments survive until everything is starred, then
+    // oldest go first regardless). `nextFragmentNumberRef` provides a stable
+    // F# even after deletes — so F1 stays F1.
+    const [fragments, setFragments] = useState([]);
+    const [auditioningFragmentId, setAuditioningFragmentId] = useState(null);
+    const [committedFragmentId, setCommittedFragmentId] = useState(null);
+    const nextFragmentNumberRef = useRef(1);
     const cueSupported = useMemo(() => isCueSupported(), []);
     // Stop any active cue audition when the channel unmounts.
     useEffect(() => () => stopCue(), []);
+    // Poll /api/generation-progress while a generation is in flight so the
+    // Generate pill renders a real fill bar instead of a vague spinner. The
+    // backend exposes a single in-flight state; performance generations are
+    // sequential (the backend serves one at a time), so this naturally
+    // reflects whichever channel is currently busy.
+    useEffect(() => {
+        if (!generating) {
+            setProgress(0);
+            return;
+        }
+        let cancelled = false;
+        const tick = async () => {
+            if (cancelled) return;
+            try {
+                const r = await api.get('/api/generation-progress');
+                const pct = Number(r.data?.progress) || 0;
+                if (!cancelled) {
+                    // Cap at 95 until handleGenerate resolves so the bar
+                    // doesn't sit at 100 while waiting for the WAV blob.
+                    setProgress((prev) => Math.max(prev, Math.min(95, pct)));
+                }
+            } catch { /* non-fatal — bar just freezes briefly */ }
+        };
+        tick();
+        const id = window.setInterval(tick, 250);
+        return () => { cancelled = true; window.clearInterval(id); };
+    }, [generating]);
     // Mirror form state up to the panel so it can persist the session. Skip the
     // first render so we don't re-write what we just loaded from localStorage.
+    // Fragments mirror as metadata only — the Blob bodies live in IndexedDB
+    // and get rehydrated on mount by the effect below.
     const initialReportSkippedRef = useRef(false);
     useEffect(() => {
         if (!initialReportSkippedRef.current) {
             initialReportSkippedRef.current = true;
             return;
         }
+        const fragmentsMeta = fragments.map(({ blob, audioUrl, ...rest }) => rest);
         onFormStateChange?.(index, {
             prompt, duration, durationMode, bars, looping, muted, soloed, batchSize, knobs,
+            fragments: fragmentsMeta,
+            committedFragmentId,
         });
+    }, [prompt, duration, durationMode, bars, looping, muted, soloed, batchSize, knobs,
+        fragments, committedFragmentId, index, onFormStateChange]);
+    // Hydrate fragments on mount from the session metadata + IDB blobs. Runs
+    // once, tolerates missing blobs (skips the entry), and rewinds the
+    // fragment numbering counter so newly generated fragments don't collide
+    // with the restored ones.
+    const hydrationRef = useRef(false);
+    useEffect(() => {
+        if (hydrationRef.current) return;
+        hydrationRef.current = true;
+        // Backward compat: pre-rename saves used `takes`/`committedTakeId`.
+        // The session loader migrates them into `fragments`/`committedFragmentId`,
+        // but we also fall back here defensively in case `initialFormState`
+        // came from somewhere unmigrated.
+        const meta = initialFormState?.fragments
+            ?? initialFormState?.takes
+            ?? [];
+        const persistedCommittedId = initialFormState?.committedFragmentId
+            ?? initialFormState?.committedTakeId
+            ?? null;
+        if (meta.length === 0) {
+            if (persistedCommittedId) setCommittedFragmentId(null);
+            return;
+        }
+        let cancelled = false;
+        (async () => {
+            const hydrated = [];
+            for (const m of meta) {
+                try {
+                    const blob = await getFragmentBlob(scope, m.id);
+                    if (cancelled) {
+                        hydrated.forEach(t => URL.revokeObjectURL(t.audioUrl));
+                        return;
+                    }
+                    if (!blob) continue;
+                    hydrated.push({
+                        ...m,
+                        blob,
+                        audioUrl: URL.createObjectURL(blob),
+                    });
+                } catch {
+                    /* one bad fetch — keep going */
+                }
+            }
+            if (cancelled) {
+                hydrated.forEach(t => URL.revokeObjectURL(t.audioUrl));
+                return;
+            }
+            const maxNumber = hydrated.reduce((a, t) => Math.max(a, t.number || 0), 0);
+            nextFragmentNumberRef.current = maxNumber + 1;
+            setFragments(hydrated);
+            if (persistedCommittedId && hydrated.some(t => t.id === persistedCommittedId)) {
+                setCommittedFragmentId(persistedCommittedId);
+                setLoaded(true);
+                onStateChange?.(index, { loaded: true });
+            }
+        })();
+        return () => { cancelled = true; };
+        // eslint-disable-next-line react-hooks/exhaustive-deps
+    }, []);
+    // When the audio strip becomes available AND we have a hydrated committed
+    // fragment, load that fragment's blob into the strip so the channel comes back
+    // ready to play after reload. Declared here as a ref so the effect that
+    // actually does the work (after drawWave is defined below) can guard
+    // against multiple loads.
+    const autoLoadDoneRef = useRef(false);
     const secondsFromBars = useMemo(
         () => bars * (60 / Math.max(bpm, 1)) * BEATS_PER_BAR,
     const drawWave = useCallback(() => {
         if (strip && canvasRef.current) {
+            // Each channel's waveform is drawn in that channel's own color.
             strip.drawWaveform(canvasRef.current, color);
         }
     }, [strip, color]);
     useEffect(() => { drawWave(); }, [drawWave, loaded]);
+    // Auto-load the persisted committed fragment into the strip once Tone.js
+    // is ready. Runs at most once per mount; the ref guards against re-trigger
+    // when the user later commits a different fragment (handled by
+    // handleCommitFragment).
+    useEffect(() => {
+        if (autoLoadDoneRef.current) return;
+        if (!strip || !committedFragmentId) return;
+        const fragment = fragments.find(f => f.id === committedFragmentId);
+        if (!fragment) return;
+        autoLoadDoneRef.current = true;
+        strip.loadBlob(fragment.blob).then(() => {
+            requestAnimationFrame(drawWave);
+        }).catch(err => {
+            console.warn(`Channel ${index + 1} auto-load failed:`, err);
+            autoLoadDoneRef.current = false;
+        });
+    }, [strip, committedFragmentId, fragments, drawWave, index]);
     // One-shot: push restored knob/loop values into the audio strip when it
     // first becomes available, so the persisted session matches what's heard.
     // Mute/solo applies through the parent's mix handler so the panel can
         stripStateAppliedRef.current = true;
         strip.setUserGain(gainDbToLinear(knobs.gain));
         strip.setPan(knobs.pan);
+        {
+            const { lpf, hpf } = bipolarToFilterFreqs(knobs.filter);
+            strip.setFilter(lpf);
+            strip.setHighpass(hpf);
+        }
         strip.setDelayMix(knobs.delay);
         strip.setReverbMix(knobs.reverb);
         strip.setLoop(looping);
         }
     }, [availableBars, bars]);
+    // Per-fragment handler factory — fires as each blob returns. Fragment #0
+    // auto-loads into the strip so the user can audition while #1..N render.
+    // Shared by Generate and Variation so both feed channel history identically.
+    const makeOnBlob = (promptSnap, effectiveDuration) => async (blob, i) => {
+        const fragmentNumber = nextFragmentNumberRef.current;
+        nextFragmentNumberRef.current = fragmentNumber + 1;
+        const fragment = {
+            id: `${Date.now()}_${i}`,
+            blob,
+            audioUrl: URL.createObjectURL(blob),
+            prompt: promptSnap,
+            duration: effectiveDuration,
+            createdAt: Date.now(),
+            starred: false,
+            number: fragmentNumber,
+        };
+        // Persist the blob to IndexedDB so it survives reload. Fire-and-forget.
+        putFragmentBlob(scope, fragment.id, blob).catch((err) => {
+            console.warn(`Channel ${index + 1} fragment persist failed:`, err);
+        });
+        // Append to history with FRAGMENT_CAP eviction (oldest unstarred first).
+        setFragments((prev) => {
+            const combined = [...prev, fragment];
+            if (combined.length <= FRAGMENT_CAP) return combined;
+            const trimmed = combined.slice();
+            while (trimmed.length > FRAGMENT_CAP) {
+                let idx = -1;
+                for (let j = 0; j < trimmed.length; j++) {
+                    if (!trimmed[j].starred) { idx = j; break; }
+                }
+                if (idx < 0) idx = 0;  // all starred → drop oldest
+                const dying = trimmed[idx];
+                if (dying.audioUrl?.startsWith('blob:')) {
+                    try { URL.revokeObjectURL(dying.audioUrl); } catch { /* ignore */ }
+                }
+                deleteFragmentBlob(scope, dying.id).catch(() => { /* ignore */ });
+                trimmed.splice(idx, 1);
+            }
+            return trimmed;
+        });
+        // Generating must never disturb playback: a playing channel keeps
+        // looping its current clip while new fragments just pile into the
+        // history list. Only auto-load when the channel has nothing loaded yet
+        // (first-ever fragment) — harmless since nothing is playing — so the
+        // user still gets a ready-to-play clip on a fresh channel. To start a
+        // newly generated fragment, pick it from the list (handleCommitFragment).
+        if (i === 0 && !loaded) {
+            await strip.loadBlob(blob);
+            setCommittedFragmentId(fragment.id);
+            setLoaded(true);
+            onStateChange?.(index, { loaded: true });
+            requestAnimationFrame(drawWave);
+        }
+    };
     const handleGenerate = async () => {
         if (!prompt.trim() || generating) return;
         const inBarsMode = durationMode === 'bars';
         const effectiveDuration = inBarsMode ? secondsFromBars : duration;
         setGenerating(true);
+        // Stop any in-flight cue audition so the old preview doesn't keep
+        // playing while we generate the new fragment.
         stopCue();
+        setAuditioningFragmentId(null);
+        const promptSnap = prompt.trim();
         try {
+            await onGenerate({
                 prompt,
                 duration: effectiveDuration,
                 batchSize,
                 // Only forward alignment params in bars mode — seconds mode
                 // generates raw audio with no post-processing.
                 ...(inBarsMode ? { alignBars: bars, alignBpm: bpm } : {}),
+                // Phase 7: bars-mode + channel-looping ⇒ ask the backend
+                // to wrap-inpaint the seam so the clip loops seamlessly.
+                ...(inBarsMode && looping ? { loopStitch: 'inpaint' } : {}),
+                onBlob: makeOnBlob(promptSnap, effectiveDuration),
             });
         } catch (err) {
             console.error(`Channel ${index + 1} generate failed:`, err);
         } finally {
         }
     };
+    // Phase 8 "Variation": re-roll the channel using its current fragment as
+    // init_audio at a high noise level — gives a related-but-different take
+    // (A/B/A/C/A live sets). Uploads the source blob to get a server path,
+    // then routes through the same generate flow.
+    const handleVariation = async () => {
+        if (generating) return;
+        const src = fragments.find((f) => f.id === committedFragmentId)
+            || fragments[fragments.length - 1];
+        if (!src?.blob) return;
+        const inBarsMode = durationMode === 'bars';
+        const effectiveDuration = inBarsMode ? secondsFromBars : duration;
+        const promptSnap = (prompt || '').trim() || src.prompt || 'variation';
+        setGenerating(true);
+        stopCue();
+        setAuditioningFragmentId(null);
+        try {
+            const form = new FormData();
+            form.append('file', new File([src.blob], `${scope}_variation_src.wav`, { type: 'audio/wav' }));
+            const up = await api.post('/api/audio/upload', form);
+            await onGenerate({
+                prompt: promptSnap,
+                duration: effectiveDuration,
+                batchSize: 1,
+                initAudioPath: up.data.path,
+                initNoiseLevel: 0.9,
+                onBlob: makeOnBlob(promptSnap, effectiveDuration),
+            });
+        } catch (err) {
+            console.error(`Channel ${index + 1} variation failed:`, err);
+        } finally {
+            setGenerating(false);
+        }
+    };
+    // Fragment history actions — toggle audition through cue, commit a
+    // fragment to the channel buffer, star/unstar, delete one, or clear
+    // the whole list.
+    const handleAuditionFragment = async (fragmentId) => {
+        const fragment = fragments.find((f) => f.id === fragmentId);
+        if (!fragment) return;
+        if (auditioningFragmentId === fragmentId) {
             stopCue();
+            setAuditioningFragmentId(null);
             return;
         }
+        setAuditioningFragmentId(fragmentId);
         try {
+            await playCueBlob(fragment.blob, {
+                onEnded: () => setAuditioningFragmentId((prev) => (prev === fragmentId ? null : prev)),
             });
         } catch (err) {
             console.warn(`Channel ${index + 1} audition failed:`, err);
+            setAuditioningFragmentId(null);
         }
     };
+    // Choosing a fragment launches it from the beginning. The currently
+    // playing clip (if any) keeps sounding until the launch point: immediately
+    // in seconds mode or when launch quantization is None, otherwise at the
+    // next launch-quantization bar. The buffer is decoded WITHOUT stopping the
+    // live source, so the swap is gapless (the engine schedules the handoff).
+    const handleCommitFragment = async (fragmentId) => {
+        const fragment = fragments.find((f) => f.id === fragmentId);
+        if (!fragment) return;
+        const sameFragment = committedFragmentId === fragmentId;
+        // Already looping this exact clip → nothing to (re)launch.
+        if (sameFragment && playing) return;
+        // Decode the new clip without cutting the live source; skip the decode
+        // when this fragment's buffer is already loaded.
+        if (!sameFragment) {
+            await strip.loadBufferFromBlob(fragment.blob);
+            setCommittedFragmentId(fragmentId);
+        }
+        // Mark loaded so the play button enables (covers preset/hydrated flows
+        // where the first commit happens here rather than via generate).
+        if (!loaded) {
+            setLoaded(true);
+            onStateChange?.(index, { loaded: true });
+        }
+        // Launch from the top. Seconds mode is always immediate; bars mode
+        // defers to the engine's launch-quantization (ASAP when quantum=None).
+        const immediate = durationMode === 'seconds';
+        if (engine) engine.relaunchChannel(index, looping, immediate);
+        else strip.playAt(looping, 0);
+        onStateChange?.(index, { playing: true });
         requestAnimationFrame(drawWave);
     };
+    // Drag-and-drop: a fragment row from this channel's history can be
+    // dropped onto the waveform monitor to load it (same effect as the row's
+    // commit ✓ button). The MIME type is channel-scoped, so a row from
+    // channel 1 won't even highlight channel 2's waveform — the browser
+    // filters at dragOver level via dataTransfer.types matching.
+    const dragMime = `application/x-fragmenta-fragment-ch${index}`;
+    const [dropActive, setDropActive] = useState(false);
+    // Counter pattern — dragenter/leave also fire when the cursor crosses
+    // into child elements (canvas, overlay). Without the counter, dropActive
+    // would flicker false whenever the cursor moved over a child.
+    const dragCounterRef = useRef(0);
+    const handleWaveDragEnter = (e) => {
+        if (!e.dataTransfer.types.includes(dragMime)) return;
+        e.preventDefault();
+        dragCounterRef.current += 1;
+        if (dragCounterRef.current === 1) setDropActive(true);
+    };
+    const handleWaveDragOver = (e) => {
+        if (!e.dataTransfer.types.includes(dragMime)) return;
+        e.preventDefault();
+        e.dataTransfer.dropEffect = 'copy';
+    };
+    const handleWaveDragLeave = () => {
+        dragCounterRef.current = Math.max(0, dragCounterRef.current - 1);
+        if (dragCounterRef.current === 0) setDropActive(false);
+    };
+    const handleWaveDrop = (e) => {
+        e.preventDefault();
+        dragCounterRef.current = 0;
+        setDropActive(false);
+        const fragmentId = e.dataTransfer.getData(dragMime);
+        if (fragmentId) handleCommitFragment(fragmentId);
+    };
+    const handleToggleStar = (fragmentId) => {
+        setFragments((prev) => prev.map((f) =>
+            f.id === fragmentId ? { ...f, starred: !f.starred } : f,
+        ));
+    };
+    const handleDeleteFragment = (fragmentId) => {
+        const target = fragments.find((f) => f.id === fragmentId);
+        if (target?.audioUrl?.startsWith('blob:')) {
+            try { URL.revokeObjectURL(target.audioUrl); } catch { /* ignore */ }
+        }
+        deleteFragmentBlob(scope, fragmentId).catch(() => { /* ignore */ });
+        setFragments((prev) => prev.filter((f) => f.id !== fragmentId));
+        if (committedFragmentId === fragmentId) setCommittedFragmentId(null);
+        if (auditioningFragmentId === fragmentId) {
+            stopCue();
+            setAuditioningFragmentId(null);
+        }
+    };
+    const handleClearFragments = () => {
+        // Stop any in-flight audition and revoke every blob URL before
+        // dropping references — otherwise the URLs leak until reload.
+        stopCue();
+        setAuditioningFragmentId(null);
+        fragments.forEach((f) => {
+            if (f.audioUrl?.startsWith('blob:')) {
+                try { URL.revokeObjectURL(f.audioUrl); } catch { /* ignore */ }
+            }
+        });
+        clearFragmentScope(scope).catch(() => { /* ignore */ });
+        setFragments([]);
+        setCommittedFragmentId(null);
+    };
     const handlePlay = () => {
         if (!loaded) return;
         if (engine) engine.playChannel(index, looping);
         setKnobs(prev => ({ ...prev, [key]: value }));
         if (key === 'gain') strip.setUserGain(gainDbToLinear(value));
         else if (key === 'pan') strip.setPan(value);
+        else if (key === 'filter') {
+            const { lpf, hpf } = bipolarToFilterFreqs(value);
+            strip.setFilter(lpf);
+            strip.setHighpass(hpf);
+        }
         else if (key === 'delay') strip.setDelayMix(value);
         else if (key === 'reverb') strip.setReverbMix(value);
     };
     return (
         <Box sx={styles.strip(color, playing)}>
             <Box sx={styles.stripHeader(color)}>
+                {/* Transport (Play / Loop) on the left, Mute / Solo on the
+                    right — replaces the old "01" channel badge so the channel
+                    number isn't using up that slot. */}
+                <Box sx={styles.muteSoloRow}>
+                    <MidiMappable id={ctrlId('transport')} label={ctrlLabel('Play/Stop')} kind="trigger" onChange={handleTransportToggle}>
+                        <IconButton
+                            onClick={playing ? handleStop : handlePlay}
+                            disabled={!loaded}
+                            sx={styles.transportBtn(color, playing)}
+                            size="small"
+                        >
+                            {playing ? <StopIcon size={16} /> : <PlayIcon size={16} />}
+                        </IconButton>
+                    </MidiMappable>
+                    <MidiMappable id={ctrlId('loop')} label={ctrlLabel('Loop')} kind="trigger" onChange={handleLoopToggle}>
+                        <Tooltip
+                            title={TIPS.channel.loop(looping, durationMode)}
+                            placement="top"
+                            enterDelay={400}
+                        >
+                            <IconButton
+                                onClick={handleLoopToggle}
+                                sx={styles.loopBtn(color, looping)}
+                                size="small"
+                                aria-label={looping ? 'Loop on' : 'Loop off'}
+                            >
+                                L
+                            </IconButton>
+                        </Tooltip>
+                    </MidiMappable>
+                </Box>
                 <Box sx={styles.muteSoloRow}>
                     <MidiMappable id={ctrlId('mute')} label={ctrlLabel('Mute')} kind="trigger" onChange={handleMuteToggle}>
+                        <Tooltip title={TIPS.channel.mute}>
                             <IconButton size="small" onClick={handleMuteToggle} sx={styles.muteBtn(muted)}>M</IconButton>
                         </Tooltip>
                     </MidiMappable>
                     <MidiMappable id={ctrlId('solo')} label={ctrlLabel('Solo')} kind="trigger" onChange={handleSoloToggle}>
+                        <Tooltip title={TIPS.channel.solo}>
                             <IconButton size="small" onClick={handleSoloToggle} sx={styles.soloBtn(soloed)}>S</IconButton>
                         </Tooltip>
                     </MidiMappable>
             <Box sx={styles.promptBox}>
                 <TextField
+                    placeholder="Prompt…"
                     value={prompt}
                     onChange={(e) => setPrompt(e.target.value)}
                     multiline
                     minRows={2}
+                    maxRows={2}
                     size="small"
                     fullWidth
                     sx={styles.promptField}
                     disabled={generating}
                 />
                 <Box sx={{ ...styles.durationRow, minHeight: 26, height: 26 }}>
+                    {durationMode === 'seconds' ? (
+                        <>
+                            <Typography sx={styles.durationLabel}>{duration.toFixed(0)}s</Typography>
+                            <Slider
+                                value={duration}
+                                onChange={(_, v) => setDuration(v)}
+                                min={2}
+                                max={maxDuration}
+                                step={1}
+                                size="small"
+                                sx={styles.durationSlider(color)}
+                            />
+                        </>
+                    ) : (
+                        <Select
+                            value={availableBars.includes(bars) ? bars : availableBars[availableBars.length - 1]}
+                            onChange={(e) => setBars(Number(e.target.value))}
+                            size="small"
+                            sx={{ ...panelStyles.pillControl, flex: 1 }}
+                        >
+                            {availableBars.map(b => (
+                                <MenuItem key={b} value={b} sx={{ fontSize: perfTokens.fontSize.sm }}>
+                                    {b} {b === 1 ? 'bar' : 'bars'}
+                                </MenuItem>
+                            ))}
+                        </Select>
+                    )}
+                    {/* Sec/Bars mode toggle — moved to the right of the row so
+                        it mirrors the Generate row layout (content fills left,
+                        modifier sits right). Width matches the ×N selector so
+                        the right column reads as a uniform stack. */}
                     <Box
                         sx={{
                             display: 'inline-flex',
                             borderRadius: 0.75,
                             overflow: 'hidden',
                             height: '100%',
+                            flexShrink: 0,
                         }}
                     >
+                        {[
+                            { mode: 'sec',  label: 'Sec'  },
+                            { mode: 'bars', label: 'Bars' },
+                        ].map(({ mode, label }) => {
                             const value = mode === 'sec' ? 'seconds' : 'bars';
                             const active = durationMode === value;
                             return (
                                     key={mode}
                                     onClick={() => setDurationMode(value)}
                                     sx={{
+                                        fontSize: perfTokens.fontSize.sm,
                                         px: 0.7,
+                                        minWidth: 36,
                                         bgcolor: active ? color : 'transparent',
+                                        backgroundImage: active ? SHEEN_DARK : 'none',
+                                        boxShadow: active ? RAISE_DARK : 'none',
                                         color: active ? 'rgba(0,0,0,0.88)' : 'text.disabled',
+                                        fontWeight: active ? perfTokens.weight.bold : perfTokens.weight.regular,
+                                        transition: 'background-color 120ms, color 120ms, box-shadow 120ms',
                                         '&:hover': {
                                             bgcolor: active ? color : 'action.hover',
                                             color: active ? 'rgba(0,0,0,0.88)' : 'text.secondary',
                                         },
                                     }}
                                 >
+                                    {label}
                                 </ButtonBase>
                             );
                         })}
                     </Box>
                 </Box>
                 <Box sx={{
                     display: 'flex',
                     alignItems: 'center',
+                    gap: 1,
                     mt: 0.5,
                     width: '100%',
                 }}>
+                    {/* Generate pill — wide CTA on the left so the eye lands
+                        on the primary action first. Fills left-to-right with
+                        live progress while generating; resets when complete. */}
+                    <MidiMappable id={ctrlId('generate')} label={ctrlLabel('Generate')} kind="trigger" onChange={handleGenerate}>
+                        <Tooltip
+                            title={TIPS.channel.generateDisabled(generating, canGenerate, prompt.trim())}
+                            placement="top"
+                        >
+                            <span style={{ display: 'inline-flex', flex: 1, minWidth: 0 }}>
+                                <ButtonBase
+                                    onClick={handleGenerate}
+                                    disabled={!canGenerate || !prompt.trim() || generating}
+                                    sx={styles.generatePill(color, {
+                                        generating,
+                                        disabled: !canGenerate || !prompt.trim(),
+                                    })}
+                                >
+                                    {generating && (
+                                        <Box sx={styles.generatePillFill(color, progress)} />
+                                    )}
+                                    <Box component="span" sx={styles.generatePillLabel}>
+                                        {generating
+                                            ? `Generating · ${Math.round(progress)}%`
+                                            : 'Generate'}
+                                        {!generating && <GenerateArrowIcon size={14} strokeWidth={2.25} />}
+                                    </Box>
+                                </ButtonBase>
+                            </span>
+                        </Tooltip>
+                    </MidiMappable>
+                    {/* Batch selector — sits right of Generate so the row
+                        reads "Generate × 4" (action then modifier). Sized to
+                        its content (×1��×8 + dropdown arrow); no need to match
+                        the wider Sec/Bars toggle above. */}
                     <Tooltip
+                        title={TIPS.channel.batch}
                         placement="top"
                         enterDelay={500}
                     >
                         <Select
                             value={batchSize}
                             onChange={(e) => setBatchSize(Number(e.target.value))}
                             disabled={generating}
+                            size="small"
+                            sx={{ ...styles.channelPillControl, width: 54, flexShrink: 0 }}
+                            renderValue={(v) => `×${v}`}
                         >
+                            {BATCH_OPTIONS.map((n) => (
+                                <MenuItem
+                                    key={n}
+                                    value={n}
+                                    sx={{ fontSize: perfTokens.fontSize.sm, fontVariantNumeric: 'tabular-nums' }}
+                                >
                                     ×{n}
                                 </MenuItem>
                             ))}
                         </Select>
                     </Tooltip>
+                    {/* Variation — re-roll from the current fragment as
+                        init_audio (Phase 8). Disabled until a fragment exists. */}
+                    <MidiMappable id={ctrlId('variation')} label={ctrlLabel('Variation')} kind="trigger" onChange={handleVariation}>
+                        <Tooltip
+                            title={TIPS.channel.variation(loaded)}
+                            placement="top"
                         >
+                            <span style={{ display: 'inline-flex', flexShrink: 0 }}>
+                                <ButtonBase
+                                    onClick={handleVariation}
+                                    disabled={!loaded || generating}
+                                    sx={{
+                                        ...styles.channelPillControl,
+                                        width: 40,
+                                        justifyContent: 'center',
+                                        '&.Mui-disabled': { opacity: 0.4, color: 'text.disabled' },
+                                    }}
+                                    aria-label="Variation"
+                                >
+                                    <VariationIcon size={15} strokeWidth={2.25} />
+                                </ButtonBase>
+                            </span>
+                        </Tooltip>
                     </MidiMappable>
                 </Box>
             </Box>
+            <Box
+                onDragEnter={handleWaveDragEnter}
+                onDragOver={handleWaveDragOver}
+                onDragLeave={handleWaveDragLeave}
+                onDrop={handleWaveDrop}
+                sx={[
+                    styles.waveformWrap,
+                    dropActive && {
+                        borderColor: color,
+                        boxShadow: `inset 0 0 0 2px ${color}`,
+                        backgroundColor: `${color}1F`,
+                        transition: 'border-color 120ms, box-shadow 120ms, background-color 120ms',
+                    },
+                ]}
+            >
                 <canvas
                     ref={canvasRef}
                     width={140}
                     height={42}
+                    style={{ width: '100%', height: 42, display: 'block', pointerEvents: 'none' }}
                 />
                 {!loaded && (
+                    <Typography sx={styles.waveformPlaceholder}>
+                        {dropActive ? 'Drop to load' : 'Waveform'}
                     </Typography>
                 )}
             </Box>
+            {/* Per-channel rolling fragment history. Always rendered (empty
+                state included). Star/keep, delete, audition, load — all
+                inline per row. Capped at FRAGMENT_CAP via FIFO with star
+                priority. */}
+            <ChannelFragmentHistory
+                fragments={fragments}
+                color={color}
+                channelIndex={index}
+                auditioningId={auditioningFragmentId}
+                committedId={committedFragmentId}
+                maxFragments={FRAGMENT_CAP}
+                onAudition={handleAuditionFragment}
+                onCommit={handleCommitFragment}
+                onToggleStar={handleToggleStar}
+                onDelete={handleDeleteFragment}
+                onClearAll={handleClearFragments}
+            />
             <Box sx={{ px: 1, py: 1 }}>
                 <Box sx={{ display: 'flex', alignItems: 'center', gap: 1, mb: 1 }}>
+                    <Box component="span" sx={{ ...perfTokens.caps, color: 'text.secondary', minWidth: 28 }}>PAN</Box>
                     <MidiMappable
                         id={ctrlId('pan')}
                         label={ctrlLabel('Pan')}
                             marks={[{ value: 0 }]}
                             sx={{
                                 flex: 1,
+                                // Match the channel main color (the global
+                                // MuiSlider override is amber; the vertical
+                                // knobs already pass `color` via knobSlider).
+                                color,
                                 '& .MuiSlider-mark': {
                                     width: 2,
                                     height: 10,
             <Box sx={styles.knobsGrid}>
                 {KNOB_DEFS.map((k) => {
                     const isLog = k.scale === 'log';
+                    const isBipolar = k.scale === 'bipolar';
                     // For log knobs, the slider drives a 0..1 position and we
                     // convert to/from the underlying value (Hz) on the audio
                     // boundary. The knob value stored in state stays in the
                                     max={isLog ? 1 : k.max}
                                     step={isLog ? 0.001 : k.step}
                                     size="small"
+                                    track={isBipolar ? false : undefined}
+                                    marks={isBipolar ? [{ value: 0 }] : undefined}
+                                    sx={{
+                                        ...styles.knobSlider(color, k.key === 'gain'),
+                                        ...(isBipolar && {
+                                            '& .MuiSlider-mark': {
+                                                width: 10,
+                                                height: 2,
+                                                borderRadius: 1,
+                                                backgroundColor: 'text.secondary',
+                                                opacity: 0.7,
+                                            },
+                                            '& .MuiSlider-markActive': {
+                                                backgroundColor: 'text.secondary',
+                                                opacity: 0.7,
+                                            },
+                                        }),
+                                    }}
                                 />
                             </MidiMappable>
                             <Box component="span" sx={styles.knobLabel}>{k.label}</Box>
                 })}
             </Box>
+            {/* Bottom row is now just the channel level meter — Play and Loop
+                moved to the top header so the channel reads "controls on top,
+                signal flow below". */}
             <Box sx={styles.transportRow}>
                 <Box sx={styles.meterTrack}>
                     <Box ref={meterRef} sx={styles.meterFill(color)} />
                 </Box>

app/frontend/src/components/PerformancePanel.js CHANGED Viewed

The diff for this file is too large to render. See raw diff

app/frontend/src/components/StorageDrilldown.js ADDED Viewed

	@@ -0,0 +1,84 @@

+import React from 'react';
+import {
+    Dialog,
+    DialogTitle,
+    DialogContent,
+    DialogActions,
+    Button,
+    Typography,
+    Box,
+    LinearProgress,
+    Table,
+    TableBody,
+    TableCell,
+    TableHead,
+    TableRow,
+} from '@mui/material';
+const fmtBytes = (n) => {
+    if (!n) return '—';
+    // Decimal (SI) units — matches what HuggingFace shows next to safetensors files.
+    const units = ['B', 'KB', 'MB', 'GB', 'TB'];
+    let v = n;
+    let u = 0;
+    while (v >= 1000 && u < units.length - 1) { v /= 1000; u += 1; }
+    return `${v.toFixed(v < 10 ? 2 : 1)} ${units[u]}`;
+};
+export default function StorageDrilldown({ open, onClose, storage, catalog }) {
+    if (!storage) return null;
+    const usedPct = storage.total_used_bytes && (storage.total_used_bytes + storage.total_free_bytes)
+        ? (storage.total_used_bytes / (storage.total_used_bytes + storage.total_free_bytes)) * 100
+        : 0;
+    const nameFor = (id) => catalog?.find(c => c.id === id)?.name || id;
+    const rows = (storage.per_model || [])
+        .filter(m => m.downloaded)
+        .sort((a, b) => b.bytes - a.bytes);
+    return (
+        <Dialog open={open} onClose={onClose} maxWidth="sm" fullWidth>
+            <DialogTitle>Storage</DialogTitle>
+            <DialogContent dividers>
+                <Box sx={{ mb: 2 }}>
+                    <Typography variant="body2" color="text.secondary">
+                        {fmtBytes(storage.total_used_bytes)} used · {fmtBytes(storage.total_free_bytes)} free
+                    </Typography>
+                    <LinearProgress
+                        variant="determinate"
+                        value={Math.min(100, usedPct)}
+                        sx={{ mt: 1, height: 6, borderRadius: 3 }}
+                    />
+                </Box>
+                {rows.length === 0 ? (
+                    <Typography variant="body2" color="text.secondary" sx={{ py: 2 }}>
+                        Nothing downloaded yet.
+                    </Typography>
+                ) : (
+                    <Table size="small">
+                        <TableHead>
+                            <TableRow>
+                                <TableCell>Checkpoint</TableCell>
+                                <TableCell align="right">Size</TableCell>
+                            </TableRow>
+                        </TableHead>
+                        <TableBody>
+                            {rows.map(m => (
+                                <TableRow key={m.id}>
+                                    <TableCell>{nameFor(m.id)}</TableCell>
+                                    <TableCell align="right">{fmtBytes(m.bytes)}</TableCell>
+                                </TableRow>
+                            ))}
+                        </TableBody>
+                    </Table>
+                )}
+            </DialogContent>
+            <DialogActions>
+                <Button onClick={onClose}>Close</Button>
+            </DialogActions>
+        </Dialog>
+    );
+}

app/frontend/src/components/Tooltip.js ADDED Viewed

	@@ -0,0 +1,35 @@

+import React, { cloneElement } from 'react';
+import { useInfoView } from './InfoView';
+/**
+ * App-wide Tooltip.
+ *
+ * Help text is shown exclusively through the Info View (see
+ * components/InfoView.js) — there are no popup tooltips on the controls:
+ *
+ *  • Info View ON  — on hover/focus the `title` is reported to the bottom
+ *    Info View pill, so help shows in one fixed place rather than over the
+ *    control itself.
+ *  • Info View OFF — no hover help at all; the child renders untouched.
+ *
+ * The API matches MUI's Tooltip (drop-in for the existing call sites): pass a
+ * `title` plus a single child element. Placement/arrow/delay props are
+ * accepted but ignored, since there's no popup.
+ */
+export default function Tooltip({ children, title }) {
+    const { enabled, setHint } = useInfoView();
+    const child = React.Children.only(children);
+    if (!enabled) {
+        // No popup tooltips — the Info View is the only help surface.
+        return child;
+    }
+    // Info View mode: route the tip to the bottom pill on hover/focus.
+    return cloneElement(child, {
+        onMouseEnter: (e) => { setHint(title); child.props?.onMouseEnter?.(e); },
+        onMouseLeave: (e) => { setHint(null); child.props?.onMouseLeave?.(e); },
+        onFocus: (e) => { setHint(title); child.props?.onFocus?.(e); },
+        onBlur: (e) => { setHint(null); child.props?.onBlur?.(e); },
+    });
+}

app/frontend/src/components/TrainingMonitor.js CHANGED Viewed

@@ -1,14 +1,26 @@
-import React from 'react';
 import { Paper, Box, Typography, LinearProgress, Grid, Alert } from '@mui/material';
 import { Activity as ActivityIcon } from 'lucide-react';
 import LossChart from './LossChart';
 import { trainingMonitorStyles } from '../theme';
 export default function TrainingMonitor({
     trainingProgress,
     trainingStatus,
     trainingError,
-    trainingConfig,
     indicatorState,
 }) {
     const getProgressColor = () => {
@@ -21,6 +33,39 @@ export default function TrainingMonitor({
     const label = indicatorState?.label || 'Idle';
     const animate = indicatorState?.animate || false;
     return (
         <Paper sx={trainingMonitorStyles.rootPaper}>
             <Box sx={trainingMonitorStyles.headerRow}>
@@ -53,60 +98,56 @@ export default function TrainingMonitor({
                 />
             </Box>
-            {trainingStatus?.device_info && (
-                <Box sx={trainingMonitorStyles.deviceSection}>
-                    <Typography variant="body2" color="textSecondary">
-                        <strong>{
-                            trainingStatus.device_info.type === 'cuda' ? 'CUDA' :
-                                trainingStatus.device_info.type === 'mps' ? 'MPS' : 'CPU'
-                        }</strong>
-                        {' · '}{trainingStatus.device_info.device}
-                        {trainingStatus.device_info.memory_gb
-                            ? ` · ${trainingStatus.device_info.memory_gb.toFixed(1)} GB`
-                            : ''}
-                    </Typography>
-                </Box>
-            )}
             <Grid container spacing={2} sx={trainingMonitorStyles.metricsGrid}>
                 <Grid item xs={12} sm={6}>
-                    <Typography variant="body2" color="textSecondary">Current Epoch</Typography>
-                    <Typography variant="body1">
-                        {trainingStatus?.current_epoch !== undefined ?
-                            `${trainingStatus.current_epoch + 1} / ${trainingConfig.epochs}` :
-                            '0 / ' + trainingConfig.epochs}
                     </Typography>
                 </Grid>
                 <Grid item xs={12} sm={6}>
-                    <Typography variant="body2" color="textSecondary">Global Step / Total Steps</Typography>
-                    <Typography variant="body1" color="primary">
-                        {trainingStatus?.global_step !== undefined && trainingStatus?.total_steps !== undefined ?
-                            `${trainingStatus.global_step} / ${trainingStatus.total_steps}` :
-                            'N/A'}
                     </Typography>
                 </Grid>
                 <Grid item xs={12} sm={6}>
                     <Typography variant="body2" color="textSecondary">Checkpoints Saved</Typography>
-                    <Typography variant="body1">
-                        {trainingStatus?.checkpoints_saved || 0}
-                    </Typography>
                 </Grid>
                 <Grid item xs={12} sm={6}>
-                    <Typography variant="body2" color="textSecondary">Current Loss</Typography>
-                    <Typography variant="body1">
-                        {trainingStatus?.loss ? parseFloat(trainingStatus.loss).toFixed(4) : 'N/A'}
                     </Typography>
                 </Grid>
             </Grid>
-            {trainingStatus?.loss_history && trainingStatus.loss_history.length > 0 && (
                 <Box sx={trainingMonitorStyles.lossSection}>
                     <Typography variant="body2" color="textSecondary" gutterBottom>
                         <strong>Loss History</strong>
                     </Typography>
                     <Box sx={trainingMonitorStyles.lossChartBox}>
-                        <LossChart data={trainingStatus.loss_history} />
                     </Box>
                 </Box>
             )}

+import React, { useMemo } from 'react';
 import { Paper, Box, Typography, LinearProgress, Grid, Alert } from '@mui/material';
 import { Activity as ActivityIcon } from 'lucide-react';
 import LossChart from './LossChart';
 import { trainingMonitorStyles } from '../theme';
+/**
+ * TrainingMonitor — right-pane status card for the Training tab.
+ *
+ * Reads SA3-shaped status from the backend:
+ *   { is_training, status, step, total_steps, current_step, progress,
+ *     loss, checkpoints, checkpoints_saved, error, ... }
+ *
+ * SA3 trains by step count, not epochs — the panel surfaces step / total
+ * directly. Loss curve is built frontend-side from successive poll snapshots
+ * (trainingHistory) so we don't depend on the backend emitting a history
+ * array.
+ */
 export default function TrainingMonitor({
     trainingProgress,
     trainingStatus,
+    trainingHistory,
     trainingError,
     indicatorState,
 }) {
     const getProgressColor = () => {
     const label = indicatorState?.label || 'Idle';
     const animate = indicatorState?.animate || false;
+    // Loss points for the chart. We prefer the backend's loss_history
+    // (built from Lightning's metrics.csv, which records per-step loss
+    // from step 0) so the chart shows the full curve even before PL's
+    // tqdm postfix surfaces train/loss (which only appears after the
+    // first metrics flush, typically end of epoch 0). Falls back to the
+    // frontend-built trainingHistory if the backend hasn't populated
+    // loss_history yet (very early in the run, before PL writes CSV).
+    const lossPoints = useMemo(() => {
+        const fromBackend = trainingStatus?.loss_history;
+        if (Array.isArray(fromBackend) && fromBackend.length > 0) {
+            return fromBackend
+                .filter(p => Number.isFinite(p?.step) && Number.isFinite(p?.loss))
+                .sort((a, b) => a.step - b.step);
+        }
+        if (!trainingHistory || trainingHistory.length === 0) return [];
+        const byStep = new Map();
+        for (const h of trainingHistory) {
+            const step = h.current_step ?? h.step;
+            const loss = typeof h.loss === 'number' ? h.loss : parseFloat(h.loss);
+            if (Number.isFinite(step) && Number.isFinite(loss)) {
+                byStep.set(step, { step, loss });
+            }
+        }
+        return Array.from(byStep.values()).sort((a, b) => a.step - b.step);
+    }, [trainingHistory, trainingStatus?.loss_history]);
+    const step = trainingStatus?.current_step ?? trainingStatus?.step ?? 0;
+    const totalSteps = trainingStatus?.total_steps ?? 0;
+    const checkpointsSaved = trainingStatus?.checkpoints_saved
+        ?? trainingStatus?.checkpoints?.length
+        ?? 0;
+    const currentLoss = trainingStatus?.loss;
     return (
         <Paper sx={trainingMonitorStyles.rootPaper}>
             <Box sx={trainingMonitorStyles.headerRow}>
                 />
             </Box>
             <Grid container spacing={2} sx={trainingMonitorStyles.metricsGrid}>
                 <Grid item xs={12} sm={6}>
+                    <Typography variant="body2" color="textSecondary">Step</Typography>
+                    <Typography variant="body1" color="primary">
+                        {totalSteps > 0 ? `${step} / ${totalSteps}` : `${step}`}
                     </Typography>
                 </Grid>
                 <Grid item xs={12} sm={6}>
+                    <Typography variant="body2" color="textSecondary">Current Loss</Typography>
+                    <Typography variant="body1">
+                        {Number.isFinite(currentLoss) ? parseFloat(currentLoss).toFixed(4) : 'N/A'}
                     </Typography>
                 </Grid>
                 <Grid item xs={12} sm={6}>
                     <Typography variant="body2" color="textSecondary">Checkpoints Saved</Typography>
+                    <Typography variant="body1">{checkpointsSaved}</Typography>
                 </Grid>
                 <Grid item xs={12} sm={6}>
+                    <Typography variant="body2" color="textSecondary">Phase</Typography>
+                    <Typography variant="body1" sx={{ textTransform: 'capitalize' }}>
+                        {trainingStatus?.status || 'idle'}
                     </Typography>
                 </Grid>
+                {Number.isFinite(trainingStatus?.seed) && (
+                    <Grid item xs={12} sm={6}>
+                        <Typography variant="body2" color="textSecondary">Seed</Typography>
+                        <Typography variant="body1" sx={{ fontVariantNumeric: 'tabular-nums' }}>
+                            {trainingStatus.seed}
+                        </Typography>
+                    </Grid>
+                )}
             </Grid>
+            {lossPoints.length > 1 && (
                 <Box sx={trainingMonitorStyles.lossSection}>
                     <Typography variant="body2" color="textSecondary" gutterBottom>
                         <strong>Loss History</strong>
                     </Typography>
                     <Box sx={trainingMonitorStyles.lossChartBox}>
+                        <LossChart data={lossPoints} />
                     </Box>
+                    <Typography
+                        variant="caption"
+                        color="textSecondary"
+                        sx={trainingMonitorStyles.lossDisclaimer}
+                    >
+                        LoRA diffusion loss is noisy by design — each step samples
+                        a random noise level. Judge the result with your ears, not
+                        only with this chart.
+                    </Typography>
                 </Box>
             )}

app/frontend/src/components/WelcomePage.js CHANGED Viewed

@@ -1,6 +1,7 @@
 import React, { useState, useEffect } from 'react';
-import { Backdrop, Box, Fade, Typography, Button, Checkbox, FormControlLabel } from '@mui/material';
 import { welcomePageStyles } from '../theme';
 export default function WelcomePage({ open, onClose }) {
     const [titleVisible, setTitleVisible] = useState(false);
@@ -48,53 +49,41 @@ export default function WelcomePage({ open, onClose }) {
                 </Fade>
                 <Fade in={textVisible} timeout={1000}>
-                    <Box>
-                        <Typography
-                            variant="overline"
-                            sx={welcomePageStyles.overline}
-                        >
-                            An End-to-End Pipeline to Fine-Tune and Use Text-to-Audio Models.
-                        </Typography>
-                        <Typography
-                            variant="body2"
-                            sx={welcomePageStyles.footer}
-                        >
-                            @2025-2026 Misagh Azimi
-                        </Typography>
-                        <Typography
-                            variant="body2"
-                            sx={welcomePageStyles.version}
-                        >
-                            Version 0.1.1
-                        </Typography>
-                        <Button
-                            variant="contained"
-                            onClick={() => onClose(dontShowAgain)}
-                            sx={welcomePageStyles.ctaButton}
-                        >
-                            Get Started
-                        </Button>
-                        <Box sx={{ mt: 1.5 }}>
                             <FormControlLabel
                                 control={
                                     <Checkbox
                                         checked={dontShowAgain}
                                         onChange={(e) => setDontShowAgain(e.target.checked)}
                                         size="small"
-                                        sx={{ color: 'text.secondary' }}
                                     />
                                 }
                                 label={
-                                    <Typography variant="caption" sx={{ color: 'text.secondary' }}>
                                         Don't show this again
                                     </Typography>
                                 }
                             />
                         </Box>
-                    </Box>
                 </Fade>
             </Box>
         </Backdrop>

 import React, { useState, useEffect } from 'react';
+import { Backdrop, Box, Fade, Typography, Button, Checkbox, FormControlLabel, Stack } from '@mui/material';
 import { welcomePageStyles } from '../theme';
+import { APP_VERSION } from '../version';
 export default function WelcomePage({ open, onClose }) {
     const [titleVisible, setTitleVisible] = useState(false);
                 </Fade>
                 <Fade in={textVisible} timeout={1000}>
+                    <Stack alignItems="center">
+                        <Stack alignItems="center">
+                            <Typography variant="body3" color="text.secondary">
+                                ©2025-2026 Misagh Azimi
+                            </Typography>
+                            <Typography variant="body3" color="text.secondary">
+                                Version {APP_VERSION}
+                            </Typography>
+                        </Stack>
+                        <Box mt={5}>
+                            <Button
+                                variant="contained"
+                                onClick={() => onClose(dontShowAgain)}
+                            >
+                                Get Started
+                            </Button>
+                        </Box>
+                        <Box mt={6}>
                             <FormControlLabel
                                 control={
                                     <Checkbox
                                         checked={dontShowAgain}
                                         onChange={(e) => setDontShowAgain(e.target.checked)}
                                         size="small"
                                     />
                                 }
                                 label={
+                                    <Typography variant="caption" color="text.secondary">
                                         Don't show this again
                                     </Typography>
                                 }
                             />
                         </Box>
+                    </Stack>
                 </Fade>
             </Box>
         </Backdrop>

app/frontend/src/components/usePerformanceSession.js CHANGED Viewed

@@ -66,13 +66,21 @@ export function loadPresetIntoSession(name) {
 const CHANNEL_DEFAULT = {
     prompt: '',
     duration: 8,
-    durationMode: 'seconds',
     bars: 4,
     looping: true,
     muted: false,
     soloed: false,
     batchSize: 1,
-    knobs: { gain: -6, pan: 0, filter: 18000, delay: 0, reverb: 0 },
 };
 function defaultSession(channelCount) {
@@ -88,6 +96,18 @@ function defaultSession(channelCount) {
         randomSeed: true,
         seedValue: '',
         cueDeviceId: '',
         channels: Array.from({ length: channelCount }, () => ({
             ...CHANNEL_DEFAULT,
             knobs: { ...CHANNEL_DEFAULT.knobs },
@@ -104,11 +124,21 @@ function loadSession(channelCount) {
         // Merge against defaults so older saves don't crash on missing fields.
         // Length shifts (channel count change between releases) are absorbed
         // by always producing exactly `channelCount` channels.
-        const channels = Array.from({ length: channelCount }, (_, i) => ({
-            ...CHANNEL_DEFAULT,
-            ...(parsed.channels?.[i] || {}),
-            knobs: { ...CHANNEL_DEFAULT.knobs, ...(parsed.channels?.[i]?.knobs || {}) },
-        }));
         return { ...fallback, ...parsed, channels };
     } catch {
         return fallback;

 const CHANNEL_DEFAULT = {
     prompt: '',
     duration: 8,
+    durationMode: 'bars',
     bars: 4,
     looping: true,
     muted: false,
     soloed: false,
     batchSize: 1,
+    knobs: { gain: -6, pan: 0, filter: 0, delay: 0, reverb: 0 },
+    // Fragment history metadata (id, prompt, duration, createdAt, starred,
+    // number). The Blob audio bodies live in IndexedDB under the
+    // `session-ch{N}` scope — see utils/fragmentStorage.js. Cleared on
+    // Fresh Start and overwritten on preset load.
+    fragments: [],
+    // Which fragment was loaded into the channel strip last; restored on
+    // reload so the channel comes back ready to play instead of empty.
+    committedFragmentId: null,
 };
 function defaultSession(channelCount) {
         randomSeed: true,
         seedValue: '',
         cueDeviceId: '',
+        // Master FX defaults — the FX are always-on; the wet level on the
+        // master bus is determined entirely by per-channel DLY/REV send
+        // levels. We only persist the IR choice and the delay division.
+        masterReverbIR: 'hall',
+        masterDelayDivision: '1/4',
+        // Prompt auto-inject fields. Each is appended (comma-separated) to
+        // every generated prompt when set. Key and Time accept any text;
+        // empty = no injection. BPM is a toggle that, when on, grabs the
+        // live master BPM (top-bar value) at generation time.
+        promptKey: '',
+        promptInjectBpm: false,
+        promptTimeSig: '',
         channels: Array.from({ length: channelCount }, () => ({
             ...CHANNEL_DEFAULT,
             knobs: { ...CHANNEL_DEFAULT.knobs },
         // Merge against defaults so older saves don't crash on missing fields.
         // Length shifts (channel count change between releases) are absorbed
         // by always producing exactly `channelCount` channels.
+        //
+        // Migration: pre-rename saves used `takes` / `committedTakeId`. Copy
+        // those into the new `fragments` / `committedFragmentId` slots when
+        // present, so users' existing generations carry over after the
+        // "Takes → Fragments" rename. Old fields are left in place but unused.
+        const channels = Array.from({ length: channelCount }, (_, i) => {
+            const ch = parsed.channels?.[i] || {};
+            return {
+                ...CHANNEL_DEFAULT,
+                ...ch,
+                fragments: ch.fragments ?? ch.takes ?? [],
+                committedFragmentId: ch.committedFragmentId ?? ch.committedTakeId ?? null,
+                knobs: { ...CHANNEL_DEFAULT.knobs, ...(ch.knobs || {}) },
+            };
+        });
         return { ...fallback, ...parsed, channels };
     } catch {
         return fallback;

app/frontend/src/theme.js CHANGED Viewed

The diff for this file is too large to render. See raw diff

app/frontend/src/tooltips.js ADDED Viewed

	@@ -0,0 +1,134 @@

+export const TIPS = {
+    // App.js — LoRA training hyperparameters + model row actions.
+    training: {
+        downloadModel: 'Download this model',
+        deleteFineTuned: 'Delete fine-tuned model',
+        steps: "SA3's documented quick-start is 1,000 steps.",
+        adapter: "DoRA-rows is SA3's upstream default and works best for most stylistic LoRAs. The -xs variants freeze SVD bases and only train a tiny core matrix — far fewer parameters, useful when VRAM is tight. BoRA scales both rows and columns independently (more expressive, more parameters).",
+        checkpointEvery: 'How often a LoRA .safetensors snapshot gets written. Auto picks ~10 checkpoints per run (capped 250–1 000 steps). Lower = more granular but more disk; higher = fewer files to compare.',
+        batchSize: 'SA3 examples use 1. Each extra sample adds ~1–2 GB of activations. Raise only on roomy GPUs (≥24 GB); medium-base activations are heavy. Lower if you hit CUDA OOM.',
+        precision: 'Cast applied to the frozen base weights only; LoRA parameters stay in fp32 for the optimizer. bf16 halves the VRAM used by the base with negligible quality cost on Ampere and newer cards.',
+        rank: "Capacity of the LoRA update — rank-k matrices A (k×in) and B (out×k) are trained. Higher rank = more expressive but larger file and more VRAM. r=16 fits comfortably on 16 GB and is SA3's default.",
+        alpha: 'Scaling factor for the LoRA update. Effective scaling is alpha / rank — setting alpha = rank gives a scaling of 1.0. Conventional choice: alpha = rank.',
+        dropout: 'Regularization probability applied to LoRA inputs during training. 0 is fine for most cases — raise to ~0.05 if you see overfitting on small datasets.',
+        seed: 'Random seed for reproducibility — same dataset + same hyperparameters + same seed produces the same LoRA. Change it to re-roll with different sampling behaviour.',
+        learningRate: "AdamW step size for the LoRA weights (base stays frozen). SA3's default is 1e-4, which works for most runs. Too high destabilizes training (loss spikes, artifacts); too low barely moves the adapter. Halve it if loss is erratic.",
+        sampleLength: 'Audio fed to the model per training step. Long clips get random-cropped to this length each step; short clips get silence-padded. Capped at the base model\'s native length (~120s small, ~380s medium) — longer windows cost markedly more VRAM and step time, so raise it only for long-form material (pre-encoding helps).',
+        includeLayers: 'Space-separated substrings — only layers whose fully-qualified name contains one of these get LoRA. Empty = all matching Linear/Conv1d layers. Example: transformer.layers.',
+        excludeLayers: 'Space-separated substrings — matching layers are skipped, even if they also match Include. SA3-docs default (seconds_total to_local_embed) prevents conditioner-hijacking on small datasets.',
+    },
+    // PerformancePanel.js — top transport bar + bottom controls.
+    perf: {
+        notDownloaded: 'Not downloaded — open Checkpoint Manager',
+        midiSettings: 'MIDI settings & mappings',
+        presets: 'Save / load presets',
+        deletePreset: 'Delete preset',
+        launchQuant: "Launch quantization — match Ableton's",
+        deleteFineTuned: 'Delete fine-tuned model',
+        deleteLora: 'Delete LoRA',
+        promptKey: 'Auto-inject Key. Leave empty to skip.',
+        timeSig: 'Auto-inject Time signature. Leave empty to skip.',
+        link: ({ installing, available, enabled, peers }) =>
+            installing
+                ? 'Installing LinkPython-extern…'
+                : !available
+                    ? 'Click to install Ableton Link script'
+                    : enabled
+                        ? `Link on — ${peers} peer${peers === 1 ? '' : 's'} (click to disable)`
+                        : 'Click to sync BPM with Ableton Link',
+        midiMode: ({ supported, permissionError, learnMode }) =>
+            !supported
+                ? (permissionError || 'Web MIDI is not available')
+                : learnMode
+                    ? 'Exit MIDI mode (Esc)'
+                    : 'Enter MIDI mode — click a control then move a hardware knob/button to bind',
+        audioSetup: (cueSupported) =>
+            cueSupported
+                ? 'Audio setup — choose output device'
+                : 'Audio device selection requires Chrome/Edge (AudioContext.setSinkId). Output falls back to system default.',
+        restoreDefaults: (armed) =>
+            armed
+                ? 'Click again within 3s to confirm — clears session, fragments, and MIDI mappings'
+                : 'Reset all panel settings, clear fragments, and clear MIDI mappings',
+        steps: (isDistilled) =>
+            isDistilled
+                ? 'Locked at 8 steps for distilled SA3 models — pick a *-base checkpoint to override'
+                : 'Diffusion steps per generation (more = higher quality, slower)',
+        bpmInject: (on, bpm) =>
+            on
+                ? `Injecting master BPM (${Math.round(bpm)}) into prompts — click to disable`
+                : 'Click to auto-inject the master BPM (top bar) into every prompt',
+    },
+    // PerformanceChannel.js — per-channel strip.
+    channel: {
+        mute: 'Mute',
+        solo: 'Solo',
+        batch: "Batch generate Fragments and cue below.",
+        loop: (looping, durationMode) =>
+            looping
+                ? (durationMode === 'bars'
+                    ? 'Loop'
+                    : 'Playback loop on')
+                : 'Loop off',
+        generateDisabled: (generating, canGenerate, hasPrompt) =>
+            generating
+                ? ''
+                : !canGenerate
+                    ? 'Pick a model in the Generation tab first'
+                    : !hasPrompt
+                        ? 'Enter a prompt to generate'
+                        : '',
+        variation: (loaded) =>
+            loaded
+                ? 'Variation from the current fragment'
+                : 'Generate a fragment first, then create variations of it',
+    },
+    // DatasetPrep.js — dataset workbench.
+    dataset: {
+        richAnnotate: 'Adds genre / mood / instrument tags using LAION-CLAP. Requires the CLAP weights — downloadable from the Checkpoint Manager.',
+        skipAnnotated: 'When on, Auto-annotate skips clips that already have an annotation. Off means every run overwrites existing prompts.',
+        deleteProject: 'Delete this project (folder, audio, sidecars, drafts) — irreversible',
+        discardChanges: 'Delete unsaved changes — reverts to the last created dataset (removes any audio added since)',
+        saveDraft: "Save a draft — persists across app restarts but isn't the SA3 sidecar form",
+        createDataset: 'Create Dataset — writes the .txt sidecars (overwrites the previous dataset)',
+        selectClips: 'Click to select these clips — then Auto-annotate them.',
+        autoAnnotateClip: 'Auto-annotate this clip (overwrites any current prompt)',
+        sliceClip: 'Slice this clip into shorter children (immediate)',
+        removeClip: 'Remove this clip from the project (immediate)',
+        tooShort: (thresholdSec) =>
+            `Shorter than ${thresholdSec}s — gets silence-padded into each batch. Consider deleting. Click to select.`,
+        duplicates: (count) =>
+            `${count} group${count === 1 ? '' : 's'} of clips share the same annotation. Bad for training diversity — click to select all of them.`,
+        unsupported: (accepted) =>
+            `SA3 only trains on ${(accepted || []).join(', ')}. These clips will be silently skipped at train time — re-export them as .wav (or another accepted format) before committing. Click to select.`,
+    },
+    // LoraStack.js — LoRA slot stack.
+    lora: {
+        stackInfo: (max) => `Blend up to ${max} LoRAs at any strength`,
+        dragReorder: 'Drag to reorder (slot 0 loads first)',
+        bypass: (bypassed) =>
+            bypassed ? 'Bypassed (strength 0) — click to enable' : 'Bypass this slot',
+    },
+    // Fragment lists — ChannelFragmentHistory.js + GeneratedFragmentsWindow.js.
+    fragments: {
+        clearAll: 'Clear all (delete every fragment from disk)',
+        deleteFromDisk: 'Delete from disk',
+        revealInFolder: 'Show in folder (reveal this file on disk)',
+        audition: (isAuditioning) =>
+            isAuditioning ? 'Stop cue' : 'Audition through cue output',
+        star: (starred) =>
+            starred ? 'Unstar' : 'Star (keep through eviction)',
+        commit: (committed) =>
+            committed ? 'Currently loaded' : 'Load into channel',
+    },
+    // CheckpointRow.js — checkpoint catalog rows.
+    checkpoints: {
+        gatedAccess: "Open on HuggingFace to accept the model's gated-access terms",
+    },
+};

app/frontend/src/utils/cueAudio.js CHANGED Viewed

@@ -11,6 +11,7 @@
 let ctx = null;
 let currentSource = null;
 let currentEndedHandler = null;
 let currentSinkId = '';
@@ -132,19 +133,26 @@ export async function playBlob(blob, { onEnded } = {}) {
     const src = c.createBufferSource();
     src.buffer = buf;
-    // Connect into the splitter, NOT directly to destination — that's how
-    // the channel-pair routing applies.
-    src.connect(cueSplitter);
     const handler = () => {
         if (currentSource === src) {
             currentSource = null;
             currentEndedHandler = null;
         }
         onEnded?.();
     };
     src.addEventListener('ended', handler);
     currentSource = src;
     currentEndedHandler = handler;
     src.start();
@@ -157,12 +165,27 @@ export async function playBlob(blob, { onEnded } = {}) {
 export function stopCue() {
     if (currentSource) {
         if (currentEndedHandler) {
-            currentSource.removeEventListener('ended', currentEndedHandler);
         }
-        try { currentSource.stop(); } catch { /* already stopped */ }
-        try { currentSource.disconnect(); } catch { /* already disconnected */ }
         currentSource = null;
         currentEndedHandler = null;
     }
 }

 let ctx = null;
 let currentSource = null;
+let currentSourceFade = null;  // per-source gain used by stopCue() to ramp out
 let currentEndedHandler = null;
 let currentSinkId = '';
     const src = c.createBufferSource();
     src.buffer = buf;
+    // Per-source fade gain so stopCue() can ramp out instead of hard-cut
+    // (a hard cut at non-zero samples is what produces the click /
+    // crackle when switching fragments rapidly). The fade graph is:
+    //   source → fadeGain → cueSplitter → cueMerger → destination
+    const fadeGain = c.createGain();
+    fadeGain.gain.value = 1;
+    src.connect(fadeGain);
+    fadeGain.connect(cueSplitter);
     const handler = () => {
         if (currentSource === src) {
             currentSource = null;
+            currentSourceFade = null;
             currentEndedHandler = null;
         }
         onEnded?.();
     };
     src.addEventListener('ended', handler);
     currentSource = src;
+    currentSourceFade = fadeGain;
     currentEndedHandler = handler;
     src.start();
 export function stopCue() {
     if (currentSource) {
+        const src = currentSource;
+        const fade = currentSourceFade;
         if (currentEndedHandler) {
+            src.removeEventListener('ended', currentEndedHandler);
         }
+        const now = ctx ? ctx.currentTime : 0;
+        const FADE = 0.012;
+        try {
+            if (fade) {
+                fade.gain.cancelScheduledValues(now);
+                fade.gain.setValueAtTime(fade.gain.value, now);
+                fade.gain.linearRampToValueAtTime(0, now + FADE);
+            }
+            src.stop(now + FADE + 0.005);
+        } catch { /* already stopped */ }
+        window.setTimeout(() => {
+            try { src.disconnect(); } catch { /* ok */ }
+            try { fade && fade.disconnect(); } catch { /* ok */ }
+        }, Math.ceil((FADE + 0.02) * 1000));
         currentSource = null;
+        currentSourceFade = null;
         currentEndedHandler = null;
     }
 }

app/frontend/src/utils/fragmentDrag.js ADDED Viewed

	@@ -0,0 +1,25 @@

+// In-app drag handoff for generated fragments.
+//
+// The HTML drag-and-drop dataTransfer can only carry strings, so when a
+// fragment is dragged from the Generated Fragments window into the Edit tab's
+// source dropzone we stash its in-memory audio Blob here on dragStart and read
+// it back on drop. This lets EditPanel use the blob directly instead of
+// re-fetching by filename — which is immune to any divergence between the
+// fragment's in-memory name and what actually exists on disk.
+//
+// Falls back gracefully: if no blob was stashed (e.g. a not-yet-preloaded
+// disk fragment), the consumer drops back to the filename-based fetch.
+let _payload = null; // { filename: string, blob: Blob } | null
+export function setFragmentDragPayload(payload) {
+    _payload = payload;
+}
+export function getFragmentDragPayload() {
+    return _payload;
+}
+export function clearFragmentDragPayload() {
+    _payload = null;
+}