Spaces:

WeReCooking
/

ACE-Step-CPU

Running

App Files Files Community

Nekochu commited on 22 days ago

Commit

32de701

1 Parent(s): 4d9a556

cancel, captioning, preprocessing, sidecar upload, elapsed time, GeneratorExit fix

Browse files

Files changed (2) hide show

app.py +162 -126
train_engine.py +13 -8

app.py CHANGED Viewed

@@ -19,10 +19,12 @@ from train_engine import (
     preprocess_audio,
     train_lora_generator,
     cancel_training,
     get_trained_loras as _get_trained_loras_engine,
     MAX_TRAINING_TIME,
 )
 logger = logging.getLogger(__name__)
 # ---------------------------------------------------------------------------
@@ -93,12 +95,14 @@ def _get_props():
     return {}
-def _poll_job(job_id, timeout=600, progress_cb=None):
-    """Poll a job until done/error/timeout. Returns (status, elapsed)."""
     t0 = time.time()
     while time.time() - t0 < timeout:
         try:
-            r = requests.get(f"{ACE_SERVER}/job", params={"id": job_id}, timeout=10)
             data = r.json()
             status = data.get("status", "unknown")
             if progress_cb:
@@ -107,7 +111,7 @@ def _poll_job(job_id, timeout=600, progress_cb=None):
                 return status, time.time() - t0
         except Exception:
             pass
-        time.sleep(2)
     return "timeout", time.time() - t0
@@ -121,58 +125,41 @@ def _fetch_result(job_id, timeout=60):
     return r
-def _caption_via_understand(audio_path, timeout=120):
-    """Call ace-server /understand to get a rich caption for an audio file.
-    Returns a dict with caption, bpm, key, signature, lyrics on success,
-    or None on failure (caller should fall back to librosa).
-    """
     fname = os.path.basename(audio_path)
     try:
         with open(audio_path, "rb") as f:
-            audio_b64 = base64.b64encode(f.read()).decode("ascii")
-    except Exception as exc:
-        logger.warning("[Caption] %s: failed to read file: %s", fname, exc)
-        return None
-    # Submit
-    try:
-        r = requests.post(
-            f"{ACE_SERVER}/understand",
-            json={"audio": audio_b64},
-            timeout=30,
-        )
         if r.status_code != 200:
-            logger.warning("[Caption] %s: /understand returned %d: %s", fname, r.status_code, r.text[:200])
             return None
         job_id = r.json().get("id")
         if not job_id:
-            logger.warning("[Caption] %s: /understand returned no job id", fname)
             return None
     except Exception as exc:
         logger.warning("[Caption] %s: /understand submit failed: %s", fname, exc)
         return None
-    # Poll until done
-    status, _ = _poll_job(job_id, timeout=timeout)
     if status != "done":
-        logger.warning("[Caption] %s: /understand job %s -> %s", fname, job_id, status)
         return None
-    # Fetch result
     try:
         r = _fetch_result(job_id, timeout=30)
         if r.status_code != 200:
-            logger.warning("[Caption] %s: /understand result fetch failed: %d", fname, r.status_code)
             return None
         data = r.json()
-        # The result should contain caption, bpm, key, signature, lyrics
         if isinstance(data, dict) and data.get("caption"):
             return data
-        logger.warning("[Caption] %s: /understand returned no caption field", fname)
         return None
-    except Exception as exc:
-        logger.warning("[Caption] %s: /understand result parse failed: %s", fname, exc)
         return None
@@ -559,7 +546,13 @@ def gradio_main():
         train_start = time.time()
         def _log(msg):
-            _train_log_lines.append(msg)
             if len(_train_log_lines) > 2000:
                 _train_log_lines[:] = _train_log_lines[-1000:]
@@ -587,7 +580,9 @@ def gradio_main():
         work_dir = os.path.join(OUTPUT_DIR, "train_workspace", lora_name)
         os.makedirs(work_dir, exist_ok=True)
         audio_dir = os.path.join(work_dir, "audio_input")
-        os.makedirs(audio_dir, exist_ok=True)
         adapter_out = os.path.join(ADAPTER_DIR, lora_name)
         os.makedirs(adapter_out, exist_ok=True)
@@ -603,6 +598,10 @@ def gradio_main():
         for f in audio_files:
             src = f.name if hasattr(f, "name") else str(f)
             fname = os.path.basename(src)
             try:
                 dur = _lr.get_duration(path=src)
             except Exception:
@@ -643,37 +642,61 @@ def gradio_main():
              f"Epochs: {epochs} | LR: {lr} | Rank: {rank}")
         yield _log_text(), gr.Button(visible=False), gr.Button(visible=True), gr.File()
-        # Caption each audio file via ace-server /understand BEFORE stopping it
-        if _server_ok():
-            _log("[INFO] Captioning audio via ace-server /understand...")
-            yield _log_text(), gr.Button(visible=False), gr.Button(visible=True), gr.File()
-            for audio_fname in sorted(os.listdir(audio_dir)):
-                full_path = os.path.join(audio_dir, audio_fname)
-                if not os.path.isfile(full_path) or audio_fname.endswith(".json"):
-                    continue
-                caption_json_path = full_path + ".json"
-                caption_data = _caption_via_understand(full_path, timeout=120)
-                if caption_data:
-                    _log(f"[Caption] {audio_fname}: using ace-server /understand")
-                    with open(caption_json_path, "w") as cj:
-                        json.dump(caption_data, cj)
-                else:
-                    # Fallback to librosa for basic metadata
-                    _log(f"[Caption] {audio_fname}: fallback to librosa")
-                    try:
-                        y_cap, sr_cap = _lr.load(full_path, sr=None, mono=True)
-                        tempo, _ = _lr.beat.beat_track(y=y_cap, sr=sr_cap)
-                        bpm_val = float(tempo) if hasattr(tempo, '__float__') else float(tempo[0])
-                        fallback = {"caption": "", "bpm": round(bpm_val), "key": "", "signature": "", "lyrics": ""}
-                        with open(caption_json_path, "w") as cj:
-                            json.dump(fallback, cj)
-                    except Exception as cap_exc:
-                        _log(f"[Caption] {audio_fname}: librosa fallback also failed: {cap_exc}")
                 yield _log_text(), gr.Button(visible=False), gr.Button(visible=True), gr.File()
-        else:
-            _log("[INFO] ace-server not running, skipping /understand captioning")
             yield _log_text(), gr.Button(visible=False), gr.Button(visible=True), gr.File()
         # Stop ace-server before training (frees memory)
         _training_lock.acquire()
         _log("[INFO] Stopping ace-server for training...")
@@ -681,28 +704,54 @@ def gradio_main():
         _stop_ace_server()
         _gc.collect()
         try:
-            # -- Phase 1: Preprocessing --
-            _log("[Step 1/2] Preprocessing audio...")
-            yield _log_text(), gr.Button(visible=False), gr.Button(visible=True), gr.File()
             preprocessed_dir = os.path.join(work_dir, "preprocessed_tensors")
             def preprocess_progress(current, total, desc):
                 _log(f"  {desc} ({current}/{total})")
-            result = preprocess_audio(
-                audio_dir=audio_dir,
-                output_dir=preprocessed_dir,
-                checkpoint_dir=ACE_CHECKPOINT_DIR,
-                device="cpu",
-                variant="turbo",
-                max_duration=float(MAX_TOTAL_AUDIO),
-                progress_callback=preprocess_progress,
-                cancel_check=lambda: False,
-            )
             yield _log_text(), gr.Button(visible=False), gr.Button(visible=True), gr.File()
             processed = result.get("processed", 0)
             failed = result.get("failed", 0)
             total = result.get("total", 0)
@@ -740,7 +789,6 @@ def gradio_main():
                 device="cpu",
                 log_every=5,
             ):
-                # Timeout check
                 elapsed = time.time() - train_start
                 if elapsed > MAX_TRAINING_TIME:
                     _log(f"[WARN] Training timed out after {int(elapsed)}s")
@@ -756,6 +804,16 @@ def gradio_main():
             _log(f"[INFO] Total time: {time.time() - train_start:.0f}s")
             yield _log_text(), gr.Button(visible=False), gr.Button(visible=True), gr.File()
         except Exception as exc:
             _log(f"[FAIL] Training error: {exc}")
             import traceback
@@ -763,50 +821,36 @@ def gradio_main():
             yield _log_text(), gr.Button(visible=True), gr.Button(visible=False), gr.File()
         finally:
-            _training_lock.release()
-            # Always restart ace-server
-            _log("[INFO] Restarting ace-server...")
-            yield _log_text(), gr.Button(visible=False), gr.Button(visible=True), gr.File()
-            _gc.collect()
-            ok = _start_ace_server()
-            if ok:
-                _log("[OK] ace-server restarted successfully")
-            else:
-                _log("[WARN] ace-server may not have restarted -- check logs")
-            if os.path.isdir(adapter_out):
-                logger.info("Adapter dir %s: %s", adapter_out, os.listdir(adapter_out))
-            else:
-                logger.warning("Adapter dir %s does not exist", adapter_out)
-            adapter_safetensors = os.path.join(adapter_out, "adapter_model.safetensors")
-            if os.path.isfile(adapter_safetensors):
-                # Copy to a temp file so Gradio doesn't try to validate /app paths
-                # (avoids InvalidPathError: "Cannot move /app to the gradio cache dir
-                # because it was not uploaded by a user")
-                tmp_out = tempfile.NamedTemporaryFile(
-                    suffix=".safetensors",
-                    prefix=f"{lora_name}_",
-                    delete=False,
-                )
-                tmp_out.close()
-                shutil.copy2(adapter_safetensors, tmp_out.name)
-                _log(f"[OK] LoRA saved: {lora_name}")
-                yield _log_text(), gr.Button(visible=True), gr.Button(visible=False), gr.File(value=tmp_out.name, visible=True)
-            else:
-                yield _log_text(), gr.Button(visible=True), gr.Button(visible=False), gr.File()
-            # Clean up training workspace (preprocessed tensors, temp audio, etc.)
-            shutil.rmtree(work_dir, ignore_errors=True)
     # -- Cancel handler --
     def _on_cancel():
         cancel_training()
         logger.info("Cancel requested by user")
-        return "Cancelling after current epoch... please wait"
-    # -- Check log handler --
-    def _check_log():
-        if _train_log_lines:
-            return "\n".join(_train_log_lines)
-        return "No training log available."
     # -- Build LM model choices --
     def _lm_model_choices():
@@ -909,9 +953,9 @@ def gradio_main():
                 with gr.Row(elem_classes="compact-row"):
                     with gr.Column(scale=2):
                         train_audio = gr.File(
-                            label="Training Audio Files",
                             file_count="multiple",
-                            file_types=["audio"],
                         )
                     with gr.Column(scale=1):
                         lora_name = gr.Textbox(label="LoRA Name", value="my-lora")
@@ -928,7 +972,6 @@ def gradio_main():
                 with gr.Row(elem_classes="compact-row"):
                     train_btn = gr.Button("Train", variant="primary", scale=2)
                     cancel_btn = gr.Button("Cancel Training", variant="stop", visible=False, scale=1)
-                    log_btn = gr.Button("Check Log", scale=1)
                 train_output_file = gr.File(label="Trained LoRA (download)", visible=False)
                 train_log = gr.Textbox(
@@ -975,13 +1018,6 @@ def gradio_main():
                     outputs=[train_log],
                 )
-                # Check log: show last training output
-                log_btn.click(
-                    _check_log,
-                    outputs=[train_log],
-                    api_name="check_log",
-                )
         demo.launch(
             server_name="0.0.0.0",
             server_port=7860,

     preprocess_audio,
     train_lora_generator,
     cancel_training,
+    _training_cancel,
     get_trained_loras as _get_trained_loras_engine,
     MAX_TRAINING_TIME,
 )
+logging.basicConfig(level=logging.INFO, format="%(message)s", stream=sys.stdout)
 logger = logging.getLogger(__name__)
 # ---------------------------------------------------------------------------
     return {}
+def _poll_job(job_id, timeout=600, progress_cb=None, cancel_check=None):
+    """Poll a job until done/error/timeout/cancelled. Returns (status, elapsed)."""
     t0 = time.time()
     while time.time() - t0 < timeout:
+        if cancel_check and cancel_check():
+            return "cancelled", time.time() - t0
         try:
+            r = requests.get(f"{ACE_SERVER}/job", params={"id": job_id}, timeout=5)
             data = r.json()
             status = data.get("status", "unknown")
             if progress_cb:
                 return status, time.time() - t0
         except Exception:
             pass
+        time.sleep(1)
     return "timeout", time.time() - t0
     return r
+def _caption_via_understand(audio_path, timeout=600, cancel_check=None):
+    """Call ace-server /understand for a rich caption. Returns dict or None."""
     fname = os.path.basename(audio_path)
     try:
         with open(audio_path, "rb") as f:
+            r = requests.post(
+                f"{ACE_SERVER}/understand",
+                files={"audio": (fname, f, "audio/mpeg")},
+                timeout=30,
+            )
         if r.status_code != 200:
+            logger.warning("[Caption] %s: /understand %d: %s", fname, r.status_code, r.text[:200])
             return None
         job_id = r.json().get("id")
         if not job_id:
             return None
     except Exception as exc:
         logger.warning("[Caption] %s: /understand submit failed: %s", fname, exc)
         return None
+    status, elapsed = _poll_job(job_id, timeout=timeout, cancel_check=cancel_check)
     if status != "done":
+        logger.warning("[Caption] %s: /understand -> %s (%.0fs)", fname, status, elapsed)
         return None
     try:
         r = _fetch_result(job_id, timeout=30)
         if r.status_code != 200:
             return None
         data = r.json()
         if isinstance(data, dict) and data.get("caption"):
             return data
         return None
+    except Exception:
         return None
         train_start = time.time()
         def _log(msg):
+            elapsed = int(time.time() - train_start)
+            m, s = divmod(elapsed, 60)
+            h, m = divmod(m, 60)
+            ts = f"+{h}:{m:02d}:{s:02d}" if h else f"+{m:02d}:{s:02d}"
+            line = f"[{ts}] {msg}"
+            _train_log_lines.append(line)
+            logger.info(msg)
             if len(_train_log_lines) > 2000:
                 _train_log_lines[:] = _train_log_lines[-1000:]
         work_dir = os.path.join(OUTPUT_DIR, "train_workspace", lora_name)
         os.makedirs(work_dir, exist_ok=True)
         audio_dir = os.path.join(work_dir, "audio_input")
+        if os.path.exists(audio_dir):
+            shutil.rmtree(audio_dir)
+        os.makedirs(audio_dir)
         adapter_out = os.path.join(ADAPTER_DIR, lora_name)
         os.makedirs(adapter_out, exist_ok=True)
         for f in audio_files:
             src = f.name if hasattr(f, "name") else str(f)
             fname = os.path.basename(src)
+            # .txt/.json sidecars: copy as caption files, skip duration check
+            if fname.lower().endswith((".txt", ".json")):
+                shutil.copy2(src, os.path.join(audio_dir, fname))
+                continue
             try:
                 dur = _lr.get_duration(path=src)
             except Exception:
              f"Epochs: {epochs} | LR: {lr} | Rank: {rank}")
         yield _log_text(), gr.Button(visible=False), gr.Button(visible=True), gr.File()
+        # Caption audio files: GGUF LM if ace-server running, else librosa
+        use_understand = _server_ok()
+        method = "GGUF LM (BPM, key, mood, lyrics)" if use_understand else "librosa (BPM only)"
+        _log(f"[INFO] Auto-captioning via {method}...")
+        yield _log_text(), gr.Button(visible=False), gr.Button(visible=True), gr.File()
+        for audio_fname in sorted(os.listdir(audio_dir)):
+            if _training_cancel.is_set():
+                break
+            full_path = os.path.join(audio_dir, audio_fname)
+            if not os.path.isfile(full_path):
+                continue
+            ext = audio_fname.lower().rsplit(".", 1)[-1] if "." in audio_fname else ""
+            if ext in ("json", "txt"):
+                continue
+            stem = audio_fname.rsplit(".", 1)[0] if "." in audio_fname else audio_fname
+            sidecar_json = os.path.join(audio_dir, stem + ".json")
+            sidecar_txt = os.path.join(audio_dir, stem + ".txt")
+            if os.path.isfile(sidecar_json) or os.path.isfile(sidecar_txt):
+                _log(f"  {audio_fname}: using caption file")
                 yield _log_text(), gr.Button(visible=False), gr.Button(visible=True), gr.File()
+                continue
+            caption_data = None
+            if use_understand:
+                _log(f"  {audio_fname}: GGUF LM captioning...")
+                yield _log_text(), gr.Button(visible=False), gr.Button(visible=True), gr.File()
+                caption_data = _caption_via_understand(
+                    full_path, timeout=600,
+                    cancel_check=lambda: _training_cancel.is_set(),
+                )
+            if caption_data:
+                bpm_s = caption_data.get("bpm", "?")
+                key_s = caption_data.get("keyscale", caption_data.get("key", "?"))
+                _log(f"  {audio_fname}: OK (BPM={bpm_s}, key={key_s})")
+                with open(sidecar_json, "w") as cj:
+                    json.dump(caption_data, cj)
+            else:
+                try:
+                    y_cap, sr_cap = _lr.load(full_path, sr=None, mono=True)
+                    tempo_arr, _ = _lr.beat.beat_track(y=y_cap, sr=sr_cap)
+                    bpm_val = int(round(float(tempo_arr.item() if hasattr(tempo_arr, 'item') else tempo_arr)))
+                    fallback = {"caption": "", "bpm": bpm_val, "key": "", "signature": "", "lyrics": ""}
+                    with open(sidecar_json, "w") as cj:
+                        json.dump(fallback, cj)
+                    _log(f"  {audio_fname}: librosa fallback BPM={bpm_val}")
+                except Exception as cap_exc:
+                    _log(f"  {audio_fname}: caption failed: {cap_exc}")
             yield _log_text(), gr.Button(visible=False), gr.Button(visible=True), gr.File()
+        if _training_cancel.is_set():
+            _training_cancel.clear()
+            _log("[CANCELLED] Stopped")
+            yield _log_text(), gr.Button(visible=True), gr.Button(visible=False), gr.File()
+            shutil.rmtree(work_dir, ignore_errors=True)
+            return
         # Stop ace-server before training (frees memory)
         _training_lock.acquire()
         _log("[INFO] Stopping ace-server for training...")
         _stop_ace_server()
         _gc.collect()
+        _cleanup_done = False
         try:
+            # -- Phase 1: Preprocessing (runs in thread for live progress) --
             preprocessed_dir = os.path.join(work_dir, "preprocessed_tensors")
+            _preprocess_log_len = len(_train_log_lines)
             def preprocess_progress(current, total, desc):
                 _log(f"  {desc} ({current}/{total})")
+            _preprocess_result = [None]
+            _preprocess_error = [None]
+            def _run_preprocess():
+                try:
+                    _preprocess_result[0] = preprocess_audio(
+                        audio_dir=audio_dir,
+                        output_dir=preprocessed_dir,
+                        checkpoint_dir=ACE_CHECKPOINT_DIR,
+                        device="cpu",
+                        variant="turbo",
+                        max_duration=float(MAX_TOTAL_AUDIO),
+                        progress_callback=preprocess_progress,
+                        cancel_check=lambda: _training_cancel.is_set(),
+                    )
+                except Exception as exc:
+                    _preprocess_error[0] = exc
+            _log("[Step 1/2] Encoding audio → training data (VAE + text encoder)...")
             yield _log_text(), gr.Button(visible=False), gr.Button(visible=True), gr.File()
+            t = threading.Thread(target=_run_preprocess, daemon=True)
+            t.start()
+            while t.is_alive():
+                t.join(timeout=3)
+                if len(_train_log_lines) > _preprocess_log_len:
+                    _preprocess_log_len = len(_train_log_lines)
+                    yield _log_text(), gr.Button(visible=False), gr.Button(visible=True), gr.File()
+            if _preprocess_error[0]:
+                raise _preprocess_error[0]
+            result = _preprocess_result[0]
+            if _training_cancel.is_set():
+                _training_cancel.clear()
+                _log("[CANCELLED] Stopped during preprocessing")
+                yield _log_text(), gr.Button(visible=True), gr.Button(visible=False), gr.File()
+                return
             processed = result.get("processed", 0)
             failed = result.get("failed", 0)
             total = result.get("total", 0)
                 device="cpu",
                 log_every=5,
             ):
                 elapsed = time.time() - train_start
                 if elapsed > MAX_TRAINING_TIME:
                     _log(f"[WARN] Training timed out after {int(elapsed)}s")
             _log(f"[INFO] Total time: {time.time() - train_start:.0f}s")
             yield _log_text(), gr.Button(visible=False), gr.Button(visible=True), gr.File()
+        except GeneratorExit:
+            _training_cancel.set()
+            logger.info("Generator closed by Gradio, cleaning up")
+            _cleanup_done = True
+            _training_lock.release()
+            _gc.collect()
+            _start_ace_server()
+            shutil.rmtree(work_dir, ignore_errors=True)
+            return
         except Exception as exc:
             _log(f"[FAIL] Training error: {exc}")
             import traceback
             yield _log_text(), gr.Button(visible=True), gr.Button(visible=False), gr.File()
         finally:
+            if not _cleanup_done:
+                _training_lock.release()
+                _log("[INFO] Restarting ace-server...")
+                yield _log_text(), gr.Button(visible=False), gr.Button(visible=True), gr.File()
+                _gc.collect()
+                ok = _start_ace_server()
+                if ok:
+                    _log("[OK] ace-server restarted successfully")
+                else:
+                    _log("[WARN] ace-server may not have restarted -- check logs")
+                adapter_safetensors = os.path.join(adapter_out, "adapter_model.safetensors")
+                if os.path.isfile(adapter_safetensors):
+                    tmp_out = tempfile.NamedTemporaryFile(
+                        suffix=".safetensors",
+                        prefix=f"{lora_name}_",
+                        delete=False,
+                    )
+                    tmp_out.close()
+                    shutil.copy2(adapter_safetensors, tmp_out.name)
+                    _log(f"[OK] LoRA saved: {lora_name}")
+                    yield _log_text(), gr.Button(visible=True), gr.Button(visible=False), gr.File(value=tmp_out.name, visible=True)
+                else:
+                    yield _log_text(), gr.Button(visible=True), gr.Button(visible=False), gr.File()
+                shutil.rmtree(work_dir, ignore_errors=True)
     # -- Cancel handler --
     def _on_cancel():
         cancel_training()
         logger.info("Cancel requested by user")
+        return "Cancelling..."
     # -- Build LM model choices --
     def _lm_model_choices():
                 with gr.Row(elem_classes="compact-row"):
                     with gr.Column(scale=2):
                         train_audio = gr.File(
+                            label="Training Audio + Caption Files",
                             file_count="multiple",
+                            file_types=["audio", ".txt", ".json"],
                         )
                     with gr.Column(scale=1):
                         lora_name = gr.Textbox(label="LoRA Name", value="my-lora")
                 with gr.Row(elem_classes="compact-row"):
                     train_btn = gr.Button("Train", variant="primary", scale=2)
                     cancel_btn = gr.Button("Cancel Training", variant="stop", visible=False, scale=1)
                 train_output_file = gr.File(label="Trained LoRA (download)", visible=False)
                 train_log = gr.Textbox(
                     outputs=[train_log],
                 )
         demo.launch(
             server_name="0.0.0.0",
             server_port=7860,

train_engine.py CHANGED Viewed

@@ -2153,13 +2153,16 @@ def preprocess_audio(
                 # Auto-caption: read existing sidecar or analyze
                 sidecar = _read_caption_sidecar(af)
-                if sidecar and sidecar.get("caption"):
-                    caption = sidecar["caption"]
                     lyrics = sidecar.get("lyrics", "[Instrumental]")
                     logger.info("[Caption] %s: using existing sidecar", af.name)
                 else:
                     # Auto-select analysis mode based on dataset size
-                    if total <= 20:
                         analysis_mode = "sas"
                     elif total <= 100:
                         analysis_mode = "mid"
@@ -2535,11 +2538,13 @@ def train_lora_generator(
         # Cancel check
         if _training_cancel.is_set():
             _training_cancel.clear()
-            early_path = str(out_path / "early_exit")
-            model.decoder.eval()
-            save_lora_adapter(model, early_path)
-            model.decoder.train()
-            yield f"[OK] Cancelled at epoch {epoch + 1}, saved to {early_path}"
             yield "[DONE]"
             _cuda_sync(device)
             unload_models(model)

                 # Auto-caption: read existing sidecar or analyze
                 sidecar = _read_caption_sidecar(af)
+                if sidecar is not None:
+                    caption = sidecar.get("caption", "") or af.stem
                     lyrics = sidecar.get("lyrics", "[Instrumental]")
                     logger.info("[Caption] %s: using existing sidecar", af.name)
                 else:
                     # Auto-select analysis mode based on dataset size
+                    # mid/sas use Demucs stem separation — GPU only
+                    if device == "cpu":
+                        analysis_mode = "faf"
+                    elif total <= 20:
                         analysis_mode = "sas"
                     elif total <= 100:
                         analysis_mode = "mid"
         # Cancel check
         if _training_cancel.is_set():
             _training_cancel.clear()
+            if epoch > start_epoch:
+                early_path = str(out_path / "early_exit")
+                model.decoder.eval()
+                save_lora_adapter(model, early_path)
+                yield f"[OK] Cancelled at epoch {epoch + 1}, saved to {early_path}"
+            else:
+                yield f"[CANCELLED] Stopped before any epoch completed"
             yield "[DONE]"
             _cuda_sync(device)
             unload_models(model)