Improve LOSS_BOUNCE detection with pre-computed bounce metrics

Pre-compute min loss index and bounce amount before classification
chain, and tighten bounce conditions (min before 85% of training,
bounce > 0.2) to reduce false positives.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (1) hide show

llm_lab/training/debugger.py +13 -5

llm_lab/training/debugger.py CHANGED Viewed

@@ -175,6 +175,16 @@ class LossDebugger:
             else:
                 val_trend = "flat"
         # ── Classify ──
         status = STATUS_NORMAL
         severity = "green"
@@ -226,15 +236,13 @@ class LossDebugger:
             recommended_levels = [3, 2]
         # Check 5: Loss bounce (decreased then increased again)
-        elif loss_change > 0.1 and second_half_avg > first_half_avg:
-            min_loss = min(train_losses)
-            bounce_amount = last_loss - min_loss
             status = STATUS_LOSS_BOUNCE
             severity = "yellow"
             details = (
                 f"Loss decreased then bounced back up: "
-                f"{first_loss:.4f} -> min {min_loss:.4f} -> {last_loss:.4f} "
-                f"(bounce={bounce_amount:.4f}). "
                 f"Possible LR too high, data issue, or overfitting."
             )
             recommended_levels = [3, 4]

             else:
                 val_trend = "flat"
+        # Pre-compute bounce detection
+        _min_loss = min(train_losses)
+        _min_idx = train_losses.index(_min_loss)
+        _bounce_amount = last_loss - _min_loss
+        _has_bounce = (
+            loss_change > 0.1
+            and _min_idx < len(train_losses) * 0.85
+            and _bounce_amount > 0.2
+        )
         # ── Classify ──
         status = STATUS_NORMAL
         severity = "green"
             recommended_levels = [3, 2]
         # Check 5: Loss bounce (decreased then increased again)
+        elif _has_bounce:
             status = STATUS_LOSS_BOUNCE
             severity = "yellow"
             details = (
                 f"Loss decreased then bounced back up: "
+                f"{first_loss:.4f} -> min {_min_loss:.4f} -> {last_loss:.4f} "
+                f"(bounce={_bounce_amount:.4f}). "
                 f"Possible LR too high, data issue, or overfitting."
             )
             recommended_levels = [3, 4]