Improve LOSS_BOUNCE detection with pre-computed bounce metrics
Browse filesPre-compute min loss index and bounce amount before classification
chain, and tighten bounce conditions (min before 85% of training,
bounce > 0.2) to reduce false positives.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
- llm_lab/training/debugger.py +13 -5
llm_lab/training/debugger.py
CHANGED
|
@@ -175,6 +175,16 @@ class LossDebugger:
|
|
| 175 |
else:
|
| 176 |
val_trend = "flat"
|
| 177 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
# ── Classify ──
|
| 179 |
status = STATUS_NORMAL
|
| 180 |
severity = "green"
|
|
@@ -226,15 +236,13 @@ class LossDebugger:
|
|
| 226 |
recommended_levels = [3, 2]
|
| 227 |
|
| 228 |
# Check 5: Loss bounce (decreased then increased again)
|
| 229 |
-
elif
|
| 230 |
-
min_loss = min(train_losses)
|
| 231 |
-
bounce_amount = last_loss - min_loss
|
| 232 |
status = STATUS_LOSS_BOUNCE
|
| 233 |
severity = "yellow"
|
| 234 |
details = (
|
| 235 |
f"Loss decreased then bounced back up: "
|
| 236 |
-
f"{first_loss:.4f} -> min {
|
| 237 |
-
f"(bounce={
|
| 238 |
f"Possible LR too high, data issue, or overfitting."
|
| 239 |
)
|
| 240 |
recommended_levels = [3, 4]
|
|
|
|
| 175 |
else:
|
| 176 |
val_trend = "flat"
|
| 177 |
|
| 178 |
+
# Pre-compute bounce detection
|
| 179 |
+
_min_loss = min(train_losses)
|
| 180 |
+
_min_idx = train_losses.index(_min_loss)
|
| 181 |
+
_bounce_amount = last_loss - _min_loss
|
| 182 |
+
_has_bounce = (
|
| 183 |
+
loss_change > 0.1
|
| 184 |
+
and _min_idx < len(train_losses) * 0.85
|
| 185 |
+
and _bounce_amount > 0.2
|
| 186 |
+
)
|
| 187 |
+
|
| 188 |
# ── Classify ──
|
| 189 |
status = STATUS_NORMAL
|
| 190 |
severity = "green"
|
|
|
|
| 236 |
recommended_levels = [3, 2]
|
| 237 |
|
| 238 |
# Check 5: Loss bounce (decreased then increased again)
|
| 239 |
+
elif _has_bounce:
|
|
|
|
|
|
|
| 240 |
status = STATUS_LOSS_BOUNCE
|
| 241 |
severity = "yellow"
|
| 242 |
details = (
|
| 243 |
f"Loss decreased then bounced back up: "
|
| 244 |
+
f"{first_loss:.4f} -> min {_min_loss:.4f} -> {last_loss:.4f} "
|
| 245 |
+
f"(bounce={_bounce_amount:.4f}). "
|
| 246 |
f"Possible LR too high, data issue, or overfitting."
|
| 247 |
)
|
| 248 |
recommended_levels = [3, 4]
|