Fix gradient clipping thresholds in dynamics and checklist modules
Browse files- Use exact grad_clip (1.0) instead of 0.99 approximation
- Raise clip rate warning threshold from 30% to 50% (Google Tuning Playbook)
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
llm_lab/evaluation/checklist.py
CHANGED
|
@@ -60,8 +60,8 @@ class InsightChecklist:
|
|
| 60 |
# 5. Gradient clipping rate
|
| 61 |
if metrics_history and metrics_history.get("grad_norm"):
|
| 62 |
gnorms = metrics_history["grad_norm"]
|
| 63 |
-
clip_rate = sum(1 for g in gnorms if g >=
|
| 64 |
-
if clip_rate < 0.
|
| 65 |
checks["passed"].append(f"Gradient clipping rate {clip_rate:.1%} (healthy)")
|
| 66 |
else:
|
| 67 |
checks["failed"].append(f"Gradient clipping rate {clip_rate:.1%} (too frequent)")
|
|
|
|
| 60 |
# 5. Gradient clipping rate
|
| 61 |
if metrics_history and metrics_history.get("grad_norm"):
|
| 62 |
gnorms = metrics_history["grad_norm"]
|
| 63 |
+
clip_rate = sum(1 for g in gnorms if g >= 1.0) / max(len(gnorms), 1)
|
| 64 |
+
if clip_rate < 0.5:
|
| 65 |
checks["passed"].append(f"Gradient clipping rate {clip_rate:.1%} (healthy)")
|
| 66 |
else:
|
| 67 |
checks["failed"].append(f"Gradient clipping rate {clip_rate:.1%} (too frequent)")
|
llm_lab/evaluation/dynamics.py
CHANGED
|
@@ -78,14 +78,14 @@ class TrainingDynamicsAnalyzer:
|
|
| 78 |
"mean": round(sum(gnorms) / len(gnorms), 4),
|
| 79 |
"max": round(max(gnorms), 4),
|
| 80 |
"min": round(min(gnorms), 4),
|
| 81 |
-
"clipped_pct": round(sum(1 for g in gnorms if g >=
|
| 82 |
}
|
| 83 |
|
| 84 |
print(f"\n π Gradient Norm Analysis:")
|
| 85 |
print(f" Mean: {analysis['grad_norm']['mean']:.4f}")
|
| 86 |
print(f" Max: {analysis['grad_norm']['max']:.4f}")
|
| 87 |
print(f" Clipping rate: {analysis['grad_norm']['clipped_pct']:.1f}%")
|
| 88 |
-
if analysis["grad_norm"]["clipped_pct"] >
|
| 89 |
print(f" β οΈ Clipping is frequent β consider lowering LR or extending warmup")
|
| 90 |
|
| 91 |
# ββ Throughput analysis ββ
|
|
|
|
| 78 |
"mean": round(sum(gnorms) / len(gnorms), 4),
|
| 79 |
"max": round(max(gnorms), 4),
|
| 80 |
"min": round(min(gnorms), 4),
|
| 81 |
+
"clipped_pct": round(sum(1 for g in gnorms if g >= 1.0) / len(gnorms) * 100, 1),
|
| 82 |
}
|
| 83 |
|
| 84 |
print(f"\n π Gradient Norm Analysis:")
|
| 85 |
print(f" Mean: {analysis['grad_norm']['mean']:.4f}")
|
| 86 |
print(f" Max: {analysis['grad_norm']['max']:.4f}")
|
| 87 |
print(f" Clipping rate: {analysis['grad_norm']['clipped_pct']:.1f}%")
|
| 88 |
+
if analysis["grad_norm"]["clipped_pct"] > 50:
|
| 89 |
print(f" β οΈ Clipping is frequent β consider lowering LR or extending warmup")
|
| 90 |
|
| 91 |
# ββ Throughput analysis ββ
|