Fix gradient clipping thresholds in dynamics and checklist modules

- Use exact grad_clip (1.0) instead of 0.99 approximation
- Raise clip rate warning threshold from 30% to 50% (Google Tuning Playbook)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (2) hide show

llm_lab/evaluation/checklist.py +2 -2
llm_lab/evaluation/dynamics.py +2 -2

llm_lab/evaluation/checklist.py CHANGED Viewed

@@ -60,8 +60,8 @@ class InsightChecklist:
         # 5. Gradient clipping rate
         if metrics_history and metrics_history.get("grad_norm"):
             gnorms = metrics_history["grad_norm"]
-            clip_rate = sum(1 for g in gnorms if g >= 0.99) / max(len(gnorms), 1)
-            if clip_rate < 0.3:
                 checks["passed"].append(f"Gradient clipping rate {clip_rate:.1%} (healthy)")
             else:
                 checks["failed"].append(f"Gradient clipping rate {clip_rate:.1%} (too frequent)")

         # 5. Gradient clipping rate
         if metrics_history and metrics_history.get("grad_norm"):
             gnorms = metrics_history["grad_norm"]
+            clip_rate = sum(1 for g in gnorms if g >= 1.0) / max(len(gnorms), 1)
+            if clip_rate < 0.5:
                 checks["passed"].append(f"Gradient clipping rate {clip_rate:.1%} (healthy)")
             else:
                 checks["failed"].append(f"Gradient clipping rate {clip_rate:.1%} (too frequent)")

llm_lab/evaluation/dynamics.py CHANGED Viewed

@@ -78,14 +78,14 @@ class TrainingDynamicsAnalyzer:
                 "mean": round(sum(gnorms) / len(gnorms), 4),
                 "max": round(max(gnorms), 4),
                 "min": round(min(gnorms), 4),
-                "clipped_pct": round(sum(1 for g in gnorms if g >= 0.99) / len(gnorms) * 100, 1),
             }
             print(f"\n  📐 Gradient Norm Analysis:")
             print(f"    Mean:          {analysis['grad_norm']['mean']:.4f}")
             print(f"    Max:           {analysis['grad_norm']['max']:.4f}")
             print(f"    Clipping rate: {analysis['grad_norm']['clipped_pct']:.1f}%")
-            if analysis["grad_norm"]["clipped_pct"] > 30:
                 print(f"    ⚠️ Clipping is frequent → consider lowering LR or extending warmup")
         # ── Throughput analysis ──

                 "mean": round(sum(gnorms) / len(gnorms), 4),
                 "max": round(max(gnorms), 4),
                 "min": round(min(gnorms), 4),
+                "clipped_pct": round(sum(1 for g in gnorms if g >= 1.0) / len(gnorms) * 100, 1),
             }
             print(f"\n  📐 Gradient Norm Analysis:")
             print(f"    Mean:          {analysis['grad_norm']['mean']:.4f}")
             print(f"    Max:           {analysis['grad_norm']['max']:.4f}")
             print(f"    Clipping rate: {analysis['grad_norm']['clipped_pct']:.1f}%")
+            if analysis["grad_norm"]["clipped_pct"] > 50:
                 print(f"    ⚠️ Clipping is frequent → consider lowering LR or extending warmup")
         # ── Throughput analysis ──