Vjeong Claude Opus 4.6 commited on
Commit
a671953
Β·
1 Parent(s): 362e9ea

Fix gradient clipping thresholds in dynamics and checklist modules

Browse files

- Use exact grad_clip (1.0) instead of 0.99 approximation
- Raise clip rate warning threshold from 30% to 50% (Google Tuning Playbook)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

llm_lab/evaluation/checklist.py CHANGED
@@ -60,8 +60,8 @@ class InsightChecklist:
60
  # 5. Gradient clipping rate
61
  if metrics_history and metrics_history.get("grad_norm"):
62
  gnorms = metrics_history["grad_norm"]
63
- clip_rate = sum(1 for g in gnorms if g >= 0.99) / max(len(gnorms), 1)
64
- if clip_rate < 0.3:
65
  checks["passed"].append(f"Gradient clipping rate {clip_rate:.1%} (healthy)")
66
  else:
67
  checks["failed"].append(f"Gradient clipping rate {clip_rate:.1%} (too frequent)")
 
60
  # 5. Gradient clipping rate
61
  if metrics_history and metrics_history.get("grad_norm"):
62
  gnorms = metrics_history["grad_norm"]
63
+ clip_rate = sum(1 for g in gnorms if g >= 1.0) / max(len(gnorms), 1)
64
+ if clip_rate < 0.5:
65
  checks["passed"].append(f"Gradient clipping rate {clip_rate:.1%} (healthy)")
66
  else:
67
  checks["failed"].append(f"Gradient clipping rate {clip_rate:.1%} (too frequent)")
llm_lab/evaluation/dynamics.py CHANGED
@@ -78,14 +78,14 @@ class TrainingDynamicsAnalyzer:
78
  "mean": round(sum(gnorms) / len(gnorms), 4),
79
  "max": round(max(gnorms), 4),
80
  "min": round(min(gnorms), 4),
81
- "clipped_pct": round(sum(1 for g in gnorms if g >= 0.99) / len(gnorms) * 100, 1),
82
  }
83
 
84
  print(f"\n πŸ“ Gradient Norm Analysis:")
85
  print(f" Mean: {analysis['grad_norm']['mean']:.4f}")
86
  print(f" Max: {analysis['grad_norm']['max']:.4f}")
87
  print(f" Clipping rate: {analysis['grad_norm']['clipped_pct']:.1f}%")
88
- if analysis["grad_norm"]["clipped_pct"] > 30:
89
  print(f" ⚠️ Clipping is frequent β†’ consider lowering LR or extending warmup")
90
 
91
  # ── Throughput analysis ──
 
78
  "mean": round(sum(gnorms) / len(gnorms), 4),
79
  "max": round(max(gnorms), 4),
80
  "min": round(min(gnorms), 4),
81
+ "clipped_pct": round(sum(1 for g in gnorms if g >= 1.0) / len(gnorms) * 100, 1),
82
  }
83
 
84
  print(f"\n πŸ“ Gradient Norm Analysis:")
85
  print(f" Mean: {analysis['grad_norm']['mean']:.4f}")
86
  print(f" Max: {analysis['grad_norm']['max']:.4f}")
87
  print(f" Clipping rate: {analysis['grad_norm']['clipped_pct']:.1f}%")
88
+ if analysis["grad_norm"]["clipped_pct"] > 50:
89
  print(f" ⚠️ Clipping is frequent β†’ consider lowering LR or extending warmup")
90
 
91
  # ── Throughput analysis ──