theapemachine commited on
Commit
1f4765b
·
verified ·
1 Parent(s): aafd87e

Fix: compute_relaxer_diagnostics called backward inside no_grad context"

Browse files
Files changed (1) hide show
  1. _patch_diagnostics.py +38 -0
_patch_diagnostics.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def compute_relaxer_diagnostics(model, sched, relaxer_deltas, x, y, corpus, bs, cs):
2
+ """
3
+ Compare relaxer delta on inactive chunks to what dense gradient would have been.
4
+ Returns (grad_cos, mag_ratio) or (None, None) if not applicable.
5
+ """
6
+ if not relaxer_deltas: return None, None
7
+
8
+ # Compute dense gradients — needs grad enabled
9
+ for m in gsl(model): m.se=False
10
+ for p in model.parameters(): p.grad=None
11
+ with torch.enable_grad():
12
+ _,lo=model(x,y)
13
+ lo.backward()
14
+
15
+ cos_sims=[]; mag_ratios=[]
16
+ with torch.no_grad():
17
+ for m,delta in relaxer_deltas.items():
18
+ if m not in sched.m2i: continue
19
+ ids=sched.m2i[m]; nc=len(ids); di=m.weight.shape[1]
20
+ la=sched.act[ids]; li=~la
21
+ if li.sum()==0 or m.weight.grad is None: continue
22
+
23
+ # Dense gradient for inactive chunks, reshaped
24
+ dense_g=m.weight.grad.view(nc,cs,di)[li] # (n_inact, cs, di)
25
+
26
+ # Flatten for cosine/magnitude
27
+ d_flat=delta.reshape(-1); g_flat=dense_g.reshape(-1)
28
+ dn=d_flat.norm(); gn=g_flat.norm()
29
+ if dn>1e-12 and gn>1e-12:
30
+ cos_sims.append(F.cosine_similarity(d_flat.unsqueeze(0),g_flat.unsqueeze(0)).item())
31
+ mag_ratios.append((dn/gn).item())
32
+
33
+ # Restore sparse mode
34
+ for m in gsl(model): m.se=True
35
+ for p in model.parameters(): p.grad=None
36
+
37
+ if not cos_sims: return None, None
38
+ return sum(cos_sims)/len(cos_sims), sum(mag_ratios)/len(mag_ratios)