theapemachine
/

sparse-transformer-experiments

Model card Files Files and versions

xet

Community

theapemachine commited on 29 days ago

Commit

1f4765b

verified ·

1 Parent(s): aafd87e

Fix: compute_relaxer_diagnostics called backward inside no_grad context"

Browse files

Files changed (1) hide show

_patch_diagnostics.py +38 -0

_patch_diagnostics.py ADDED Viewed

	@@ -0,0 +1,38 @@

+def compute_relaxer_diagnostics(model, sched, relaxer_deltas, x, y, corpus, bs, cs):
+    """
+    Compare relaxer delta on inactive chunks to what dense gradient would have been.
+    Returns (grad_cos, mag_ratio) or (None, None) if not applicable.
+    """
+    if not relaxer_deltas: return None, None
+    # Compute dense gradients — needs grad enabled
+    for m in gsl(model): m.se=False
+    for p in model.parameters(): p.grad=None
+    with torch.enable_grad():
+        _,lo=model(x,y)
+        lo.backward()
+    cos_sims=[]; mag_ratios=[]
+    with torch.no_grad():
+        for m,delta in relaxer_deltas.items():
+            if m not in sched.m2i: continue
+            ids=sched.m2i[m]; nc=len(ids); di=m.weight.shape[1]
+            la=sched.act[ids]; li=~la
+            if li.sum()==0 or m.weight.grad is None: continue
+            # Dense gradient for inactive chunks, reshaped
+            dense_g=m.weight.grad.view(nc,cs,di)[li]  # (n_inact, cs, di)
+            # Flatten for cosine/magnitude
+            d_flat=delta.reshape(-1); g_flat=dense_g.reshape(-1)
+            dn=d_flat.norm(); gn=g_flat.norm()
+            if dn>1e-12 and gn>1e-12:
+                cos_sims.append(F.cosine_similarity(d_flat.unsqueeze(0),g_flat.unsqueeze(0)).item())
+                mag_ratios.append((dn/gn).item())
+    # Restore sparse mode
+    for m in gsl(model): m.se=True
+    for p in model.parameters(): p.grad=None
+    if not cos_sims: return None, None
+    return sum(cos_sims)/len(cos_sims), sum(mag_ratios)/len(mag_ratios)