CausalGrok / code /experiments /causalgrok_baseline.py

Upload code/experiments

50fa85c verified 12 days ago

19 kB

	"""
	CausalGrok — Main Training Loop
	Nilesh

	Core experiment: does the IRM invariance penalty drop at the SAME epoch
	as validation accuracy jumps (the grokking transition)?
	If yes → the paper's central claim is confirmed.

	Run via the launchers (always nohup-detached so SSH disconnects don't kill it):
	bash scripts/launch.sh grokking 500 42

	All artifacts (config, logs, history, checkpoints, figures) for every
	invocation land in experiments/runs/<run_id>/ and are kept forever.
	"""

	from __future__ import annotations

	import argparse
	import json
	import os
	import time
	from datetime import datetime, timezone

	import numpy as np
	import torch
	import torch.nn as nn
	import torchvision.transforms as transforms
	from torch.utils.data import DataLoader, Dataset, Subset
	from torchvision.models import resnet18
	from medmnist import PneumoniaMNIST
	import wandb

	from utils.metrics import (
	accuracy, weight_norm, feature_rank, irm_penalty, shortcut_ratio,
	)
	from utils.grokfast import gradfilter_ema
	from utils.pseudo_envs import make_brightness_envs
	from utils.run_dir import make_run_dir, ensure_run_dir, save_config


	# ──────────────────────────────────────────────
	# CONFIG
	# ──────────────────────────────────────────────

	def get_config(condition):
	base = dict(
	seed=42, n_train=500, batch_size=32, img_size=28,
	n_classes=2, log_every=50, n_pseudo_envs=3,
	device="cuda" if torch.cuda.is_available() else "cpu",
	)
	if condition == "standard":
	base.update(dict(condition="standard", lr=1e-3, weight_decay=1e-4,
	n_epochs=300, init_scale=1.0, use_grokfast=False))
	elif condition == "grokking":
	base.update(dict(condition="grokking", lr=1e-3, weight_decay=1e-3,
	n_epochs=3000, init_scale=4.0, use_grokfast=True,
	grokfast_alpha=0.98, grokfast_lamb=2.0))
	return base


	# ──────────────────────────────────────────────
	# DATA
	# ──────────────────────────────────────────────

	class SpuriousColorPatchDataset(Dataset):
	"""
	Wraps a (image-tensor, label) dataset and stamps a colored corner
	patch correlated with the label at probability `rho`.

	Encoding (after Normalize mean=.5/std=.5, image is in [-1,1] across
	3 identical grayscale channels):
	encoded label 0 → channel-0 high, channels 1,2 low (red corner)
	encoded label 1 → channel-2 high, channels 0,1 low (blue corner)

	With prob rho the encoded label matches the true label — a usable
	shortcut. With prob (1-rho) it's flipped — pure noise on the patch.

	The same `seed` produces the same per-sample correlation decisions
	across val/test so the spurious feature is stable across runs and
	the ceiling effect (val plateau ≈ rho before grokking) is clean.
	"""
	def __init__(self, base, rho=0.8, patch_size=4, seed=0,
	hi=1.0, lo=-1.0):
	self.base = base
	self.rho = float(rho)
	self.patch_size = int(patch_size)
	self.hi = hi
	self.lo = lo
	rng = torch.Generator().manual_seed(int(seed))
	self.is_correlated = (torch.rand(len(base), generator=rng) < self.rho)

	def __len__(self):
	return len(self.base)

	def __getitem__(self, idx):
	img, label = self.base[idx]
	# label may be a 1-element tensor or a python scalar
	try:
	label_int = int(label.squeeze().item())
	except AttributeError:
	label_int = int(label)
	encoded = label_int if bool(self.is_correlated[idx]) else (1 - label_int)
	ps = self.patch_size
	if encoded == 0:
	img[0, :ps, :ps] = self.hi
	img[1, :ps, :ps] = self.lo
	img[2, :ps, :ps] = self.lo
	else:
	img[0, :ps, :ps] = self.lo
	img[1, :ps, :ps] = self.lo
	img[2, :ps, :ps] = self.hi
	return img, label


	def get_dataloaders(cfg, data_root):
	# medmnist 3.x raises if root doesn't exist; create it ourselves
	# rather than relying on its default-root fallback.
	os.makedirs(data_root, exist_ok=True)
	transform = transforms.Compose([
	transforms.Resize((cfg["img_size"], cfg["img_size"])),
	transforms.ToTensor(),
	transforms.Normalize(mean=[.5], std=[.5]),
	transforms.Lambda(lambda x: x.repeat(3, 1, 1)),
	])
	train_ds = PneumoniaMNIST(split="train", transform=transform, download=True, root=data_root)
	val_ds = PneumoniaMNIST(split="val", transform=transform, download=True, root=data_root)
	test_ds = PneumoniaMNIST(split="test", transform=transform, download=True, root=data_root)

	# Spurious-feature injection: colored corner patch at correlation rho.
	# Same rho on all splits so the shortcut model plateaus at val≈rho;
	# grokking transition is the model breaking through that ceiling.
	rho = cfg.get("spurious_rho")
	if rho:
	ps = cfg.get("spurious_patch_size", 4)
	sd = cfg.get("spurious_seed", cfg["seed"])
	train_ds = SpuriousColorPatchDataset(train_ds, rho=rho, patch_size=ps, seed=sd + 1)
	val_ds = SpuriousColorPatchDataset(val_ds, rho=rho, patch_size=ps, seed=sd + 2)
	test_ds = SpuriousColorPatchDataset(test_ds, rho=rho, patch_size=ps, seed=sd + 3)

	torch.manual_seed(cfg["seed"])
	indices = torch.randperm(len(train_ds))[:cfg["n_train"]]
	train_subset = Subset(train_ds, indices)

	train_loader = DataLoader(train_subset, batch_size=cfg["batch_size"], shuffle=True, num_workers=4, pin_memory=True)
	val_loader = DataLoader(val_ds, batch_size=256, shuffle=False, num_workers=4, pin_memory=True)
	test_loader = DataLoader(test_ds, batch_size=256, shuffle=False, num_workers=4, pin_memory=True)
	return train_loader, val_loader, test_loader, train_subset


	# ──────────────────────────────────────────────
	# MODEL
	# ──────────────────────────────────────────────

	def build_model(cfg):
	model = resnet18(weights=None, num_classes=cfg["n_classes"])
	if cfg["init_scale"] != 1.0:
	with torch.no_grad():
	for name, p in model.named_parameters():
	if "weight" in name and p.dim() > 1:
	p.data *= cfg["init_scale"]
	return model.to(cfg["device"])


	# ──────────────────────────────────────────────
	# TRAIN
	# ──────────────────────────────────────────────

	def train(cfg, model, train_loader, val_loader, test_loader,
	pseudo_envs, optimizer, run_dir):
	criterion = nn.CrossEntropyLoss()
	grads_ema = None
	history = []
	best_val = 0.0
	grok_epoch = None
	irm_base = None

	print(f"\n{'='*55}")
	print(f" {cfg['condition'].upper()} \| {cfg['n_epochs']} epochs \| "
	f"WD={cfg['weight_decay']} \| α={cfg['init_scale']}")
	print(f" run_dir: {run_dir}")
	print(f"{'='*55}", flush=True)

	history_path = os.path.join(run_dir, "results", "history.json")

	grad_clip = cfg.get("grad_clip", 1.0)
	plateau_window = 10
	plateau_eps = 0.01 # \|Δval_acc\| within this counts as flat

	for epoch in range(1, cfg["n_epochs"] + 1):
	model.train()
	loss_sum = 0.0
	n_b = 0
	for imgs, labels in train_loader:
	imgs = imgs.to(cfg["device"])
	labels = labels.squeeze().long().to(cfg["device"])
	optimizer.zero_grad()
	loss = criterion(model(imgs), labels)
	loss.backward()
	# Order matters: Grokfast amplifies, THEN we clip the
	# amplified result. Clipping before Grokfast would let the
	# amplification re-blow up the gradient and partially
	# undo the safety bound.
	if cfg.get("use_grokfast"):
	grads_ema = gradfilter_ema(
	model, grads_ema,
	alpha=cfg.get("grokfast_alpha", 0.98),
	lamb=cfg.get("grokfast_lamb", 2.0))
	if grad_clip and grad_clip > 0:
	torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=grad_clip)
	optimizer.step()
	loss_sum += loss.item(); n_b += 1

	if epoch % cfg["log_every"] == 0 or epoch == 1:
	tr_acc = accuracy(model, train_loader, cfg["device"])
	vl_acc = accuracy(model, val_loader, cfg["device"])
	wn = weight_norm(model)
	fr = feature_rank(model, val_loader, cfg["device"])
	irm_m, irm_v = irm_penalty(model, pseudo_envs, cfg["device"])
	cconf, bconf = shortcut_ratio(model, val_loader, cfg["device"])

	if irm_base is None:
	irm_base = irm_m

	# Robust grokking detection — require a sustained plateau in
	# val_acc (≥ plateau_window-2 of the last `plateau_window`
	# checkpoints flat within `plateau_eps`) BEFORE the jump.
	# Otherwise early-training noise (0.50 → 0.56) can trigger.
	if grok_epoch is None and len(history) >= plateau_window:
	last = history[-plateau_window:]
	ref = last[-1]["val_acc"]
	flat = sum(1 for r in last if abs(r["val_acc"] - ref) < plateau_eps)
	if flat >= plateau_window - 2 and vl_acc > best_val + 0.05:
	grok_epoch = epoch
	irm_drop = (irm_base - irm_m) / (irm_base + 1e-8) * 100
	print(f"\n * GROKKING at epoch {epoch} *")
	print(f" Val: {best_val:.3f}→{vl_acc:.3f} \| IRM drop: {irm_drop:.1f}%",
	flush=True)

	if vl_acc > best_val:
	best_val = vl_acc

	# Cap the shortcut ratio — early training can give cconf≈bconf≈0
	# which makes the raw ratio explode.
	sc_ratio = min(bconf / (cconf + 1e-8), 10.0)

	row = dict(epoch=epoch, train_loss=loss_sum / n_b,
	train_acc=tr_acc, val_acc=vl_acc,
	weight_norm=wn, feature_rank=fr,
	irm_mean=irm_m, irm_var=irm_v,
	center_conf=cconf, border_conf=bconf,
	shortcut_ratio=sc_ratio,
	grokking_detected=grok_epoch is not None)
	history.append(row)
	wandb.log(row)

	with open(history_path, "w") as f:
	json.dump(history, f, indent=2)

	print(f" ep {epoch:5d} \| loss {loss_sum/n_b:.4f} \| "
	f"tr {tr_acc:.3f} \| vl {vl_acc:.3f} \| "
	f"‖W‖ {wn:.1f} \| rank {fr:.1f} \| "
	f"IRM {irm_m:.4f} \| sc {sc_ratio:.2f}x",
	flush=True)

	test_acc = accuracy(model, test_loader, cfg["device"])
	wandb.log({"test_acc": test_acc, "grokking_epoch": grok_epoch or -1})

	# Compute the four decision numbers right here so summary.json is
	# the single source of truth for go/no-go.
	irm_drop_pct = float("nan")
	irm_drop_ep = -1
	epoch_gap = -1
	if history:
	irm0 = history[0]["irm_mean"]
	irm_min = min(r["irm_mean"] for r in history)
	if irm0:
	irm_drop_pct = (irm0 - irm_min) / (irm0 + 1e-8) * 100.0
	# Epoch of biggest IRM step-change (proxy for "the IRM drop")
	if len(history) > 1:
	biggest = 0.0
	for prev, cur in zip(history[:-1], history[1:]):
	d = abs(cur["irm_mean"] - prev["irm_mean"])
	if d > biggest:
	biggest = d
	irm_drop_ep = cur["epoch"]
	if grok_epoch and irm_drop_ep > 0:
	epoch_gap = abs(grok_epoch - irm_drop_ep)

	summary = dict(
	run_id = cfg["run_id"],
	condition = cfg["condition"],
	n_train = cfg["n_train"],
	seed = cfg["seed"],
	test_acc = test_acc,
	best_val = best_val,
	grokking_epoch = grok_epoch if grok_epoch else -1,
	irm_drop_pct = irm_drop_pct,
	irm_drop_epoch = irm_drop_ep,
	epoch_gap = epoch_gap,
	final_weight_norm = history[-1]["weight_norm"] if history else None,
	final_feature_rank = history[-1]["feature_rank"] if history else None,
	final_irm = history[-1]["irm_mean"] if history else None,
	final_shortcut_ratio = history[-1]["shortcut_ratio"] if history else None,
	)
	with open(os.path.join(run_dir, "results", "summary.json"), "w") as f:
	json.dump(summary, f, indent=2)

	ckpt_path = os.path.join(run_dir, "checkpoints", "final.pt")
	torch.save(model.state_dict(), ckpt_path)

	print(f"\n Test acc: {test_acc:.4f} \| Grokking at: {grok_epoch}")
	print(f" History → {history_path}")
	print(f" Checkpoint → {ckpt_path}", flush=True)
	return history


	# ──────────────────────────────────────────────
	# MAIN
	# ──────────────────────────────────────────────

	def main():
	p = argparse.ArgumentParser()
	p.add_argument("--condition", default="grokking", choices=["standard", "grokking"])
	p.add_argument("--n_train", type=int, default=500)
	p.add_argument("--seed", type=int, default=42)
	p.add_argument("--log_every", type=int, default=50)
	p.add_argument("--wandb_project", default="causalgrok")
	p.add_argument("--wandb_mode", default="online",
	choices=["online", "offline", "disabled"])
	p.add_argument("--run_dir", default=None,
	help="Override the auto-generated experiments/runs/<run_id>/ path")
	p.add_argument("--data_root", default="data",
	help="Where MedMNIST cache lives")

	# Per-knob overrides for the ablation grid. When set, they override
	# the preset chosen by --condition. When omitted, the preset wins.
	p.add_argument("--weight_decay", type=float, default=None)
	p.add_argument("--init_scale", type=float, default=None)
	p.add_argument("--n_epochs", type=int, default=None)
	p.add_argument("--lr", type=float, default=None)
	p.add_argument("--grokfast", choices=["on", "off"], default=None,
	help="Force Grokfast on/off, overriding the preset")
	p.add_argument("--grad_clip", type=float, default=1.0,
	help="Max ℓ2 gradient norm; 0 disables clipping")

	# Spurious-feature injection (Outcome-C variant).
	p.add_argument("--spurious_rho", type=float, default=None,
	help="Probability that the colored corner patch is correctly correlated with the label. None/0 disables injection.")
	p.add_argument("--spurious_patch_size", type=int, default=4)
	p.add_argument("--spurious_seed", type=int, default=None,
	help="Defaults to --seed; controls per-sample correlation decisions")

	args = p.parse_args()

	cfg = get_config(args.condition)
	cfg.update(n_train=args.n_train, seed=args.seed, log_every=args.log_every)

	# CLI overrides take precedence over preset
	if args.weight_decay is not None: cfg["weight_decay"] = args.weight_decay
	if args.init_scale is not None: cfg["init_scale"] = args.init_scale
	if args.n_epochs is not None: cfg["n_epochs"] = args.n_epochs
	if args.lr is not None: cfg["lr"] = args.lr
	if args.grokfast is not None: cfg["use_grokfast"] = (args.grokfast == "on")
	cfg["grad_clip"] = args.grad_clip

	cfg["spurious_rho"] = args.spurious_rho
	cfg["spurious_patch_size"] = args.spurious_patch_size
	cfg["spurious_seed"] = args.spurious_seed if args.spurious_seed is not None else args.seed

	# ── Use the remaining compute on a shared GPU more aggressively ──
	# TF32 matmuls are A100-native and ~2× faster than fp32 with no
	# measurable effect on grokking dynamics for our scale of model.
	# cudnn.benchmark autotunes conv algorithms for our fixed shape.
	if cfg["device"] == "cuda":
	torch.set_float32_matmul_precision("high")
	torch.backends.cudnn.benchmark = True
	torch.backends.cuda.matmul.allow_tf32 = True
	torch.backends.cudnn.allow_tf32 = True

	torch.manual_seed(cfg["seed"])
	np.random.seed(cfg["seed"])

	if args.run_dir is None:
	# Tag spurious runs in the run_id so the dirs are
	# distinguishable on disk and globs like
	# `experiments/runs/spurious/` work without ambiguity.
	parts = [cfg["condition"]]
	if cfg.get("spurious_rho"):
	parts.append(f"spurious{cfg['spurious_rho']}")
	parts += [f"n{cfg['n_train']}", f"s{cfg['seed']}"]
	run_dir, run_id = make_run_dir(parts)
	else:
	run_dir = args.run_dir
	ensure_run_dir(run_dir)
	run_id = os.path.basename(os.path.normpath(run_dir))

	cfg["run_id"] = run_id
	cfg["run_dir"] = run_dir
	save_config(cfg, run_dir)

	wandb.init(project=args.wandb_project, config=cfg, name=run_id,
	mode=args.wandb_mode, dir=run_dir)

	print(f"\nDevice: {cfg['device']}")
	print(f"Run ID: {run_id}")
	print(f"Started (UTC): {datetime.now(timezone.utc).isoformat()}", flush=True)

	train_loader, val_loader, test_loader, train_subset = get_dataloaders(cfg, args.data_root)
	pseudo_envs = make_brightness_envs(train_subset, cfg["n_pseudo_envs"], cfg["device"])
	model = build_model(cfg)
	optimizer = torch.optim.AdamW(model.parameters(),
	lr=cfg["lr"], weight_decay=cfg["weight_decay"])

	print(f"Train: {len(train_subset)} \| Val: {len(val_loader.dataset)} \| "
	f"Test: {len(test_loader.dataset)}")
	print(f"Params: {sum(p.numel() for p in model.parameters()):,}", flush=True)

	t0 = time.time()
	train(cfg, model, train_loader, val_loader, test_loader,
	pseudo_envs, optimizer, run_dir)
	print(f"\nWall time: {(time.time() - t0) / 60:.1f} min", flush=True)
	wandb.finish()


	if __name__ == "__main__":
	main()