Create trainer_anchor_bank_attempt_1.py

Browse files

Files changed (1) hide show

trainers/trainer_anchor_bank_attempt_1.py +730 -0

trainers/trainer_anchor_bank_attempt_1.py ADDED Viewed

	@@ -0,0 +1,730 @@

+# ============================================================================
+# ALIGNMENT BANK: Production 5-Expert CaptionBERT-8192
+#
+# Trains the geometric interface layer for the deployed model.
+# Uses cached expert embeddings — no re-extraction needed.
+# GPA alignment — no reference expert bias.
+# Full whitened Procrustes chain per expert.
+# Disagreement preservation calibrated from GPA center.
+#
+# Inputs:  consensus_500k/{bert,modern,roberta,albert,distil}.pt
+#          AbstractPhil/geolip-captionbert-8192 (frozen student)
+# Outputs: alignment_bank.pt (upload alongside model)
+# ============================================================================
+import gc
+import math
+import os
+import time
+import json
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from tqdm import tqdm
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+EXPERTS = [
+    ("google-bert/bert-base-uncased",      "bert",    512),
+    ("answerdotai/ModernBERT-base",        "modern",  8192),
+    ("FacebookAI/roberta-base",            "roberta", 512),
+    ("albert/albert-base-v2",              "albert",  512),
+    ("distilbert/distilbert-base-uncased", "distil",  512),
+]
+CACHE_DIR = "/home/claude/consensus_500k"
+REPO_ID = "AbstractPhil/geolip-captionbert-8192"
+# Bank config
+N_EXTRACT = 500000
+N_ANCHORS = 512
+D_BANK = 128
+BANK_EPOCHS = 30
+BANK_BATCH = 256
+BANK_LR = 1e-3
+N_VAL = 5000
+print("=" * 65)
+print("ALIGNMENT BANK: 5-Expert CaptionBERT-8192")
+print("=" * 65)
+print(f"  Device: {DEVICE}")
+print(f"  Experts: {len(EXPERTS)}")
+print(f"  Anchors: {N_ANCHORS}")
+print(f"  Bank dim: {D_BANK}")
+# ══════════════════════════════════════════════════════════════════
+# ALIGNMENT BANK MODULE
+# ══════════════════════════════════════════════════════════════════
+class AlignmentBank(nn.Module):
+    def __init__(self, d_embed=768, n_experts=5, n_anchors=512, d_bank=128):
+        super().__init__()
+        self.d_embed = d_embed
+        self.n_experts = n_experts
+        self.n_anchors = n_anchors
+        self.d_bank = d_bank
+        self.expert_rotations = nn.ParameterList([
+            nn.Parameter(torch.eye(d_embed)) for _ in range(n_experts)])
+        self.expert_whiteners = nn.ParameterList([
+            nn.Parameter(torch.eye(d_embed)) for _ in range(n_experts)])
+        self.expert_means = nn.ParameterList([
+            nn.Parameter(torch.zeros(d_embed)) for _ in range(n_experts)])
+        self.anchors = nn.Parameter(
+            F.normalize(torch.randn(n_anchors, d_embed), dim=-1))
+        n_cross = n_experts * (n_experts - 1) // 2
+        geo_dim = n_experts + n_experts + n_cross + 1 + n_experts + n_anchors
+        self.geo_proj = nn.Sequential(
+            nn.Linear(geo_dim, d_bank * 2), nn.GELU(), nn.LayerNorm(d_bank * 2),
+            nn.Linear(d_bank * 2, d_bank), nn.LayerNorm(d_bank))
+        self.register_buffer("target_cv", torch.tensor(0.084))
+        self.register_buffer("target_mean_cos", torch.tensor(0.0))
+        self.register_buffer("target_spectral", torch.zeros(50))
+        self.register_buffer("target_cross_cos_mean", torch.tensor(0.0))
+        self.register_buffer("target_cross_cos_std", torch.tensor(0.0))
+        self.register_buffer("target_disagreement_ratio", torch.tensor(0.0))
+    def init_from_procrustes(self, procrustes_results, expert_names,
+                              consensus_embeddings=None, consensus_stats=None):
+        device = self.anchors.device
+        for i, name in enumerate(expert_names[:self.n_experts]):
+            info = procrustes_results[name]
+            self.expert_rotations[i].data = info["rotation"].float().to(device)
+            if "source_whitener" in info:
+                self.expert_whiteners[i].data = info["source_whitener"].float().to(device)
+            if "source_mean" in info:
+                self.expert_means[i].data = info["source_mean"].float().to(device)
+            print(f"    Expert {i} ({name}): loaded, cos_after={info['cos_after']:.4f}")
+        if consensus_embeddings is not None:
+            n = min(self.n_anchors, consensus_embeddings.shape[0])
+            indices = torch.linspace(0, consensus_embeddings.shape[0] - 1, n).long()
+            self.anchors.data[:n] = F.normalize(
+                consensus_embeddings[indices].float(), dim=-1).to(device)
+            print(f"    Anchors: {n} from consensus")
+        if consensus_stats is not None:
+            self.target_cv.fill_(consensus_stats["cv"])
+            self.target_mean_cos.fill_(consensus_stats["mean_cos"])
+            if "spectral" in consensus_stats:
+                s = torch.tensor(consensus_stats["spectral"][:50], dtype=torch.float32)
+                self.target_spectral[:len(s)] = s.to(device)
+            print(f"    Targets: CV={consensus_stats['cv']:.4f}")
+    def forward(self, embedding):
+        B = embedding.shape[0]
+        emb = embedding.float()
+        # Full whitened Procrustes: center → whiten → normalize → rotate
+        expert_consistency = []
+        expert_recon = []
+        expert_projected = []
+        for i in range(self.n_experts):
+            R = self.expert_rotations[i]
+            W = self.expert_whiteners[i]
+            mu = self.expert_means[i]
+            centered = emb - mu
+            whitened = centered @ W
+            whitened_n = F.normalize(whitened, dim=-1)
+            in_expert = whitened_n @ R.T
+            back = in_expert @ R
+            cos = F.cosine_similarity(whitened_n, back, dim=-1)
+            recon = (whitened_n - back).pow(2).mean(dim=-1)
+            expert_consistency.append(cos)
+            expert_recon.append(recon)
+            expert_projected.append(in_expert)
+        expert_cos = torch.stack(expert_consistency, dim=-1)
+        expert_mse = torch.stack(expert_recon, dim=-1)
+        # Cross-expert differentiation (10 pairs for 5 experts)
+        cross_cos = []
+        for i in range(self.n_experts):
+            for j in range(i + 1, self.n_experts):
+                cc = F.cosine_similarity(expert_projected[i], expert_projected[j], dim=-1)
+                cross_cos.append(cc)
+        cross_features = torch.stack(cross_cos, dim=-1)
+        # Per-sample disagreement
+        per_sample_agreement = expert_cos.mean(dim=-1)
+        per_sample_disagreement = expert_cos.std(dim=-1)
+        disagreement_ratio = per_sample_disagreement / (per_sample_agreement + 1e-8)
+        # Expert norms
+        expert_norms = []
+        for i in range(self.n_experts):
+            W = self.expert_whiteners[i]; mu = self.expert_means[i]
+            whitened = (emb - mu) @ W
+            expert_norms.append(whitened.norm(dim=-1))
+        norm_ratio = torch.stack(expert_norms, dim=-1)
+        norm_ratio = norm_ratio / (norm_ratio.mean(dim=-1, keepdim=True) + 1e-8)
+        # Anchor distances
+        anchors_n = F.normalize(self.anchors, dim=-1)
+        anchor_cos = emb @ anchors_n.T
+        # Geometric context
+        geo_input = torch.cat([
+            expert_cos, expert_mse, cross_features,
+            disagreement_ratio.unsqueeze(-1), norm_ratio, anchor_cos
+        ], dim=-1)
+        geo_context = self.geo_proj(geo_input)
+        enriched = torch.cat([embedding, geo_context], dim=-1)
+        # Losses + diagnostics
+        aux = {}
+        expert_mean = expert_cos.mean(dim=-1, keepdim=True)
+        aux["expert_agreement"] = (expert_cos - expert_mean).pow(2).mean()
+        ortho_loss = 0.0
+        for i in range(self.n_experts):
+            R = self.expert_rotations[i]
+            ortho_loss += (R @ R.T - torch.eye(self.d_embed, device=R.device)).pow(2).mean()
+        aux["rotation_ortho"] = ortho_loss / self.n_experts
+        anchor_sim = anchors_n @ anchors_n.T
+        anchor_sim.fill_diagonal_(0)
+        aux["anchor_spread"] = anchor_sim.pow(2).mean()
+        anchor_probs = F.softmax(anchor_cos * 10, dim=-1)
+        aux["anchor_entropy"] = -(anchor_probs * (anchor_probs + 1e-12).log()).sum(-1).mean()
+        aux["cross_expert_var"] = cross_features.var(dim=0).mean()
+        batch_cross_mean = cross_features.mean()
+        batch_cross_std = cross_features.std()
+        batch_disagree_ratio = disagreement_ratio.mean()
+        aux["disagree_preserve"] = (
+            (batch_cross_mean - self.target_cross_cos_mean).pow(2) +
+            (batch_cross_std - self.target_cross_cos_std).pow(2) +
+            (batch_disagree_ratio - self.target_disagreement_ratio).pow(2))
+        # CV measurements
+        for label, data in [("bank_cv", F.normalize(geo_context, dim=-1)),
+                            ("emb_cv", F.normalize(emb, dim=-1))]:
+            if B >= 10:
+                vols = []
+                for _ in range(32):
+                    idx = torch.randperm(B, device=emb.device)[:5]
+                    pts = data[idx].unsqueeze(0)
+                    diff = pts.unsqueeze(-2) - pts.unsqueeze(-3)
+                    d2 = (diff * diff).sum(-1)
+                    Bv, V, _ = d2.shape
+                    cm = torch.zeros(Bv, V+1, V+1, device=d2.device, dtype=torch.float32)
+                    cm[:, 0, 1:] = 1; cm[:, 1:, 0] = 1; cm[:, 1:, 1:] = d2
+                    s = (-1.0)**V; f = math.factorial(V-1)
+                    v2 = s / ((2.0**(V-1)) * f*f) * torch.linalg.det(cm)
+                    vols.append(torch.sqrt(F.relu(v2[0]) + 1e-12))
+                stacked = torch.stack(vols)
+                aux[label] = stacked.std() / (stacked.mean() + 1e-8)
+            else:
+                aux[label] = torch.tensor(0.0, device=emb.device)
+        # Diagnostics
+        aux["expert_cos_mean"] = expert_cos.mean().item()
+        aux["expert_cos_std"] = expert_cos.std().item()
+        aux["anchor_max_cos"] = anchor_cos.max(dim=-1).values.mean().item()
+        aux["cross_expert_cos"] = cross_features.mean().item()
+        aux["cross_expert_cos_std"] = cross_features.std().item()
+        aux["disagreement_ratio"] = disagreement_ratio.mean().item()
+        aux["norm_ratio_spread"] = norm_ratio.std(dim=-1).mean().item()
+        return enriched, aux
+    def bank_loss(self, aux):
+        return (
+            1.0 * aux["expert_agreement"] +
+            1.0 * aux["rotation_ortho"] +
+            0.5 * aux["anchor_spread"] +
+            0.1 * aux["anchor_entropy"] +
+            0.3 * aux["cross_expert_var"] +
+            0.3 * (aux["bank_cv"] - self.target_cv).abs() +
+            0.3 * (aux["emb_cv"] - self.target_cv).abs() +
+            0.5 * aux["disagree_preserve"])
+    @torch.no_grad()
+    def calibrate_disagreement(self, embeddings):
+        B = embeddings.shape[0]
+        emb = embeddings.float()
+        per_sample_expert_cos = []
+        expert_projected = []
+        for i in range(self.n_experts):
+            R = self.expert_rotations[i]; W = self.expert_whiteners[i]; mu = self.expert_means[i]
+            centered = emb - mu; whitened = centered @ W
+            whitened_n = F.normalize(whitened, dim=-1)
+            in_expert = whitened_n @ R.T
+            back = in_expert @ R
+            per_sample_expert_cos.append(F.cosine_similarity(whitened_n, back, dim=-1))
+            expert_projected.append(in_expert)
+        expert_cos = torch.stack(per_sample_expert_cos, dim=-1)
+        per_sample_ratio = expert_cos.std(dim=-1) / (expert_cos.mean(dim=-1) + 1e-8)
+        cross_vals = []
+        for i in range(self.n_experts):
+            for j in range(i + 1, self.n_experts):
+                cross_vals.append(F.cosine_similarity(expert_projected[i], expert_projected[j], dim=-1))
+        cross_all = torch.stack(cross_vals, dim=-1)
+        self.target_cross_cos_mean.fill_(cross_all.mean().item())
+        self.target_cross_cos_std.fill_(cross_all.std().item())
+        self.target_disagreement_ratio.fill_(per_sample_ratio.median().item())
+        print(f"    Calibrated (n={B}):")
+        print(f"      cross_cos: {self.target_cross_cos_mean.item():.4f} ± {self.target_cross_cos_std.item():.4f}")
+        print(f"      disagree_ratio: median={self.target_disagreement_ratio.item():.6f}")
+        print(f"      expert_cos: {expert_cos.mean().item():.4f} ± {expert_cos.std().item():.4f}")
+        print(f"      cross pairs: {len(cross_vals)}")
+# ══════════════════════════════════════════════════════════════════
+# ALIGNMENT UTILITIES
+# ══════════════════════════════════════════════════════════════════
+def symmetric_inv_sqrt(cov, eps=1e-6):
+    evals, evecs = torch.linalg.eigh(cov)
+    evals = torch.clamp(evals, min=eps)
+    return evecs @ torch.diag(evals.rsqrt()) @ evecs.T
+def procrustes_align(source, target, n_align=10000):
+    N = min(n_align, source.shape[0], target.shape[0])
+    S = source[:N].float(); T = target[:N].float()
+    s_mean = S.mean(0, keepdim=True); t_mean = T.mean(0, keepdim=True)
+    Sc = S - s_mean; Tc = T - t_mean; N_s = Sc.shape[0]
+    cos_before = F.cosine_similarity(Sc, Tc, dim=-1).mean().item()
+    s_cov = (Sc.T @ Sc) / max(N_s - 1, 1)
+    t_cov = (Tc.T @ Tc) / max(N_s - 1, 1)
+    s_whiten = symmetric_inv_sqrt(s_cov)
+    t_whiten = symmetric_inv_sqrt(t_cov)
+    Sc_w = F.normalize(Sc @ s_whiten, dim=-1)
+    Tc_w = F.normalize(Tc @ t_whiten, dim=-1)
+    U, _, Vt = torch.linalg.svd(Tc_w.T @ Sc_w, full_matrices=False)
+    R = U @ Vt
+    cos_after = F.cosine_similarity(Sc_w @ R.T, Tc_w, dim=-1).mean().item()
+    return {
+        "rotation": R, "source_mean": s_mean.squeeze(0),
+        "source_whitener": s_whiten,
+        "target_unwhitener": torch.linalg.pinv(t_whiten),
+        "cos_before": cos_before, "cos_after": cos_after,
+    }
+def apply_align(emb, a):
+    x = emb.float() - a["source_mean"]
+    x = x @ a["source_whitener"]; x = x @ a["rotation"].T
+    x = x @ a["target_unwhitener"]; return x
+def cv_metric(emb, n=200):
+    B = emb.shape[0]
+    if B < 5: return 0.0
+    vols = []
+    for _ in range(n):
+        idx = torch.randperm(B, device=emb.device)[:5]
+        pts = emb[idx].unsqueeze(0).float()
+        diff = pts.unsqueeze(-2) - pts.unsqueeze(-3)
+        d2 = (diff * diff).sum(-1)
+        Bv, V, _ = d2.shape
+        cm = torch.zeros(Bv, V+1, V+1, device=d2.device, dtype=torch.float32)
+        cm[:, 0, 1:] = 1; cm[:, 1:, 0] = 1; cm[:, 1:, 1:] = d2
+        s = (-1.0)**V; f = math.factorial(V-1)
+        v2 = s / ((2.0**(V-1)) * f*f) * torch.linalg.det(cm)
+        v = torch.sqrt(F.relu(v2[0]) + 1e-12).item()
+        if v > 0: vols.append(v)
+    if len(vols) < 10: return 0.0
+    a = np.array(vols)
+    return float(a.std() / (a.mean() + 1e-8))
+# ══════════════════════════════════════════════════════════════════
+# MAIN
+# ══════════════════════════════════════════════════════════════════
+def run():
+    torch.manual_seed(42)
+    np.random.seed(42)
+    names = [s for _, s, _ in EXPERTS]
+    # ── Phase 0: Extract or Load Embeddings ──
+    print(f"\n{'='*65}")
+    print("PHASE 0: EXPERT EMBEDDINGS")
+    print(f"{'='*65}")
+    os.makedirs(CACHE_DIR, exist_ok=True)
+    caps_path = os.path.join(CACHE_DIR, "captions.json")
+    # Check what's cached
+    all_cached = all(
+        os.path.exists(os.path.join(CACHE_DIR, f"{s}.pt"))
+        for _, s, _ in EXPERTS)
+    if all_cached:
+        print("  Loading cached embeddings...")
+        embeds = {}
+        for _, short, _ in EXPERTS:
+            embeds[short] = torch.load(
+                os.path.join(CACHE_DIR, f"{short}.pt"), weights_only=True)
+            print(f"    {short}: {embeds[short].shape}")
+        if os.path.exists(caps_path):
+            with open(caps_path) as f:
+                captions = json.load(f)
+            print(f"  Captions: {len(captions):,}")
+        else:
+            print("  captions.json missing, reloading...")
+            from datasets import load_dataset
+            ds = load_dataset("CaptionEmporium/conceptual-captions-cc12m-llavanext",
+                              split="train", streaming=True)
+            captions = []
+            for row in ds:
+                cap = row.get("caption_llava", "")
+                if isinstance(cap, str) and len(cap) > 50:
+                    captions.append(cap)
+                if len(captions) >= N_EXTRACT:
+                    break
+            with open(caps_path, "w") as f:
+                json.dump(captions, f)
+    else:
+        # Extract from scratch
+        from datasets import load_dataset
+        from transformers import AutoModel, AutoTokenizer
+        print(f"  Loading {N_EXTRACT:,} captions...")
+        ds = load_dataset("CaptionEmporium/conceptual-captions-cc12m-llavanext",
+                          split="train", streaming=True)
+        captions = []
+        for row in ds:
+            cap = row.get("caption_llava", "")
+            if isinstance(cap, str) and len(cap) > 50:
+                captions.append(cap)
+            if len(captions) >= N_EXTRACT:
+                break
+        print(f"  Got {len(captions):,} captions")
+        with open(caps_path, "w") as f:
+            json.dump(captions, f)
+        embeds = {}
+        for model_name, short, max_len in EXPERTS:
+            out_path = os.path.join(CACHE_DIR, f"{short}.pt")
+            if os.path.exists(out_path):
+                embeds[short] = torch.load(out_path, weights_only=True)
+                print(f"  {short}: cached {embeds[short].shape}")
+                continue
+            print(f"\n  Extracting: {short} ({model_name}, max_len={max_len})...")
+            ext_model = AutoModel.from_pretrained(model_name).to(DEVICE).eval()
+            ext_tok = AutoTokenizer.from_pretrained(model_name)
+            n_p = sum(p.numel() for p in ext_model.parameters())
+            print(f"    {n_p:,} params")
+            all_emb = []
+            with torch.no_grad():
+                for i in tqdm(range(0, len(captions), 128), desc=f"    {short}"):
+                    batch = captions[i:i+128]
+                    inputs = ext_tok(batch, max_length=max_len, padding=True,
+                                    truncation=True, return_tensors="pt").to(DEVICE)
+                    out = ext_model(**inputs)
+                    m = inputs.attention_mask.unsqueeze(-1).float()
+                    pooled = (out.last_hidden_state * m).sum(1) / m.sum(1).clamp(min=1)
+                    all_emb.append(pooled.cpu())
+            emb = torch.cat(all_emb)
+            if emb.shape[1] != 768:
+                emb = emb[:, :768] if emb.shape[1] > 768 else F.pad(emb, (0, 768 - emb.shape[1]))
+            embeds[short] = emb
+            torch.save(emb, out_path)
+            print(f"    Saved: {emb.shape}")
+            del ext_model, ext_tok; gc.collect(); torch.cuda.empty_cache()
+    N = min(len(captions), min(e.shape[0] for e in embeds.values()))
+    print(f"  Using {N:,} samples")
+    # ── Phase 1: GPA Alignment ──
+    print(f"\n{'='*65}")
+    print("PHASE 1: GENERALIZED PROCRUSTES ALIGNMENT")
+    print(f"{'='*65}")
+    current = {name: embeds[name][:N].float() for name in names}
+    for gpa_iter in range(15):
+        mean_shape = sum(current[n] for n in names) / len(names)
+        total_delta = 0.0
+        new_current = {}
+        for name in names:
+            info = procrustes_align(current[name], mean_shape)
+            new_current[name] = apply_align(current[name], info)
+            total_delta += (new_current[name] - current[name]).pow(2).mean().item()
+        current = new_current
+        if gpa_iter == 0 or (gpa_iter + 1) % 3 == 0:
+            print(f"  GPA iter {gpa_iter+1}: delta={total_delta:.8f}")
+        if total_delta < 1e-8:
+            print(f"  Converged at iteration {gpa_iter+1}")
+            break
+    # Final alignment to converged mean
+    mean_shape = sum(current[n] for n in names) / len(names)
+    procrustes_results = {}
+    aligned = {}
+    for name in names:
+        info = procrustes_align(embeds[name][:N], mean_shape)
+        procrustes_results[name] = info
+        aligned[name] = apply_align(embeds[name][:N], info)
+        cos_to_mean = F.cosine_similarity(
+            aligned[name][:5000], mean_shape[:5000], dim=-1).mean().item()
+        print(f"  {name:10s}: cos_after={info['cos_after']:.4f}  cos_to_mean={cos_to_mean:.4f}")
+    consensus = F.normalize(sum(aligned[n] for n in names) / len(names), dim=-1)
+    expert_cos_to_consensus = []
+    for name in names:
+        c = F.cosine_similarity(consensus[:5000], aligned[name][:5000], dim=-1).mean().item()
+        expert_cos_to_consensus.append(c)
+        print(f"  cos(consensus, {name}): {c:.4f}")
+    equidist = max(expert_cos_to_consensus) - min(expert_cos_to_consensus)
+    print(f"  Equidistance range: {equidist:.4f}")
+    # Measure consensus statistics
+    print(f"\n  Measuring consensus statistics...")
+    c_sub = consensus[:5000].to(DEVICE)
+    consensus_cv = cv_metric(c_sub)
+    sim = c_sub @ c_sub.T
+    mask = ~torch.eye(5000, dtype=torch.bool, device=DEVICE)
+    mean_cos = sim[mask].mean().item()
+    centered = c_sub.float() - c_sub.float().mean(0, keepdim=True)
+    S = torch.linalg.svdvals(centered)
+    spectral = (S / (S.sum() + 1e-8)).cpu().tolist()[:50]
+    eff_dim = float((S.sum() ** 2) / (S.pow(2).sum() + 1e-12))
+    consensus_stats = {"cv": consensus_cv, "mean_cos": mean_cos,
+                        "spectral": spectral, "eff_dim": eff_dim}
+    print(f"    CV:       {consensus_cv:.4f}")
+    print(f"    Mean cos: {mean_cos:.4f}")
+    print(f"    Eff dim:  {eff_dim:.1f}")
+    del c_sub, sim; torch.cuda.empty_cache()
+    del embeds, aligned, current, mean_shape
+    gc.collect(); torch.cuda.empty_cache()
+    # ── Phase 2: Load + Encode Frozen Student ──
+    print(f"\n{'='*65}")
+    print("PHASE 2: ENCODE FROZEN STUDENT")
+    print(f"{'='*65}")
+    from transformers import AutoModel, AutoTokenizer
+    model = AutoModel.from_pretrained(REPO_ID, trust_remote_code=True).to(DEVICE).eval()
+    tokenizer = AutoTokenizer.from_pretrained(REPO_ID, trust_remote_code=True)
+    for p in model.parameters():
+        p.requires_grad = False
+    print(f"  Student: {sum(p.numel() for p in model.parameters()):,} params (frozen)")
+    # captions already loaded from Phase 0
+    captions = captions[:N]
+    print(f"  Encoding {N:,} captions...")
+    all_student_embs = []
+    with torch.no_grad():
+        for i in tqdm(range(0, N, 256), desc="  Encoding"):
+            j = min(i + 256, N)
+            inputs = tokenizer(captions[i:j], max_length=512, padding="max_length",
+                              truncation=True, return_tensors="pt").to(DEVICE)
+            out = model(**inputs)
+            all_student_embs.append(out.last_hidden_state.cpu())
+    student_embs = torch.cat(all_student_embs).to(DEVICE)
+    print(f"  Student embeddings: {student_embs.shape}")
+    del model
+    gc.collect(); torch.cuda.empty_cache()
+    # Split
+    n_train = N - N_VAL
+    train_embs = student_embs[:n_train]
+    val_embs = student_embs[n_train:n_train + N_VAL]
+    print(f"  Train: {n_train:,}  Val: {N_VAL:,}")
+    # ── Phase 3: Train Alignment Bank ──
+    print(f"\n{'='*65}")
+    print("PHASE 3: TRAIN ALIGNMENT BANK")
+    print(f"{'='*65}")
+    bank = AlignmentBank(
+        d_embed=768, n_experts=len(EXPERTS),
+        n_anchors=N_ANCHORS, d_bank=D_BANK
+    ).to(DEVICE)
+    bank.init_from_procrustes(procrustes_results, names,
+                               consensus[:n_train], consensus_stats)
+    bank.calibrate_disagreement(train_embs[:5000])
+    bank_params = sum(p.numel() for p in bank.parameters())
+    print(f"  Bank: {bank_params:,} params")
+    bank_opt = torch.optim.AdamW(bank.parameters(), lr=BANK_LR, weight_decay=0.01)
+    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
+        bank_opt, T_max=(n_train // BANK_BATCH) * BANK_EPOCHS, eta_min=1e-5)
+    best_v_loss = float("inf")
+    for epoch in range(BANK_EPOCHS):
+        bank.train()
+        perm = torch.randperm(n_train, device=DEVICE)
+        total_loss = 0
+        stats = {"expert_agreement": 0, "rotation_ortho": 0, "anchor_spread": 0,
+                 "bank_cv": 0, "emb_cv": 0, "cross_expert_var": 0, "disagree_preserve": 0}
+        n = 0
+        t0 = time.time()
+        for i in range(0, n_train, BANK_BATCH):
+            idx = perm[i:i+BANK_BATCH]
+            if len(idx) < 16: continue
+            _, aux = bank(train_embs[idx])
+            loss = bank.bank_loss(aux)
+            loss.backward()
+            torch.nn.utils.clip_grad_norm_(bank.parameters(), 1.0)
+            bank_opt.step(); bank_opt.zero_grad(set_to_none=True)
+            scheduler.step()
+            total_loss += loss.item()
+            for k in stats:
+                if k in aux:
+                    v = aux[k]
+                    stats[k] += v.item() if torch.is_tensor(v) else v
+            n += 1
+        elapsed = time.time() - t0; d = max(n, 1)
+        bank.eval()
+        with torch.no_grad():
+            _, v_aux = bank(val_embs)
+            v_loss = bank.bank_loss(v_aux).item()
+        if v_loss < best_v_loss:
+            best_v_loss = v_loss
+            torch.save(bank.state_dict(), "alignment_bank_best.pt")
+        if (epoch + 1) % 5 == 0 or epoch == 0:
+            print(f"\n  E{epoch+1:2d}: {elapsed:.0f}s  loss={total_loss/d:.4f}  v_loss={v_loss:.4f}")
+            print(f"    Geometry:  b_cv={stats['bank_cv']/d:.4f}  e_cv={stats['emb_cv']/d:.4f}  "
+                  f"spread={stats['anchor_spread']/d:.5f}  a_max={v_aux['anchor_max_cos']:.3f}")
+            print(f"    Experts:   cos={v_aux['expert_cos_mean']:.3f}±{v_aux['expert_cos_std']:.3f}  "
+                  f"agr={stats['expert_agreement']/d:.6f}  ortho={stats['rotation_ortho']/d:.6f}")
+            print(f"    Disagree:  x_cos={v_aux['cross_expert_cos']:.4f}±{v_aux['cross_expert_cos_std']:.4f}  "
+                  f"ratio={v_aux['disagreement_ratio']:.6f}  "
+                  f"preserve={stats['disagree_preserve']/d:.6f}")
+        else:
+            print(f"  E{epoch+1:2d}: {elapsed:.0f}s  loss={total_loss/d:.4f}  v_loss={v_loss:.4f}  "
+                  f"exp={v_aux['expert_cos_mean']:.3f}  "
+                  f"b_cv={stats['bank_cv']/d:.4f}  "
+                  f"x_cos={v_aux['cross_expert_cos']:.4f}")
+    torch.save(bank.state_dict(), "alignment_bank_final.pt")
+    # ── Phase 4: Geometric Verification ──
+    print(f"\n{'='*65}")
+    print("PHASE 4: GEOMETRIC VERIFICATION")
+    print(f"{'='*65}")
+    bank.load_state_dict(torch.load("alignment_bank_best.pt", weights_only=True))
+    bank.eval()
+    with torch.no_grad():
+        enriched_val, v_aux = bank(val_embs)
+        original_768 = enriched_val[:, :768]
+        geo_context = enriched_val[:, 768:]
+        passthrough = F.cosine_similarity(
+            original_768[:100], val_embs[:100], dim=-1).mean().item()
+        geo_cv = cv_metric(F.normalize(geo_context[:2000], dim=-1))
+        S = torch.linalg.svdvals(
+            geo_context[:2000].float() - geo_context[:2000].float().mean(0))
+        geo_eff_dim = float((S.sum() ** 2) / (S.pow(2).sum() + 1e-12))
+        emb_cv = cv_metric(val_embs[:2000])
+    print(f"  Passthrough:     {passthrough:.6f}")
+    print(f"  Emb CV:          {emb_cv:.4f} (consensus: {consensus_stats['cv']:.4f})")
+    print(f"  Geo context CV:  {geo_cv:.4f}")
+    print(f"  Geo eff_dim:     {geo_eff_dim:.1f} / {D_BANK}")
+    print(f"  Expert cos:      {v_aux['expert_cos_mean']:.3f} ± {v_aux['expert_cos_std']:.3f}")
+    print(f"  Anchor max cos:  {v_aux['anchor_max_cos']:.3f}")
+    print(f"  Cross-expert:    {v_aux['cross_expert_cos']:.4f} ± {v_aux['cross_expert_cos_std']:.4f}")
+    print(f"  Disagree ratio:  {v_aux['disagreement_ratio']:.6f}")
+    # ── Phase 5: Classifier Test ──
+    print(f"\n{'='*65}")
+    print("PHASE 5: CLASSIFIER STABILITY TEST")
+    print(f"{'='*65}")
+    with torch.no_grad():
+        embs = val_embs[:2000]
+        sim = embs @ embs.T; sim.fill_diagonal_(-1)
+        n_pairs = 5000
+        idx_a = torch.randint(0, 2000, (n_pairs,))
+        idx_b = torch.randint(0, 2000, (n_pairs,))
+        pair_cos = sim[idx_a, idx_b]
+        sorted_cos, _ = pair_cos.sort()
+        t1 = sorted_cos[n_pairs // 3].item()
+        t2 = sorted_cos[2 * n_pairs // 3].item()
+        labels = torch.zeros(n_pairs, dtype=torch.long, device=DEVICE)
+        labels[pair_cos > t2] = 0
+        labels[(pair_cos <= t2) & (pair_cos > t1)] = 1
+        labels[pair_cos <= t1] = 2
+        enriched_a, _ = bank(embs[idx_a])
+        enriched_b, _ = bank(embs[idx_b])
+        a_emb = embs[idx_a]; b_emb = embs[idx_b]
+        a_geo = enriched_a[:, 768:]; b_geo = enriched_b[:, 768:]
+        geo_explicit = torch.cat([
+            F.cosine_similarity(a_emb, b_emb, dim=-1).unsqueeze(-1),
+            (a_emb - b_emb).pow(2).mean(dim=-1).unsqueeze(-1),
+            F.cosine_similarity(a_geo, b_geo, dim=-1).unsqueeze(-1),
+            (a_geo - b_geo).pow(2).mean(dim=-1).unsqueeze(-1),
+            torch.abs(a_emb - b_emb).mean(dim=-1).unsqueeze(-1),
+            (a_emb * b_emb).sum(dim=-1).unsqueeze(-1),
+        ], dim=-1)
+    modes = {
+        "raw_768": torch.cat([a_emb, b_emb], dim=-1),
+        "raw+diff": torch.cat([a_emb, b_emb, torch.abs(a_emb - b_emb), a_emb * b_emb], dim=-1),
+        "bank_enriched": torch.cat([enriched_a, enriched_b], dim=-1),
+        "bank+diff": torch.cat([enriched_a, enriched_b,
+                                torch.abs(enriched_a - enriched_b),
+                                enriched_a * enriched_b], dim=-1),
+        "geo_explicit": geo_explicit,
+    }
+    print(f"\n  {'Mode':<20} {'Dim':>6} {'Train':>7} {'Val':>7} {'Gap':>7}")
+    print(f"  {'-'*50}")
+    n_clf_train = 4000
+    for mode_name, features in modes.items():
+        feat_dim = features.shape[1]
+        clf = nn.Sequential(
+            nn.Linear(feat_dim, min(256, feat_dim)), nn.GELU(),
+            nn.LayerNorm(min(256, feat_dim)), nn.Dropout(0.1),
+            nn.Linear(min(256, feat_dim), 3)).to(DEVICE)
+        clf_opt = torch.optim.Adam(clf.parameters(), lr=1e-3)
+        train_f = features[:n_clf_train].detach()
+        train_l = labels[:n_clf_train]
+        val_f = features[n_clf_train:].detach()
+        val_l = labels[n_clf_train:]
+        for e in range(30):
+            clf.train()
+            loss = F.cross_entropy(clf(train_f), train_l)
+            loss.backward(); clf_opt.step(); clf_opt.zero_grad()
+        clf.eval()
+        with torch.no_grad():
+            v_acc = (clf(val_f).argmax(-1) == val_l).float().mean().item()
+            t_acc = (clf(train_f).argmax(-1) == train_l).float().mean().item()
+        print(f"  {mode_name:<20} {feat_dim:>6} {t_acc:>7.3f} {v_acc:>7.3f} {t_acc-v_acc:>7.3f}")
+    print(f"\n{'='*65}")
+    print("SUMMARY")
+    print(f"{'='*65}")
+    print(f"  Consensus CV:      {consensus_stats['cv']:.4f}")
+    print(f"  Consensus eff_dim: {consensus_stats['eff_dim']:.1f}")
+    print(f"  Equidistance:      {equidist:.4f}")
+    print(f"  Bank params:       {bank_params:,}")
+    print(f"  Bank geo eff_dim:  {geo_eff_dim:.1f}")
+    print(f"  Bank geo CV:       {geo_cv:.4f}")
+    print(f"  Best val loss:     {best_v_loss:.4f}")
+    print(f"\n  Files: alignment_bank_best.pt, alignment_bank_final.pt")
+    print(f"\n{'='*65}")
+    print("DONE")
+    print(f"{'='*65}")
+if __name__ == "__main__":
+    run()