AbstractPhil
/

geolip-axis-prototype

Model card Files Files and versions

xet

Community

AbstractPhil commited on 11 days ago

Commit

7cc3d76

verified ·

1 Parent(s): 04ba8f5

Create prototype_55_geodesic_bank_multitest.py

Browse files

Files changed (1) hide show

prototype_55_geodesic_bank_multitest.py +924 -0

prototype_55_geodesic_bank_multitest.py ADDED Viewed

	@@ -0,0 +1,924 @@

+# ============================================================================
+# RAPID PROTOTYPE v2: Differentiation-Centered Alignment Bank
+#
+# The bank aligns to the DIFFERENTIATION between experts, not to any
+# arbitrary target. The consensus CV, spectral profile, and pairwise
+# statistics measured during alignment become the exact targets.
+#
+# The bank embodies the centerpoint of expert disagreement.
+# ============================================================================
+import gc
+import math
+import os
+import time
+import json
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from tqdm import tqdm
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+EXPERTS = [
+    ("google-bert/bert-base-uncased", "bert", 512),
+    ("answerdotai/ModernBERT-base", "modern", 512),
+]
+print("=" * 65)
+print("RAPID PROTOTYPE v2: Differentiation-Centered Bank")
+print("=" * 65)
+print(f"  Device: {DEVICE}")
+# ══════════════════════════════════════════════════════════════════
+# STUDENT MODEL
+# ══════════════════════════════════════════════════════════════════
+class MiniStudent(nn.Module):
+    def __init__(self, vocab_size=30522, max_len=512, d_model=256,
+                 n_heads=4, n_layers=4, d_ff=1024, output_dim=768,
+                 dropout=0.1, pad_token_id=0):
+        super().__init__()
+        self.pad_token_id = pad_token_id
+        self.token_emb = nn.Embedding(vocab_size, d_model, padding_idx=pad_token_id)
+        self.pos_emb = nn.Embedding(max_len, d_model)
+        self.emb_norm = nn.LayerNorm(d_model)
+        self.emb_drop = nn.Dropout(dropout)
+        encoder_layer = nn.TransformerEncoderLayer(
+            d_model=d_model, nhead=n_heads, dim_feedforward=d_ff,
+            dropout=dropout, activation="gelu", batch_first=True,
+            norm_first=True)
+        self.encoder = nn.TransformerEncoder(
+            encoder_layer, num_layers=n_layers, enable_nested_tensor=False)
+        self.output_proj = nn.Sequential(
+            nn.Linear(d_model, d_model), nn.GELU(),
+            nn.LayerNorm(d_model), nn.Linear(d_model, output_dim))
+    def forward(self, input_ids, attention_mask=None):
+        B, L = input_ids.shape
+        positions = torch.arange(L, device=input_ids.device).unsqueeze(0)
+        x = self.token_emb(input_ids) + self.pos_emb(positions)
+        x = self.emb_drop(self.emb_norm(x))
+        kpm = ~attention_mask.bool() if attention_mask is not None else (input_ids == self.pad_token_id)
+        x = self.encoder(x, src_key_padding_mask=kpm)
+        mask = attention_mask.unsqueeze(-1).float() if attention_mask is not None else (~kpm).unsqueeze(-1).float()
+        pooled = (x * mask).sum(1) / mask.sum(1).clamp(min=1)
+        return F.normalize(self.output_proj(pooled), dim=-1)
+# ══════════════════════════════════════════════════════════════════
+# ALIGNMENT BANK
+# ══════════════════════════════════════════════════════════════════
+class AlignmentBank(nn.Module):
+    """
+    Differentiation-centered geometric interface.
+    Aligns to the CENTERPOINT between experts — the consensus itself.
+    Stores per-expert rotation matrices (the differentiation structure)
+    and learned anchor landmarks (the consensus manifold topology).
+    The bank doesn't invent geometry. It mirrors the measured consensus.
+    Every loss term pulls toward measured consensus statistics.
+    """
+    def __init__(self, d_embed=768, n_experts=2, n_anchors=512, d_bank=128):
+        super().__init__()
+        self.d_embed = d_embed
+        self.n_experts = n_experts
+        self.n_anchors = n_anchors
+        self.d_bank = d_bank
+        # Per-expert rotation matrices (differentiation structure)
+        self.expert_rotations = nn.ParameterList([
+            nn.Parameter(torch.eye(d_embed)) for _ in range(n_experts)
+        ])
+        # Per-expert whiteners (captures variance structure per expert)
+        self.expert_whiteners = nn.ParameterList([
+            nn.Parameter(torch.eye(d_embed)) for _ in range(n_experts)
+        ])
+        # Per-expert means (centering offset per expert)
+        self.expert_means = nn.ParameterList([
+            nn.Parameter(torch.zeros(d_embed)) for _ in range(n_experts)
+        ])
+        # Anchor bank: consensus landmarks on the hypersphere
+        self.anchors = nn.Parameter(
+            F.normalize(torch.randn(n_anchors, d_embed), dim=-1))
+        # Project: expert_cos (n) + expert_mse (n) + cross (n*(n-1)/2) +
+        #          disagreement_ratio (1) + norm_ratio (n) + anchor_cos (n_anchors)
+        n_cross = n_experts * (n_experts - 1) // 2
+        geo_dim = n_experts + n_experts + n_cross + 1 + n_experts + n_anchors
+        self.geo_proj = nn.Sequential(
+            nn.Linear(geo_dim, d_bank * 2),
+            nn.GELU(),
+            nn.LayerNorm(d_bank * 2),
+            nn.Linear(d_bank * 2, d_bank),
+            nn.LayerNorm(d_bank),
+        )
+        # Consensus statistics (set during init, used as exact targets)
+        self.register_buffer("target_cv", torch.tensor(0.12))
+        self.register_buffer("target_mean_cos", torch.tensor(0.0))
+        self.register_buffer("target_spectral", torch.zeros(50))
+        # Disagreement structure (measured once, preserved forever)
+        self.register_buffer("target_cross_cos_mean", torch.tensor(0.0))
+        self.register_buffer("target_cross_cos_std", torch.tensor(0.0))
+        self.register_buffer("target_disagreement_ratio", torch.tensor(0.0))
+    def init_from_procrustes(self, procrustes_results, expert_names,
+                              consensus_embeddings=None,
+                              consensus_stats=None):
+        """Initialize from consensus training artifacts."""
+        device = self.anchors.device
+        for i, name in enumerate(expert_names[:self.n_experts]):
+            info = procrustes_results[name]
+            self.expert_rotations[i].data = info["rotation"].float().to(device)
+            if "source_whitener" in info:
+                self.expert_whiteners[i].data = info["source_whitener"].float().to(device)
+            if "source_mean" in info:
+                self.expert_means[i].data = info["source_mean"].float().to(device)
+            print(f"    Expert {i} ({name}): rotation + whitener + mean loaded, "
+                  f"cos_after={info['cos_after']:.4f}")
+        if consensus_embeddings is not None:
+            n = min(self.n_anchors, consensus_embeddings.shape[0])
+            indices = torch.linspace(0, consensus_embeddings.shape[0] - 1, n).long()
+            self.anchors.data[:n] = F.normalize(
+                consensus_embeddings[indices].float(), dim=-1).to(device)
+            print(f"    Anchors: {n} initialized from consensus embeddings")
+        if consensus_stats is not None:
+            self.target_cv.fill_(consensus_stats["cv"])
+            self.target_mean_cos.fill_(consensus_stats["mean_cos"])
+            if "spectral" in consensus_stats:
+                s = torch.tensor(consensus_stats["spectral"][:50], dtype=torch.float32)
+                self.target_spectral[:len(s)] = s.to(device)
+            print(f"    Targets: CV={consensus_stats['cv']:.4f}, "
+                  f"mean_cos={consensus_stats['mean_cos']:.4f}")
+    def forward(self, embedding):
+        B = embedding.shape[0]
+        emb = embedding.float()
+        # ── Per-expert projections (full whitened Procrustes) ──
+        # Chain: center → whiten → normalize → rotate
+        # This is EXACTLY what was computed during alignment.
+        # The rotation only makes geometric sense in whitened-normalized space.
+        expert_consistency = []
+        expert_recon = []
+        expert_projected = []
+        for i in range(self.n_experts):
+            R = self.expert_rotations[i]
+            W = self.expert_whiteners[i]
+            mu = self.expert_means[i]
+            # Forward: center → whiten → normalize → rotate
+            centered = emb - mu
+            whitened = centered @ W
+            whitened_n = F.normalize(whitened, dim=-1)
+            in_expert = whitened_n @ R.T  # now in expert's whitened-normalized space
+            # Round-trip: rotate back (orthogonal, so R.T inverse = R)
+            back = in_expert @ R
+            # Consistency: round-trip should recover whitened_n exactly
+            cos = F.cosine_similarity(whitened_n, back, dim=-1)
+            recon = (whitened_n - back).pow(2).mean(dim=-1)
+            expert_consistency.append(cos)
+            expert_recon.append(recon)
+            expert_projected.append(in_expert)
+        expert_cos = torch.stack(expert_consistency, dim=-1)   # (B, n_experts)
+        expert_mse = torch.stack(expert_recon, dim=-1)         # (B, n_experts)
+        # ── Cross-expert differentiation ──
+        # How each expert's projection relates to every other expert's projection
+        # This IS the disagreement structure. Preserve it exactly.
+        cross_cos = []
+        for i in range(self.n_experts):
+            for j in range(i + 1, self.n_experts):
+                cc = F.cosine_similarity(
+                    expert_projected[i], expert_projected[j], dim=-1)
+                cross_cos.append(cc)
+        cross_features = torch.stack(cross_cos, dim=-1) if cross_cos else torch.zeros(B, 0, device=emb.device)
+        # Per-sample disagreement: how much do experts disagree on THIS embedding?
+        # High disagreement = embedding is in contested territory
+        # Low disagreement = all experts agree (well-anchored)
+        per_sample_agreement = expert_cos.mean(dim=-1)           # (B,) mean round-trip cos
+        per_sample_disagreement = expert_cos.std(dim=-1)         # (B,) std across experts
+        # Ratio: how much agreement relative to disagreement
+        disagreement_ratio = per_sample_disagreement / (per_sample_agreement + 1e-8)  # (B,)
+        # Expert projection norms before normalization (captures magnitude structure)
+        expert_norms = []
+        for i in range(self.n_experts):
+            R = self.expert_rotations[i]
+            W = self.expert_whiteners[i]
+            mu = self.expert_means[i]
+            centered = emb - mu
+            whitened = centered @ W
+            expert_norms.append(whitened.norm(dim=-1))  # (B,)
+        expert_norm_features = torch.stack(expert_norms, dim=-1)  # (B, n_experts)
+        norm_ratio = expert_norm_features / (expert_norm_features.mean(dim=-1, keepdim=True) + 1e-8)
+        # ── Anchor distances ──
+        anchors_n = F.normalize(self.anchors, dim=-1)
+        anchor_cos = emb @ anchors_n.T  # (B, n_anchors)
+        # ── Geometric context ──
+        # Full feature set: expert consistency + reconstruction + cross-expert +
+        #                   disagreement ratio + norm ratios + anchor distances
+        geo_input = torch.cat([
+            expert_cos,                                 # (B, n_experts)
+            expert_mse,                                 # (B, n_experts)
+            cross_features,                             # (B, n_cross)
+            disagreement_ratio.unsqueeze(-1),            # (B, 1)
+            norm_ratio,                                 # (B, n_experts)
+            anchor_cos,                                 # (B, n_anchors)
+        ], dim=-1)
+        geo_context = self.geo_proj(geo_input)
+        enriched = torch.cat([embedding, geo_context], dim=-1)
+        # ── Losses + Diagnostics ──
+        aux = {}
+        # 1. Expert agreement: all experts should see embedding equally
+        expert_mean = expert_cos.mean(dim=-1, keepdim=True)
+        aux["expert_agreement"] = (expert_cos - expert_mean).pow(2).mean()
+        # 2. Rotation orthogonality
+        ortho_loss = 0.0
+        for i in range(self.n_experts):
+            R = self.expert_rotations[i]
+            RRT = R @ R.T
+            ortho_loss += (RRT - torch.eye(self.d_embed, device=R.device)).pow(2).mean()
+        aux["rotation_ortho"] = ortho_loss / self.n_experts
+        # 3. Anchor spread
+        anchor_sim = anchors_n @ anchors_n.T
+        anchor_sim.fill_diagonal_(0)
+        aux["anchor_spread"] = anchor_sim.pow(2).mean()
+        # 4. Anchor sharpness
+        anchor_probs = F.softmax(anchor_cos * 10, dim=-1)
+        entropy = -(anchor_probs * (anchor_probs + 1e-12).log()).sum(-1).mean()
+        aux["anchor_entropy"] = entropy
+        # 5. Cross-expert differentiation consistency
+        if cross_features.shape[1] > 0:
+            aux["cross_expert_var"] = cross_features.var(dim=0).mean()
+        else:
+            aux["cross_expert_var"] = torch.tensor(0.0, device=emb.device)
+        # 6. Disagreement preservation
+        # The distribution of disagreement should stay at the measured target
+        batch_cross_mean = cross_features.mean() if cross_features.shape[1] > 0 else torch.tensor(0.0, device=emb.device)
+        batch_cross_std = cross_features.std() if cross_features.shape[1] > 0 else torch.tensor(0.0, device=emb.device)
+        batch_disagree_ratio = disagreement_ratio.mean()
+        aux["disagree_preserve"] = (
+            (batch_cross_mean - self.target_cross_cos_mean).pow(2) +
+            (batch_cross_std - self.target_cross_cos_std).pow(2) +
+            (batch_disagree_ratio - self.target_disagreement_ratio).pow(2)
+        )
+        # 7. Bank CV
+        if B >= 10:
+            ctx_n = F.normalize(geo_context, dim=-1)
+            vols = []
+            for _ in range(32):
+                idx = torch.randperm(B, device=embedding.device)[:5]
+                pts = ctx_n[idx].unsqueeze(0)
+                diff = pts.unsqueeze(-2) - pts.unsqueeze(-3)
+                d2 = (diff * diff).sum(-1)
+                Bv, V, _ = d2.shape
+                cm = torch.zeros(Bv, V+1, V+1, device=d2.device, dtype=torch.float32)
+                cm[:, 0, 1:] = 1; cm[:, 1:, 0] = 1; cm[:, 1:, 1:] = d2
+                s = (-1.0)**V; f = math.factorial(V-1)
+                v2 = s / ((2.0**(V-1)) * f*f) * torch.linalg.det(cm)
+                vols.append(torch.sqrt(F.relu(v2[0]) + 1e-12))
+            stacked = torch.stack(vols)
+            bank_cv = stacked.std() / (stacked.mean() + 1e-8)
+            aux["bank_cv"] = bank_cv
+        else:
+            aux["bank_cv"] = torch.tensor(0.0, device=embedding.device)
+        # 8. Emb CV
+        if B >= 10:
+            emb_n = F.normalize(emb, dim=-1)
+            vols = []
+            for _ in range(32):
+                idx = torch.randperm(B, device=embedding.device)[:5]
+                pts = emb_n[idx].unsqueeze(0)
+                diff = pts.unsqueeze(-2) - pts.unsqueeze(-3)
+                d2 = (diff * diff).sum(-1)
+                Bv, V, _ = d2.shape
+                cm = torch.zeros(Bv, V+1, V+1, device=d2.device, dtype=torch.float32)
+                cm[:, 0, 1:] = 1; cm[:, 1:, 0] = 1; cm[:, 1:, 1:] = d2
+                s = (-1.0)**V; f = math.factorial(V-1)
+                v2 = s / ((2.0**(V-1)) * f*f) * torch.linalg.det(cm)
+                vols.append(torch.sqrt(F.relu(v2[0]) + 1e-12))
+            stacked = torch.stack(vols)
+            emb_cv = stacked.std() / (stacked.mean() + 1e-8)
+            aux["emb_cv"] = emb_cv
+        else:
+            aux["emb_cv"] = torch.tensor(0.0, device=embedding.device)
+        # Diagnostics
+        aux["expert_cos_mean"] = expert_cos.mean().item()
+        aux["expert_cos_std"] = expert_cos.std().item()
+        aux["anchor_max_cos"] = anchor_cos.max(dim=-1).values.mean().item()
+        aux["anchor_mean_cos"] = anchor_cos.mean().item()
+        if cross_features.shape[1] > 0:
+            aux["cross_expert_cos"] = cross_features.mean().item()
+            aux["cross_expert_cos_std"] = cross_features.std().item()
+        aux["disagreement_ratio"] = disagreement_ratio.mean().item()
+        aux["norm_ratio_spread"] = norm_ratio.std(dim=-1).mean().item()
+        return enriched, aux
+    def bank_loss(self, aux):
+        """All targets from measured consensus. Preserves disagreement structure."""
+        loss = (
+            1.0 * aux["expert_agreement"] +
+            1.0 * aux["rotation_ortho"] +
+            0.5 * aux["anchor_spread"] +
+            0.1 * aux["anchor_entropy"] +
+            0.3 * aux["cross_expert_var"] +
+            0.3 * (aux["bank_cv"] - self.target_cv).abs() +
+            0.3 * (aux["emb_cv"] - self.target_cv).abs() +
+            0.5 * aux["disagree_preserve"]  # preserve the disagreement distribution
+        )
+        return loss
+    @torch.no_grad()
+    def calibrate_disagreement(self, embeddings):
+        """
+        Measure the initial disagreement structure from per-sample distribution.
+        Uses the full batch to capture the spread, not just the mean.
+        """
+        B = embeddings.shape[0]
+        emb = embeddings.float()
+        # Compute per-sample disagreement directly
+        per_sample_expert_cos = []
+        for i in range(self.n_experts):
+            R = self.expert_rotations[i]
+            W = self.expert_whiteners[i]
+            mu = self.expert_means[i]
+            centered = emb - mu
+            whitened = centered @ W
+            whitened_n = F.normalize(whitened, dim=-1)
+            in_expert = whitened_n @ R.T
+            back = in_expert @ R
+            cos = F.cosine_similarity(whitened_n, back, dim=-1)
+            per_sample_expert_cos.append(cos)
+        expert_cos = torch.stack(per_sample_expert_cos, dim=-1)  # (B, n_experts)
+        per_sample_agreement = expert_cos.mean(dim=-1)
+        per_sample_disagreement = expert_cos.std(dim=-1)
+        per_sample_ratio = per_sample_disagreement / (per_sample_agreement + 1e-8)
+        # Cross-expert cosines
+        cross_vals = []
+        expert_projected = []
+        for i in range(self.n_experts):
+            R = self.expert_rotations[i]
+            W = self.expert_whiteners[i]
+            mu = self.expert_means[i]
+            centered = emb - mu
+            whitened = centered @ W
+            whitened_n = F.normalize(whitened, dim=-1)
+            expert_projected.append(whitened_n @ R.T)
+        for i in range(self.n_experts):
+            for j in range(i + 1, self.n_experts):
+                cc = F.cosine_similarity(expert_projected[i], expert_projected[j], dim=-1)
+                cross_vals.append(cc)
+        if cross_vals:
+            cross_all = torch.stack(cross_vals, dim=-1)
+            self.target_cross_cos_mean.fill_(cross_all.mean().item())
+            self.target_cross_cos_std.fill_(cross_all.std().item())
+        # Use MEDIAN of per-sample ratio (robust to outliers)
+        self.target_disagreement_ratio.fill_(per_sample_ratio.median().item())
+        print(f"    Calibrated disagreement (n={B}):")
+        print(f"      cross_cos: {self.target_cross_cos_mean.item():.4f} ± {self.target_cross_cos_std.item():.4f}")
+        print(f"      disagree_ratio: median={self.target_disagreement_ratio.item():.6f}  "
+              f"mean={per_sample_ratio.mean().item():.6f}  "
+              f"std={per_sample_ratio.std().item():.6f}")
+        print(f"      expert_cos: {expert_cos.mean().item():.4f} ± {expert_cos.std().item():.4f}")
+# ══════════════════════════════════════════════════════════════════
+# GEOMETRY
+# ══════════════════════════════════════════════════════════════════
+def infonce(a, b, temperature=0.07):
+    a = F.normalize(a, dim=-1)
+    b = F.normalize(b, dim=-1)
+    logits = (a @ b.T) / temperature
+    labels = torch.arange(logits.shape[0], device=logits.device)
+    loss = (F.cross_entropy(logits, labels) + F.cross_entropy(logits.T, labels)) / 2
+    with torch.no_grad():
+        acc = (logits.argmax(-1) == labels).float().mean().item()
+    return loss, acc
+def cayley_menger_vol2(pts):
+    pts = pts.float()
+    diff = pts.unsqueeze(-2) - pts.unsqueeze(-3)
+    d2 = (diff * diff).sum(-1)
+    B, V, _ = d2.shape
+    cm = torch.zeros(B, V+1, V+1, device=d2.device, dtype=torch.float32)
+    cm[:, 0, 1:] = 1; cm[:, 1:, 0] = 1; cm[:, 1:, 1:] = d2
+    s = (-1.0)**V; f = math.factorial(V-1)
+    return s / ((2.0**(V-1)) * f*f) * torch.linalg.det(cm)
+def cv_loss(emb, target=0.12, n_samples=16):
+    B = emb.shape[0]
+    if B < 5: return torch.tensor(0.0, device=emb.device)
+    vols = []
+    for _ in range(n_samples):
+        idx = torch.randperm(B, device=emb.device)[:5]
+        v2 = cayley_menger_vol2(emb[idx].unsqueeze(0))
+        vols.append(torch.sqrt(F.relu(v2[0]) + 1e-12))
+    stacked = torch.stack(vols)
+    cv = stacked.std() / (stacked.mean() + 1e-8)
+    return (cv - target).abs()
+def cv_metric(emb, n=200):
+    B = emb.shape[0]
+    if B < 5: return 0.0
+    vols = []
+    for _ in range(n):
+        idx = torch.randperm(B, device=emb.device)[:5]
+        v2 = cayley_menger_vol2(emb[idx].unsqueeze(0))
+        v = torch.sqrt(F.relu(v2[0]) + 1e-12).item()
+        if v > 0: vols.append(v)
+    if len(vols) < 10: return 0.0
+    a = np.array(vols)
+    return float(a.std() / (a.mean() + 1e-8))
+def measure_consensus_stats(consensus_embs, n_check=2000):
+    """Measure exact geometric statistics of the consensus manifold."""
+    embs = consensus_embs[:n_check].float()
+    # CV
+    cv = cv_metric(embs.to(DEVICE))
+    # Pairwise cosine
+    sim = embs @ embs.T
+    mask = ~torch.eye(embs.shape[0], dtype=torch.bool)
+    pairwise = sim[mask]
+    mean_cos = pairwise.mean().item()
+    # Spectral
+    centered = embs - embs.mean(0, keepdim=True)
+    S = torch.linalg.svdvals(centered)
+    S_norm = (S / (S.sum() + 1e-8)).tolist()[:50]
+    # Eff dim
+    eff_dim = float((S.sum() ** 2) / (S.pow(2).sum() + 1e-12))
+    return {
+        "cv": cv,
+        "mean_cos": mean_cos,
+        "spectral": S_norm,
+        "eff_dim": eff_dim,
+    }
+# ══════════════════════════════════════════════════════════════════
+# EXTRACTION + ALIGNMENT
+# ══════════════════════════════════════════════════════════════════
+def symmetric_inv_sqrt(cov, eps=1e-6):
+    evals, evecs = torch.linalg.eigh(cov)
+    evals = torch.clamp(evals, min=eps)
+    return evecs @ torch.diag(evals.rsqrt()) @ evecs.T
+def procrustes_align(source, target, n_align=5000):
+    N = min(n_align, source.shape[0], target.shape[0])
+    S = source[:N].float(); T = target[:N].float()
+    s_mean = S.mean(0, keepdim=True); t_mean = T.mean(0, keepdim=True)
+    Sc = S - s_mean; Tc = T - t_mean; N_s = Sc.shape[0]
+    cos_before = F.cosine_similarity(Sc, Tc, dim=-1).mean().item()
+    s_cov = (Sc.T @ Sc) / max(N_s - 1, 1)
+    t_cov = (Tc.T @ Tc) / max(N_s - 1, 1)
+    s_whiten = symmetric_inv_sqrt(s_cov)
+    t_whiten = symmetric_inv_sqrt(t_cov)
+    Sc_w = F.normalize(Sc @ s_whiten, dim=-1)
+    Tc_w = F.normalize(Tc @ t_whiten, dim=-1)
+    U, _, Vt = torch.linalg.svd(Tc_w.T @ Sc_w, full_matrices=False)
+    R = U @ Vt
+    cos_after = F.cosine_similarity(Sc_w @ R.T, Tc_w, dim=-1).mean().item()
+    return {
+        "rotation": R, "source_mean": s_mean.squeeze(0),
+        "source_whitener": s_whiten,
+        "target_unwhitener": torch.linalg.pinv(t_whiten),
+        "cos_before": cos_before, "cos_after": cos_after,
+    }
+def apply_align(emb, a):
+    x = emb.float() - a["source_mean"]
+    x = x @ a["source_whitener"]; x = x @ a["rotation"].T
+    x = x @ a["target_unwhitener"]; return x
+# ══════════════════════════════════════════════════════════════════
+# MAIN
+# ══════════════════════════════════════════════════════════════════
+def run():
+    torch.manual_seed(42)
+    np.random.seed(42)
+    N_SAMPLES = 20000
+    MAX_LEN = 128
+    BATCH = 256
+    # ── Phase 0: Extract ──
+    print(f"\n{'='*65}")
+    print("PHASE 0: EXTRACTION")
+    print(f"{'='*65}")
+    from datasets import load_dataset
+    from transformers import AutoModel, AutoTokenizer
+    ds = load_dataset("CaptionEmporium/conceptual-captions-cc12m-llavanext",
+                      split="train", streaming=True)
+    captions = []
+    for row in ds:
+        cap = row.get("caption_llava", "")
+        if isinstance(cap, str) and len(cap) > 50:
+            captions.append(cap)
+        if len(captions) >= N_SAMPLES:
+            break
+    print(f"  Captions: {len(captions):,}")
+    embeds = {}
+    for model_name, short, max_len in EXPERTS:
+        print(f"\n  Extracting: {short}...")
+        model = AutoModel.from_pretrained(model_name).to(DEVICE).eval()
+        tokenizer = AutoTokenizer.from_pretrained(model_name)
+        all_emb = []
+        with torch.no_grad():
+            for i in tqdm(range(0, len(captions), 128), desc=f"    {short}"):
+                batch = captions[i:i+128]
+                inputs = tokenizer(batch, max_length=max_len, padding=True,
+                                  truncation=True, return_tensors="pt").to(DEVICE)
+                out = model(**inputs)
+                m = inputs.attention_mask.unsqueeze(-1).float()
+                pooled = (out.last_hidden_state * m).sum(1) / m.sum(1).clamp(min=1)
+                all_emb.append(pooled.cpu())
+        embeds[short] = torch.cat(all_emb)
+        print(f"    Shape: {embeds[short].shape}")
+        del model; gc.collect(); torch.cuda.empty_cache()
+    # ── Phase 0b: Align + Consensus + Measure ──
+    print(f"\n{'='*65}")
+    print("PHASE 0b: GENERALIZED PROCRUSTES ALIGNMENT (no reference bias)")
+    print(f"{'='*65}")
+    names = [s for _, s, _ in EXPERTS]
+    # Generalized Procrustes: iteratively align all to their mean
+    # No expert is the reference. The centerpoint emerges.
+    GPA_ITERS = 10
+    current = {name: embeds[name].float() for name in names}
+    for gpa_iter in range(GPA_ITERS):
+        # Compute mean shape
+        mean_shape = sum(current[n] for n in names) / len(names)
+        # Align each to mean
+        new_current = {}
+        total_delta = 0.0
+        for name in names:
+            info = procrustes_align(current[name], mean_shape)
+            new_current[name] = apply_align(current[name], info)
+            # Measure how much this iteration changed things
+            delta = (new_current[name] - current[name]).pow(2).mean().item()
+            total_delta += delta
+        current = new_current
+        if gpa_iter == 0 or (gpa_iter + 1) % 3 == 0 or total_delta < 1e-8:
+            print(f"  GPA iter {gpa_iter+1}: delta={total_delta:.8f}")
+        if total_delta < 1e-8:
+            print(f"  Converged at iteration {gpa_iter+1}")
+            break
+    # Final alignment: align each expert to the converged mean
+    mean_shape = sum(current[n] for n in names) / len(names)
+    procrustes_results = {}
+    aligned = {}
+    for name in names:
+        info = procrustes_align(embeds[name], mean_shape)
+        procrustes_results[name] = info
+        aligned[name] = apply_align(embeds[name], info)
+        cos = F.cosine_similarity(
+            aligned[name][:2000], mean_shape[:2000], dim=-1).mean().item()
+        print(f"  {name:10s}: cos_after={info['cos_after']:.4f}  cos_to_mean={cos:.4f}")
+    # Consensus = normalized centroid (now equidistant from all experts)
+    consensus = F.normalize(sum(aligned[n] for n in names) / len(names), dim=-1)
+    for name in names:
+        cos = F.cosine_similarity(consensus[:2000], aligned[name][:2000], dim=-1).mean().item()
+        print(f"  cos(consensus, {name}): {cos:.4f}")
+    # Verify equidistance
+    expert_cos_to_consensus = []
+    for name in names:
+        c = F.cosine_similarity(consensus[:2000], aligned[name][:2000], dim=-1).mean().item()
+        expert_cos_to_consensus.append(c)
+    equidist_range = max(expert_cos_to_consensus) - min(expert_cos_to_consensus)
+    print(f"  Equidistance range: {equidist_range:.4f} (should be near 0)")
+    # Measure EXACT consensus statistics
+    print(f"\n  Measuring consensus statistics...")
+    consensus_stats = measure_consensus_stats(consensus)
+    print(f"    CV:       {consensus_stats['cv']:.4f}")
+    print(f"    Mean cos: {consensus_stats['mean_cos']:.4f}")
+    print(f"    Eff dim:  {consensus_stats['eff_dim']:.1f}")
+    print(f"    Spectral: [{', '.join(f'{s:.4f}' for s in consensus_stats['spectral'][:5])}...]")
+    del embeds, aligned
+    gc.collect(); torch.cuda.empty_cache()
+    # ── Phase 1: Train Student ──
+    print(f"\n{'='*65}")
+    print("PHASE 1: TRAIN STUDENT")
+    print(f"{'='*65}")
+    tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")
+    tokens = tokenizer(captions, max_length=MAX_LEN, padding="max_length",
+                       truncation=True, return_tensors="pt")
+    input_ids = tokens["input_ids"]
+    attention_mask = tokens["attention_mask"]
+    n_train = N_SAMPLES - 2000
+    train_ids = input_ids[:n_train].to(DEVICE)
+    train_mask = attention_mask[:n_train].to(DEVICE)
+    train_targets = consensus[:n_train].to(DEVICE)
+    val_ids = input_ids[n_train:].to(DEVICE)
+    val_mask = attention_mask[n_train:].to(DEVICE)
+    val_targets = consensus[n_train:].to(DEVICE)
+    student = MiniStudent(
+        vocab_size=tokenizer.vocab_size, max_len=MAX_LEN,
+        d_model=256, n_heads=4, n_layers=4, d_ff=1024,
+        output_dim=768, dropout=0.1, pad_token_id=tokenizer.pad_token_id
+    ).to(DEVICE)
+    n_params = sum(p.numel() for p in student.parameters())
+    print(f"  Student: {n_params:,} params")
+    print(f"  CV target: {consensus_stats['cv']:.4f}")
+    optimizer = torch.optim.AdamW(student.parameters(), lr=3e-4, weight_decay=0.01)
+    for epoch in range(5):
+        student.train()
+        perm = torch.randperm(n_train, device=DEVICE)
+        t_loss, t_acc, t_cos, n = 0, 0, 0, 0
+        t0 = time.time()
+        for i in range(0, n_train, BATCH):
+            idx = perm[i:i+BATCH]
+            if len(idx) < 8: continue
+            emb = student(train_ids[idx], train_mask[idx])
+            tgt = train_targets[idx]
+            l_nce, acc = infonce(emb, tgt)
+            l_mse = F.mse_loss(emb, tgt)
+            l_cv = cv_loss(emb, target=consensus_stats["cv"])
+            loss = l_nce + l_mse + 0.1 * l_cv
+            loss.backward()
+            torch.nn.utils.clip_grad_norm_(student.parameters(), 1.0)
+            optimizer.step(); optimizer.zero_grad(set_to_none=True)
+            with torch.no_grad():
+                cos = F.cosine_similarity(emb, tgt, dim=-1).mean().item()
+            t_loss += loss.item(); t_acc += acc; t_cos += cos; n += 1
+        elapsed = time.time() - t0; d = max(n, 1)
+        student.eval()
+        with torch.no_grad():
+            v_emb = student(val_ids, val_mask)
+            _, v_acc = infonce(v_emb[:1000], val_targets[:1000])
+            v_cos = F.cosine_similarity(v_emb, val_targets, dim=-1).mean().item()
+            v_cv = cv_metric(v_emb[:1000])
+        print(f"  E{epoch+1}: {elapsed:.0f}s  loss={t_loss/d:.4f}  "
+              f"t_acc={t_acc/d:.3f}  t_cos={t_cos/d:.3f}  "
+              f"v_acc={v_acc:.3f}  v_cos={v_cos:.3f}  v_cv={v_cv:.3f}")
+    torch.save(student.state_dict(), "mini_student.pt")
+    print(f"\n  Student saved. v_cos={v_cos:.3f}, v_cv={v_cv:.3f}")
+    # ── Phase 2: Train Alignment Bank ──
+    print(f"\n{'='*65}")
+    print("PHASE 2: TRAIN ALIGNMENT BANK (student frozen)")
+    print(f"{'='*65}")
+    student.eval()
+    for p in student.parameters():
+        p.requires_grad = False
+    print("  Pre-encoding through frozen student...")
+    with torch.no_grad():
+        all_embs = []
+        for i in range(0, n_train, 512):
+            j = min(i + 512, n_train)
+            emb = student(train_ids[i:j], train_mask[i:j])
+            all_embs.append(emb)
+        student_embs = torch.cat(all_embs)
+        val_student_embs = student(val_ids, val_mask)
+    print(f"  Student embeddings: {student_embs.shape}")
+    bank = AlignmentBank(
+        d_embed=768, n_experts=len(EXPERTS),
+        n_anchors=512, d_bank=128
+    ).to(DEVICE)
+    bank.init_from_procrustes(procrustes_results, names,
+                               consensus[:n_train], consensus_stats)
+    bank_params = sum(p.numel() for p in bank.parameters())
+    print(f"  Bank: {bank_params:,} params")
+    print(f"  Bank targets: CV={bank.target_cv.item():.4f}, "
+          f"mean_cos={bank.target_mean_cos.item():.4f}")
+    # Calibrate disagreement from initial state (before any training)
+    bank.calibrate_disagreement(student_embs[:2000])
+    bank_opt = torch.optim.AdamW(bank.parameters(), lr=1e-3, weight_decay=0.01)
+    BANK_EPOCHS = 20
+    BANK_BATCH = 256
+    for epoch in range(BANK_EPOCHS):
+        bank.train()
+        perm = torch.randperm(n_train, device=DEVICE)
+        total_loss = 0
+        stats = {"expert_agreement": 0, "rotation_ortho": 0,
+                 "anchor_spread": 0, "bank_cv": 0, "emb_cv": 0,
+                 "cross_expert_var": 0, "disagree_preserve": 0}
+        n = 0
+        t0 = time.time()
+        for i in range(0, n_train, BANK_BATCH):
+            idx = perm[i:i+BANK_BATCH]
+            if len(idx) < 16: continue
+            emb = student_embs[idx]
+            enriched, aux = bank(emb)
+            loss = bank.bank_loss(aux)
+            loss.backward()
+            torch.nn.utils.clip_grad_norm_(bank.parameters(), 1.0)
+            bank_opt.step(); bank_opt.zero_grad(set_to_none=True)
+            total_loss += loss.item()
+            for k in stats:
+                if k in aux:
+                    v = aux[k]
+                    stats[k] += v.item() if torch.is_tensor(v) else v
+            n += 1
+        elapsed = time.time() - t0; d = max(n, 1)
+        bank.eval()
+        with torch.no_grad():
+            v_enriched, v_aux = bank(val_student_embs)
+            v_loss = bank.bank_loss(v_aux).item()
+        print(f"\n  E{epoch+1:2d}: {elapsed:.0f}s  loss={total_loss/d:.4f}  v_loss={v_loss:.4f}")
+        print(f"    Geometry:  b_cv={stats['bank_cv']/d:.4f}  e_cv={stats['emb_cv']/d:.4f}  "
+              f"spread={stats['anchor_spread']/d:.5f}  a_max={v_aux['anchor_max_cos']:.3f}")
+        print(f"    Experts:   cos={v_aux['expert_cos_mean']:.3f}±{v_aux['expert_cos_std']:.3f}  "
+              f"agr={stats['expert_agreement']/d:.6f}  ortho={stats['rotation_ortho']/d:.6f}")
+        print(f"    Disagree:  x_cos={v_aux.get('cross_expert_cos', 0):.4f}±{v_aux.get('cross_expert_cos_std', 0):.4f}  "
+              f"ratio={v_aux['disagreement_ratio']:.6f}  "
+              f"preserve={stats['disagree_preserve']/d:.6f}  "
+              f"norms={v_aux['norm_ratio_spread']:.4f}")
+    torch.save(bank.state_dict(), "alignment_bank.pt")
+    # ── Phase 3: Geometric Verification ──
+    print(f"\n{'='*65}")
+    print("PHASE 3: GEOMETRIC VERIFICATION")
+    print(f"{'='*65}")
+    bank.eval()
+    with torch.no_grad():
+        enriched_val, v_aux = bank(val_student_embs)
+        original_768 = enriched_val[:, :768]
+        geo_context = enriched_val[:, 768:]
+        passthrough_cos = F.cosine_similarity(
+            original_768[:100], val_student_embs[:100], dim=-1).mean().item()
+        geo_cv = cv_metric(F.normalize(geo_context[:1000], dim=-1))
+        S = torch.linalg.svdvals(
+            geo_context[:1000].float() - geo_context[:1000].float().mean(0))
+        geo_eff_dim = float((S.sum() ** 2) / (S.pow(2).sum() + 1e-12))
+        # Verify consensus stats are preserved
+        emb_cv = cv_metric(val_student_embs[:1000])
+    print(f"  Passthrough:     {passthrough_cos:.6f} (target: 1.000)")
+    print(f"  Emb CV:          {emb_cv:.4f} (consensus: {consensus_stats['cv']:.4f})")
+    print(f"  Geo context CV:  {geo_cv:.4f}")
+    print(f"  Geo eff_dim:     {geo_eff_dim:.1f} / {bank.d_bank}")
+    print(f"  Expert cos:      {v_aux['expert_cos_mean']:.3f} ± {v_aux['expert_cos_std']:.3f}")
+    print(f"  Anchor max cos:  {v_aux['anchor_max_cos']:.3f}")
+    print(f"  Disagreement:")
+    print(f"    Cross-expert:  {v_aux.get('cross_expert_cos', 0):.4f} ± {v_aux.get('cross_expert_cos_std', 0):.4f}")
+    print(f"    Ratio:         {v_aux['disagreement_ratio']:.6f} (target: {bank.target_disagreement_ratio.item():.6f})")
+    print(f"    Norm spread:   {v_aux['norm_ratio_spread']:.4f}")
+    # ── Phase 4: Classifier Stability Test ──
+    print(f"\n{'='*65}")
+    print("PHASE 4: CLASSIFIER STABILITY TEST")
+    print(f"{'='*65}")
+    with torch.no_grad():
+        embs = val_student_embs[:1000]
+        sim = embs @ embs.T
+        sim.fill_diagonal_(-1)
+        n_pairs = 3000
+        idx_a = torch.randint(0, 1000, (n_pairs,))
+        idx_b = torch.randint(0, 1000, (n_pairs,))
+        pair_cos = sim[idx_a, idx_b]
+        sorted_cos, _ = pair_cos.sort()
+        t1 = sorted_cos[n_pairs // 3].item()
+        t2 = sorted_cos[2 * n_pairs // 3].item()
+        labels = torch.zeros(n_pairs, dtype=torch.long, device=DEVICE)
+        labels[pair_cos > t2] = 0
+        labels[(pair_cos <= t2) & (pair_cos > t1)] = 1
+        labels[pair_cos <= t1] = 2
+        enriched_a, aux_a = bank(embs[idx_a])
+        enriched_b, aux_b = bank(embs[idx_b])
+        # Build explicit geometric features per pair
+        # These are interpretable and hard to overfit
+        a_emb = embs[idx_a]; b_emb = embs[idx_b]
+        a_geo = enriched_a[:, 768:]; b_geo = enriched_b[:, 768:]
+        geo_explicit = torch.cat([
+            # Pair-level
+            F.cosine_similarity(a_emb, b_emb, dim=-1).unsqueeze(-1),         # raw cosine
+            (a_emb - b_emb).pow(2).mean(dim=-1).unsqueeze(-1),               # MSE
+            F.cosine_similarity(a_geo, b_geo, dim=-1).unsqueeze(-1),         # geo context cosine
+            (a_geo - b_geo).pow(2).mean(dim=-1).unsqueeze(-1),               # geo context MSE
+            # Per-sample bank diagnostics (already computed in forward)
+            torch.abs(a_emb - b_emb).mean(dim=-1).unsqueeze(-1),            # L1 distance
+            (a_emb * b_emb).sum(dim=-1).unsqueeze(-1),                       # dot product
+        ], dim=-1)  # (n_pairs, 6)
+    modes = {
+        "raw_768": torch.cat([a_emb, b_emb], dim=-1),
+        "raw+diff": torch.cat([a_emb, b_emb, torch.abs(a_emb - b_emb), a_emb * b_emb], dim=-1),
+        "bank_enriched": torch.cat([enriched_a, enriched_b], dim=-1),
+        "bank+diff": torch.cat([enriched_a, enriched_b,
+                                torch.abs(enriched_a - enriched_b),
+                                enriched_a * enriched_b], dim=-1),
+        "geo_explicit": geo_explicit,
+    }
+    print(f"\n  {'Mode':<20} {'Dim':>6} {'Train':>7} {'Val':>7} {'Gap':>7}")
+    print(f"  {'-'*50}")
+    for mode_name, features in modes.items():
+        feat_dim = features.shape[1]
+        clf = nn.Sequential(
+            nn.Linear(feat_dim, min(256, feat_dim)), nn.GELU(), nn.LayerNorm(min(256, feat_dim)),
+            nn.Dropout(0.1),
+            nn.Linear(min(256, feat_dim), 3)
+        ).to(DEVICE)
+        clf_opt = torch.optim.Adam(clf.parameters(), lr=1e-3)
+        n_clf_train = 2400
+        train_f = features[:n_clf_train].detach()
+        train_l = labels[:n_clf_train]
+        val_f = features[n_clf_train:].detach()
+        val_l = labels[n_clf_train:]
+        for e in range(30):
+            clf.train()
+            logits = clf(train_f)
+            loss = F.cross_entropy(logits, train_l)
+            loss.backward(); clf_opt.step(); clf_opt.zero_grad()
+        clf.eval()
+        with torch.no_grad():
+            v_acc = (clf(val_f).argmax(-1) == val_l).float().mean().item()
+            t_acc = (clf(train_f).argmax(-1) == train_l).float().mean().item()
+        print(f"  {mode_name:<20} {feat_dim:>6} {t_acc:>7.3f} {v_acc:>7.3f} {t_acc-v_acc:>7.3f}")
+    print(f"\n{'='*65}")
+    print("SUMMARY")
+    print(f"{'='*65}")
+    print(f"  Consensus CV:     {consensus_stats['cv']:.4f}")
+    print(f"  Consensus eff_dim:{consensus_stats['eff_dim']:.1f}")
+    print(f"  Student v_cos:    {v_cos:.3f}")
+    print(f"  Student v_cv:     {v_cv:.3f}")
+    print(f"  Bank params:      {bank_params:,}")
+    print(f"  Bank geo_eff_dim: {geo_eff_dim:.1f}")
+    print(f"  Bank geo_cv:      {geo_cv:.4f}")
+    print(f"\n{'='*65}")
+    print("DONE")
+    print(f"{'='*65}")
+if __name__ == "__main__":
+    run()