""" implicit_solver/A1_projective_reprobe_h2a.py ============================================ Same projective probe as A0, applied to H2a (Q-rank02, V=32, D=4). Tests whether the projective interpretation generalizes: - A0 found G-Cand (D=3) has uniform distribution on ℝP² when collapsed. - A1 tests whether H2a (D=4) shows the same on ℝP³. Predicted outcomes ------------------ A. UNIFORM ℝP³ ALSO: H2a's rows collapse to N axes uniformly distributed on ℝP³ (deviation from baseline < 0.05). Projective reading is GENERAL — works at any D. Polygonal omega derivation via sphere training is validated as a method, not a D=3 quirk. B. STILL SPHERICAL: H2a shows few antipodal pairs (< 4), and what few axes get collapsed don't show uniform ℝP³ distribution. Projective reading is D=3-SPECIFIC — sphere-starvation symptom. D=4 genuinely lives on S³ as designed. C. INTERMEDIATE: Some collapse but not full uniform. Mixed regime. Cost: ~10 seconds (same checkpoint we already have). Output ------ /content/implicit_solver_reports/A1_projective_reprobe_h2a.json /content/implicit_solver_reports/A1_projective_reprobe_h2a.png """ import json import math from pathlib import Path import numpy as np import torch import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D # noqa from sklearn.cluster import KMeans from sklearn.metrics import silhouette_score CKPT_DIR = Path("/content/phaseQ_reports") RANK02_CKPT = CKPT_DIR / "Q_rank02_h64_V32_D4_dp0_nx0_adam" / "epoch_1_checkpoint.pt" OUTPUT_DIR = Path("/content/implicit_solver_reports") OUTPUT_DIR.mkdir(parents=True, exist_ok=True) OUTPUT_PLOT = OUTPUT_DIR / "A1_projective_reprobe_h2a.png" OUTPUT_JSON = OUTPUT_DIR / "A1_projective_reprobe_h2a.json" # ════════════════════════════════════════════════════════════════════ # Loading # ════════════════════════════════════════════════════════════════════ def load_h2a(): cfgs = get_phaseQ_configs() cfg_dict = next(c for c in cfgs if 'rank02' in c['variant']) cfg = build_run_config(cfg_dict) overrides = cfg_dict['overrides'] model = PatchSVAE_F_Ablation( matrix_v=cfg.matrix_v, D=cfg.D, patch_size=cfg.patch_size, hidden=cfg.hidden, depth=cfg.depth, n_cross_layers=cfg.n_cross_layers, n_heads=cfg.n_heads, max_alpha=overrides.get('max_alpha', cfg.max_alpha), alpha_init=cfg.alpha_init, activation=overrides.get('activation', 'gelu'), row_norm=overrides.get('row_norm', 'sphere'), svd_mode=overrides.get('svd', 'fp64'), linear_readout=overrides.get('linear_readout', False), match_params=overrides.get('match_params', True), init_scheme=overrides.get('init', 'orthogonal'), ) ckpt = torch.load(RANK02_CKPT, map_location='cpu', weights_only=False) state_dict = ( ckpt.get('model_state') or ckpt.get('model_state_dict') or ckpt.get('state_dict') or ckpt ) model.load_state_dict(state_dict) model.eval() return model, cfg def collect_per_sample_M(model, cfg, n_batches=8, batch_size=64): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model = model.to(device) ds = OmegaNoiseDataset( size=n_batches * batch_size, img_size=cfg.img_size, allowed_types=[0]) loader = torch.utils.data.DataLoader(ds, batch_size=batch_size, shuffle=False) all_M = [] with torch.no_grad(): for imgs, _ in loader: imgs = imgs.to(device) out = model(imgs) M_patch0 = out['svd']['M'][:, 0] all_M.append(M_patch0.cpu()) return torch.cat(all_M, dim=0).numpy() # ════════════════════════════════════════════════════════════════════ # Antipodal pair identification + projective collapse (carry from A0) # ════════════════════════════════════════════════════════════════════ def identify_antipodal_pairs(M_avg, threshold=-0.9): """Greedy mutual-strongest matching.""" norms = np.linalg.norm(M_avg, axis=1, keepdims=True) unit = M_avg / np.clip(norms, 1e-12, None) cosines = unit @ unit.T np.fill_diagonal(cosines, 1.0) V = M_avg.shape[0] claimed = [False] * V pairs = [] candidates = [] for i in range(V): best_j = int(cosines[i].argmin()) best_cos = float(cosines[i, best_j]) if best_cos < threshold: candidates.append((best_cos, i, best_j)) candidates.sort() for cos_val, i, j in candidates: if claimed[i] or claimed[j]: continue if cosines[j].argmin() == i or cosines[j, i] < threshold: pairs.append((min(i, j), max(i, j))) claimed[i] = True claimed[j] = True unpaired = [i for i in range(V) if not claimed[i]] return pairs, unpaired def collapse_to_axes(M_avg, pairs, unpaired): """For each pair, take (row_i - row_j)/2 normalized — symmetric merge. For unpaired, take the row as-is. Canonicalize sign so first nonzero coordinate is positive.""" norms = np.linalg.norm(M_avg, axis=1, keepdims=True) unit = M_avg / np.clip(norms, 1e-12, None) representatives = [] for i, j in pairs: merged = unit[i] - unit[j] merged = merged / max(np.linalg.norm(merged), 1e-12) for k in range(merged.shape[0]): if abs(merged[k]) > 1e-6: if merged[k] < 0: merged = -merged break representatives.append(merged) for i in unpaired: r = unit[i].copy() for k in range(r.shape[0]): if abs(r[k]) > 1e-6: if r[k] < 0: r = -r break representatives.append(r) return np.array(representatives) # ════════════════════════════════════════════════════════════════════ # Projective metrics # ════════════════════════════════════════════════════════════════════ def projective_pairwise_angles(axes): """Angles on ℝP^(D-1): wrap [0, π] → [0, π/2] via min(θ, π-θ).""" n = axes.shape[0] cosines = axes @ axes.T cosines = np.clip(cosines, -1, 1) raw_angles = np.arccos(cosines) proj_angles = np.minimum(raw_angles, np.pi - raw_angles) triu = np.triu_indices(n, k=1) return proj_angles[triu] def uniform_rp_pairwise_angle_baseline(D, n_axes, n_trials=10): """Empirical baseline: sample n_axes uniformly on ℝP^(D-1), compute mean projective pairwise angle. Higher D → higher baseline.""" rng = np.random.RandomState(0) means = [] for _ in range(n_trials): x = rng.randn(n_axes, D) x = x / np.linalg.norm(x, axis=1, keepdims=True) # Canonicalize to upper hemisphere for k in range(D): mask = (x[:, k] != 0) & (np.all(x[:, :k] == 0, axis=1) if k > 0 else np.ones(n_axes, dtype=bool)) x[mask] = x[mask] * np.sign(x[mask, k:k+1]) if not np.any(mask): break angles = projective_pairwise_angles(x) means.append(angles.mean()) return float(np.mean(means)) def test_axis_distribution(axes, label): D = axes.shape[1] n = axes.shape[0] print(f"\n[{label}]") print(f" Axes shape: {axes.shape}") proj_angles = projective_pairwise_angles(axes) print(f" Projective pairwise angles (radians, max π/2={math.pi/2:.3f}):") print(f" mean: {proj_angles.mean():.3f}") print(f" median: {np.median(proj_angles):.3f}") print(f" min: {proj_angles.min():.3f}") print(f" max: {proj_angles.max():.3f}") uniform_baseline = uniform_rp_pairwise_angle_baseline(D, n) deviation = proj_angles.mean() - uniform_baseline print(f" Uniform ℝP^{D-1} baseline (n={n}): {uniform_baseline:.3f}") print(f" Deviation: {deviation:+.3f} " f"({'CLOSE TO UNIFORM' if abs(deviation) < 0.05 else 'NON-UNIFORM'})") fraction_clustered = (proj_angles < 0.3).mean() print(f" Fraction near-zero (axes parallel): {fraction_clustered:.3f}") sils = [] for k in range(2, min(8, n)): try: km = KMeans(n_clusters=k, n_init=10, random_state=42) labels = km.fit_predict(axes) if len(set(labels)) >= 2: sils.append((k, silhouette_score(axes, labels))) except Exception: pass if sils: best_k, best_sil = max(sils, key=lambda x: x[1]) print(f" Best cluster k={best_k}, silhouette={best_sil:.3f}") cluster_verdict = ( 'STRONG (real clusters)' if best_sil > 0.5 else 'WEAK (some structure)' if best_sil > 0.3 else 'NONE (continuous distribution)' ) print(f" Cluster verdict: {cluster_verdict}") else: best_k, best_sil = None, None cluster_verdict = 'N/A' sv = np.linalg.svd(axes, compute_uv=False) sv_norm = sv / sv.sum() erank = math.exp(-(sv_norm * np.log(sv_norm + 1e-12)).sum()) print(f" Effective rank: {erank:.2f} of {D} possible " f"({erank/D*100:.0f}% utilization)") cos_axes = axes @ axes.T np.fill_diagonal(cos_axes, 1.0) most_anti = cos_axes.min(axis=1) secondary_anti = (most_anti < -0.9).sum() // 2 print(f" Secondary antipodal pairs: {secondary_anti}/{n//2}") return { 'n_axes': int(n), 'D': int(D), 'proj_angle_mean': float(proj_angles.mean()), 'proj_angle_median': float(np.median(proj_angles)), 'proj_angle_min': float(proj_angles.min()), 'proj_angle_max': float(proj_angles.max()), 'uniform_baseline': uniform_baseline, 'deviation_from_uniform': float(deviation), 'fraction_clustered': float(fraction_clustered), 'best_cluster_k': best_k, 'best_silhouette': best_sil, 'cluster_verdict': cluster_verdict, 'effective_rank': float(erank), 'utilization': float(erank / D), 'secondary_antipodal_pairs': int(secondary_anti), 'proj_angles_subset': proj_angles[:200].tolist(), } # ════════════════════════════════════════════════════════════════════ # Plotting # ════════════════════════════════════════════════════════════════════ def plot_projective(M_avg, axes, pairs, unpaired, results, output_path, g_cand_results=None): """Same 6-panel layout as A0, but for D=4 we project to first 3 dims for the 3D scatter panels. Adds optional comparison lines from A0.""" fig = plt.figure(figsize=(18, 12)) # Panel 1: Original M_avg projected to first 3 dims ax1 = fig.add_subplot(2, 3, 1, projection='3d') norms = np.linalg.norm(M_avg, axis=1, keepdims=True) unit = M_avg / np.clip(norms, 1e-12, None) u = np.linspace(0, 2*np.pi, 20) v = np.linspace(0, np.pi, 20) x_s = np.outer(np.cos(u), np.sin(v)) y_s = np.outer(np.sin(u), np.sin(v)) z_s = np.outer(np.ones_like(u), np.cos(v)) ax1.plot_wireframe(x_s, y_s, z_s, alpha=0.1, color='gray') pair_colors = plt.cm.tab20(np.linspace(0, 1, max(len(pairs), 1))) for k, (i, j) in enumerate(pairs): color = pair_colors[k] ax1.scatter(unit[i, 0], unit[i, 1], unit[i, 2], c=[color], s=80, edgecolors='black', linewidths=0.5) ax1.scatter(unit[j, 0], unit[j, 1], unit[j, 2], c=[color], s=80, edgecolors='black', linewidths=0.5) ax1.plot([unit[i, 0], unit[j, 0]], [unit[i, 1], unit[j, 1]], [unit[i, 2], unit[j, 2]], color=color, alpha=0.3, linewidth=0.8) for i in unpaired: ax1.scatter(unit[i, 0], unit[i, 1], unit[i, 2], c='blue', marker='o', s=80, edgecolors='black', linewidths=0.5, alpha=0.7) ax1.set_title(f'H2a M_avg projected to first 3 dims\n' f'{len(pairs)} antipodal pairs (colored), ' f'{len(unpaired)} unpaired (blue)') # Panel 2: Collapsed axes (first 3 dims) ax2 = fig.add_subplot(2, 3, 2, projection='3d') ax2.plot_wireframe(x_s, y_s, z_s, alpha=0.1, color='gray') for k, ax in enumerate(axes): ax2.scatter(ax[0], ax[1], ax[2], c=[plt.cm.tab20(k % 20)], s=120, edgecolors='black', linewidths=0.5) ax2.plot([-ax[0], ax[0]], [-ax[1], ax[1]], [-ax[2], ax[2]], color=plt.cm.tab20(k % 20), alpha=0.4, linewidth=1.0) ax2.set_title(f'Collapsed axes (n={axes.shape[0]})\n' f'D={axes.shape[1]} → projected to first 3 dims') # Panel 3: Projective angle distribution + uniform baseline + G-Cand overlay ax3 = fig.add_subplot(2, 3, 3) proj_angles = results['proj_angles_subset'] ax3.hist(proj_angles, bins=30, density=True, alpha=0.7, color='steelblue', label=f'H2a projective (D={results["D"]})') if g_cand_results is not None: ax3.hist(g_cand_results['proj_angles_subset'], bins=30, density=True, alpha=0.4, color='red', label='G-Cand projective (D=3)') ax3.axvline(results['uniform_baseline'], color='blue', linestyle='--', label=f"H2a uniform ℝP³ ({results['uniform_baseline']:.3f})") if g_cand_results is not None: ax3.axvline(g_cand_results['uniform_baseline'], color='red', linestyle=':', alpha=0.5, label=f"G-Cand uniform ℝP² ({g_cand_results['uniform_baseline']:.3f})") ax3.set_xlabel('Projective pairwise angle (radians)') ax3.set_ylabel('Density') ax3.set_title(f'Projective angle distribution\n' f"H2a deviation: {results['deviation_from_uniform']:+.3f}") ax3.legend(fontsize=8) # Panel 4: Cluster silhouette across k ax4 = fig.add_subplot(2, 3, 4) if results['best_cluster_k'] is not None: ks_sils = [] for k in range(2, min(8, axes.shape[0])): try: km = KMeans(n_clusters=k, n_init=10, random_state=42) labels = km.fit_predict(axes) if len(set(labels)) >= 2: ks_sils.append((k, silhouette_score(axes, labels))) except Exception: pass if ks_sils: ks, sils = zip(*ks_sils) ax4.plot(ks, sils, 'o-', color='purple', markersize=8) ax4.axhline(0.5, color='red', linestyle='--', alpha=0.5, label='strong cluster') ax4.axhline(0.3, color='orange', linestyle='--', alpha=0.5, label='weak cluster') ax4.set_xlabel('k (number of clusters)') ax4.set_ylabel('silhouette score') ax4.set_title(f"Axis clustering\n" f"verdict: {results['cluster_verdict']}") ax4.legend(fontsize=8) ax4.grid(alpha=0.3) # Panel 5: Singular values ax5 = fig.add_subplot(2, 3, 5) sv = np.linalg.svd(axes, compute_uv=False) ax5.bar([f'σ{i+1}' for i in range(len(sv))], sv, color=plt.cm.viridis(np.linspace(0.2, 0.8, len(sv)))) ax5.set_ylabel('Singular value') ax5.set_title(f"Singular values of axis matrix\n" f"effective rank: {results['effective_rank']:.2f} " f"of {results['D']}") # Panel 6: Comparison verdict ax6 = fig.add_subplot(2, 3, 6) ax6.axis('off') is_uniform = abs(results['deviation_from_uniform']) < 0.05 is_clustered = (results['best_silhouette'] or 0) > 0.5 has_secondary = results['secondary_antipodal_pairs'] >= 3 full_rank = results['utilization'] > 0.95 if is_uniform and not is_clustered and not has_secondary and full_rank: verdict = "✓ ALSO ℝP³ UNIFORM" explanation = ( "H2a's collapsed axes are uniformly distributed on ℝP³.\n" "Projective interpretation GENERALIZES beyond D=3.\n\n" "Sphere-solvers in general are projective at the level of\n" "their geometric output. Polygonal omega derivation via\n" "sphere-trained anchors is validated as a method." ) color = 'lightgreen' elif results['n_axes'] >= results['D'] * 6 and full_rank: # Many axes, full rank → still strongly spherical verdict = "✗ STILL ESSENTIALLY SPHERICAL" explanation = ( f"H2a has {results['n_axes']} axes (vs G-Cand's smaller count),\n" f"few antipodal pairs were identified, full rank utilization.\n\n" f"Projective collapse barely changes the picture at D=4.\n" f"D=3 was a special case — sphere-starvation symptom.\n" f"D=4 lives on S³ as designed." ) color = 'lightyellow' elif is_uniform: verdict = "✓ MOSTLY ℝP³, full rank" explanation = ( "H2a collapses to axes that are roughly uniform on ℝP³.\n" "Projective reading IS valid at D=4 too, with caveats." ) color = 'palegreen' else: verdict = "? MIXED RESULT" explanation = ( "H2a doesn't cleanly fit either ℝP³ uniform or pure spherical.\n" "Geometry is more complex than the simple projective hypothesis." ) color = 'lightgray' ax6.text(0.5, 0.85, verdict, ha='center', va='top', fontsize=18, fontweight='bold', bbox=dict(boxstyle='round', facecolor=color, alpha=0.8)) ax6.text(0.05, 0.55, explanation, ha='left', va='top', fontsize=10, wrap=True, family='monospace') metrics_summary = ( f"\n\nKey metrics (H2a):\n" f" axes: {results['n_axes']}\n" f" proj angle mean: {results['proj_angle_mean']:.3f}\n" f" uniform baseline: {results['uniform_baseline']:.3f}\n" f" deviation: {results['deviation_from_uniform']:+.3f}\n" f" best cluster silhouette: {results['best_silhouette'] or 0:.3f}\n" f" effective rank: {results['effective_rank']:.2f}/{results['D']}\n" f" secondary antipodal: {results['secondary_antipodal_pairs']}\n" ) if g_cand_results is not None: metrics_summary += ( f"\nG-Cand comparison:\n" f" axes: {g_cand_results['n_axes']}\n" f" deviation: {g_cand_results['deviation_from_uniform']:+.3f}\n" f" best silhouette: {g_cand_results['best_silhouette']:.3f}\n" ) ax6.text(0.05, 0.30, metrics_summary, ha='left', va='top', fontsize=9, family='monospace') plt.tight_layout() plt.savefig(output_path, dpi=120, bbox_inches='tight') plt.show() # ════════════════════════════════════════════════════════════════════ # Main # ════════════════════════════════════════════════════════════════════ def main(): print("=" * 70) print("Projective re-probe of H2a (Q-rank02, V=32, D=4)") print("Tests whether projective interpretation generalizes from D=3 → D=4") print("=" * 70) print("\nLoading H2a checkpoint...") model, cfg = load_h2a() print(f" V={cfg.matrix_v}, D={cfg.D}, " f"params={sum(p.numel() for p in model.parameters()):,}") print("\nCollecting M tensor (512 gaussian samples)...") all_M = collect_per_sample_M(model, cfg) M_avg = all_M.mean(axis=0) print(f" M_avg shape: {M_avg.shape}") print("\nIdentifying antipodal pairs (cos < -0.9, mutual-strongest)...") pairs, unpaired = identify_antipodal_pairs(M_avg, threshold=-0.9) print(f" Found {len(pairs)} antipodal pairs") print(f" Unpaired rows: {len(unpaired)}") print(f" Total accounted: {2*len(pairs) + len(unpaired)} of {M_avg.shape[0]}") print("\nCollapsing to projective axes...") axes = collapse_to_axes(M_avg, pairs, unpaired) print(f" Axes: {axes.shape[0]} representatives in {axes.shape[1]}-D") results = test_axis_distribution(axes, "H2a projective axes") # Try to load A0 (G-Cand) results for side-by-side comparison g_cand_results = None g_cand_json = OUTPUT_DIR / "A0_projective_reprobe.json" if g_cand_json.exists(): with open(g_cand_json) as f: g_cand_data = json.load(f) g_cand_results = g_cand_data['projective_metrics'] print(f"\n (Loaded A0 G-Cand results for comparison)") output_data = { 'config': { 'variant': 'Q_rank02_h64_V32_D4_dp0_nx0_adam', 'V': cfg.matrix_v, 'D': cfg.D, }, 'antipodal_pairs_found': len(pairs), 'unpaired_rows': len(unpaired), 'total_axes': axes.shape[0], 'projective_metrics': results, 'pairs': [list(p) for p in pairs], 'unpaired': unpaired, } with open(OUTPUT_JSON, 'w') as f: json.dump(output_data, f, indent=2, default=str) print(f"\nSaved: {OUTPUT_JSON}") plot_projective(M_avg, axes, pairs, unpaired, results, OUTPUT_PLOT, g_cand_results=g_cand_results) print(f"Saved: {OUTPUT_PLOT}") # Headline conclusion print("\n" + "=" * 70) print("CONCLUSION — generalization test") print("=" * 70) is_uniform = abs(results['deviation_from_uniform']) < 0.05 is_clustered = (results['best_silhouette'] or 0) > 0.5 has_secondary = results['secondary_antipodal_pairs'] >= 3 full_rank = results['utilization'] > 0.95 print(f"\n H2a (D=4, V=32):") print(f" {len(pairs)} antipodal pairs, {axes.shape[0]} total axes") print(f" Projective angle mean: {results['proj_angle_mean']:.3f}") print(f" ℝP³ uniform baseline: {results['uniform_baseline']:.3f}") print(f" Deviation: {results['deviation_from_uniform']:+.3f}") if g_cand_results is not None: print(f"\n G-Cand (D=3, V=32) for comparison:") print(f" {g_cand_data.get('antipodal_pairs_found', '?')} antipodal pairs, " f"{g_cand_data.get('total_axes', '?')} total axes") print(f" Projective angle mean: {g_cand_results['proj_angle_mean']:.3f}") print(f" ℝP² uniform baseline: {g_cand_results['uniform_baseline']:.3f}") print(f" Deviation: {g_cand_results['deviation_from_uniform']:+.3f}") print("\n" + "-" * 70) if is_uniform and not is_clustered and not has_secondary and full_rank: print(" ✓ PROJECTIVE READING GENERALIZES") print(" H2a also collapses to uniform projective distribution.") print(" The polytope-implicit-in-sphere hypothesis is supported") print(" at D=4 too. Inference-projection framing is general.") elif len(pairs) <= 4 and full_rank: print(" ✗ PROJECTIVE READING IS D=3-SPECIFIC") print(" H2a has very few antipodal pairs — most rows didn't") print(" collapse. The projective reading is a sphere-starvation") print(" symptom, not a general property of trained sphere-solvers.") print(" D=4 lives on S³ as designed.") else: print(" ? INTERMEDIATE RESULT") print(" H2a shows partial collapse with unclear interpretation.") print(" Need to think about whether the metric thresholds") print(" (uniform deviation, cluster silhouette) are appropriate") print(" at higher D where the unfilled space is much larger.") return output_data if __name__ == '__main__': results = main()