geolip-svae-implicit-solver-experiments / 12_test_claim_2_deeper.py
AbstractPhil's picture
Create 12_test_claim_2_deeper.py
9f3395f verified
"""
implicit_solver/A1_projective_reprobe_h2a.py
============================================
Same projective probe as A0, applied to H2a (Q-rank02, V=32, D=4).
Tests whether the projective interpretation generalizes:
- A0 found G-Cand (D=3) has uniform distribution on ℝPΒ² when collapsed.
- A1 tests whether H2a (D=4) shows the same on ℝPΒ³.
Predicted outcomes
------------------
A. UNIFORM ℝPΒ³ ALSO: H2a's rows collapse to N axes uniformly distributed
on ℝPΒ³ (deviation from baseline < 0.05). Projective reading is
GENERAL β€” works at any D. Polygonal omega derivation via sphere
training is validated as a method, not a D=3 quirk.
B. STILL SPHERICAL: H2a shows few antipodal pairs (< 4), and what few
axes get collapsed don't show uniform ℝPΒ³ distribution. Projective
reading is D=3-SPECIFIC β€” sphere-starvation symptom. D=4 genuinely
lives on SΒ³ as designed.
C. INTERMEDIATE: Some collapse but not full uniform. Mixed regime.
Cost: ~10 seconds (same checkpoint we already have).
Output
------
/content/implicit_solver_reports/A1_projective_reprobe_h2a.json
/content/implicit_solver_reports/A1_projective_reprobe_h2a.png
"""
import json
import math
from pathlib import Path
import numpy as np
import torch
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D # noqa
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
CKPT_DIR = Path("/content/phaseQ_reports")
RANK02_CKPT = CKPT_DIR / "Q_rank02_h64_V32_D4_dp0_nx0_adam" / "epoch_1_checkpoint.pt"
OUTPUT_DIR = Path("/content/implicit_solver_reports")
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
OUTPUT_PLOT = OUTPUT_DIR / "A1_projective_reprobe_h2a.png"
OUTPUT_JSON = OUTPUT_DIR / "A1_projective_reprobe_h2a.json"
# ════════════════════════════════════════════════════════════════════
# Loading
# ════════════════════════════════════════════════════════════════════
def load_h2a():
cfgs = get_phaseQ_configs()
cfg_dict = next(c for c in cfgs if 'rank02' in c['variant'])
cfg = build_run_config(cfg_dict)
overrides = cfg_dict['overrides']
model = PatchSVAE_F_Ablation(
matrix_v=cfg.matrix_v, D=cfg.D, patch_size=cfg.patch_size,
hidden=cfg.hidden, depth=cfg.depth,
n_cross_layers=cfg.n_cross_layers, n_heads=cfg.n_heads,
max_alpha=overrides.get('max_alpha', cfg.max_alpha),
alpha_init=cfg.alpha_init,
activation=overrides.get('activation', 'gelu'),
row_norm=overrides.get('row_norm', 'sphere'),
svd_mode=overrides.get('svd', 'fp64'),
linear_readout=overrides.get('linear_readout', False),
match_params=overrides.get('match_params', True),
init_scheme=overrides.get('init', 'orthogonal'),
)
ckpt = torch.load(RANK02_CKPT, map_location='cpu', weights_only=False)
state_dict = (
ckpt.get('model_state')
or ckpt.get('model_state_dict')
or ckpt.get('state_dict')
or ckpt
)
model.load_state_dict(state_dict)
model.eval()
return model, cfg
def collect_per_sample_M(model, cfg, n_batches=8, batch_size=64):
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
ds = OmegaNoiseDataset(
size=n_batches * batch_size, img_size=cfg.img_size,
allowed_types=[0])
loader = torch.utils.data.DataLoader(ds, batch_size=batch_size, shuffle=False)
all_M = []
with torch.no_grad():
for imgs, _ in loader:
imgs = imgs.to(device)
out = model(imgs)
M_patch0 = out['svd']['M'][:, 0]
all_M.append(M_patch0.cpu())
return torch.cat(all_M, dim=0).numpy()
# ════════════════════════════════════════════════════════════════════
# Antipodal pair identification + projective collapse (carry from A0)
# ════════════════════════════════════════════════════════════════════
def identify_antipodal_pairs(M_avg, threshold=-0.9):
"""Greedy mutual-strongest matching."""
norms = np.linalg.norm(M_avg, axis=1, keepdims=True)
unit = M_avg / np.clip(norms, 1e-12, None)
cosines = unit @ unit.T
np.fill_diagonal(cosines, 1.0)
V = M_avg.shape[0]
claimed = [False] * V
pairs = []
candidates = []
for i in range(V):
best_j = int(cosines[i].argmin())
best_cos = float(cosines[i, best_j])
if best_cos < threshold:
candidates.append((best_cos, i, best_j))
candidates.sort()
for cos_val, i, j in candidates:
if claimed[i] or claimed[j]:
continue
if cosines[j].argmin() == i or cosines[j, i] < threshold:
pairs.append((min(i, j), max(i, j)))
claimed[i] = True
claimed[j] = True
unpaired = [i for i in range(V) if not claimed[i]]
return pairs, unpaired
def collapse_to_axes(M_avg, pairs, unpaired):
"""For each pair, take (row_i - row_j)/2 normalized β€” symmetric merge.
For unpaired, take the row as-is. Canonicalize sign so first nonzero
coordinate is positive."""
norms = np.linalg.norm(M_avg, axis=1, keepdims=True)
unit = M_avg / np.clip(norms, 1e-12, None)
representatives = []
for i, j in pairs:
merged = unit[i] - unit[j]
merged = merged / max(np.linalg.norm(merged), 1e-12)
for k in range(merged.shape[0]):
if abs(merged[k]) > 1e-6:
if merged[k] < 0:
merged = -merged
break
representatives.append(merged)
for i in unpaired:
r = unit[i].copy()
for k in range(r.shape[0]):
if abs(r[k]) > 1e-6:
if r[k] < 0:
r = -r
break
representatives.append(r)
return np.array(representatives)
# ════════════════════════════════════════════════════════════════════
# Projective metrics
# ════════════════════════════════════════════════════════════════════
def projective_pairwise_angles(axes):
"""Angles on ℝP^(D-1): wrap [0, Ο€] β†’ [0, Ο€/2] via min(ΞΈ, Ο€-ΞΈ)."""
n = axes.shape[0]
cosines = axes @ axes.T
cosines = np.clip(cosines, -1, 1)
raw_angles = np.arccos(cosines)
proj_angles = np.minimum(raw_angles, np.pi - raw_angles)
triu = np.triu_indices(n, k=1)
return proj_angles[triu]
def uniform_rp_pairwise_angle_baseline(D, n_axes, n_trials=10):
"""Empirical baseline: sample n_axes uniformly on ℝP^(D-1),
compute mean projective pairwise angle. Higher D β†’ higher baseline."""
rng = np.random.RandomState(0)
means = []
for _ in range(n_trials):
x = rng.randn(n_axes, D)
x = x / np.linalg.norm(x, axis=1, keepdims=True)
# Canonicalize to upper hemisphere
for k in range(D):
mask = (x[:, k] != 0) & (np.all(x[:, :k] == 0, axis=1) if k > 0 else np.ones(n_axes, dtype=bool))
x[mask] = x[mask] * np.sign(x[mask, k:k+1])
if not np.any(mask):
break
angles = projective_pairwise_angles(x)
means.append(angles.mean())
return float(np.mean(means))
def test_axis_distribution(axes, label):
D = axes.shape[1]
n = axes.shape[0]
print(f"\n[{label}]")
print(f" Axes shape: {axes.shape}")
proj_angles = projective_pairwise_angles(axes)
print(f" Projective pairwise angles (radians, max Ο€/2={math.pi/2:.3f}):")
print(f" mean: {proj_angles.mean():.3f}")
print(f" median: {np.median(proj_angles):.3f}")
print(f" min: {proj_angles.min():.3f}")
print(f" max: {proj_angles.max():.3f}")
uniform_baseline = uniform_rp_pairwise_angle_baseline(D, n)
deviation = proj_angles.mean() - uniform_baseline
print(f" Uniform ℝP^{D-1} baseline (n={n}): {uniform_baseline:.3f}")
print(f" Deviation: {deviation:+.3f} "
f"({'CLOSE TO UNIFORM' if abs(deviation) < 0.05 else 'NON-UNIFORM'})")
fraction_clustered = (proj_angles < 0.3).mean()
print(f" Fraction near-zero (axes parallel): {fraction_clustered:.3f}")
sils = []
for k in range(2, min(8, n)):
try:
km = KMeans(n_clusters=k, n_init=10, random_state=42)
labels = km.fit_predict(axes)
if len(set(labels)) >= 2:
sils.append((k, silhouette_score(axes, labels)))
except Exception:
pass
if sils:
best_k, best_sil = max(sils, key=lambda x: x[1])
print(f" Best cluster k={best_k}, silhouette={best_sil:.3f}")
cluster_verdict = (
'STRONG (real clusters)' if best_sil > 0.5 else
'WEAK (some structure)' if best_sil > 0.3 else
'NONE (continuous distribution)'
)
print(f" Cluster verdict: {cluster_verdict}")
else:
best_k, best_sil = None, None
cluster_verdict = 'N/A'
sv = np.linalg.svd(axes, compute_uv=False)
sv_norm = sv / sv.sum()
erank = math.exp(-(sv_norm * np.log(sv_norm + 1e-12)).sum())
print(f" Effective rank: {erank:.2f} of {D} possible "
f"({erank/D*100:.0f}% utilization)")
cos_axes = axes @ axes.T
np.fill_diagonal(cos_axes, 1.0)
most_anti = cos_axes.min(axis=1)
secondary_anti = (most_anti < -0.9).sum() // 2
print(f" Secondary antipodal pairs: {secondary_anti}/{n//2}")
return {
'n_axes': int(n),
'D': int(D),
'proj_angle_mean': float(proj_angles.mean()),
'proj_angle_median': float(np.median(proj_angles)),
'proj_angle_min': float(proj_angles.min()),
'proj_angle_max': float(proj_angles.max()),
'uniform_baseline': uniform_baseline,
'deviation_from_uniform': float(deviation),
'fraction_clustered': float(fraction_clustered),
'best_cluster_k': best_k,
'best_silhouette': best_sil,
'cluster_verdict': cluster_verdict,
'effective_rank': float(erank),
'utilization': float(erank / D),
'secondary_antipodal_pairs': int(secondary_anti),
'proj_angles_subset': proj_angles[:200].tolist(),
}
# ════════════════════════════════════════════════════════════════════
# Plotting
# ════════════════════════════════════════════════════════════════════
def plot_projective(M_avg, axes, pairs, unpaired, results, output_path,
g_cand_results=None):
"""Same 6-panel layout as A0, but for D=4 we project to first 3 dims
for the 3D scatter panels. Adds optional comparison lines from A0."""
fig = plt.figure(figsize=(18, 12))
# Panel 1: Original M_avg projected to first 3 dims
ax1 = fig.add_subplot(2, 3, 1, projection='3d')
norms = np.linalg.norm(M_avg, axis=1, keepdims=True)
unit = M_avg / np.clip(norms, 1e-12, None)
u = np.linspace(0, 2*np.pi, 20)
v = np.linspace(0, np.pi, 20)
x_s = np.outer(np.cos(u), np.sin(v))
y_s = np.outer(np.sin(u), np.sin(v))
z_s = np.outer(np.ones_like(u), np.cos(v))
ax1.plot_wireframe(x_s, y_s, z_s, alpha=0.1, color='gray')
pair_colors = plt.cm.tab20(np.linspace(0, 1, max(len(pairs), 1)))
for k, (i, j) in enumerate(pairs):
color = pair_colors[k]
ax1.scatter(unit[i, 0], unit[i, 1], unit[i, 2],
c=[color], s=80, edgecolors='black', linewidths=0.5)
ax1.scatter(unit[j, 0], unit[j, 1], unit[j, 2],
c=[color], s=80, edgecolors='black', linewidths=0.5)
ax1.plot([unit[i, 0], unit[j, 0]],
[unit[i, 1], unit[j, 1]],
[unit[i, 2], unit[j, 2]],
color=color, alpha=0.3, linewidth=0.8)
for i in unpaired:
ax1.scatter(unit[i, 0], unit[i, 1], unit[i, 2],
c='blue', marker='o', s=80,
edgecolors='black', linewidths=0.5, alpha=0.7)
ax1.set_title(f'H2a M_avg projected to first 3 dims\n'
f'{len(pairs)} antipodal pairs (colored), '
f'{len(unpaired)} unpaired (blue)')
# Panel 2: Collapsed axes (first 3 dims)
ax2 = fig.add_subplot(2, 3, 2, projection='3d')
ax2.plot_wireframe(x_s, y_s, z_s, alpha=0.1, color='gray')
for k, ax in enumerate(axes):
ax2.scatter(ax[0], ax[1], ax[2], c=[plt.cm.tab20(k % 20)],
s=120, edgecolors='black', linewidths=0.5)
ax2.plot([-ax[0], ax[0]], [-ax[1], ax[1]], [-ax[2], ax[2]],
color=plt.cm.tab20(k % 20), alpha=0.4, linewidth=1.0)
ax2.set_title(f'Collapsed axes (n={axes.shape[0]})\n'
f'D={axes.shape[1]} β†’ projected to first 3 dims')
# Panel 3: Projective angle distribution + uniform baseline + G-Cand overlay
ax3 = fig.add_subplot(2, 3, 3)
proj_angles = results['proj_angles_subset']
ax3.hist(proj_angles, bins=30, density=True, alpha=0.7,
color='steelblue', label=f'H2a projective (D={results["D"]})')
if g_cand_results is not None:
ax3.hist(g_cand_results['proj_angles_subset'], bins=30, density=True,
alpha=0.4, color='red', label='G-Cand projective (D=3)')
ax3.axvline(results['uniform_baseline'], color='blue', linestyle='--',
label=f"H2a uniform ℝPΒ³ ({results['uniform_baseline']:.3f})")
if g_cand_results is not None:
ax3.axvline(g_cand_results['uniform_baseline'], color='red',
linestyle=':', alpha=0.5,
label=f"G-Cand uniform ℝPΒ² ({g_cand_results['uniform_baseline']:.3f})")
ax3.set_xlabel('Projective pairwise angle (radians)')
ax3.set_ylabel('Density')
ax3.set_title(f'Projective angle distribution\n'
f"H2a deviation: {results['deviation_from_uniform']:+.3f}")
ax3.legend(fontsize=8)
# Panel 4: Cluster silhouette across k
ax4 = fig.add_subplot(2, 3, 4)
if results['best_cluster_k'] is not None:
ks_sils = []
for k in range(2, min(8, axes.shape[0])):
try:
km = KMeans(n_clusters=k, n_init=10, random_state=42)
labels = km.fit_predict(axes)
if len(set(labels)) >= 2:
ks_sils.append((k, silhouette_score(axes, labels)))
except Exception:
pass
if ks_sils:
ks, sils = zip(*ks_sils)
ax4.plot(ks, sils, 'o-', color='purple', markersize=8)
ax4.axhline(0.5, color='red', linestyle='--', alpha=0.5,
label='strong cluster')
ax4.axhline(0.3, color='orange', linestyle='--', alpha=0.5,
label='weak cluster')
ax4.set_xlabel('k (number of clusters)')
ax4.set_ylabel('silhouette score')
ax4.set_title(f"Axis clustering\n"
f"verdict: {results['cluster_verdict']}")
ax4.legend(fontsize=8)
ax4.grid(alpha=0.3)
# Panel 5: Singular values
ax5 = fig.add_subplot(2, 3, 5)
sv = np.linalg.svd(axes, compute_uv=False)
ax5.bar([f'Οƒ{i+1}' for i in range(len(sv))], sv,
color=plt.cm.viridis(np.linspace(0.2, 0.8, len(sv))))
ax5.set_ylabel('Singular value')
ax5.set_title(f"Singular values of axis matrix\n"
f"effective rank: {results['effective_rank']:.2f} "
f"of {results['D']}")
# Panel 6: Comparison verdict
ax6 = fig.add_subplot(2, 3, 6)
ax6.axis('off')
is_uniform = abs(results['deviation_from_uniform']) < 0.05
is_clustered = (results['best_silhouette'] or 0) > 0.5
has_secondary = results['secondary_antipodal_pairs'] >= 3
full_rank = results['utilization'] > 0.95
if is_uniform and not is_clustered and not has_secondary and full_rank:
verdict = "βœ“ ALSO ℝPΒ³ UNIFORM"
explanation = (
"H2a's collapsed axes are uniformly distributed on ℝPΒ³.\n"
"Projective interpretation GENERALIZES beyond D=3.\n\n"
"Sphere-solvers in general are projective at the level of\n"
"their geometric output. Polygonal omega derivation via\n"
"sphere-trained anchors is validated as a method."
)
color = 'lightgreen'
elif results['n_axes'] >= results['D'] * 6 and full_rank:
# Many axes, full rank β†’ still strongly spherical
verdict = "βœ— STILL ESSENTIALLY SPHERICAL"
explanation = (
f"H2a has {results['n_axes']} axes (vs G-Cand's smaller count),\n"
f"few antipodal pairs were identified, full rank utilization.\n\n"
f"Projective collapse barely changes the picture at D=4.\n"
f"D=3 was a special case β€” sphere-starvation symptom.\n"
f"D=4 lives on SΒ³ as designed."
)
color = 'lightyellow'
elif is_uniform:
verdict = "βœ“ MOSTLY ℝPΒ³, full rank"
explanation = (
"H2a collapses to axes that are roughly uniform on ℝPΒ³.\n"
"Projective reading IS valid at D=4 too, with caveats."
)
color = 'palegreen'
else:
verdict = "? MIXED RESULT"
explanation = (
"H2a doesn't cleanly fit either ℝPΒ³ uniform or pure spherical.\n"
"Geometry is more complex than the simple projective hypothesis."
)
color = 'lightgray'
ax6.text(0.5, 0.85, verdict, ha='center', va='top',
fontsize=18, fontweight='bold',
bbox=dict(boxstyle='round', facecolor=color, alpha=0.8))
ax6.text(0.05, 0.55, explanation, ha='left', va='top', fontsize=10,
wrap=True, family='monospace')
metrics_summary = (
f"\n\nKey metrics (H2a):\n"
f" axes: {results['n_axes']}\n"
f" proj angle mean: {results['proj_angle_mean']:.3f}\n"
f" uniform baseline: {results['uniform_baseline']:.3f}\n"
f" deviation: {results['deviation_from_uniform']:+.3f}\n"
f" best cluster silhouette: {results['best_silhouette'] or 0:.3f}\n"
f" effective rank: {results['effective_rank']:.2f}/{results['D']}\n"
f" secondary antipodal: {results['secondary_antipodal_pairs']}\n"
)
if g_cand_results is not None:
metrics_summary += (
f"\nG-Cand comparison:\n"
f" axes: {g_cand_results['n_axes']}\n"
f" deviation: {g_cand_results['deviation_from_uniform']:+.3f}\n"
f" best silhouette: {g_cand_results['best_silhouette']:.3f}\n"
)
ax6.text(0.05, 0.30, metrics_summary, ha='left', va='top',
fontsize=9, family='monospace')
plt.tight_layout()
plt.savefig(output_path, dpi=120, bbox_inches='tight')
plt.show()
# ════════════════════════════════════════════════════════════════════
# Main
# ════════════════════════════════════════════════════════════════════
def main():
print("=" * 70)
print("Projective re-probe of H2a (Q-rank02, V=32, D=4)")
print("Tests whether projective interpretation generalizes from D=3 β†’ D=4")
print("=" * 70)
print("\nLoading H2a checkpoint...")
model, cfg = load_h2a()
print(f" V={cfg.matrix_v}, D={cfg.D}, "
f"params={sum(p.numel() for p in model.parameters()):,}")
print("\nCollecting M tensor (512 gaussian samples)...")
all_M = collect_per_sample_M(model, cfg)
M_avg = all_M.mean(axis=0)
print(f" M_avg shape: {M_avg.shape}")
print("\nIdentifying antipodal pairs (cos < -0.9, mutual-strongest)...")
pairs, unpaired = identify_antipodal_pairs(M_avg, threshold=-0.9)
print(f" Found {len(pairs)} antipodal pairs")
print(f" Unpaired rows: {len(unpaired)}")
print(f" Total accounted: {2*len(pairs) + len(unpaired)} of {M_avg.shape[0]}")
print("\nCollapsing to projective axes...")
axes = collapse_to_axes(M_avg, pairs, unpaired)
print(f" Axes: {axes.shape[0]} representatives in {axes.shape[1]}-D")
results = test_axis_distribution(axes, "H2a projective axes")
# Try to load A0 (G-Cand) results for side-by-side comparison
g_cand_results = None
g_cand_json = OUTPUT_DIR / "A0_projective_reprobe.json"
if g_cand_json.exists():
with open(g_cand_json) as f:
g_cand_data = json.load(f)
g_cand_results = g_cand_data['projective_metrics']
print(f"\n (Loaded A0 G-Cand results for comparison)")
output_data = {
'config': {
'variant': 'Q_rank02_h64_V32_D4_dp0_nx0_adam',
'V': cfg.matrix_v,
'D': cfg.D,
},
'antipodal_pairs_found': len(pairs),
'unpaired_rows': len(unpaired),
'total_axes': axes.shape[0],
'projective_metrics': results,
'pairs': [list(p) for p in pairs],
'unpaired': unpaired,
}
with open(OUTPUT_JSON, 'w') as f:
json.dump(output_data, f, indent=2, default=str)
print(f"\nSaved: {OUTPUT_JSON}")
plot_projective(M_avg, axes, pairs, unpaired, results, OUTPUT_PLOT,
g_cand_results=g_cand_results)
print(f"Saved: {OUTPUT_PLOT}")
# Headline conclusion
print("\n" + "=" * 70)
print("CONCLUSION β€” generalization test")
print("=" * 70)
is_uniform = abs(results['deviation_from_uniform']) < 0.05
is_clustered = (results['best_silhouette'] or 0) > 0.5
has_secondary = results['secondary_antipodal_pairs'] >= 3
full_rank = results['utilization'] > 0.95
print(f"\n H2a (D=4, V=32):")
print(f" {len(pairs)} antipodal pairs, {axes.shape[0]} total axes")
print(f" Projective angle mean: {results['proj_angle_mean']:.3f}")
print(f" ℝPΒ³ uniform baseline: {results['uniform_baseline']:.3f}")
print(f" Deviation: {results['deviation_from_uniform']:+.3f}")
if g_cand_results is not None:
print(f"\n G-Cand (D=3, V=32) for comparison:")
print(f" {g_cand_data.get('antipodal_pairs_found', '?')} antipodal pairs, "
f"{g_cand_data.get('total_axes', '?')} total axes")
print(f" Projective angle mean: {g_cand_results['proj_angle_mean']:.3f}")
print(f" ℝPΒ² uniform baseline: {g_cand_results['uniform_baseline']:.3f}")
print(f" Deviation: {g_cand_results['deviation_from_uniform']:+.3f}")
print("\n" + "-" * 70)
if is_uniform and not is_clustered and not has_secondary and full_rank:
print(" βœ“ PROJECTIVE READING GENERALIZES")
print(" H2a also collapses to uniform projective distribution.")
print(" The polytope-implicit-in-sphere hypothesis is supported")
print(" at D=4 too. Inference-projection framing is general.")
elif len(pairs) <= 4 and full_rank:
print(" βœ— PROJECTIVE READING IS D=3-SPECIFIC")
print(" H2a has very few antipodal pairs β€” most rows didn't")
print(" collapse. The projective reading is a sphere-starvation")
print(" symptom, not a general property of trained sphere-solvers.")
print(" D=4 lives on SΒ³ as designed.")
else:
print(" ? INTERMEDIATE RESULT")
print(" H2a shows partial collapse with unclear interpretation.")
print(" Need to think about whether the metric thresholds")
print(" (uniform deviation, cluster silhouette) are appropriate")
print(" at higher D where the unfilled space is much larger.")
return output_data
if __name__ == '__main__':
results = main()