| """ |
| ablation_configs.py |
| ==================== |
| The ablation matrix for the three-band SVAE validation sweep. |
| |
| Each config is a dict of overrides on the baseline PatchSVAE_F trainer. |
| The trainer expects: |
| - band: 'LOW' | 'MID' | 'HIGH' (selects the base architecture) |
| - variant: unique identifier for this variant within the group |
| - seed: random seed |
| - phase: 1 (1000-batch triage) | 2 (30-epoch full) |
| - overrides: dict of RunConfig field overrides |
| |
| Three band representatives (kept constant across every test): |
| LOW: S=64, V=64, D=16, h=64, d=1, patch=16, 184K params, CV target β 0.21 |
| MID: S=64, V=64, D=8, h=64, d=1, patch=16, 183K params, CV target β 0.39 |
| HIGH: S=64, V=32, D=4, h=64, d=1, patch=4, 41K params, CV target β 1.10 |
| |
| Phase 1 early-stop: |
| - LOW/MID bands: train to batch 1000, record CV_ema, classify band |
| - HIGH band: train to batch 100, record CV_ema, classify band |
| |
| Phase 2 full run: |
| - Group E (soft-hand): 30 epochs, 10 seeds per variant |
| - Group H (SVD necessity): 30 epochs, 3 seeds per variant |
| """ |
|
|
| from typing import Dict, List, Any |
| from dataclasses import dataclass, field, asdict |
|
|
|
|
| |
| |
| |
|
|
| BAND_REPS = { |
| 'LOW': { |
| 'img_size': 64, |
| 'V': 64, |
| 'D': 16, |
| 'hidden': 64, |
| 'depth': 1, |
| 'patch_size': 16, |
| 'n_cross': 1, |
| 'expected_cv': 0.21, |
| 'expected_params': 184_000, |
| }, |
| 'MID': { |
| 'img_size': 64, |
| 'V': 64, |
| 'D': 8, |
| 'hidden': 64, |
| 'depth': 1, |
| 'patch_size': 16, |
| 'n_cross': 1, |
| 'expected_cv': 0.39, |
| 'expected_params': 183_000, |
| }, |
| 'HIGH': { |
| 'img_size': 64, |
| 'V': 32, |
| 'D': 4, |
| 'hidden': 64, |
| 'depth': 1, |
| 'patch_size': 4, |
| 'n_cross': 1, |
| 'expected_cv': 1.10, |
| 'expected_params': 41_000, |
| }, |
| } |
|
|
|
|
| def band_classifier(cv_ema: float) -> str: |
| """Classify a final CV-EMA value into a band.""" |
| if cv_ema < 0.30: |
| return 'LOW' |
| elif cv_ema < 0.55: |
| return 'MID' |
| elif cv_ema > 0.80: |
| return 'HIGH' |
| return 'UNCLASSIFIED' |
|
|
|
|
| def phase1_batch_limit(band: str) -> int: |
| """How many batches to train before stopping for Phase 1 band classification.""" |
| if band == 'HIGH': |
| return 100 |
| return 1000 |
|
|
|
|
| def phase2_batch_limit(config: Dict[str, Any]) -> int: |
| """How many batches per epoch for Phase 2. |
| |
| Per-config override: if the config specifies 'batch_limit', use it. |
| This allows the floor sweep (P group) to cap at a few dozen batches |
| without changing defaults for existing phase-2 configs. |
| |
| Default behavior (unchanged): |
| - Adam at batch_size=256: 1_000_000 / 256 β 3900 batches |
| - LBFGS at batch_size=32: normally 31250 batches, but LBFGS |
| does 20 inner iterations per outer step so ~40k gradient steps |
| per batch β we cap at 2000 outer batches = ~40k gradient steps |
| which is plenty for within-attractor convergence |
| |
| The batch_size is read from the config (Phase 2 configs include |
| an explicit batch_size field). |
| """ |
| |
| if 'batch_limit' in config: |
| return config['batch_limit'] |
|
|
| overrides = config.get('overrides', {}) |
| if overrides.get('optimizer') == 'lbfgs': |
| return 2000 |
|
|
| batch_size = config.get('batch_size', 256) |
| return 1_000_000 // batch_size |
|
|
|
|
| |
| |
| |
|
|
| def group_A_seed_replication() -> List[Dict[str, Any]]: |
| """Reproducibility: 5 seeds Γ 3 bands = 15 runs. |
| |
| Tests whether each band reproducibly appears across random inits. |
| Acceptance: >=4/5 seeds per band within +/-0.02 of expected CV. |
| """ |
| configs = [] |
| for band in ['LOW', 'MID', 'HIGH']: |
| for seed in range(5): |
| configs.append({ |
| 'group': 'A', |
| 'variant': 'baseline', |
| 'band': band, |
| 'seed': seed, |
| 'phase': 1, |
| 'overrides': {}, |
| 'description': f'A-{band}-baseline-s{seed}', |
| }) |
| return configs |
|
|
|
|
| def group_B_dataset_composition() -> List[Dict[str, Any]]: |
| """Noise-type dependence: 6 variants Γ 3 bands = 18 runs. |
| |
| Tests whether band structure is architecture-driven or data-driven. |
| """ |
| variants = { |
| 'B1_all16': list(range(16)), |
| 'B2_gaussian_only': [0], |
| 'B3_structured': [3, 4, 5, 11, 13], |
| 'B4_heavy_tailed': [6, 7, 10], |
| 'B5_first_half': list(range(8)), |
| 'B6_even_indices': [0, 2, 4, 6, 8, 10, 12, 14], |
| } |
| configs = [] |
| for variant_name, types in variants.items(): |
| for band in ['LOW', 'MID', 'HIGH']: |
| configs.append({ |
| 'group': 'B', |
| 'variant': variant_name, |
| 'band': band, |
| 'seed': 0, |
| 'phase': 1, |
| 'overrides': {'noise_types': types}, |
| 'description': f'B-{band}-{variant_name}', |
| }) |
| return configs |
|
|
|
|
| def group_C_optimizer() -> List[Dict[str, Any]]: |
| """Optimizer dependence: 4 variants Γ 3 bands = 12 runs. |
| |
| Tests whether attractor is Adam-specific. |
| |
| NOTE: LBFGS was originally included as C5 but removed 2026-04-20 |
| after empirical evidence that it is incompatible with the sphere- |
| normed architecture as currently constructed. LBFGS's flat-space |
| strong Wolfe line search drives parameters away from the sphere |
| manifold during line search, producing ill-conditioned SVD inputs. |
| Symptoms observed: D=16 crashed in torch.linalg.eigh with "failed |
| to converge β ill-conditioned or too many repeated eigenvalues"; |
| D=8 and D=4 completed but produced NaN MSE (CV measurements at |
| intermediate batches were valid β 0.3373 MID, 0.9435 HIGH β but |
| final test MSE was NaN, indicating parameters went non-finite |
| during training). |
| |
| This is NOT a finding about LBFGS as an optimizer β it's a finding |
| about the LBFGS-sphere_norm interaction. Proper test requires |
| Riemannian LBFGS with constraint-aware line search. See scratchpad |
| entry 000080 for the dedicated LBFGS engineering pass TODO. |
| """ |
| variants = [ |
| ('C1_adam', {'optimizer': 'adam', 'lr': 1e-4, 'weight_decay': 0.0}), |
| ('C2_sgd', {'optimizer': 'sgd', 'lr': 1e-2, 'momentum': 0.0}), |
| ('C3_sgd_momentum', {'optimizer': 'sgd', 'lr': 1e-2, 'momentum': 0.9}), |
| ('C4_adamw', {'optimizer': 'adamw', 'lr': 1e-4, 'weight_decay': 0.01}), |
| ] |
| configs = [] |
| for variant_name, overrides in variants: |
| for band in ['LOW', 'MID', 'HIGH']: |
| configs.append({ |
| 'group': 'C', |
| 'variant': variant_name, |
| 'band': band, |
| 'seed': 0, |
| 'phase': 1, |
| 'overrides': overrides, |
| 'description': f'C-{band}-{variant_name}', |
| }) |
| return configs |
|
|
|
|
| def group_D_schedule() -> List[Dict[str, Any]]: |
| """LR schedule: 5 variants Γ 3 bands = 15 runs.""" |
| variants = [ |
| ('D1_cosine', {'scheduler': 'cosine'}), |
| ('D2_constant', {'scheduler': 'constant'}), |
| ('D3_linear_decay', {'scheduler': 'linear'}), |
| ('D4_warm_restart', {'scheduler': 'cosine_warm_restarts', 'T_0': 1000}), |
| ('D5_one_cycle', {'scheduler': 'one_cycle'}), |
| ] |
| configs = [] |
| for variant_name, overrides in variants: |
| for band in ['LOW', 'MID', 'HIGH']: |
| configs.append({ |
| 'group': 'D', |
| 'variant': variant_name, |
| 'band': band, |
| 'seed': 0, |
| 'phase': 1, |
| 'overrides': overrides, |
| 'description': f'D-{band}-{variant_name}', |
| }) |
| return configs |
|
|
|
|
| def group_E_soft_hand() -> List[Dict[str, Any]]: |
| """Soft-hand guidance β PHASE 2 (1 epoch, ~3900 batches at batch_size=256). |
| |
| Phase 1 E_preview already showed all four variants reach the same band |
| at 1000 batches (all within 0.0014 CV). The Phase 2 question is NO |
| LONGER "does the attractor survive" β that's settled β but rather: |
| "what's the within-attractor reconstruction MSE under each soft-hand |
| regime over a full epoch?" |
| |
| Primary comparison: E1 (full soft-hand) vs E2 (pure MSE). If MSE |
| differs meaningfully, soft-hand is trading reconstruction quality |
| for geometric coherence at an epoch-scale budget. |
| |
| 4 variants Γ 3 bands Γ 3 seeds = 36 runs. |
| """ |
| variants = [ |
| ('E1_full_softhand', {'soft_hand': True, 'boost': 0.5, 'cv_penalty': 0.3}), |
| ('E2_pure_mse', {'soft_hand': False, 'boost': 0.0, 'cv_penalty': 0.0}), |
| ('E3_measure_only', {'soft_hand': False, 'boost': 0.0, 'cv_penalty': 0.0, 'cv_measurement_only': True}), |
| ('E4_hard_cv_penalty', {'soft_hand': False, 'boost': 0.0, 'cv_penalty': 1.0, 'hard_cv_target': 0.21}), |
| ] |
| configs = [] |
| for variant_name, overrides in variants: |
| for band in ['LOW', 'MID', 'HIGH']: |
| for seed in range(3): |
| configs.append({ |
| 'group': 'E', |
| 'variant': variant_name, |
| 'band': band, |
| 'seed': seed, |
| 'phase': 2, |
| 'num_epochs': 1, |
| 'batch_size': 256, |
| 'overrides': overrides, |
| 'description': f'E-{band}-{variant_name}-s{seed}', |
| }) |
| return configs |
|
|
|
|
| def group_E_subset_phase1() -> List[Dict[str, Any]]: |
| """E subset for Phase 1 preview β 1 seed per variant, 1000 batches. |
| |
| Quick read on whether E2 even approaches the attractor before |
| committing to full Phase 2 Group E. 4 variants Γ 3 bands = 12 runs. |
| """ |
| variants = [ |
| ('E1_full_softhand', {'soft_hand': True, 'boost': 0.5, 'cv_penalty': 0.3}), |
| ('E2_pure_mse', {'soft_hand': False, 'boost': 0.0, 'cv_penalty': 0.0}), |
| ('E3_measure_only', {'soft_hand': False, 'boost': 0.0, 'cv_penalty': 0.0, 'cv_measurement_only': True}), |
| ('E4_hard_cv_penalty', {'soft_hand': False, 'boost': 0.0, 'cv_penalty': 1.0, 'hard_cv_target': 0.21}), |
| ] |
| configs = [] |
| for variant_name, overrides in variants: |
| for band in ['LOW', 'MID', 'HIGH']: |
| configs.append({ |
| 'group': 'E_preview', |
| 'variant': variant_name, |
| 'band': band, |
| 'seed': 0, |
| 'phase': 1, |
| 'overrides': overrides, |
| 'description': f'Eprev-{band}-{variant_name}', |
| }) |
| return configs |
|
|
|
|
| def group_F_activation() -> List[Dict[str, Any]]: |
| """Activation function: 5 variants Γ 3 bands = 15 runs.""" |
| variants = [ |
| ('F1_gelu', {'activation': 'gelu'}), |
| ('F2_relu', {'activation': 'relu'}), |
| ('F3_silu', {'activation': 'silu'}), |
| ('F4_tanh', {'activation': 'tanh'}), |
| ('F5_identity', {'activation': 'identity'}), |
| ] |
| configs = [] |
| for variant_name, overrides in variants: |
| for band in ['LOW', 'MID', 'HIGH']: |
| configs.append({ |
| 'group': 'F', |
| 'variant': variant_name, |
| 'band': band, |
| 'seed': 0, |
| 'phase': 1, |
| 'overrides': overrides, |
| 'description': f'F-{band}-{variant_name}', |
| }) |
| return configs |
|
|
|
|
| def group_G_sphere_norm() -> List[Dict[str, Any]]: |
| """Sphere-norm ablation: 4 variants Γ 3 bands = 12 runs. |
| |
| Expected per framework: G2 (no sphere-norm) reproduces charge- |
| discharge catastrophe. G3/G4 may or may not preserve the band. |
| """ |
| variants = [ |
| ('G1_sphere_norm', {'row_norm': 'sphere'}), |
| ('G2_no_norm', {'row_norm': 'none'}), |
| ('G3_layer_norm', {'row_norm': 'layer_norm'}), |
| ('G4_scale_only', {'row_norm': 'scale_only'}), |
| ] |
| configs = [] |
| for variant_name, overrides in variants: |
| for band in ['LOW', 'MID', 'HIGH']: |
| configs.append({ |
| 'group': 'G', |
| 'variant': variant_name, |
| 'band': band, |
| 'seed': 0, |
| 'phase': 1, |
| 'overrides': overrides, |
| 'description': f'G-{band}-{variant_name}', |
| }) |
| return configs |
|
|
|
|
| def group_H_svd_necessity() -> List[Dict[str, Any]]: |
| """SVD necessity β PHASE 2 (1 epoch, ~3900 batches at batch_size=256). |
| |
| Tests whether learned linear readout can match SVD, and whether |
| fp64 SVD precision and per-batch SVD are load-bearing. |
| |
| Staged seed counts based on the question each variant answers: |
| - H1/H2/H3 (3 seeds): core SVD-vs-linear comparison, needs variance |
| - H4/H5 (2 seeds): precision/batching questions, binary yes/no |
| - H6 (1 seed): expected-failure confirmation |
| |
| Total: 3Γ3 + 3Γ3 + 3Γ3 + 3Γ2 + 3Γ2 + 3Γ1 = 42 runs |
| """ |
| variants_full = [ |
| ('H1_svd_fp64', {'svd': 'fp64'}), |
| ('H2_linear_matched', {'svd': 'none', 'linear_readout': True, 'match_params': True}), |
| ('H3_linear_unmatched', {'svd': 'none', 'linear_readout': True, 'match_params': False}), |
| ] |
| variants_probe = [ |
| ('H4_svd_fp32', {'svd': 'fp32'}), |
| ('H5_batch_shared_svd', {'svd': 'batch_shared'}), |
| ] |
| variants_confirm = [ |
| ('H6_no_svd_direct', {'svd': 'none', 'linear_readout': False}), |
| ] |
| configs = [] |
| for variants, n_seeds in [(variants_full, 3), (variants_probe, 2), (variants_confirm, 1)]: |
| for variant_name, overrides in variants: |
| for band in ['LOW', 'MID', 'HIGH']: |
| for seed in range(n_seeds): |
| configs.append({ |
| 'group': 'H', |
| 'variant': variant_name, |
| 'band': band, |
| 'seed': seed, |
| 'phase': 2, |
| 'num_epochs': 1, |
| 'batch_size': 256, |
| 'overrides': overrides, |
| 'description': f'H-{band}-{variant_name}-s{seed}', |
| }) |
| return configs |
|
|
|
|
| def group_L2_lbfgs() -> List[Dict[str, Any]]: |
| """LBFGS characterization β PHASE 2 (1 epoch, ~3900 batches at batch_size=256). |
| |
| Front-loads LBFGS investigation after Phil's isolated test at 100 |
| batches showed LBFGS + pure MSE + no soft-hand reaches the HIGH |
| attractor (CV 0.869) with better within-attractor reconstruction MSE |
| (0.0644) than Adam + soft-hand achieves at 30 epochs (0.072). |
| |
| Phase 2 L2 tests whether this gap holds at epoch scale and whether |
| MID band shows a similar effect. |
| |
| βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| STIPEND: LOW band (D=16) OMITTED pending LBFGS engineering pass. |
| βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| Isolated test in Phase 1 session confirmed LBFGS + sphere_norm + |
| D=16 crashes torch.linalg.eigh (error code 15, ill-conditioned |
| Gram matrix). PyTorch LBFGS's flat-space strong Wolfe line search |
| drives parameters off the sphere manifold, producing degenerate |
| SVD inputs. Fix requires Riemannian (constraint-aware) line |
| search β see scratchpad entry 000080 for the engineering pass |
| TODO. L2-LOW will be runnable once RLBFGS integration lands. |
| |
| Current scope: MID + HIGH only, pure MSE + no soft-hand |
| (matching the Phil isolated test configuration that produced |
| the 0.869/0.0644 data point). |
| |
| 2 bands Γ 3 seeds = 6 runs. |
| """ |
| variants = [ |
| ('L2_lbfgs_pure_mse', { |
| 'optimizer': 'lbfgs', |
| 'lr': 1.0, |
| 'batch_size': 32, |
| 'soft_hand': False, |
| 'boost': 0.0, |
| 'cv_penalty': 0.0, |
| }), |
| ] |
| configs = [] |
| for variant_name, overrides in variants: |
| for band in ['MID', 'HIGH']: |
| for seed in range(3): |
| configs.append({ |
| 'group': 'L2', |
| 'variant': variant_name, |
| 'band': band, |
| 'seed': seed, |
| 'phase': 2, |
| 'num_epochs': 1, |
| 'batch_size': 32, |
| 'overrides': overrides, |
| 'description': f'L2-{band}-{variant_name}-s{seed}', |
| }) |
| return configs |
|
|
|
|
| def group_I_cross_attention() -> List[Dict[str, Any]]: |
| """Cross-attention necessity: 4 variants Γ 3 bands = 12 runs.""" |
| variants = [ |
| ('I1_1layer', {'n_cross': 1, 'max_alpha': 0.2}), |
| ('I2_0layers', {'n_cross': 0}), |
| ('I3_2layers', {'n_cross': 2, 'max_alpha': 0.2}), |
| ('I4_unbounded_alpha', {'n_cross': 1, 'max_alpha': 1.0}), |
| ] |
| configs = [] |
| for variant_name, overrides in variants: |
| for band in ['LOW', 'MID', 'HIGH']: |
| configs.append({ |
| 'group': 'I', |
| 'variant': variant_name, |
| 'band': band, |
| 'seed': 0, |
| 'phase': 1, |
| 'overrides': overrides, |
| 'description': f'I-{band}-{variant_name}', |
| }) |
| return configs |
|
|
|
|
| def group_J_capacity_within_LOW() -> List[Dict[str, Any]]: |
| """Minimum on-attractor parameter count β LOW band only, 5 variants.""" |
| variants = [ |
| ('J1_V64_h64', {'V': 64, 'hidden': 64}), |
| ('J2_V32_h32', {'V': 32, 'hidden': 32}), |
| ('J3_V16_h32', {'V': 16, 'hidden': 32}), |
| ('J4_V64_h32', {'V': 64, 'hidden': 32}), |
| ('J5_V128_h128', {'V': 128, 'hidden': 128}), |
| ] |
| configs = [] |
| for variant_name, overrides in variants: |
| configs.append({ |
| 'group': 'J', |
| 'variant': variant_name, |
| 'band': 'LOW', |
| 'seed': 0, |
| 'phase': 1, |
| 'overrides': overrides, |
| 'description': f'J-LOW-{variant_name}', |
| }) |
| return configs |
|
|
|
|
| def group_K_batch_size() -> List[Dict[str, Any]]: |
| """Batch size sensitivity: 4 variants Γ 3 bands = 12 runs.""" |
| variants = [ |
| ('K1_bs128', {'batch_size': 128}), |
| ('K2_bs32', {'batch_size': 32}), |
| ('K3_bs512', {'batch_size': 512}), |
| ('K4_bs1024', {'batch_size': 1024}), |
| ] |
| configs = [] |
| for variant_name, overrides in variants: |
| for band in ['LOW', 'MID', 'HIGH']: |
| configs.append({ |
| 'group': 'K', |
| 'variant': variant_name, |
| 'band': band, |
| 'seed': 0, |
| 'phase': 1, |
| 'overrides': overrides, |
| 'description': f'K-{band}-{variant_name}', |
| }) |
| return configs |
|
|
|
|
| def group_L_initialization() -> List[Dict[str, Any]]: |
| """Init: 4 variants Γ 3 bands = 12 runs.""" |
| variants = [ |
| ('L1_orthogonal', {'init': 'orthogonal'}), |
| ('L2_kaiming', {'init': 'kaiming_normal'}), |
| ('L3_xavier', {'init': 'xavier_uniform'}), |
| ('L4_normal_small', {'init': 'normal_0_02'}), |
| ] |
| configs = [] |
| for variant_name, overrides in variants: |
| for band in ['LOW', 'MID', 'HIGH']: |
| configs.append({ |
| 'group': 'L', |
| 'variant': variant_name, |
| 'band': band, |
| 'seed': 0, |
| 'phase': 1, |
| 'overrides': overrides, |
| 'description': f'L-{band}-{variant_name}', |
| }) |
| return configs |
|
|
|
|
| def group_M_brute_force_sgd() -> List[Dict[str, Any]]: |
| """Brute-force SGD stress: 3 variants Γ 3 bands = 9 runs.""" |
| variants = [ |
| ('M1_sgd_aggressive', {'optimizer': 'sgd', 'lr': 1e-1, 'momentum': 0.0, 'warmup': 0}), |
| ('M2_sgd_huge_lr', {'optimizer': 'sgd', 'lr': 1.0, 'momentum': 0.0, 'grad_clip': 1.0}), |
| ('M3_sgd_high_momentum',{'optimizer': 'sgd', 'lr': 3e-3, 'momentum': 0.99}), |
| ] |
| configs = [] |
| for variant_name, overrides in variants: |
| for band in ['LOW', 'MID', 'HIGH']: |
| configs.append({ |
| 'group': 'M', |
| 'variant': variant_name, |
| 'band': band, |
| 'seed': 0, |
| 'phase': 1, |
| 'overrides': overrides, |
| 'description': f'M-{band}-{variant_name}', |
| }) |
| return configs |
|
|
|
|
| def group_N_uniformity_diagnostic() -> List[Dict[str, Any]]: |
| """Attractor uniformity diagnostic β NOT a standalone group. |
| |
| Instead, ADDED TO EVERY other variant's post-training analysis: |
| 1. Extract final sphere-normed rows |
| 2. Compute pentachoron CV at n_samples=2000 |
| 3. Compare to uniform-sphere prediction for that D |
| 4. Record observed_CV, uniform_CV, deviation in final_report.json |
| |
| This function returns 0 standalone configs β Group N is a flag |
| that every other group's runs should include the diagnostic. |
| """ |
| return [] |
|
|
|
|
| |
| |
| |
|
|
| def get_phase1_configs() -> List[Dict[str, Any]]: |
| """Phase 1 matrix β all band-classification ablations. |
| |
| Recommended run order (most informative first): |
| 1. Group A (seed replication) β foundational |
| 2. Group G (sphere-norm) β framework verification |
| 3. Group E_preview (soft-hand 1000-batch preview) |
| 4. Group B, C, D, F, I, J, K, L, M β remaining ablations |
| """ |
| return ( |
| group_A_seed_replication() |
| + group_G_sphere_norm() |
| + group_E_subset_phase1() |
| + group_B_dataset_composition() |
| + group_C_optimizer() |
| + group_D_schedule() |
| + group_F_activation() |
| + group_I_cross_attention() |
| + group_J_capacity_within_LOW() |
| + group_K_batch_size() |
| + group_L_initialization() |
| + group_M_brute_force_sgd() |
| ) |
|
|
|
|
| def group_P_small_battery_floor() -> List[Dict[str, Any]]: |
| """Small-battery floor sweep β PHASE 2 variant with tiny batch budget. |
| |
| Grid-sweeps architecture at the H2_linear_matched baseline to find |
| the smallest battery that still reconstructs gaussian within a |
| reasonable multiplier of the h2-64 floor AND lands in a valid |
| geometric attractor (CV in MID/HIGH range). |
| |
| Grid axes: |
| hidden: {4, 8, 16, 32, 64} 5 |
| V: {2, 4, 8, 16, 32} 5 |
| D: {2, 3, 4} 3 |
| depth: {0, 1} 2 |
| n_cross: {0, 1} 2 |
| optimizer: {'adam', 'lbfgs'} 2 |
| |
| Full product: 5 Γ 5 Γ 3 Γ 2 Γ 2 Γ 2 = 600 runs. |
| |
| Pins (H2_linear_matched baseline): |
| svd='none', linear_readout=True, match_params=True |
| band='HIGH' (patch_size=4, img_size=64) |
| batch_size=256 |
| batch_limit=20 (5120 samples seen β matches floor-sweep budget) |
| |
| NOTE: smooth_mid is NOT varied here β PatchSVAE_F_Ablation doesn't |
| expose it as a parameter. All configs use the PatchSVAE_F_Ablation |
| default BoundarySmooth. If smooth_mid variation is needed later, |
| plumb it through the model class and add it as a grid axis. |
| |
| LIMITATION: cv_of() returns 0 for V<5 (pentachoron volume needs β₯5 |
| points). Vβ{2,4} configs will have observed_sphere_cv=0, cv_ema=0, |
| and predicted_band='LOW'. This is an architectural constraint of |
| the geometric validity metric, not a training failure. Use |
| test_mse_per_noise[0] and train_loss_trajectory as the primary |
| quality metrics for those configs; CV-based analysis applies only |
| to Vβ₯8 configs. |
| |
| Records via run_ablation_config's full report: CV_ema, cv_last, |
| S0, SD, ratio, erank, observed_sphere_cv, band_deviation, |
| predicted_band, band_match, params_finite, cv_trajectory, |
| train_loss_trajectory, test_mse, test_mse_per_noise, plus |
| per-config wallclock and batches_completed. |
| """ |
| configs = [] |
| for hidden in [4, 8, 16, 32, 64]: |
| for V in [2, 4, 8, 16, 32]: |
| for D in [2, 3, 4]: |
| for depth in [0, 1]: |
| for n_cross in [0, 1]: |
| for optimizer in ['adam', 'lbfgs']: |
| variant_name = ( |
| f"P_h{hidden}_V{V}_D{D}_dp{depth}" |
| f"_nx{n_cross}_{optimizer}" |
| ) |
| |
| |
| |
| |
| |
| lr = 3e-3 if optimizer == 'adam' else 1.0 |
| configs.append({ |
| 'group': 'P', |
| 'variant': variant_name, |
| 'band': 'HIGH', |
| 'seed': 42, |
| 'phase': 2, |
| 'num_epochs': 1, |
| 'batch_size': 256, |
| 'batch_limit': 20, |
| 'overrides': { |
| |
| 'svd': 'none', |
| 'linear_readout': True, |
| 'match_params': True, |
| |
| 'hidden': hidden, |
| 'V': V, |
| 'D': D, |
| 'depth': depth, |
| 'n_cross': n_cross, |
| |
| |
| 'n_heads': 1, |
| |
| 'optimizer': optimizer, |
| 'lr': lr, |
| |
| |
| |
| |
| |
| |
| |
| 'grad_clip': 1.0, |
| |
| |
| 'cv_measure_every': 2, |
| |
| |
| 'soft_hand': False, |
| |
| 'noise_types': [0], |
| |
| |
| |
| 'test_noise_types': list(range(16)), |
| 'test_samples_per_noise': 256, |
| 'test_batch_size': 64, |
| }, |
| 'description': ( |
| f'P-HIGH-{variant_name} ' |
| f'(floor sweep, 20-batch budget)' |
| ), |
| }) |
| return configs |
|
|
|
|
| def group_implicit_solver_A_d5_spherical() -> List[Dict[str, Any]]: |
| """Implicit-solver A-set: D=5 spherical reference batteries. |
| |
| Three configs to test the projective-axis hypothesis at D=5: |
| A3a: V=16, D=5 β minimal V, may force more antipodal collapses |
| A3b: V=32, D=5 β direct comparator to H2a (V=32, D=4) |
| A3c: V=64, D=5 β extra V room, may reduce antipodal pair count |
| |
| All configs match Q-rank02 (H2a) baseline: |
| H2_linear_matched: svd=none, linear_readout=True, match_params=True |
| Adam @ lr=3e-3, depth=0, n_cross=0, n_heads=1 |
| 1000 batches, gaussian-only training |
| Per-noise test on all 16 noise types |
| |
| Predicted (if 000101 generalizes to D=5): |
| - All three converge with finite MSE |
| - All three show projective-uniform distribution on βPβ΄ |
| - Axis count grows with V; antipodal pair count grows with V/D |
| - Effective rank stays near full (~4.95/5) |
| |
| A3b is the critical test (matches H2a config except D bumped to 5). |
| """ |
| A_CONFIGS = [ |
| |
| (16, 5, 'A3a_V16_D5'), |
| (32, 5, 'A3b_V32_D5'), |
| (64, 5, 'A3c_V64_D5'), |
| ] |
|
|
| configs = [] |
| for V, D, label in A_CONFIGS: |
| variant_name = f"{label}_h64_dp0_nx0_adam" |
| configs.append({ |
| 'group': 'implicit_solver_A', |
| 'variant': variant_name, |
| 'band': 'HIGH', |
| 'seed': 42, |
| 'phase': 2, |
| 'num_epochs': 1, |
| 'batch_size': 256, |
| 'batch_limit': 1000, |
| 'overrides': { |
| 'svd': 'none', |
| 'linear_readout': True, |
| 'match_params': True, |
| 'hidden': 64, |
| 'V': V, |
| 'D': D, |
| 'depth': 0, |
| 'n_cross': 0, |
| 'n_heads': 1, |
| 'optimizer': 'adam', |
| 'lr': 3e-3, |
| 'grad_clip': 1.0, |
| 'cv_measure_every': 50, |
| 'soft_hand': False, |
| 'noise_types': [0], |
| 'test_noise_types': list(range(16)), |
| 'test_samples_per_noise': 256, |
| 'test_batch_size': 64, |
| }, |
| 'description': ( |
| f'implicit_solver_A-{variant_name} ' |
| f'(D=5 spherical reference, projective probe target)' |
| ), |
| }) |
| return configs |
|
|
|
|
| def get_implicit_solver_A_configs() -> List[Dict[str, Any]]: |
| """Implicit-solver A-set Stage 1: D=5 spherical references.""" |
| return group_implicit_solver_A_d5_spherical() |
|
|
|
|
| def group_R_packed_polytope_test() -> List[Dict[str, Any]]: |
| """Sphere-packing prediction test β does V Γ D matter geometrically? |
| |
| Hypothesis (from G-Class probe v3): the 32-row Γ D=3 G-Class behavior |
| (rotating antipodal frame) emerged because 32 points cannot be |
| uniformly arranged on SΒ² β geometric frustration. When V matches a |
| natural polytope vertex count for S^(D-1), training should produce |
| STATIC sphere-solver rows instead. |
| |
| Three test configs (each predicted to produce H2-LIKE static rows): |
| - D=4, V=16: 16-cell (4-orthoplex) vertex count on SΒ³ |
| - D=4, V=8: 16-cell again (8 vertices = 4D cross-polytope subset) |
| or 8-cell (tesseract) β 8 is canonical for both |
| - D=3, V=20: dodecahedron vertex count on SΒ² |
| |
| All else matches H2a (Q-rank02): adam, lr=3e-3, depth=0, n_cross=0, |
| H2_linear_matched (svd=none, linear_readout=True, match_params=True). |
| 1000 batches, gaussian-only training, 16-noise per-noise test. |
| |
| Predicted result: all three produce row_stability > 0.85, antipodal |
| pair fraction < 0.55 β i.e. H2-LIKE character on the v3 probe. |
| """ |
| POLYTOPE_CONFIGS = [ |
| |
| (16, 4, '16cell_orthoplex'), |
| (8, 4, '8cell_or_16cell_subset'), |
| (20, 3, 'dodecahedron'), |
| ] |
|
|
| configs = [] |
| for V, D, polytope in POLYTOPE_CONFIGS: |
| variant_name = f"R_h64_V{V}_D{D}_{polytope}_adam" |
| configs.append({ |
| 'group': 'R', |
| 'variant': variant_name, |
| 'band': 'HIGH', |
| 'seed': 42, |
| 'phase': 2, |
| 'num_epochs': 1, |
| 'batch_size': 256, |
| 'batch_limit': 1000, |
| 'overrides': { |
| 'svd': 'none', |
| 'linear_readout': True, |
| 'match_params': True, |
| 'hidden': 64, |
| 'V': V, |
| 'D': D, |
| 'depth': 0, |
| 'n_cross': 0, |
| 'n_heads': 1, |
| 'optimizer': 'adam', |
| 'lr': 3e-3, |
| 'grad_clip': 1.0, |
| 'cv_measure_every': 50, |
| 'soft_hand': False, |
| 'noise_types': [0], |
| 'test_noise_types': list(range(16)), |
| 'test_samples_per_noise': 256, |
| 'test_batch_size': 64, |
| }, |
| 'description': ( |
| f'R-HIGH-{variant_name} ' |
| f'(packing test, predicted H2-LIKE)' |
| ), |
| }) |
| return configs |
|
|
|
|
| def get_phaseR_configs() -> List[Dict[str, Any]]: |
| """Phase R β sphere-packing prediction test (3 configs).""" |
| return group_R_packed_polytope_test() |
|
|
|
|
| def group_Q_h2_candidates() -> List[Dict[str, Any]]: |
| """Top-10 P-sweep winners extended to 1000 batches. |
| |
| These are the 10 configs flagged by the P-sweep analyzer's |
| continued-training-potential ranking. Each is re-run with the |
| same architecture and optimizer but with batch_limit=1000 (50Γ |
| the P sweep's 20-batch budget). |
| |
| Purpose: answer the classification questions the P sweep couldn't: |
| - What's the actual convergence floor per config? |
| - Does Adam catch LBFGS with enough budget? (6 Adam / 4 LBFGS in top 10) |
| - Where does the loss trajectory flatten? |
| - Does discrimination ratio sharpen with more training? |
| - Does final CV land in the valid band (0.13-0.30)? |
| |
| Results feed into H2 class-rank assignment. |
| |
| cv_measure_every=50 so we get ~20 CV measurements across the run |
| (P sweep used 2, which would be 500 measurements at 1000 batches β |
| too many). |
| """ |
| |
| TOP_10 = [ |
| |
| (64, 32, 4, 1, 0, 'lbfgs'), |
| (64, 32, 4, 0, 0, 'adam'), |
| (64, 32, 4, 0, 1, 'adam'), |
| (64, 32, 4, 0, 1, 'lbfgs'), |
| (64, 16, 4, 1, 1, 'lbfgs'), |
| (64, 32, 3, 1, 1, 'adam'), |
| (64, 32, 3, 0, 1, 'adam'), |
| (64, 32, 4, 1, 1, 'adam'), |
| (64, 32, 3, 0, 0, 'adam'), |
| (64, 32, 2, 0, 1, 'adam'), |
| ] |
|
|
| configs = [] |
| for rank, (hidden, V, D, depth, n_cross, optimizer) in enumerate(TOP_10, start=1): |
| variant_name = ( |
| f"Q_rank{rank:02d}_h{hidden}_V{V}_D{D}_dp{depth}" |
| f"_nx{n_cross}_{optimizer}" |
| ) |
| |
| lr = 3e-3 if optimizer == 'adam' else 1.0 |
| configs.append({ |
| 'group': 'Q', |
| 'variant': variant_name, |
| 'band': 'HIGH', |
| 'seed': 42, |
| 'phase': 2, |
| 'num_epochs': 1, |
| 'batch_size': 256, |
| 'batch_limit': 1000, |
| 'overrides': { |
| |
| 'svd': 'none', |
| 'linear_readout': True, |
| 'match_params': True, |
| |
| 'hidden': hidden, |
| 'V': V, |
| 'D': D, |
| 'depth': depth, |
| 'n_cross': n_cross, |
| 'n_heads': 1, |
| |
| 'optimizer': optimizer, |
| 'lr': lr, |
| 'grad_clip': 1.0, |
| |
| |
| |
| 'cv_measure_every': 50, |
| |
| 'soft_hand': False, |
| |
| 'noise_types': [0], |
| |
| 'test_noise_types': list(range(16)), |
| 'test_samples_per_noise': 256, |
| 'test_batch_size': 64, |
| }, |
| 'description': ( |
| f'Q-HIGH-{variant_name} ' |
| f'(H2 candidate extended sweep, 1000 batches)' |
| ), |
| }) |
| return configs |
|
|
|
|
| def get_phaseQ_configs() -> List[Dict[str, Any]]: |
| """Phase Q β top-10 P winners at 1000 batches for H2 class-rank assignment.""" |
| return group_Q_h2_candidates() |
|
|
|
|
| def get_phaseP_configs() -> List[Dict[str, Any]]: |
| """Phase P (floor sweep) β 600 configs at 20 batches each.""" |
| return group_P_small_battery_floor() |
|
|
|
|
| def get_phase2_configs() -> List[Dict[str, Any]]: |
| """Phase 2 matrix β 1 epoch each at batch_size=256, resume-capable. |
| |
| Revised from original 174-config design after Phase 1 settled the |
| "does the attractor survive" question. Phase 2 now characterizes |
| WITHIN-ATTRACTOR behavior over one full epoch (~3900 batches): |
| |
| - Group E (36 runs): within-attractor MSE under each soft-hand regime |
| - Group H (42 runs): SVD necessity (vs learned linear readout) |
| - Group L2 (6 runs): LBFGS within-attractor MSE characterization |
| (MID + HIGH only; LOW stipended pending RLBFGS |
| engineering pass β see group_L2_lbfgs docstring) |
| |
| Total: 84 runs. Intriguing cases can be continued to epoch 3 or 5 |
| using the orchestrator's continue_training() function. |
| """ |
| return ( |
| group_E_soft_hand() |
| + group_H_svd_necessity() |
| + group_L2_lbfgs() |
| ) |
|
|
|
|
| def summarize(configs: List[Dict[str, Any]]) -> None: |
| """Print a breakdown of the matrix for sanity-check.""" |
| by_group = {} |
| by_band = {} |
| by_phase = {} |
| for c in configs: |
| by_group[c['group']] = by_group.get(c['group'], 0) + 1 |
| by_band[c['band']] = by_band.get(c['band'], 0) + 1 |
| by_phase[c['phase']] = by_phase.get(c['phase'], 0) + 1 |
|
|
| print(f"Total configs: {len(configs)}") |
| print(f"\nBy group:") |
| for g, n in sorted(by_group.items()): |
| print(f" {g}: {n}") |
| print(f"\nBy band:") |
| for b, n in sorted(by_band.items()): |
| print(f" {b}: {n}") |
| print(f"\nBy phase:") |
| for p, n in sorted(by_phase.items()): |
| print(f" Phase {p}: {n}") |
|
|
|
|
| if __name__ == '__main__': |
| print("=" * 60) |
| print("PHASE 1 MATRIX") |
| print("=" * 60) |
| summarize(get_phase1_configs()) |
| print() |
| print("=" * 60) |
| print("PHASE 2 MATRIX") |
| print("=" * 60) |
| summarize(get_phase2_configs()) |