Create 5_big_finder_sweep_600_configs.py

Browse files

Files changed (1) hide show

5_big_finder_sweep_600_configs.py +119 -0

5_big_finder_sweep_600_configs.py ADDED Viewed

	@@ -0,0 +1,119 @@

+"""
+cell_q_runner.py — Phase Q H2-candidate extended sweep
+Takes the top 10 configs flagged by the P-sweep analyzer's
+continued-training-potential metric and re-runs each with
+batch_limit=1000 (50× the P sweep's 20-batch budget).
+Purpose: produce the data needed to assign H2 class ranks —
+actual convergence floors, trajectory shapes at full budget,
+Adam-vs-LBFGS parity question, sharpened discrimination ratios.
+Output: /content/phaseQ_reports/results_phaseQ.json
+"""
+import json
+import time
+import traceback
+from pathlib import Path
+OUTPUT_ROOT = Path("/content/phaseQ_reports")
+OUTPUT_ROOT.mkdir(parents=True, exist_ok=True)
+AGGREGATE_PATH = OUTPUT_ROOT / "results_phaseQ.json"
+def run_sweep():
+    configs = get_phaseQ_configs()
+    print(f"Phase Q: {len(configs)} configs at 1000 batches each")
+    print(f"Output: {OUTPUT_ROOT}\n")
+    # Print the config lineup so we know what's running
+    print("Config lineup:")
+    for cfg in configs:
+        ov = cfg['overrides']
+        print(f"  {cfg['variant']:<45} "
+              f"h={ov['hidden']} V={ov['V']} D={ov['D']} "
+              f"dp={ov['depth']} nx={ov['n_cross']} opt={ov['optimizer']}")
+    print()
+    # Resume support
+    results = []
+    done_variants = set()
+    if AGGREGATE_PATH.exists():
+        with open(AGGREGATE_PATH) as f:
+            results = json.load(f)
+        done_variants = {r.get('variant') for r in results}
+        if done_variants:
+            print(f"Resuming: {len(done_variants)} configs already complete")
+    sweep_t0 = time.time()
+    for i, cfg in enumerate(configs):
+        if cfg['variant'] in done_variants:
+            print(f"  [{i+1}/{len(configs)}] {cfg['variant']}  (skipped — already done)")
+            continue
+        config_output_dir = OUTPUT_ROOT / cfg['variant']
+        config_output_dir.mkdir(exist_ok=True)
+        batch_limit = phase2_batch_limit(cfg)
+        t0 = time.time()
+        print(f"  [{i+1}/{len(configs)}] {cfg['variant']}  "
+              f"(batch_limit={batch_limit}) ...", end=' ', flush=True)
+        try:
+            report = run_ablation_config(
+                ablation_config=cfg,
+                output_dir=str(config_output_dir),
+                batch_limit=batch_limit,
+                num_epochs=cfg.get('num_epochs', 1),
+            )
+            report['_sweep_status'] = 'ok'
+            elapsed = time.time() - t0
+            final_mse = report.get('test_mse_per_noise', {}).get(0,
+                        report.get('test_mse_per_noise', {}).get('0', 'N/A'))
+            cv = report.get('observed_sphere_cv', 0.0)
+            finite = report.get('params_finite', False)
+            status_ind = "OK " if finite else "NaN"
+            print(f"{status_ind} ({elapsed:.0f}s, "
+                  f"G-MSE={final_mse if isinstance(final_mse, str) else f'{final_mse:.5f}'}, "
+                  f"CV={cv:.3f})")
+        except Exception as e:
+            report = {
+                '_sweep_status': f'error: {type(e).__name__}: {str(e)[:300]}',
+                '_traceback': traceback.format_exc()[:2000],
+                'config': cfg,
+                'variant': cfg['variant'],
+            }
+            print(f"ERROR: {type(e).__name__}: {str(e)[:80]}")
+        report['variant'] = cfg['variant']
+        report['wallclock_outer_s'] = time.time() - t0
+        results.append(report)
+        # Checkpoint after every run (only 10 configs, cheap)
+        with open(AGGREGATE_PATH, 'w') as f:
+            json.dump(results, f, indent=2, default=str)
+    total = time.time() - sweep_t0
+    print(f"\nPhase Q complete: {len(results)} reports in {total/60:.1f} min")
+    print(f"Aggregate: {AGGREGATE_PATH}")
+    # Quick summary
+    print(f"\nQuick summary (by rank):")
+    print(f"  {'Rank':<6} {'Variant':<45} {'G-MSE':>9} {'CV':>6} {'Finite':>7}")
+    print(f"  {'-'*75}")
+    for r in results:
+        v = r.get('variant', '?')
+        g_mse = r.get('test_mse_per_noise', {}).get(0)
+        if g_mse is None:
+            g_mse = r.get('test_mse_per_noise', {}).get('0', float('nan'))
+        cv = r.get('observed_sphere_cv', 0.0)
+        finite = r.get('params_finite', False)
+        print(f"  {v[:5]:<6} {v[:45]:<45} "
+              f"{g_mse:>9.5f} {cv:>6.3f} {'YES' if finite else 'NO':>7}")
+    return results
+if __name__ == '__main__':
+    results = run_sweep()