Spaces:
Runtime error
Runtime error
| """SDR Composition Analysis v3 — using cached retina.npz.""" | |
| import json, os | |
| from pathlib import Path | |
| import numpy as np | |
| OUT_DIR = Path(__file__).resolve().parents[1] / "docs" | |
| RETINA = Path.home() / ".cache" / "autoresearch" / "retina.npz" | |
| print("[SDR] Loading retina...") | |
| data = np.load(RETINA) | |
| sdr = data["sdr"] # (65536, 16384) bool | |
| n_tok, n_bits = sdr.shape | |
| n_active = int(sdr.sum(axis=1).mean()) | |
| print(f"[SDR] {n_tok} tokens x {n_bits} bits, ~{n_active} active/token ({n_active/n_bits*100:.2f}% density)") | |
| # Sample 500 tokens for pairwise Jaccard | |
| rng = np.random.RandomState(42) | |
| sample_n = 500 | |
| idx = rng.choice(n_tok, sample_n, replace=False) | |
| codes = [set(np.where(sdr[i])[0]) for i in idx] | |
| # Pairwise Jaccard (vectorized via set ops on sampled tokens) | |
| jaccards = np.array([ | |
| len(codes[i] & codes[j]) / max(len(codes[i] | codes[j]), 1) | |
| for i in range(sample_n) for j in range(i+1, sample_n) | |
| ]) | |
| print(f"[SDR] Jaccard: mean={jaccards.mean():.4f} median={np.median(jaccards):.4f} " | |
| f"P95={np.percentile(jaccards,95):.4f} any_overlap={ (jaccards>0).mean()*100:.1f}%") | |
| # Union generalization: 100 random pairs | |
| pair_results = [] | |
| for _ in range(100): | |
| i, j = rng.randint(sample_n, size=2) | |
| if i == j: continue | |
| u = codes[i] | codes[j] | |
| best = max(len(u & codes[k]) / max(len(u | codes[k]), 1) for k in range(sample_n) if k not in (i, j)) | |
| pair_results.append({"i": int(idx[i]), "j": int(idx[j]), "best_union_jaccard": float(best)}) | |
| mean_best = np.mean([p["best_union_jaccard"] for p in pair_results]) | |
| pct_match = sum(1 for p in pair_results if p["best_union_jaccard"] > 0.3) / len(pair_results) * 100 | |
| print(f"[SDR] Union: mean_best={mean_best:.4f} pct_match_third_token={pct_match:.1f}%") | |
| # Intersection sparsity: for random pairs, how many bits do they share? | |
| inters = [len(codes[rng.randint(sample_n)] & codes[rng.randint(sample_n)]) for _ in range(500)] | |
| print(f"[SDR] Intersection: mean={np.mean(inters):.1f} bits median={np.median(inters):.1f} max={max(inters)}") | |
| results = { | |
| "pairwise_jaccard": { | |
| "mean": float(jaccards.mean()), "median": float(np.median(jaccards)), | |
| "p95": float(np.percentile(jaccards,95)), "min": float(jaccards.min()), "max": float(jaccards.max()), | |
| "pct_with_any_overlap": float((jaccards>0).mean()*100), | |
| }, | |
| "union_generalization": { | |
| "n_pairs": len(pair_results), "mean_best_union_jaccard": float(mean_best), | |
| "pct_union_matches_third_token": float(pct_match), | |
| }, | |
| "intersection": {"mean_active_shared": float(np.mean(inters)), "median_active_shared": float(np.median(inters)), "max_active_shared": int(max(inters))}, | |
| "sparsity": {"n_tokens": int(n_tok), "sdr_dim": int(n_bits), "active_bits": int(n_active), "density_pct": float(n_active / n_bits * 100)}, | |
| } | |
| Path(OUT_DIR / "results_sdr_composition.json").write_text(json.dumps(results, indent=2)) | |
| print(f"[SDR] Saved results_sdr_composition.json") | |