Spaces:

Executor-Tyrant-Framework
/

NuWave

Running

App Files Files Community

Executor-Tyrant-Framework commited on 22 days ago

Commit

ffd85b6

verified ·

1 Parent(s): a3050b3

Sync from GitHub: 52d4a30210d1002c573156d4be8b5e38ee852595

Browse files

Files changed (1) hide show

app.py +37 -11

app.py CHANGED Viewed

@@ -119,6 +119,7 @@ from nuwave.kiss import KISSFilter, KISSConfig
 from nuwave.pith import PithPipeline, PithConfig
 from nuwave.benchmark_loader import sample_pairs as _sample_benchmark_pairs
 from nuwave.benchmark_loader import describe_sample as _describe_benchmark_sample
 from nuwave.splat_engine import decompose_layer, SplatConfig, GaussianSplats
 from nuwave.lenia_splat import LeniaSplatEngine, LeniaSplatConfig
@@ -1222,7 +1223,15 @@ def on_interleaved_benchmark(
     # and same-cat pairs [(0,8), (1,9), ..., (7,15)]. Shadows the module-level
     # INTERLEAVED_QUESTIONS / _INTERLEAVED_SAME_CAT_PAIRS for this function's
     # scope; downstream code reads the locals via Python scoping.
     _pool_interleaved, _pool_same_cat_pairs, _pool_meta = _sample_benchmark_pairs(
         n_pairs=8,
     )
     INTERLEAVED_QUESTIONS = _pool_interleaved
@@ -1251,25 +1260,42 @@ def on_interleaved_benchmark(
         nw_organism._benchmark_category_registry = {}
     cat_registry = nw_organism._benchmark_category_registry
-    # ── Option G: similarity-based categorization ──────────────────────
-    # Build one centroid per category by averaging the embeddings of all
-    # INTERLEAVED_QUESTIONS prompts in that category (q1 + q2). Then any
-    # node with a stored embedding can be categorized post-hoc by cosine
-    # similarity, regardless of when it was deposited. Replaces the
-    # registry-only logic that couldn't see pre-instrumentation nodes.
     _category_centroids: dict = {}
     _CATEGORY_SIM_THRESHOLD = 0.30  # cosine sim floor to assign category
     try:
         _per_cat_embs: dict = {}
-        for _cat, _prompt in INTERLEAVED_QUESTIONS:
-            _emb = np.asarray(nw_organism._embed_fn(_prompt), dtype=np.float32)
-            _per_cat_embs.setdefault(_cat, []).append(_emb)
         for _cat, _embs in _per_cat_embs.items():
             _centroid = np.mean(_embs, axis=0)
             _norm = np.linalg.norm(_centroid) + 1e-8
             _category_centroids[_cat] = _centroid / _norm
-        logger.info("Built %d category centroids for similarity tagging",
-                    len(_category_centroids))
     except Exception as exc:
         logger.warning("Category centroid build failed: %s", exc)

 from nuwave.pith import PithPipeline, PithConfig
 from nuwave.benchmark_loader import sample_pairs as _sample_benchmark_pairs
 from nuwave.benchmark_loader import describe_sample as _describe_benchmark_sample
+from nuwave.benchmark_loader import load_pool as _load_benchmark_pool
 from nuwave.splat_engine import decompose_layer, SplatConfig, GaussianSplats
 from nuwave.lenia_splat import LeniaSplatEngine, LeniaSplatConfig
     # and same-cat pairs [(0,8), (1,9), ..., (7,15)]. Shadows the module-level
     # INTERLEAVED_QUESTIONS / _INTERLEAVED_SAME_CAT_PAIRS for this function's
     # scope; downstream code reads the locals via Python scoping.
+    # Load the full pool once — passed to the sampler AND used downstream to
+    # build category centroids from ALL 80 prompts (not just the per-run
+    # sampled subset). Run 43 surfaced the bug where centroids built from
+    # `INTERLEAVED_QUESTIONS` (per-run sample of 6-8 cats) caused old
+    # substrate nodes from non-sampled categories to be force-mapped to
+    # whatever centroid was closest, garbling the diagnostic metrics.
+    _full_benchmark_pool = _load_benchmark_pool()
     _pool_interleaved, _pool_same_cat_pairs, _pool_meta = _sample_benchmark_pairs(
+        pool=_full_benchmark_pool,
         n_pairs=8,
     )
     INTERLEAVED_QUESTIONS = _pool_interleaved
         nw_organism._benchmark_category_registry = {}
     cat_registry = nw_organism._benchmark_category_registry
+    # ── Option G: similarity-based categorization (full-pool centroids) ─
+    # Build one centroid per category by averaging the embeddings of ALL
+    # prompts in the FULL pool's q1 and q2 layers (8 prompts per category
+    # × 10 categories = 80 embeddings, 10 centroids). Earlier this iterated
+    # only over the per-run-sampled INTERLEAVED_QUESTIONS, which produced
+    # centroids for just 6-8 categories — substrate nodes from non-sampled
+    # categories (physics, biology, math, etc. when they weren't in the
+    # current run's draw) got force-mapped to whatever centroid happened to
+    # be closest, garbling per-turn category diagnostics (Run 43 surfaced
+    # this — gravitational-collapse nodes were tagged "music," prime-
+    # factorization nodes tagged "computing," etc.).
+    #
+    # Building from the full pool means tagging is stable regardless of
+    # which subset gets sampled this run, AND the centroid quality is
+    # better (8 prompts averaged per category vs 2 in the old benchmark).
+    # Cost: ~10 seconds of embedding at benchmark startup; one-time per run.
     _category_centroids: dict = {}
     _CATEGORY_SIM_THRESHOLD = 0.30  # cosine sim floor to assign category
     try:
         _per_cat_embs: dict = {}
+        for _layer_key in ("q1_layer", "q2_layer"):
+            for _entry in _full_benchmark_pool.get(_layer_key, []):
+                _emb = np.asarray(
+                    nw_organism._embed_fn(_entry["text"]), dtype=np.float32,
+                )
+                _per_cat_embs.setdefault(_entry["category"], []).append(_emb)
         for _cat, _embs in _per_cat_embs.items():
             _centroid = np.mean(_embs, axis=0)
             _norm = np.linalg.norm(_centroid) + 1e-8
             _category_centroids[_cat] = _centroid / _norm
+        logger.info(
+            "Built %d category centroids from full pool "
+            "(%d prompts averaged per centroid)",
+            len(_category_centroids),
+            sum(len(v) for v in _per_cat_embs.values()) // max(1, len(_per_cat_embs)),
+        )
     except Exception as exc:
         logger.warning("Category centroid build failed: %s", exc)