Executor-Tyrant-Framework commited on
Commit
ffd85b6
Β·
verified Β·
1 Parent(s): a3050b3

Sync from GitHub: 52d4a30210d1002c573156d4be8b5e38ee852595

Browse files
Files changed (1) hide show
  1. app.py +37 -11
app.py CHANGED
@@ -119,6 +119,7 @@ from nuwave.kiss import KISSFilter, KISSConfig
119
  from nuwave.pith import PithPipeline, PithConfig
120
  from nuwave.benchmark_loader import sample_pairs as _sample_benchmark_pairs
121
  from nuwave.benchmark_loader import describe_sample as _describe_benchmark_sample
 
122
  from nuwave.splat_engine import decompose_layer, SplatConfig, GaussianSplats
123
  from nuwave.lenia_splat import LeniaSplatEngine, LeniaSplatConfig
124
 
@@ -1222,7 +1223,15 @@ def on_interleaved_benchmark(
1222
  # and same-cat pairs [(0,8), (1,9), ..., (7,15)]. Shadows the module-level
1223
  # INTERLEAVED_QUESTIONS / _INTERLEAVED_SAME_CAT_PAIRS for this function's
1224
  # scope; downstream code reads the locals via Python scoping.
 
 
 
 
 
 
 
1225
  _pool_interleaved, _pool_same_cat_pairs, _pool_meta = _sample_benchmark_pairs(
 
1226
  n_pairs=8,
1227
  )
1228
  INTERLEAVED_QUESTIONS = _pool_interleaved
@@ -1251,25 +1260,42 @@ def on_interleaved_benchmark(
1251
  nw_organism._benchmark_category_registry = {}
1252
  cat_registry = nw_organism._benchmark_category_registry
1253
 
1254
- # ── Option G: similarity-based categorization ──────────────────────
1255
- # Build one centroid per category by averaging the embeddings of all
1256
- # INTERLEAVED_QUESTIONS prompts in that category (q1 + q2). Then any
1257
- # node with a stored embedding can be categorized post-hoc by cosine
1258
- # similarity, regardless of when it was deposited. Replaces the
1259
- # registry-only logic that couldn't see pre-instrumentation nodes.
 
 
 
 
 
 
 
 
 
 
1260
  _category_centroids: dict = {}
1261
  _CATEGORY_SIM_THRESHOLD = 0.30 # cosine sim floor to assign category
1262
  try:
1263
  _per_cat_embs: dict = {}
1264
- for _cat, _prompt in INTERLEAVED_QUESTIONS:
1265
- _emb = np.asarray(nw_organism._embed_fn(_prompt), dtype=np.float32)
1266
- _per_cat_embs.setdefault(_cat, []).append(_emb)
 
 
 
1267
  for _cat, _embs in _per_cat_embs.items():
1268
  _centroid = np.mean(_embs, axis=0)
1269
  _norm = np.linalg.norm(_centroid) + 1e-8
1270
  _category_centroids[_cat] = _centroid / _norm
1271
- logger.info("Built %d category centroids for similarity tagging",
1272
- len(_category_centroids))
 
 
 
 
1273
  except Exception as exc:
1274
  logger.warning("Category centroid build failed: %s", exc)
1275
 
 
119
  from nuwave.pith import PithPipeline, PithConfig
120
  from nuwave.benchmark_loader import sample_pairs as _sample_benchmark_pairs
121
  from nuwave.benchmark_loader import describe_sample as _describe_benchmark_sample
122
+ from nuwave.benchmark_loader import load_pool as _load_benchmark_pool
123
  from nuwave.splat_engine import decompose_layer, SplatConfig, GaussianSplats
124
  from nuwave.lenia_splat import LeniaSplatEngine, LeniaSplatConfig
125
 
 
1223
  # and same-cat pairs [(0,8), (1,9), ..., (7,15)]. Shadows the module-level
1224
  # INTERLEAVED_QUESTIONS / _INTERLEAVED_SAME_CAT_PAIRS for this function's
1225
  # scope; downstream code reads the locals via Python scoping.
1226
+ # Load the full pool once β€” passed to the sampler AND used downstream to
1227
+ # build category centroids from ALL 80 prompts (not just the per-run
1228
+ # sampled subset). Run 43 surfaced the bug where centroids built from
1229
+ # `INTERLEAVED_QUESTIONS` (per-run sample of 6-8 cats) caused old
1230
+ # substrate nodes from non-sampled categories to be force-mapped to
1231
+ # whatever centroid was closest, garbling the diagnostic metrics.
1232
+ _full_benchmark_pool = _load_benchmark_pool()
1233
  _pool_interleaved, _pool_same_cat_pairs, _pool_meta = _sample_benchmark_pairs(
1234
+ pool=_full_benchmark_pool,
1235
  n_pairs=8,
1236
  )
1237
  INTERLEAVED_QUESTIONS = _pool_interleaved
 
1260
  nw_organism._benchmark_category_registry = {}
1261
  cat_registry = nw_organism._benchmark_category_registry
1262
 
1263
+ # ── Option G: similarity-based categorization (full-pool centroids) ─
1264
+ # Build one centroid per category by averaging the embeddings of ALL
1265
+ # prompts in the FULL pool's q1 and q2 layers (8 prompts per category
1266
+ # Γ— 10 categories = 80 embeddings, 10 centroids). Earlier this iterated
1267
+ # only over the per-run-sampled INTERLEAVED_QUESTIONS, which produced
1268
+ # centroids for just 6-8 categories β€” substrate nodes from non-sampled
1269
+ # categories (physics, biology, math, etc. when they weren't in the
1270
+ # current run's draw) got force-mapped to whatever centroid happened to
1271
+ # be closest, garbling per-turn category diagnostics (Run 43 surfaced
1272
+ # this β€” gravitational-collapse nodes were tagged "music," prime-
1273
+ # factorization nodes tagged "computing," etc.).
1274
+ #
1275
+ # Building from the full pool means tagging is stable regardless of
1276
+ # which subset gets sampled this run, AND the centroid quality is
1277
+ # better (8 prompts averaged per category vs 2 in the old benchmark).
1278
+ # Cost: ~10 seconds of embedding at benchmark startup; one-time per run.
1279
  _category_centroids: dict = {}
1280
  _CATEGORY_SIM_THRESHOLD = 0.30 # cosine sim floor to assign category
1281
  try:
1282
  _per_cat_embs: dict = {}
1283
+ for _layer_key in ("q1_layer", "q2_layer"):
1284
+ for _entry in _full_benchmark_pool.get(_layer_key, []):
1285
+ _emb = np.asarray(
1286
+ nw_organism._embed_fn(_entry["text"]), dtype=np.float32,
1287
+ )
1288
+ _per_cat_embs.setdefault(_entry["category"], []).append(_emb)
1289
  for _cat, _embs in _per_cat_embs.items():
1290
  _centroid = np.mean(_embs, axis=0)
1291
  _norm = np.linalg.norm(_centroid) + 1e-8
1292
  _category_centroids[_cat] = _centroid / _norm
1293
+ logger.info(
1294
+ "Built %d category centroids from full pool "
1295
+ "(%d prompts averaged per centroid)",
1296
+ len(_category_centroids),
1297
+ sum(len(v) for v in _per_cat_embs.values()) // max(1, len(_per_cat_embs)),
1298
+ )
1299
  except Exception as exc:
1300
  logger.warning("Category centroid build failed: %s", exc)
1301