Sync from GitHub: 52d4a30210d1002c573156d4be8b5e38ee852595
Browse files
app.py
CHANGED
|
@@ -119,6 +119,7 @@ from nuwave.kiss import KISSFilter, KISSConfig
|
|
| 119 |
from nuwave.pith import PithPipeline, PithConfig
|
| 120 |
from nuwave.benchmark_loader import sample_pairs as _sample_benchmark_pairs
|
| 121 |
from nuwave.benchmark_loader import describe_sample as _describe_benchmark_sample
|
|
|
|
| 122 |
from nuwave.splat_engine import decompose_layer, SplatConfig, GaussianSplats
|
| 123 |
from nuwave.lenia_splat import LeniaSplatEngine, LeniaSplatConfig
|
| 124 |
|
|
@@ -1222,7 +1223,15 @@ def on_interleaved_benchmark(
|
|
| 1222 |
# and same-cat pairs [(0,8), (1,9), ..., (7,15)]. Shadows the module-level
|
| 1223 |
# INTERLEAVED_QUESTIONS / _INTERLEAVED_SAME_CAT_PAIRS for this function's
|
| 1224 |
# scope; downstream code reads the locals via Python scoping.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1225 |
_pool_interleaved, _pool_same_cat_pairs, _pool_meta = _sample_benchmark_pairs(
|
|
|
|
| 1226 |
n_pairs=8,
|
| 1227 |
)
|
| 1228 |
INTERLEAVED_QUESTIONS = _pool_interleaved
|
|
@@ -1251,25 +1260,42 @@ def on_interleaved_benchmark(
|
|
| 1251 |
nw_organism._benchmark_category_registry = {}
|
| 1252 |
cat_registry = nw_organism._benchmark_category_registry
|
| 1253 |
|
| 1254 |
-
# ββ Option G: similarity-based categorization β
|
| 1255 |
-
# Build one centroid per category by averaging the embeddings of
|
| 1256 |
-
#
|
| 1257 |
-
#
|
| 1258 |
-
#
|
| 1259 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1260 |
_category_centroids: dict = {}
|
| 1261 |
_CATEGORY_SIM_THRESHOLD = 0.30 # cosine sim floor to assign category
|
| 1262 |
try:
|
| 1263 |
_per_cat_embs: dict = {}
|
| 1264 |
-
for
|
| 1265 |
-
|
| 1266 |
-
|
|
|
|
|
|
|
|
|
|
| 1267 |
for _cat, _embs in _per_cat_embs.items():
|
| 1268 |
_centroid = np.mean(_embs, axis=0)
|
| 1269 |
_norm = np.linalg.norm(_centroid) + 1e-8
|
| 1270 |
_category_centroids[_cat] = _centroid / _norm
|
| 1271 |
-
logger.info(
|
| 1272 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1273 |
except Exception as exc:
|
| 1274 |
logger.warning("Category centroid build failed: %s", exc)
|
| 1275 |
|
|
|
|
| 119 |
from nuwave.pith import PithPipeline, PithConfig
|
| 120 |
from nuwave.benchmark_loader import sample_pairs as _sample_benchmark_pairs
|
| 121 |
from nuwave.benchmark_loader import describe_sample as _describe_benchmark_sample
|
| 122 |
+
from nuwave.benchmark_loader import load_pool as _load_benchmark_pool
|
| 123 |
from nuwave.splat_engine import decompose_layer, SplatConfig, GaussianSplats
|
| 124 |
from nuwave.lenia_splat import LeniaSplatEngine, LeniaSplatConfig
|
| 125 |
|
|
|
|
| 1223 |
# and same-cat pairs [(0,8), (1,9), ..., (7,15)]. Shadows the module-level
|
| 1224 |
# INTERLEAVED_QUESTIONS / _INTERLEAVED_SAME_CAT_PAIRS for this function's
|
| 1225 |
# scope; downstream code reads the locals via Python scoping.
|
| 1226 |
+
# Load the full pool once β passed to the sampler AND used downstream to
|
| 1227 |
+
# build category centroids from ALL 80 prompts (not just the per-run
|
| 1228 |
+
# sampled subset). Run 43 surfaced the bug where centroids built from
|
| 1229 |
+
# `INTERLEAVED_QUESTIONS` (per-run sample of 6-8 cats) caused old
|
| 1230 |
+
# substrate nodes from non-sampled categories to be force-mapped to
|
| 1231 |
+
# whatever centroid was closest, garbling the diagnostic metrics.
|
| 1232 |
+
_full_benchmark_pool = _load_benchmark_pool()
|
| 1233 |
_pool_interleaved, _pool_same_cat_pairs, _pool_meta = _sample_benchmark_pairs(
|
| 1234 |
+
pool=_full_benchmark_pool,
|
| 1235 |
n_pairs=8,
|
| 1236 |
)
|
| 1237 |
INTERLEAVED_QUESTIONS = _pool_interleaved
|
|
|
|
| 1260 |
nw_organism._benchmark_category_registry = {}
|
| 1261 |
cat_registry = nw_organism._benchmark_category_registry
|
| 1262 |
|
| 1263 |
+
# ββ Option G: similarity-based categorization (full-pool centroids) β
|
| 1264 |
+
# Build one centroid per category by averaging the embeddings of ALL
|
| 1265 |
+
# prompts in the FULL pool's q1 and q2 layers (8 prompts per category
|
| 1266 |
+
# Γ 10 categories = 80 embeddings, 10 centroids). Earlier this iterated
|
| 1267 |
+
# only over the per-run-sampled INTERLEAVED_QUESTIONS, which produced
|
| 1268 |
+
# centroids for just 6-8 categories β substrate nodes from non-sampled
|
| 1269 |
+
# categories (physics, biology, math, etc. when they weren't in the
|
| 1270 |
+
# current run's draw) got force-mapped to whatever centroid happened to
|
| 1271 |
+
# be closest, garbling per-turn category diagnostics (Run 43 surfaced
|
| 1272 |
+
# this β gravitational-collapse nodes were tagged "music," prime-
|
| 1273 |
+
# factorization nodes tagged "computing," etc.).
|
| 1274 |
+
#
|
| 1275 |
+
# Building from the full pool means tagging is stable regardless of
|
| 1276 |
+
# which subset gets sampled this run, AND the centroid quality is
|
| 1277 |
+
# better (8 prompts averaged per category vs 2 in the old benchmark).
|
| 1278 |
+
# Cost: ~10 seconds of embedding at benchmark startup; one-time per run.
|
| 1279 |
_category_centroids: dict = {}
|
| 1280 |
_CATEGORY_SIM_THRESHOLD = 0.30 # cosine sim floor to assign category
|
| 1281 |
try:
|
| 1282 |
_per_cat_embs: dict = {}
|
| 1283 |
+
for _layer_key in ("q1_layer", "q2_layer"):
|
| 1284 |
+
for _entry in _full_benchmark_pool.get(_layer_key, []):
|
| 1285 |
+
_emb = np.asarray(
|
| 1286 |
+
nw_organism._embed_fn(_entry["text"]), dtype=np.float32,
|
| 1287 |
+
)
|
| 1288 |
+
_per_cat_embs.setdefault(_entry["category"], []).append(_emb)
|
| 1289 |
for _cat, _embs in _per_cat_embs.items():
|
| 1290 |
_centroid = np.mean(_embs, axis=0)
|
| 1291 |
_norm = np.linalg.norm(_centroid) + 1e-8
|
| 1292 |
_category_centroids[_cat] = _centroid / _norm
|
| 1293 |
+
logger.info(
|
| 1294 |
+
"Built %d category centroids from full pool "
|
| 1295 |
+
"(%d prompts averaged per centroid)",
|
| 1296 |
+
len(_category_centroids),
|
| 1297 |
+
sum(len(v) for v in _per_cat_embs.values()) // max(1, len(_per_cat_embs)),
|
| 1298 |
+
)
|
| 1299 |
except Exception as exc:
|
| 1300 |
logger.warning("Category centroid build failed: %s", exc)
|
| 1301 |
|