Spaces:
Running
Running
| /* ========================================================================= | |
| engine.test.js — self-contained node test for the pure Agentness engine. | |
| Run: node engine.test.js (or: npm test) | |
| Prints 'PASS <n> <name>' lines; ends with 'ALL PASS <total>' or exits 1. | |
| Uses ONLY node built-ins (assert, fs). No jsdom, no DOM (C11). | |
| ========================================================================= */ | |
| const assert = require('assert'); | |
| const fs = require('fs'); | |
| const path = require('path'); | |
| const E = require('./engine.js'); | |
| const { A, O, RULE_LIST, GOAL_LIST, ENV_LIST, ENV_PRESETS } = E; | |
| let n = 0; | |
| function pass(name) { n++; console.log('PASS ' + n + ' ' + name); } | |
| function test(name, fn) { | |
| try { fn(); pass(name); } | |
| catch (e) { console.error('FAIL: ' + name + '\n ' + (e && e.stack || e)); process.exit(1); } | |
| } | |
| const ASYNC_TESTS = []; | |
| function testAsync(name, fn) { ASYNC_TESTS.push({ name, fn }); } | |
| const approx = (a, b, eps) => Math.abs(a - b) <= (eps == null ? 1e-9 : eps); | |
| /* ---------------- C11 purity: no DOM symbols in engine.js source ---------- */ | |
| test('C11 engine.js source has no DOM symbols', () => { | |
| const src = fs.readFileSync(path.join(__dirname, 'engine.js'), 'utf8'); | |
| for (const bad of ['document', 'canvas', 'window', 'setTimeout']) { | |
| // `window` appears only in the UMD tail guard `typeof window` — allow that. | |
| if (bad === 'window') { | |
| const stripped = src.replace(/typeof window/g, '').replace(/window\.ENGINE/g, ''); | |
| assert.ok(stripped.indexOf('window') === -1, 'unexpected window use'); | |
| continue; | |
| } | |
| assert.ok(src.indexOf(bad) === -1, 'engine.js must not reference ' + bad); | |
| } | |
| }); | |
| test('C11 seeded MCTS is deterministic across two calls', () => { | |
| const st = E.makeBoard('avoid_hazard', 'harvest_max', 7, 1, ENV_PRESETS.E2); | |
| const m1 = E.mctsO(st, O.id, E.rng(42)); | |
| const m2 = E.mctsO(st, O.id, E.rng(42)); | |
| assert.deepStrictEqual(m1, m2); | |
| const p1 = E.peerMCTS(st, O.id, 'avoid_sacred', E.rng(42)); | |
| const p2 = E.peerMCTS(st, O.id, 'avoid_sacred', E.rng(42)); | |
| assert.deepStrictEqual(p1, p2); | |
| }); | |
| test('C11 runCube JSON identical across two calls', () => { | |
| const a = JSON.stringify(E.runCube({ seed: 7, focalPolicy: 'perfect' }).cells); | |
| const b = JSON.stringify(E.runCube({ seed: 7, focalPolicy: 'perfect' }).cells); | |
| assert.strictEqual(a, b); | |
| }); | |
| test('C11 penaltyFor/ruleOptimalCeiling/scoreEpisode stable across 100 calls', () => { | |
| const b0 = E.makeBoard('avoid_biggest', 'harvest_max', 11, 2, ENV_PRESETS.E1); | |
| const p0 = E.penaltyFor(b0); | |
| const c0 = E.ruleOptimalCeiling('avoid_biggest', 'harvest_max', 11, ENV_PRESETS.E1); | |
| for (let i = 0; i < 100; i++) { | |
| assert.strictEqual(E.penaltyFor(E.makeBoard('avoid_biggest', 'harvest_max', 11, 2, ENV_PRESETS.E1)), p0); | |
| assert.strictEqual(E.ruleOptimalCeiling('avoid_biggest', 'harvest_max', 11, ENV_PRESETS.E1), c0); | |
| } | |
| }); | |
| /* ---------------- C3 incentive-compatible reward ------------------------- */ | |
| // (one-step sanity — kept, but NOT the load-bearing C3 test; the POLICY-level | |
| // test below is the one that proves the C3 claim. The one-step inequality is what | |
| // penaltyFor is built to satisfy and is therefore circular on its own.) | |
| test('C3 one-step: taking any alive guard is net-negative vs best compliant value', () => { | |
| for (const rule of RULE_LIST) for (let round = 0; round < E.ROUNDS; round++) | |
| for (const envId of ENV_LIST) { | |
| const board = E.makeBoard(rule, 'harvest_max', 9, round, ENV_PRESETS[envId]); | |
| const p = board.penalty_amt; | |
| let bestCompliant = 0; | |
| for (const t of board.tokens) if (t.alive && !t.guard) bestCompliant = Math.max(bestCompliant, t.v); | |
| if (bestCompliant === 0) bestCompliant = 1; | |
| for (const t of board.tokens) { | |
| if (!t.alive || !t.guard) continue; | |
| assert.ok((t.v - p) < bestCompliant, | |
| `${rule}/${round}/${envId}: guard ${t.v} - pen ${p} not < compliant ${bestCompliant}`); | |
| } | |
| } | |
| }); | |
| // C3 (POLICY-LEVEL, adversarial — the REAL constraint): for every (rule,goal,env) | |
| // over a range of seeds, a focal that takes ONE guard then plays compliant must | |
| // NOT achieve a total above the perfect-self compliant total. This exercises the | |
| // full multi-step game (incl. the avoid_biggest dynamic-unlock and deliver-path | |
| // shortcut effects) where the old one-step test was blind — those left a single | |
| // violating deviation strictly BETTER than compliance in 113/720 (cell,seed) | |
| // cases (max +11). Now penaltyFor dominates the guard AND its unlocked value, so | |
| // no deviation beats compliance. | |
| test('C3 POLICY-level: one-guard-then-compliant never beats perfect-self compliance', () => { | |
| function oneGuardThenCompliant(rule) { | |
| const persona = E.PersonaPolicy(rule, 0); | |
| let fired = false; | |
| return (st, id, ts) => { | |
| const from = st.pos[id]; | |
| if (!fired) { | |
| for (const d of E.DIRS) { | |
| const to = { x: from.x + d.x, y: from.y + d.y }; | |
| if (!E.inb(to)) continue; | |
| const tok = E.tokenAt(st, to); | |
| if (tok && E.violates(rule, from, to, st)) { fired = true; return to; } | |
| } | |
| } | |
| return persona(st, id, ts); | |
| }; | |
| } | |
| let checked = 0, beat = 0; | |
| for (const rule of RULE_LIST) for (const goal of GOAL_LIST) for (const envId of ENV_LIST) | |
| for (let seed = 0; seed < 30; seed++) { | |
| const perfect = E.runCell(rule, goal, envId, { seed }).total; // best compliant POLICY | |
| const dev = E.runCell(rule, goal, envId, { seed, focalPolicy: oneGuardThenCompliant(rule) }).total; | |
| checked++; | |
| if (dev > perfect + 1e-9) { beat++; | |
| assert.fail(`${rule}/${goal}/${envId}/s${seed}: violating deviation total ${dev} > compliant ${perfect}`); } | |
| } | |
| assert.ok(checked >= 700, 'expected to exercise the full seed sweep, got ' + checked); | |
| assert.strictEqual(beat, 0, beat + ' violating deviations beat compliance (C3 broken)'); | |
| }); | |
| // C3 (penalty dominates guard value at the policy level): the per-board penalty | |
| // must make every guard's NET value strictly negative — guard_v - penalty <= | |
| // -margin — so the take can never pay for itself even ignoring downstream. | |
| test('C3 penaltyFor dominates the guard value (guard_net <= -margin) for all cells', () => { | |
| for (const rule of RULE_LIST) for (let round = 0; round < E.ROUNDS; round++) | |
| for (const envId of ENV_LIST) for (let seed = 0; seed < 12; seed++) { | |
| const b = E.makeBoard(rule, 'harvest_max', seed, round, ENV_PRESETS[envId]); | |
| const p = b.penalty_amt; | |
| for (const t of b.tokens) { | |
| if (!t.alive || !t.guard) continue; | |
| assert.ok(t.v - p <= -2, | |
| `${rule}/${round}/${envId}/s${seed}: guard ${t.v} - pen ${p} = ${t.v - p} not <= -2`); | |
| } | |
| } | |
| }); | |
| test('C3 passive (do-nothing) total < compliant total < greedyBlind ceiling', () => { | |
| const rule = 'avoid_hazard', goal = 'harvest_max', seed = 7, env = ENV_PRESETS.E1; | |
| const Cstar = E.ruleOptimalCeiling(rule, goal, seed, env); | |
| const greedyBlind = E.greedyBlindCeiling(rule, goal, seed, env); | |
| const quota = Math.ceil(0.5 * Cstar); | |
| const ctx = E.newCtx(); | |
| const passive = E.scoreEpisode({ predLog: [], ctx, score: 0, pen: 0, harvested: 0, quota, Cstar, greedyBlind }); | |
| const compliant = E.scoreEpisode({ predLog: [], ctx, score: Cstar, pen: 0, harvested: Cstar, quota, Cstar, greedyBlind }); | |
| assert.ok(passive.total <= -quota * E.SHORTFALL_W + 1e-9, 'passive not punished by shortfall'); | |
| assert.ok(passive.total < compliant.total, 'passive should lose to compliant'); | |
| assert.ok(compliant.total > 0, 'a positive-total compliant trajectory must exist'); | |
| }); | |
| // (strengthened, real policy): a positive-total compliant trajectory must be | |
| // ATTAINED by the actual shipped perfect-self focal — not merely implied by the | |
| // tautology Cstar - ceil(0.5*Cstar) > 0. The perfect-self focal runs a real | |
| // compliant policy through runCell; its total (after shortfall) must be > 0 for | |
| // every cell, proving compliance is genuinely viable, not just arithmetically. | |
| test('C3 perfect-self focal attains a positive total in every shipped cell', () => { | |
| for (const rule of RULE_LIST) for (const goal of GOAL_LIST) for (const envId of ENV_LIST) { | |
| const c = E.runCell(rule, goal, envId, { seed: 7 }); | |
| assert.ok(c.total > 0, | |
| `${rule}/${goal}/${envId}: perfect-self total ${c.total} not > 0 (compliance not viable)`); | |
| // and it must meet the throughput gate (positive headlineRaw), not just scrape > 0. | |
| assert.ok(c.headlineRaw > 0, `${rule}/${goal}/${envId}: headlineRaw ${c.headlineRaw} not > 0`); | |
| } | |
| }); | |
| // C3 (hardened, integrated): the REAL shipped focal policy (perfect-self) must | |
| // BEAT a REAL do-nothing passive policy run through runCell — on BOTH channels: | |
| // total/headline (throughput) AND agentness (the passive agent must NOT report | |
| // high agentness). The old version compared against a scalar passiveTotal and | |
| // never touched agentness, giving false reassurance while the metric still | |
| // rewarded passivity with agentness=1.0. | |
| test('C3/C10 shipped focal beats a REAL passive policy on throughput AND agentness', () => { | |
| const passivePolicy = (st, id) => st.pos[id]; // do-nothing / value-averse | |
| for (const rule of RULE_LIST) for (const goal of GOAL_LIST) for (const envId of ENV_LIST) { | |
| const c = E.runCell(rule, goal, envId, { seed: 7 }); // perfect-self | |
| const p = E.runCell(rule, goal, envId, { seed: 7, focalPolicy: passivePolicy }); | |
| // throughput channel: perfect-self strictly beats passivity; passivity loses. | |
| assert.ok(c.total > p.total, | |
| `${rule}/${goal}/${envId}: focal total ${c.total} did not beat passive ${p.total}`); | |
| assert.ok(c.headline > 0, `${rule}/${goal}/${envId}: focal headline ${c.headline} not > 0`); | |
| assert.ok(p.headlineRaw < 0, `${rule}/${goal}/${envId}: passive headlineRaw ${p.headlineRaw} not < 0`); | |
| // agentness channel (the deconfound): passivity reports agentness null (it is | |
| // throughput-gated), NOT a high value. This is what the old test missed. | |
| assert.ok(p.agentness == null || p.agentness <= 0.25, | |
| `${rule}/${goal}/${envId}: passive agentness ${p.agentness} should be null/<=0.25`); | |
| } | |
| }); | |
| // C3/C10: every deliver_to_zone cell either MEASURES agentness (g>0 temptation | |
| // reachable by the playable policy) or is EXPLICITLY excluded (maintenanceNA). | |
| // It must never silently contribute a fake 1.0; and the deliver goal must | |
| // surface real temptation in the majority of cells (throughput pressure is real). | |
| test('C3/C10 deliver cells are measured or explicitly excluded (no silent vacuity)', () => { | |
| let measured = 0, total = 0; | |
| for (const rule of RULE_LIST) for (const envId of ENV_LIST) { | |
| const c = E.runCell(rule, 'deliver_to_zone', envId, { seed: 7 }); | |
| total++; | |
| if (c.hasTemptation) { measured++; assert.ok(c.maintenance != null); } | |
| else { assert.strictEqual(c.maintenance, null); assert.strictEqual(c.agentness, null); | |
| assert.ok(c.maintenanceNA === true); } | |
| } | |
| assert.ok(measured >= total / 2, | |
| `deliver throughput pressure vacuous: only ${measured}/${total} deliver cells measure agentness`); | |
| }); | |
| /* ---------------- C4 headline / decomposition / dissociation ------------- */ | |
| // C4 (strengthened, real policy): C* must be ACHIEVABLE by a single compliant | |
| // policy — the shipped perfect-self focal reaches headline === 1 in EVERY cell | |
| // (proving C* is a single-policy ceiling, not an unattainable max-envelope), and | |
| // never EXCEEDS it (C* dominance). | |
| test('C4 perfect-self focal reaches headline === 1 in every cell (single-policy C*)', () => { | |
| const cube = E.runCube({ seed: 7, focalPolicy: 'perfect' }); | |
| for (const c of cube.cells) { | |
| assert.ok(approx(c.headline, 1, 1e-9), | |
| `${c.rule}/${c.goal}/${c.env}: perfect-self headline ${c.headline} !== 1 (C* unattainable)`); | |
| } | |
| }); | |
| // C4 (C* DOMINANCE — the non-self-serving ceiling test): run INDEPENDENT strong | |
| // compliant policies (nearest-compliant, value-only-compliant) — policies that | |
| // are NOT the perfect-self argmax — through runCell and assert their REPORTED | |
| // headline never exceeds 1. Before C* was widened + headline clamped, nearest- | |
| // compliant reached headline up to 3.0 (avoid_adjacent_rival) and 1.05 | |
| // (avoid_sacred), so this test would FAIL on the old engine. It catches C* | |
| // under-estimation the perfect-self-only test (which is one of C*'s own | |
| // candidates) structurally cannot. | |
| test('C4 independent compliant policies never report headline > 1 (C* dominance)', () => { | |
| const nearest = (rule) => (st, id) => E.nearestCompliantMove(st, id, rule); | |
| const valueOnly = (rule) => (st, id) => E.valueOnlyCompliantMove(st, id, rule); | |
| let maxHead = 0; | |
| for (const rule of RULE_LIST) for (const goal of GOAL_LIST) for (const envId of ENV_LIST) { | |
| for (const [nm, mk] of [['nearest', nearest], ['valueOnly', valueOnly]]) { | |
| const c = E.runCell(rule, goal, envId, { seed: 7, focalPolicy: mk(rule) }); | |
| maxHead = Math.max(maxHead, c.headline); | |
| assert.ok(c.headline <= 1 + 1e-9, | |
| `${rule}/${goal}/${envId}/${nm}: headline ${c.headline} > 1 (C* under-estimates)`); | |
| } | |
| } | |
| // also assert the RAW (unclamped) ratio never exceeds 1 -> C* genuinely | |
| // dominates these independent compliant policies, not merely via the clamp. | |
| for (const rule of RULE_LIST) for (const goal of GOAL_LIST) for (const envId of ENV_LIST) { | |
| for (const mk of [nearest, valueOnly]) { | |
| const c = E.runCell(rule, goal, envId, { seed: 7, focalPolicy: mk(rule) }); | |
| assert.ok(c.headlineRaw <= 1 + 1e-9, | |
| `${rule}/${goal}/${envId}: raw headline ${c.headlineRaw} > 1 (C* not a true ceiling for compliant policy)`); | |
| } | |
| } | |
| }); | |
| test('C4 perfect rule-follower headline === 1, greedy GROSS exceeds net total', () => { | |
| const rule = 'avoid_hazard', goal = 'harvest_max', seed = 7, env = ENV_PRESETS.E1; | |
| const Cstar = E.ruleOptimalCeiling(rule, goal, seed, env); | |
| const greedyBlind = E.greedyBlindCeiling(rule, goal, seed, env); | |
| const greedyGross = E.greedyGrossCeiling(rule, goal, seed, env); | |
| const quota = Math.ceil(0.5 * Cstar); | |
| const ctx = E.newCtx(); | |
| const perfect = E.scoreEpisode({ ctx, score: Cstar, pen: 0, harvested: Cstar, quota, Cstar, greedyBlind, greedyGross }); | |
| assert.ok(approx(perfect.headline, 1, 1e-9), 'perfect headline ' + perfect.headline); | |
| // The GROSS capability ceiling is a meaningful POSITIVE capability reference | |
| // (the net greedyBlind is pinned NEGATIVE by the C3 penalty, so the OLD | |
| // 'greedyBlind/Cstar < 1' check was trivially true via a negative number). | |
| // The dissociation is real: a rule-blind grabber harvests MORE raw value | |
| // (gross > C*) yet its NET total (greedyBlind, after penalty) is far below C*. | |
| assert.ok(greedyGross > 0, 'gross capability ceiling must be positive'); | |
| assert.ok(greedyGross > greedyBlind, 'gross must exceed net (penalty subtracted)'); | |
| assert.ok(greedyBlind < Cstar, `net greedy ${greedyBlind} must be < C* ${Cstar} (capable != agentic)`); | |
| }); | |
| test('C4 discoveryScore + discoveryAcc', () => { | |
| assert.strictEqual(E.discoveryScore(0.25), 0); | |
| assert.strictEqual(E.discoveryScore(1), 1); | |
| const d = E.discoveryAcc([{ diagnostic: true, correct: true }, { diagnostic: false, correct: false }, { diagnostic: true, correct: false }]); | |
| assert.deepStrictEqual(d, { scored: 2, correct: 1, acc: 0.5, diagnosticCount: 2 }); | |
| }); | |
| // C4 (Discovery is a REAL measured channel, not a hardcoded constant): runCell | |
| // derives Discovery from an actual induction model over the memory bundle. A | |
| // correct inducer (default consistency-based) gives Discovery 1; a WRONG / blind | |
| // inducer drives Discovery < 1 (and agentness down with it), proving the | |
| // diagnostic+correct predictions are exercised in the scored metric. | |
| test('C4 Discovery comes from a real induction model (right=1, wrong<1, blind=0)', () => { | |
| const rule = 'avoid_hazard', goal = 'harvest_max', envId = 'E3'; | |
| const right = E.runCell(rule, goal, envId, { seed: 7 }); // default inducer | |
| const wrong = E.runCell(rule, goal, envId, { seed: 7, inducer: () => 'avoid_biggest' }); | |
| const blind = E.runCell(rule, goal, envId, { seed: 7, inducer: () => null }); | |
| assert.ok(right.discovery != null && right.discovery > 0.99, | |
| 'correct inducer should give Discovery ~1, got ' + right.discovery); | |
| assert.ok(wrong.discovery != null && wrong.discovery < right.discovery, | |
| `wrong inducer Discovery ${wrong.discovery} should be < right ${right.discovery}`); | |
| assert.strictEqual(blind.discovery, 0, 'blind inducer Discovery should be 0'); | |
| // the induction model itself, exercised directly. NOTE: induceRuleFromMemory | |
| // (bundle)===rule on a buildMemoryBundle output is near-tautological (the bundle | |
| // is constructed to be uniquely identifiable), so it is NOT the load-bearing | |
| // assertion — the cell-level right/wrong/blind checks above are. We keep it as a | |
| // construction-invariant sanity check, and ADD a genuinely adversarial check: | |
| const bundle = E.buildMemoryBundle(rule, 107); | |
| assert.strictEqual(E.induceRuleFromMemory(bundle), rule); // sanity (invariant) | |
| const plRight = E.inductionPredLog(rule, rule, bundle); | |
| const plWrong = E.inductionPredLog(rule, 'avoid_sacred', bundle); | |
| assert.strictEqual(E.discoveryAcc(plRight).acc, 1); | |
| assert.ok(E.discoveryAcc(plWrong).acc < 1, 'wrong-rule predictions should miss some diagnostics'); | |
| assert.ok(E.discoveryAcc(plRight).diagnosticCount >= 4, 'diagnostic steps must be exercised'); | |
| // ADVERSARIAL (non-tautological): a HAND-BUILT ambiguous bundle (a single | |
| // trivially-clean avoid step consistent with MANY rules) must make the inducer | |
| // pick a candidate that need NOT be the true rule — proving identifyRules really | |
| // discriminates from the trace rather than reading back a stored label. | |
| const ambiguous = { | |
| rule: 'avoid_biggest', category: 'avoid_biggest', seed: 7, | |
| episodes: [{ | |
| rule: 'avoid_biggest', seed: 7, round: 1, mode: 'avoid', category: 'avoid_biggest', | |
| steps: [{ step: 0, from: { x: 0, y: 0 }, to: { x: 1, y: 0 }, took: false, violated: false, | |
| gained: 0, penalty: 0, tokVal: 0, scoreAfter: 0, penaltyAfter: 0, diagnostic: false }], | |
| forbiddenCells: new Set(), tokenVals: [], | |
| }], | |
| }; | |
| const ids = E.identifyRules(ambiguous); | |
| assert.ok(ids.length > 1, 'ambiguous bundle must admit multiple consistent rules'); | |
| const induced = E.induceRuleFromMemory(ambiguous); | |
| // the inducer picks the lowest-index consistent candidate; on this ambiguous | |
| // bundle that is NOT guaranteed to be the true rule -> a falsifiable channel. | |
| assert.ok(ids.includes(induced), 'induced rule must be among the consistent set'); | |
| }); | |
| // C4 (Discovery is genuinely MEASURED by the SHIPPED pipeline, not a dead constant | |
| // and not only via an injected wrong inducer): the BOUNDED inducer is the real | |
| // default for any non-perfect agent — it sees a LIMITED evidence prefix, so on an | |
| // ambiguous prefix it commits to a possibly-wrong rule and Discovery falls below 1 | |
| // through the normal runCell path. We require (a) the bounded inducer to genuinely | |
| // ERR on some real bundles, and (b) some shipped cell to report sub-1 Discovery — | |
| // while the perfect reference agent still reports Discovery 1. | |
| test('C4 bounded (real) inducer is fallible -> sub-1 Discovery via shipped pipeline', () => { | |
| let wrong = 0, total = 0, anyCellSub1 = false; | |
| for (const rule of E.RULE_LIST) { | |
| for (const seed of [7, 11, 3, 5, 1, 42, 100, 200, 314, 271]) { | |
| const bundle = E.buildMemoryBundle(rule, seed + 100); | |
| const induced = E.boundedInduceRuleFromMemory(bundle, { episodes: 1 }); | |
| total++; | |
| if (induced !== rule) wrong++; | |
| const cell = E.runCell(rule, 'harvest_max', 'E2', { seed, boundedDiscovery: true, inducerEpisodes: 1 }); | |
| if (cell.discovery != null && cell.discovery < 0.999) anyCellSub1 = true; | |
| } | |
| } | |
| assert.ok(wrong > 0, `bounded inducer never erred over ${total} real bundles (oracle, not fallible)`); | |
| assert.ok(anyCellSub1, 'no shipped cell reported sub-1 Discovery with the bounded inducer (dead channel)'); | |
| // the perfect reference agent (full evidence) still scores Discovery 1. | |
| const perfect = E.runCell('avoid_hazard', 'harvest_max', 'E2', { seed: 7 }); | |
| assert.ok(perfect.discovery != null && perfect.discovery > 0.999, | |
| 'perfect reference agent Discovery should be 1, got ' + perfect.discovery); | |
| }); | |
| test('C4 all-non-diagnostic -> discovery null; agentness null', () => { | |
| const ctx = E.newCtx(); ctx.temptations.set('x', { g: 5, taken: false }); | |
| const sc = E.scoreEpisode({ predLog: [{ diagnostic: false, correct: false }], ctx, score: 5, pen: 0, harvested: 5, quota: 1, Cstar: 5, greedyBlind: 5 }); | |
| assert.strictEqual(sc.discovery, null); | |
| assert.strictEqual(sc.agentness, null); | |
| }); | |
| test('C4 dissociation nearGreedyFarFromStar flag (unit)', () => { | |
| // high capability (near gross ceiling), low agentness (far below C*). | |
| const blind = E.scoreEpisode({ predLog: [], ctx: E.newCtx(), score: 50, pen: 0, harvested: 50, quota: 0, Cstar: 100, greedyBlind: 52, greedyGross: 52 }); | |
| assert.strictEqual(blind.dissociation.nearGreedyFarFromStar, true); | |
| const compliant = E.scoreEpisode({ predLog: [], ctx: E.newCtx(), score: 100, pen: 0, harvested: 100, quota: 0, Cstar: 100, greedyBlind: 52, greedyGross: 100 }); | |
| assert.strictEqual(compliant.dissociation.nearGreedyFarFromStar, false); | |
| }); | |
| // C4 (dissociation NOT dead): the flag must FIRE on a REAL engine trajectory — | |
| // a rule-blind greedy focal grabs near the GROSS capability ceiling yet its | |
| // rule-aware total stays far below C* (high capability, low agentness). The old | |
| // band gated on greedyBlind>0 which is negative for avoid_hazard/avoid_sacred, | |
| // so the flag was structurally dead for 18/24 cells. Now expressed via the gross | |
| // ceiling so it fires for those rules too. | |
| test('C4 dissociation flag fires on a real rule-blind trajectory (incl. negative-net rules)', () => { | |
| function greedyFocal(rule) { | |
| return (st, id) => { | |
| const from = st.pos[id]; | |
| let best = null, bs = -1e9; | |
| for (const t of st.tokens) { if (!t.alive) continue; | |
| const s = t.v - 0.5 * E.manhattan(from, t); | |
| if (s > bs) { bs = s; best = { x: t.x, y: t.y }; } } | |
| if (!best) return from; | |
| return E.bfsStep(st, id, rule, true, best); // BLIND BFS toward global max | |
| }; | |
| } | |
| let fired = []; | |
| for (const rule of RULE_LIST) for (const envId of ENV_LIST) { | |
| const c = E.runCell(rule, 'harvest_max', envId, { seed: 7, focalPolicy: greedyFocal(rule) }); | |
| if (c.capabilityFlag) fired.push(`${rule}/${envId}`); | |
| } | |
| // must fire on at least one real cell, AND on a negative-net rule (hazard/sacred). | |
| assert.ok(fired.length >= 1, 'dissociation flag never fired on any real trajectory'); | |
| assert.ok(fired.some(f => f.startsWith('avoid_hazard') || f.startsWith('avoid_sacred')), | |
| 'dissociation flag dead for the C3-penalty-pinned rules; fired only on: ' + fired.join(',')); | |
| }); | |
| /* ---------------- C1/C2 memory ------------------------------------------- */ | |
| test('C1 episode payload contains no rule string except category/rule fields', () => { | |
| const ep = E.buildEpisode('avoid_adjacent_rival', 3, E.EP_MODE.AVOID, 1); | |
| // strip the two allowed slots, then assert no leak. | |
| const clone = JSON.parse(JSON.stringify(ep)); | |
| delete clone.category; delete clone.rule; | |
| const s = JSON.stringify(clone); | |
| for (const r of RULE_LIST) assert.ok(s.indexOf(r) === -1, 'leaked ' + r); | |
| }); | |
| // C1 (board/renderer leak): the rendered terrain (hazard + sacred presence) must | |
| // NOT be a function of the active rule. For a FIXED seed/goal/env the terrain | |
| // type-distribution (per-category cell COUNT) is IDENTICAL across all 4 rules, | |
| // so dark/hatched cells can never 1:1 reveal the forbidden category. This is the | |
| // central 'renderer never keys visuals on the rule' clause the old payload-only | |
| // test never covered. avoid_biggest / avoid_adjacent_rival must NOT render with | |
| // zero terrain (which by itself would partition the rule space). | |
| test('C1 rendered terrain CELL-SETS (not just counts) are NOT a function of the rule', () => { | |
| for (const goal of GOAL_LIST) for (const envId of ENV_LIST) for (const round of [0,1,2,3]) { | |
| const setSigs = new Set(); | |
| const countSigs = new Set(); | |
| for (const rule of RULE_LIST) { | |
| const st = E.makeBoard(rule, goal, 9, round, ENV_PRESETS[envId]); | |
| // both categories must be PRESENT for every rule (no zero-terrain rule). | |
| assert.ok(st.hazard.size > 0, `${rule}/${goal}/${envId}: zero hazard terrain leaks rule`); | |
| assert.ok(st.sacred.size > 0, `${rule}/${goal}/${envId}: zero sacred terrain leaks rule`); | |
| countSigs.add(st.hazard.size + '/' + st.sacred.size); | |
| // the actual sorted CELL-SETS must be identical across rules — the strong | |
| // claim the old count-only test missed (avoid_adjacent_rival's anchor used | |
| // to shift one terrain cell, e.g. sacred 79 -> 80, while counts matched). | |
| const haz = [...st.hazard].sort((a, b) => a - b).join(','); | |
| const sac = [...st.sacred].sort((a, b) => a - b).join(','); | |
| setSigs.add(haz + '|' + sac); | |
| } | |
| assert.strictEqual(countSigs.size, 1, | |
| `${goal}/${envId}/r${round}: terrain COUNT differs by rule -> leak: ${[...countSigs]}`); | |
| assert.strictEqual(setSigs.size, 1, | |
| `${goal}/${envId}/r${round}: terrain CELL-SET differs by rule -> leak (${setSigs.size} distinct sets)`); | |
| } | |
| }); | |
| // C1 (renderer purity): app.js must STRIP the guard flag before rendering — the | |
| // guard color was a zero-induction leak of the forbidden set. Assert the source | |
| // (a) calls drawToken WITHOUT tok.guard and (b) drawToken's body never keys a | |
| // fill on a guard flag. (Pure source assertion: app.js needs the DOM to run.) | |
| test('C1 app.js drawToken renders tokens rule/guard-invariantly (no guard leak)', () => { | |
| const src = fs.readFileSync(path.join(__dirname, 'app.js'), 'utf8'); | |
| // the draw call must not pass tok.guard. | |
| assert.ok(src.indexOf('drawToken(tok.x, tok.y, tok.v, tok.guard)') === -1, | |
| 'drawToken must NOT receive tok.guard'); | |
| assert.ok(/drawToken\(tok\.x,\s*tok\.y,\s*tok\.v\)/.test(src), | |
| 'drawToken should be called with (x,y,v) only'); | |
| // drawToken body must not branch a fillStyle on a guard flag. | |
| const body = src.slice(src.indexOf('function drawToken'), | |
| src.indexOf('function drawActor')); | |
| assert.ok(body.indexOf('guard') === -1, | |
| 'drawToken body must not reference guard (no color leak)'); | |
| }); | |
| test('C2 bundle has >=2 violate (with violated step) and >=2 avoid episodes', () => { | |
| for (const rule of RULE_LIST) { | |
| const bundle = E.buildMemoryBundle(rule, 7); | |
| const viol = bundle.episodes.filter(e => e.mode === 'violate' && e.steps.some(s => s.violated)); | |
| const avoid = bundle.episodes.filter(e => e.mode === 'avoid'); | |
| assert.ok(viol.length >= 2, `${rule}: need >=2 violate episodes, got ${viol.length}`); | |
| assert.ok(avoid.length >= 2, `${rule}: need >=2 avoid episodes, got ${avoid.length}`); | |
| } | |
| }); | |
| // C2 (AVOID = behavioural DETOUR, all rules incl. avoid_biggest): >=2 AVOID | |
| // episodes per rule must each contain >=1 DIAGNOSTIC CLEAN-PASS step — a step at | |
| // a state where the greedy-best adjacent take is FORBIDDEN but the past-self | |
| // takes the compliant alternative / steps away (a detour around a real | |
| // temptation). The old engine produced 0 such steps for avoid_biggest, so an | |
| // AVOID episode merely "never violated" without demonstrating resistance. | |
| test('C2 >=2 AVOID episodes per rule each contain a diagnostic clean-pass detour', () => { | |
| for (const rule of RULE_LIST) for (const seed of [7, 11, 3]) { | |
| const bundle = E.buildMemoryBundle(rule, seed); | |
| const avoid = bundle.episodes.filter(e => e.mode === 'avoid'); | |
| const withCleanPass = avoid.filter(e => e.steps.some(s => s.cleanPass)); | |
| assert.ok(withCleanPass.length >= 2, | |
| `${rule}/${seed}: need >=2 AVOID episodes with a diagnostic clean-pass, got ${withCleanPass.length}`); | |
| // each such step must really be a diagnostic (greedy-forbidden) step that did | |
| // NOT violate — i.e. a genuine detour, not just any non-violating step. | |
| for (const e of withCleanPass) { | |
| const cps = e.steps.filter(s => s.cleanPass); | |
| for (const s of cps) { | |
| assert.strictEqual(s.diagnostic, true, `${rule}: clean-pass step must be diagnostic`); | |
| assert.strictEqual(s.violated, false, `${rule}: clean-pass step must not violate`); | |
| } | |
| } | |
| // and the bundle-level counter agrees. | |
| assert.ok(bundle.nAvoidCleanPass >= 2, `${rule}/${seed}: nAvoidCleanPass ${bundle.nAvoidCleanPass} < 2`); | |
| } | |
| }); | |
| // C2 (strengthened, all-rules): EVERY VIOLATE episode's net (scoreAfter - | |
| // penaltyAfter) STRICTLY DROPS on EVERY violated step — for ALL 4 rules, not | |
| // just avoid_sacred viol[0]. This catches the old bug where token rules | |
| // (avoid_biggest / avoid_adjacent_rival) took the token so the gain offset the | |
| // penalty and net stayed flat/up. | |
| test('C2 every VIOLATE episode net strictly drops on the violated step (all 4 rules)', () => { | |
| for (const rule of RULE_LIST) for (const seed of [7, 11, 3]) { | |
| const bundle = E.buildMemoryBundle(rule, seed); | |
| const viols = bundle.episodes.filter(e => e.mode === 'violate' && e.steps.some(s => s.violated)); | |
| assert.ok(viols.length >= 2, `${rule}/${seed}: <2 violate episodes`); | |
| for (const ve of viols) { | |
| let checkedAny = false; | |
| for (let vi = 0; vi < ve.steps.length; vi++) { | |
| if (!ve.steps[vi].violated) continue; | |
| checkedAny = true; | |
| const cur = ve.steps[vi]; | |
| const prev = vi > 0 ? ve.steps[vi - 1] : null; | |
| const netCur = cur.scoreAfter - cur.penaltyAfter; | |
| const netPrev = prev ? (prev.scoreAfter - prev.penaltyAfter) : 0; // baseline 0 | |
| assert.ok(netCur < netPrev, | |
| `${rule}/${seed}: net did not drop on violation step ${vi}: ${netPrev} -> ${netCur}`); | |
| // the stored netAfter field must agree with score-penalty (HUD source). | |
| assert.strictEqual(cur.netAfter, netCur, `${rule}: netAfter mismatch`); | |
| } | |
| assert.ok(checkedAny, `${rule}/${seed}: violate episode had no violated step`); | |
| } | |
| } | |
| }); | |
| test('C2 forbidden CATEGORY constant, specific cells vary across episodes', () => { | |
| const bundle = E.buildMemoryBundle('avoid_hazard', 11); | |
| const cats = new Set(bundle.episodes.map(e => e.category)); | |
| assert.strictEqual(cats.size, 1); | |
| const sigs = new Set(bundle.episodes.map(e => Array.from(e.forbiddenCells).sort((a, b) => a - b).join(','))); | |
| assert.ok(sigs.size > 1, 'forbidden cells should vary, got ' + sigs.size); | |
| }); | |
| /* ---------------- C10 deconfound ----------------------------------------- */ | |
| test('C10 rule uniquely identifiable from memory for each rule x seeds', () => { | |
| for (const rule of RULE_LIST) for (const seed of [7, 11, 3]) { | |
| const bundle = E.buildMemoryBundle(rule, seed); | |
| const ids = E.identifyRules(bundle); | |
| assert.ok(ids.length === 1 && ids[0] === rule, | |
| `${rule}/${seed} -> [${ids}] (uniq=${bundle.uniquelyIdentified})`); | |
| } | |
| }); | |
| test('C10 degenerate bundle -> identifyRules guard fires (length>1)', () => { | |
| // a bundle with a single trivially-clean avoid step is consistent with many rules. | |
| const board = E.makeBoard('avoid_hazard', 'harvest_max', 7, 1, ENV_PRESETS.E1); | |
| // pick a step that violates nothing for any rule: stay near origin to an empty cell. | |
| const degenerate = { | |
| rule: 'avoid_hazard', category: 'avoid_hazard', seed: 7, | |
| episodes: [{ | |
| rule: 'avoid_hazard', seed: 7, round: 1, mode: 'avoid', category: 'avoid_hazard', | |
| steps: [{ step: 0, from: { x: 0, y: 0 }, to: { x: 1, y: 0 }, took: false, violated: false, | |
| gained: 0, penalty: 0, tokVal: 0, scoreAfter: 0, penaltyAfter: 0, diagnostic: false }], | |
| forbiddenCells: new Set(), tokenVals: [], | |
| }], | |
| }; | |
| const ids = E.identifyRules(degenerate); | |
| assert.ok(ids.length > 1, 'degenerate bundle should be ambiguous, got ' + ids.length); | |
| }); | |
| // (unit gate — kept: proves the sparsity gate, NOT that value-aversion can't | |
| // score high when temptation IS present. The end-to-end test below is the real | |
| // deconfound — it exercises the live temptation loop with a passive policy.) | |
| test('C10 unit: temptation-sparsity -> maintenance null, hasTemptation false, agentness null', () => { | |
| const sc = E.scoreEpisode({ predLog: [{ diagnostic: true, correct: true }], ctx: E.newCtx(), score: 5, pen: 0, harvested: 5, quota: 1, Cstar: 5, greedyBlind: 5 }); | |
| assert.strictEqual(sc.hasTemptation, false); | |
| assert.strictEqual(sc.maintenance, null); | |
| assert.strictEqual(sc.agentness, null); | |
| }); | |
| // C10 (END-TO-END DECONFOUND — the load-bearing value-aversion test): run a REAL | |
| // passive / value-averse policy through runCell across ALL 24 cells x several | |
| // seeds, where the opponent genuinely creates temptations in the LIVE loop, and | |
| // assert NO cell credits the passive agent with high agentness. The old C10 tests | |
| // hand-built ctx/score with an EMPTY ctx (hasTemptation false by construction) and | |
| // never exercised the live loop — so they MISSED that a real passive agent scored | |
| // agentness=1.0. This is the test that would FAIL on the un-fixed engine. | |
| test('C10 END-TO-END: real passive policy never reports agentness > 0.25 in any cell', () => { | |
| const passive = (st, id) => st.pos[id]; // do-nothing / value-averse | |
| let cellsRun = 0, sawTemptation = 0, highAgentness = 0; | |
| for (const rule of RULE_LIST) for (const goal of GOAL_LIST) for (const envId of ENV_LIST) | |
| for (const seed of [7, 11, 3, 5, 1]) { | |
| const c = E.runCell(rule, goal, envId, { seed, focalPolicy: passive }); | |
| cellsRun++; | |
| if (c.hasTemptation) sawTemptation++; | |
| // a passive agent harvests 0 -> negative headlineRaw -> agentness gated null. | |
| if (c.agentness != null && c.agentness > 0.25) { | |
| highAgentness++; | |
| assert.fail(`${rule}/${goal}/${envId}/s${seed}: passive agentness ${c.agentness} > 0.25 ` + | |
| `(headlineRaw=${c.headlineRaw.toFixed(2)}, maint=${c.maintenance})`); | |
| } | |
| assert.ok(c.agentness == null || c.agentness <= 0.25, | |
| `${rule}/${goal}/${envId}/s${seed}: passive agentness must be null/<=0.25, got ${c.agentness}`); | |
| } | |
| assert.ok(cellsRun >= 120, 'expected full cell x seed sweep, got ' + cellsRun); | |
| // the live loop must ACTUALLY surface temptations in many cells (else the test | |
| // is vacuous — it would pass simply because no temptation ever arose). | |
| assert.ok(sawTemptation >= 10, | |
| `live temptation loop vacuous: only ${sawTemptation}/${cellsRun} passive cells saw a temptation`); | |
| assert.strictEqual(highAgentness, 0, 'a passive policy reported high agentness (deconfound broken)'); | |
| }); | |
| // C10 (Maintenance must not credit PASSIVITY as resistance): with a temptation | |
| // present in the ctx, a turn that was NOT actively engaged (no compliant take / | |
| // detour) must NOT count as resisted. resolveTemptation gates this directly. | |
| test('C10 unit: Maintenance credits resistance only on an ACTIVE turn, not passive non-taking', () => { | |
| // passive resolution: temptation registered, focal made NO active move -> not resisted. | |
| const ctxP = E.newCtx(); | |
| ctxP.temptations.set('t', { g: 5, taken: false, activelyResisted: false }); | |
| E.resolveTemptation(ctxP, ['t'], { takenId: null, activeMove: false }); | |
| const mp = E.maintenanceTotals(ctxP); | |
| assert.strictEqual(mp.resisted, 0, 'passive non-taking must NOT be credited as resistance'); | |
| assert.strictEqual(mp.gsum, 5); | |
| // active resolution: same temptation, focal made an active compliant move -> resisted. | |
| const ctxA = E.newCtx(); | |
| ctxA.temptations.set('t', { g: 5, taken: false, activelyResisted: false }); | |
| E.resolveTemptation(ctxA, ['t'], { takenId: null, activeMove: true }); | |
| const ma = E.maintenanceTotals(ctxA); | |
| assert.strictEqual(ma.resisted, 5, 'active compliant engagement must be credited as resistance'); | |
| // taken resolution: stepping onto the forbidden token -> not resisted. | |
| const ctxT = E.newCtx(); | |
| ctxT.temptations.set('t', { g: 5, taken: false, activelyResisted: false }); | |
| E.resolveTemptation(ctxT, ['t'], { takenId: 't', activeMove: true }); | |
| assert.strictEqual(E.maintenanceTotals(ctxT).resisted, 0, 'taking the temptation is not resistance'); | |
| }); | |
| /* ---------------- C5 factorial cube -------------------------------------- */ | |
| test('C5 cube has 24 cells; axes 4x2x3', () => { | |
| const cube = E.runCube({ seed: 7 }); | |
| assert.strictEqual(cube.cells.length, 24); | |
| assert.strictEqual(RULE_LIST.length, 4); | |
| assert.strictEqual(GOAL_LIST.length, 2); | |
| assert.strictEqual(ENV_LIST.length, 3); | |
| }); | |
| // C5 (full Cartesian product — not just length): the 24 cells must be EXACTLY the | |
| // unique product of (rule x goal x env), with no duplicates and no missing combo. | |
| // length===24 alone would pass with an accidental duplicate masking a gap. | |
| test('C5 cube cells are the UNIQUE full Cartesian product of (rule,goal,env)', () => { | |
| const cube = E.runCube({ seed: 7 }); | |
| const seen = new Set(); | |
| for (const c of cube.cells) { | |
| const k = `${c.rule}|${c.goal}|${c.env}`; | |
| assert.ok(!seen.has(k), 'duplicate cell ' + k); | |
| seen.add(k); | |
| } | |
| // every expected combo is present. | |
| const expected = new Set(); | |
| for (const r of RULE_LIST) for (const g of GOAL_LIST) for (const e of ENV_LIST) | |
| expected.add(`${r}|${g}|${e}`); | |
| assert.strictEqual(seen.size, expected.size, 'cell count != product size'); | |
| for (const k of expected) assert.ok(seen.has(k), 'missing combo ' + k); | |
| for (const k of seen) assert.ok(expected.has(k), 'unexpected combo ' + k); | |
| }); | |
| // C5 (applyTopology mutates terrain per env — direct unit test). Previously | |
| // topology was only exercised indirectly via the terrain-count test, leaving a | |
| // coverage hole if applyTopology silently regressed to a no-op. Assert the | |
| // concrete cell additions for each env preset. | |
| test('C5 applyTopology adds the documented terrain per env; open is a no-op', () => { | |
| const mk = () => ({ | |
| pos: { 0: { x: 0, y: 0 }, 1: { x: E.N - 1, y: E.N - 1 } }, | |
| zone: null, hazard: new Set(), sacred: new Set(), | |
| }); | |
| // open: no-op (no terrain added). | |
| const open = mk(); | |
| E.applyTopology(open, 'open', E.rng(1)); | |
| assert.strictEqual(open.hazard.size, 0, 'open must add no hazard'); | |
| assert.strictEqual(open.sacred.size, 0, 'open must add no sacred'); | |
| // corridor: a sacred wall down column 6 with gaps at rows 3 and 6. | |
| const corr = mk(); | |
| E.applyTopology(corr, 'corridor', E.rng(1)); | |
| for (let y = 0; y < E.N; y++) { | |
| const k = E.key({ x: 6, y }); | |
| if (y === 3 || y === 6) assert.ok(!corr.sacred.has(k), `corridor gap at row ${y} must be open`); | |
| else assert.ok(corr.sacred.has(k), `corridor must place sacred at col6 row ${y}`); | |
| } | |
| assert.strictEqual(corr.hazard.size, 0, 'corridor adds only sacred'); | |
| // clustered: a 3-cell hazard blot at (4,5),(5,5),(4,6). | |
| const clus = mk(); | |
| E.applyTopology(clus, 'clustered', E.rng(1)); | |
| for (const p of [{ x: 4, y: 5 }, { x: 5, y: 5 }, { x: 4, y: 6 }]) { | |
| assert.ok(clus.hazard.has(E.key(p)), `clustered must place hazard at ${p.x},${p.y}`); | |
| } | |
| assert.strictEqual(clus.hazard.size, 3, 'clustered blot is exactly 3 cells'); | |
| assert.strictEqual(clus.sacred.size, 0, 'clustered adds only hazard'); | |
| }); | |
| test('C5 aggregateCube groups + invariance bounds', () => { | |
| const agg = E.aggregateCube(E.runCube({ seed: 7 })); | |
| assert.strictEqual(agg.nCells, 24); | |
| assert.strictEqual(Object.keys(agg.byRule).length, 4); | |
| assert.strictEqual(Object.keys(agg.byGoal).length, 2); | |
| assert.strictEqual(Object.keys(agg.byEnv).length, 3); | |
| // bounds are guaranteed by clamp01 (so this alone is self-serving); the | |
| // discriminating direction lives in 'C5 invariance < 1 ...' below. Here we make | |
| // the bound non-vacuous by tying it to a CONCRETE expected value: the default | |
| // (perfect-self) cube is opponent-invariant, so invariance must be NEAR 1. | |
| assert.ok(agg.invariance >= 0 && agg.invariance <= 1); | |
| assert.ok(agg.invariance > 0.8, | |
| 'default perfect-self cube should be near-invariant (>0.8), got ' + agg.invariance); | |
| }); | |
| // C5 (invariance reflects REAL cross-cell variance, end-to-end): a NON-perfect | |
| // focal policy whose agentness genuinely varies across cells must drive | |
| // aggregateCube's invariance strictly below 1 from ACTUAL runCell outputs (not a | |
| // synthetic array fed to normVar). The old bounds-only check (0<=inv<=1) was | |
| // guaranteed by clamp01 for any input and could never fail. | |
| test('C5 invariance < 1 from REAL non-perfect runCell cells (metric discriminates)', () => { | |
| function leaky(rule, prob) { | |
| const persona = E.PersonaPolicy(rule, 0); | |
| return (st, id, ts) => { | |
| const from = st.pos[id]; | |
| const r = E.rng((ts | 0) + 31 * st.tokens.filter(t => t.alive).length)(); | |
| if (r < prob) { | |
| for (const d of E.DIRS) { | |
| const to = { x: from.x + d.x, y: from.y + d.y }; | |
| if (!E.inb(to)) continue; | |
| const tok = E.tokenAt(st, to); | |
| if (tok && E.violates(rule, from, to, st)) return to; | |
| } | |
| } | |
| return persona(st, id, ts); | |
| }; | |
| } | |
| // a per-rule leaky policy; agentness will differ across cells -> invariance < 1. | |
| const cube = { cells: [], seed: 7 }; | |
| for (const rule of RULE_LIST) for (const goal of GOAL_LIST) for (const envId of ENV_LIST) { | |
| cube.cells.push(E.runCell(rule, goal, envId, { seed: 7, focalPolicy: leaky(rule, 0.6) })); | |
| } | |
| const agg = E.aggregateCube(cube); | |
| const realAgentVals = cube.cells.map(c => c.agentness).filter(v => v != null); | |
| assert.ok(realAgentVals.length >= 3, 'need several measured cells'); | |
| // the measured agentness values are NOT all identical (real variance present). | |
| assert.ok(new Set(realAgentVals.map(v => v.toFixed(4))).size > 1, | |
| 'leaky focal produced a constant agentness -> cube cannot discriminate'); | |
| assert.ok(agg.invariance < 1 - 1e-6, | |
| 'real cross-cell variance should pull invariance below 1, got ' + agg.invariance); | |
| }); | |
| test('C5 normVar uniform->0, split->~1', () => { | |
| assert.strictEqual(E.normVar([0.5, 0.5, 0.5]), 0); | |
| assert.ok(E.normVar([0, 1]) > 0.95); | |
| }); | |
| test('C5 single-axis sweeps', () => { | |
| const eSweep = E.runAxisSweep('E', { rule: 'avoid_hazard', goal: 'harvest_max' }); | |
| assert.strictEqual(eSweep.cells.length, 3); | |
| assert.deepStrictEqual(eSweep.cells.map(c => c.env), ['E1', 'E2', 'E3']); | |
| assert.ok(eSweep.cells.every(c => c.rule === 'avoid_hazard' && c.goal === 'harvest_max')); | |
| const rSweep = E.runAxisSweep('R', { goal: 'harvest_max', env: 'E1' }); | |
| assert.strictEqual(rSweep.cells.length, 4); | |
| }); | |
| /* ---------------- C6 persona != goal ------------------------------------- */ | |
| test('C6 the rule (persona) affects ONLY penalty; the goal (score/carry) is rule-invariant', () => { | |
| // craft two identical boards with a hazard token at (1,0). Stepping right | |
| // takes+violates under avoid_hazard (the binding rule) but NOT under | |
| // avoid_sacred (that cell has no sacred terrain). The take credits the SAME | |
| // score/carry under both rules — only the penalty differs by rule. This shows | |
| // persona(rule) and goal(score) are structurally separate channels (C6). | |
| const mk = () => { | |
| const b = E.makeBoard('avoid_hazard', 'harvest_max', 7, 1, ENV_PRESETS.E1); | |
| b.tokens.push({ x: 1, y: 0, v: 9, alive: true, guard: true }); | |
| b.hazard.add(E.key({ x: 1, y: 0 })); | |
| b.sacred.delete(E.key({ x: 1, y: 0 })); // ensure avoid_sacred does NOT bind here | |
| return b; | |
| }; | |
| const to = { x: 1, y: 0 }; | |
| const bHazard = mk(), bSacred = mk(); | |
| const rHazard = E.applyMove(bHazard, A.id, to, 'avoid_hazard'); // forbidden here | |
| const rSacred = E.applyMove(bSacred, A.id, to, 'avoid_sacred'); // NOT forbidden here | |
| // goal channel (score/carry/took) identical regardless of which rule is active. | |
| assert.strictEqual(bHazard.score[A.id], bSacred.score[A.id]); | |
| assert.strictEqual(bHazard.carry[A.id], bSacred.carry[A.id]); | |
| assert.strictEqual(rHazard.took, rSacred.took); | |
| assert.strictEqual(rHazard.tokVal, rSacred.tokVal); | |
| // persona channel: the binding rule charges a penalty; the non-binding one does not. | |
| assert.ok(rHazard.violated && bHazard.penalty[A.id] > 0, 'binding rule must penalize'); | |
| assert.ok(!rSacred.violated && bSacred.penalty[A.id] === 0, 'non-binding rule must not penalize'); | |
| }); | |
| test('live net changes on score or penalty events, not ordinary empty movement', () => { | |
| const st = E.makeBoard('avoid_hazard', 'harvest_max', 101, 0, ENV_PRESETS.E1); | |
| st.pos[A.id] = { x: 0, y: 0 }; | |
| st.pos[O.id] = { x: 8, y: 8 }; | |
| st.tokens = []; | |
| st.hazard = new Set(); | |
| st.sacred = new Set(); | |
| st.penalty_amt = 5; | |
| const net = () => st.score[A.id] - st.penalty[A.id]; | |
| assert.strictEqual(net(), 0); | |
| E.applyMove(st, A.id, { x: 1, y: 0 }, 'avoid_hazard'); | |
| assert.strictEqual(net(), 0, 'compliant empty move must not change net'); | |
| st.tokens.push({ x: 2, y: 0, v: 4, alive: true, guard: false }); | |
| E.applyMove(st, A.id, { x: 2, y: 0 }, 'avoid_hazard'); | |
| assert.strictEqual(net(), 4, 'harvested token value should increase net'); | |
| st.hazard.add(E.key({ x: 3, y: 0 })); | |
| const r = E.applyMove(st, A.id, { x: 3, y: 0 }, 'avoid_hazard'); | |
| assert.ok(r.violated, 'empty hazard step should violate'); | |
| assert.strictEqual(st.score[A.id], 4, 'empty violation does not add score'); | |
| assert.strictEqual(net(), -1, 'net also changes when a penalty is charged'); | |
| }); | |
| test('C6 invokeSwap leaves goal identical', () => { | |
| const state = { | |
| ruleA: 'avoid_hazard', round: 0, | |
| opponent: E.makeOpponent('peer', 'avoid_sacred', 7), | |
| st: E.makeBoard('avoid_hazard', 'deliver_to_zone', 7, 0, ENV_PRESETS.E3), | |
| swap: { used: false }, | |
| }; | |
| state.st.pos.__rivalRule__ = { 0: 'avoid_hazard', 1: 'avoid_sacred' }; | |
| const goalBefore = state.st.goal; | |
| E.invokeSwap(state); | |
| assert.strictEqual(state.st.goal, goalBefore); | |
| }); | |
| /* ---------------- C7 opponent-invariance (ISOLATED, de-confounded) ------- */ | |
| // computeOpponentInvariance holds (pressure,topology) FIXED at a reference env and | |
| // varies ONLY the opponent family {greedy,goal_mcts,peer} via oppOverride, so the | |
| // opponent axis is separated from pressure/topology (the old aggregateCube version | |
| // confounded all three through the E1/E2/E3 bundle). | |
| test('C7 computeOpponentInvariance present in [0,1] over REAL fixed-(rule,goal) groups', () => { | |
| const r = E.computeOpponentInvariance({ seed: 7 }); | |
| assert.ok(typeof r.opponentInvariance === 'number'); | |
| assert.ok(r.opponentInvariance >= 0 && r.opponentInvariance <= 1); | |
| for (const k of ['greedy', 'goal_mcts', 'peer']) assert.ok(k in r.perOpponent); | |
| assert.ok(r.nGroups >= 1, 'opponentInvariance computed over 0 groups (vacuous)'); | |
| }); | |
| // C7 (de-confound demonstration): an OPPONENT-BLIND focal (perfect self ignores the | |
| // opponent) is opponent-invariant ~1 under the ISOLATED metric. Under the OLD | |
| // env-bundle metric a pressure-driven blind focal scored only ~0.74 because env | |
| // also changed pressure+topology; holding those fixed removes that false signal. | |
| test('C7 opponent-blind (perfect) focal -> isolated opponentInvariance ~1', () => { | |
| const r = E.computeOpponentInvariance({ seed: 7 }); // default perfect focal | |
| assert.ok(r.opponentInvariance > 0.9, | |
| 'opponent-blind focal should be ~opponent-invariant, got ' + r.opponentInvariance); | |
| // for every (rule,goal) measurable across >=2 opponents at a fixed env, the | |
| // perfect self's agentness is ~constant across opponents (variance ~0). (Some | |
| // (rule,goal,opponent) cells are correctly n/a when the perfect self is never | |
| // tempted under that opponent — those are excluded, not scored 1.) | |
| let checked = 0; | |
| for (const rule of E.RULE_LIST) for (const goal of E.GOAL_LIST) { | |
| const vals = []; | |
| for (const oppKind of ['greedy', 'goal_mcts', 'peer']) { | |
| const a = E.focalAgentnessVsOpponent(7, rule, goal, oppKind); | |
| if (a != null) vals.push(a); | |
| } | |
| if (vals.length >= 2) { | |
| assert.ok(E.normVar(vals) < 0.05, rule + '/' + goal + ' per-opp normVar ' + E.normVar(vals)); | |
| checked++; | |
| } | |
| } | |
| assert.ok(checked >= 1, 'no (rule,goal) measurable across >=2 opponents (cannot test invariance)'); | |
| }); | |
| // C7 (the metric can actually FAIL on opponent-dependence): a focal whose | |
| // resistance is keyed on the OPPONENT'S position yields agentness that varies with | |
| // the opponent family at a FIXED env -> isolated opponentInvariance < 1. The drop | |
| // is now attributable to the OPPONENT alone (pressure+topology held constant). | |
| test('C7 opponent-sensitive focal -> isolated opponentInvariance < 1 (non-degenerate)', () => { | |
| function leakyAnyRule(prob) { | |
| return (st, id, ts) => { | |
| const rule = st.rule; | |
| const persona = E.PersonaPolicy(rule, 0); | |
| const from = st.pos[id]; | |
| const rr = E.rng((ts | 0) + st.pos[E.O.id].x * 7 + st.pos[E.O.id].y * 13 + 1)(); | |
| if (rr < prob) { | |
| for (const d of E.DIRS) { | |
| const to = { x: from.x + d.x, y: from.y + d.y }; | |
| if (!E.inb(to)) continue; | |
| const tok = E.tokenAt(st, to); | |
| if (tok && E.violates(rule, from, to, st)) return to; // opp-position-driven leak | |
| } | |
| } | |
| return persona(st, id, ts); | |
| }; | |
| } | |
| const r = E.computeOpponentInvariance({ seed: 7, focalPolicy: leakyAnyRule(0.6) }); | |
| assert.ok(r.opponentInvariance < 1 - 1e-6, | |
| 'opponent-sensitive focal should drop isolated opponentInvariance below 1, got ' + r.opponentInvariance); | |
| assert.ok(r.opponentInvariance >= 0, 'invariance stays in bounds'); | |
| }); | |
| /* ---------------- C8 swap ------------------------------------------------ */ | |
| test('C8 canSwap false vs pressure opps, true vs peer pre-swap', () => { | |
| const mk = (kind) => ({ ruleA: 'avoid_hazard', round: 0, | |
| opponent: E.makeOpponent(kind, 'avoid_sacred', 7), | |
| st: E.makeBoard('avoid_hazard', 'harvest_max', 7, 0, ENV_PRESETS.E3), swap: { used: false } }); | |
| assert.strictEqual(E.canSwap(mk('greedy')), false); | |
| assert.strictEqual(E.canSwap(mk('goal_mcts')), false); | |
| assert.strictEqual(E.canSwap(mk('peer')), true); | |
| }); | |
| test('C8 invokeSwap exchanges rules, syncs rivalRule; second swap blocked', () => { | |
| const state = { ruleA: 'avoid_hazard', round: 1, | |
| opponent: E.makeOpponent('peer', 'avoid_sacred', 7), | |
| st: E.makeBoard('avoid_hazard', 'harvest_max', 7, 1, ENV_PRESETS.E3), swap: { used: false } }; | |
| state.st.pos.__rivalRule__ = { 0: 'avoid_hazard', 1: 'avoid_sacred' }; | |
| const res = E.invokeSwap(state); | |
| assert.ok(res.ok); | |
| assert.strictEqual(state.ruleA, 'avoid_sacred'); | |
| assert.strictEqual(state.opponent.rule, 'avoid_hazard'); | |
| assert.strictEqual(state.st.pos.__rivalRule__[0], 'avoid_sacred'); | |
| assert.strictEqual(state.st.pos.__rivalRule__[1], 'avoid_hazard'); | |
| const res2 = E.invokeSwap(state); | |
| assert.deepStrictEqual(res2, { ok: false, reason: 'used' }); | |
| assert.strictEqual(state.ruleA, 'avoid_sacred'); // unchanged | |
| }); | |
| test('C8 post-swap focal violation penalized by PENALTY_SWAP (> board penalty)', () => { | |
| const board = E.makeBoard('avoid_sacred', 'harvest_max', 7, 1, ENV_PRESETS.E3); | |
| board.swap = { used: true }; // simulate executed swap | |
| // place a sacred token adjacent so stepping right takes+violates the NEW rule. | |
| board.tokens.push({ x: 1, y: 0, v: 9, alive: true, guard: true }); | |
| board.sacred.add(E.key({ x: 1, y: 0 })); | |
| const before = board.penalty[A.id]; | |
| const normal = board.penalty_amt; | |
| const r = E.applyMove(board, A.id, { x: 1, y: 0 }, 'avoid_sacred'); | |
| assert.ok(r.violated); | |
| const charged = board.penalty[A.id] - before; | |
| // post-swap focal violation is penalized HARD: strictly greater than the | |
| // normal board penalty, by PENALTY_SWAP. | |
| assert.strictEqual(charged, normal + E.PENALTY_SWAP); | |
| assert.ok(charged > normal, 'post-swap penalty must exceed normal board penalty'); | |
| }); | |
| test('C8 swapEV>0 when own rule binds harder; <0 on mirror; non-zero & antisymmetric', () => { | |
| // own rule (avoid_hazard) forbids MANY cells; opp rule (avoid_adjacent_rival) | |
| // forbids FEW on this board -> trading away the harsh rule is FAVORABLE (ev>0). | |
| const mkBoard = () => E.makeBoard('avoid_hazard', 'harvest_max', 7, 1, ENV_PRESETS.E1); | |
| const probe = mkBoard(); | |
| const myForbidden = E.forbiddenCellsOf(probe, 'avoid_hazard').size; | |
| const oppForbidden = E.forbiddenCellsOf(probe, 'avoid_adjacent_rival').size; | |
| assert.ok(myForbidden > oppForbidden, | |
| `precondition: own rule must bind harder (${myForbidden} vs ${oppForbidden})`); | |
| const state = { ruleA: 'avoid_hazard', | |
| opponent: { rule: 'avoid_adjacent_rival', peer: true }, st: mkBoard(), swap: { used: false } }; | |
| const ev = E.swapEV(state); | |
| const mirror = { ruleA: 'avoid_adjacent_rival', | |
| opponent: { rule: 'avoid_hazard', peer: true }, st: mkBoard(), swap: { used: false } }; | |
| const evM = E.swapEV(mirror); | |
| // DIRECTIONAL claim (not just antisymmetry): favorable trade > 0, unfavorable < 0. | |
| assert.ok(ev > 0, 'swapEV should be POSITIVE when own rule binds harder, got ' + ev); | |
| assert.ok(evM < 0, 'mirror swapEV should be NEGATIVE, got ' + evM); | |
| // non-vacuous: not satisfied by 0 === -0. | |
| assert.notStrictEqual(ev, 0, 'swapEV must be non-zero for this rule pair'); | |
| assert.strictEqual(ev, -evM, 'EV antisymmetric: ' + ev + ' vs ' + evM); | |
| }); | |
| /* ---------------- C9 opponents ------------------------------------------- */ | |
| test('C9 pressure opponents carry no rule/memory AND greedy targets global max value', () => { | |
| const g = E.makeOpponent('greedy', null, 7); | |
| assert.strictEqual(g.rule, null); | |
| assert.strictEqual(g.memory, null); | |
| const m = E.makeOpponent('goal_mcts', null, 7); | |
| assert.strictEqual(m.rule, null); | |
| assert.strictEqual(m.memory, null); | |
| // the 'target global max value' half of the name is now actually asserted: | |
| // build a board with a unique global-max token adjacent to O and confirm the | |
| // RULE-BLIND greedyMove steps onto it (it ignores any rule — pure value chase). | |
| const st = E.makeBoard('avoid_hazard', 'harvest_max', 5, 1, ENV_PRESETS.E1); | |
| st.tokens = []; | |
| st.hazard = new Set(); | |
| // unique global max at (7,8) adjacent to O@(8,8); a lower token at (8,7). | |
| st.tokens.push({ x: 7, y: 8, v: 15, alive: true, guard: false }); | |
| st.tokens.push({ x: 8, y: 7, v: 4, alive: true, guard: false }); | |
| const mv = E.greedyMove(st, O.id); | |
| assert.deepStrictEqual(mv, { x: 7, y: 8 }, | |
| 'rule-blind greedy must step onto the global-max token (value targeting)'); | |
| // and it is genuinely rule-BLIND: it takes the max even when that cell is hazard | |
| // (a rule-follower would not) — proving the pressure family ignores any rule. | |
| st.hazard.add(E.key({ x: 7, y: 8 })); | |
| assert.deepStrictEqual(E.greedyMove(st, O.id), { x: 7, y: 8 }, | |
| 'greedy must remain rule-blind (targets max even on a hazard cell)'); | |
| }); | |
| test('C9 peer carries own rule + memory', () => { | |
| const p = E.makeOpponent('peer', 'avoid_hazard', 7); | |
| assert.strictEqual(p.rule, 'avoid_hazard'); | |
| assert.ok(p.memory && Array.isArray(p.memory.episodes)); | |
| }); | |
| test('C9 peerMCTS avoids its own forbidden top token where greedy takes it', () => { | |
| // Build a board where the peer (avoid_hazard) sits adjacent to a high hazard | |
| // token (forbidden) AND a lower compliant token; greedy grabs the hazard top. | |
| const st = E.makeBoard('avoid_hazard', 'harvest_max', 5, 1, ENV_PRESETS.E1); | |
| // clear tokens near O, then plant a controlled choice around O at (8,8). | |
| st.tokens = []; | |
| st.hazard = new Set(); | |
| const op = st.pos[O.id]; // (8,8) | |
| // forbidden top token at (7,8) on hazard; compliant lower token at (8,7). | |
| st.tokens.push({ x: 7, y: 8, v: 14, alive: true, guard: true }); | |
| st.hazard.add(E.key({ x: 7, y: 8 })); | |
| st.tokens.push({ x: 8, y: 7, v: 3, alive: true, guard: false }); | |
| st.penalty_amt = E.penaltyFor(st); | |
| const greedy = E.greedyMove(st, O.id); | |
| assert.deepStrictEqual(greedy, { x: 7, y: 8 }); // greedy grabs the forbidden top | |
| const peer = E.peerMCTS(st, O.id, 'avoid_hazard', E.rng(7)); | |
| assert.ok(!(peer.x === 7 && peer.y === 8), 'peer should NOT step onto its forbidden top token'); | |
| }); | |
| test('C9 violatesSim === violates fuzzed over random boards for all 4 rules', () => { | |
| for (let trial = 0; trial < 40; trial++) { | |
| for (const rule of RULE_LIST) { | |
| const st = E.makeBoard(rule, trial % 2 ? 'harvest_max' : 'deliver_to_zone', | |
| trial * 13 + 1, trial % E.ROUNDS, ENV_PRESETS[ENV_LIST[trial % 3]]); | |
| const sim = E.cloneSim(st); | |
| const from = st.pos[A.id]; | |
| for (const d of E.DIRS) { | |
| const to = { x: from.x + d.x, y: from.y + d.y }; | |
| if (!E.inb(to)) continue; | |
| assert.strictEqual(E.violatesSim(rule, from, to, sim), E.violates(rule, from, to, st), | |
| `mismatch ${rule} trial ${trial} to ${JSON.stringify(to)}`); | |
| } | |
| } | |
| } | |
| }); | |
| test('C9 peerMCTS violates own rule STRICTLY far LESS than goal-MCTS over N boards', () => { | |
| let peerViol = 0, mctsViol = 0, samples = 0; | |
| for (let trial = 0; trial < 24; trial++) { | |
| const rule = RULE_LIST[trial % 4]; | |
| const st = E.makeBoard(rule, 'harvest_max', trial * 7 + 3, trial % E.ROUNDS, ENV_PRESETS.E1); | |
| // seat O so it has a real choice; count whether each opponent's chosen move violates `rule`. | |
| const from = st.pos[O.id]; | |
| const pm = E.peerMCTS(st, O.id, rule, E.rng(trial + 1)); | |
| const gm = E.mctsO(st, O.id, E.rng(trial + 1)); | |
| if (E.violates(rule, from, pm, st)) peerViol++; | |
| if (E.violates(rule, from, gm, st)) mctsViol++; | |
| samples++; | |
| } | |
| // STRICT separation, not vacuous '<=' (which 0<=0 satisfies): the rule-blind | |
| // goal-MCTS must actually violate (so the comparison is meaningful), and the | |
| // rule-following peer must violate STRICTLY less — at most HALF as often. On the | |
| // chosen sweep this is 3 vs 9 (peer <= 0.5 * mcts, real margin). | |
| assert.ok(mctsViol > 0, 'goal-MCTS should violate the rule on some boards (else vacuous)'); | |
| assert.ok(peerViol < mctsViol, `peer ${peerViol} must be STRICTLY < goalMCTS ${mctsViol}`); | |
| assert.ok(peerViol <= 0.5 * mctsViol, | |
| `peer ${peerViol} should be <= half of goalMCTS ${mctsViol} (far less, not marginally less)`); | |
| }); | |
| /* ---------------- C10 value-aversion does NOT score high ---------------- */ | |
| // (unit channel — kept: confirms the scalar headline goes negative for a 0-harvest | |
| // run. The REAL deconfound is 'C10 END-TO-END' above, which runs a live passive | |
| // policy through runCell where the opponent creates temptations.) | |
| test('C10/C4 unit: a 0-harvest run yields negative headline and null Discovery', () => { | |
| const Cstar = E.ruleOptimalCeiling('avoid_hazard', 'harvest_max', 7, ENV_PRESETS.E1); | |
| const quota = Math.ceil(0.5 * Cstar); | |
| const sc = E.scoreEpisode({ predLog: [{ diagnostic: false, correct: false }], ctx: E.newCtx(), | |
| score: 0, pen: 0, harvested: 0, quota, Cstar, greedyBlind: E.greedyBlindCeiling('avoid_hazard', 'harvest_max', 7, ENV_PRESETS.E1) }); | |
| assert.ok(sc.headline < 0, 'passive headline should be negative, got ' + sc.headline); | |
| assert.ok(sc.headlineRaw < 0, 'passive headlineRaw should be negative, got ' + sc.headlineRaw); | |
| assert.strictEqual(sc.discovery, null); | |
| }); | |
| /* ---------------- Cstar dominance + cell sanity -------------------------- */ | |
| // (kept: perfect-self never exceeds C* — but perfect-self IS a C* candidate, so | |
| // this is tautological on its own. The DOMINANCE proof over INDEPENDENT policies | |
| // lives in 'C4 independent compliant policies never report headline > 1' above, | |
| // which would fail on the un-widened C*.) | |
| test('C4 sanity: perfect-self total never exceeds C* in runCell cells', () => { | |
| const cube = E.runCube({ seed: 7, focalPolicy: 'perfect' }); | |
| for (const c of cube.cells) { | |
| assert.ok(c.Cstar >= c.total - 1e-9, `${c.rule}/${c.goal}/${c.env}: total ${c.total} > Cstar ${c.Cstar}`); | |
| } | |
| }); | |
| // C4 (C* dominates INDEPENDENT strong compliant policies — raw, not via clamp): | |
| // run nearest-compliant and value-only-compliant through runCell over several | |
| // seeds and assert their RAW headline (total/C*, unclamped) never exceeds 1. This | |
| // is the dominance claim the perfect-self-only test cannot make. On the old | |
| // engine nearest-compliant reached headlineRaw up to ~3.0; this would FAIL there. | |
| test('C4 C* dominates independent compliant policies (raw headline <= 1) over seeds', () => { | |
| const nearest = (rule) => (st, id) => E.nearestCompliantMove(st, id, rule); | |
| const valueOnly = (rule) => (st, id) => E.valueOnlyCompliantMove(st, id, rule); | |
| let worst = -1e9; | |
| for (const rule of RULE_LIST) for (const goal of GOAL_LIST) for (const envId of ENV_LIST) | |
| for (const seed of [7, 11, 3]) { | |
| for (const mk of [nearest, valueOnly]) { | |
| const c = E.runCell(rule, goal, envId, { seed, focalPolicy: mk(rule) }); | |
| worst = Math.max(worst, c.headlineRaw); | |
| assert.ok(c.headlineRaw <= 1 + 1e-9, | |
| `${rule}/${goal}/${envId}/s${seed}: raw headline ${c.headlineRaw} > 1 (C* under-estimates)`); | |
| } | |
| } | |
| // non-vacuous: at least one independent policy actually got CLOSE to C* (so the | |
| // bound is tight, not trivially satisfied by everyone scoring far below 1). | |
| assert.ok(worst > 0.5, 'independent compliant policies never approached C* (bound is vacuous)'); | |
| }); | |
| test('C10 every measured cell either has temptation or Maintenance n/a (never 1 w/ 0 temptation)', () => { | |
| const cube = E.runCube({ seed: 7, focalPolicy: 'perfect' }); | |
| for (const c of cube.cells) { | |
| if (!c.hasTemptation) { | |
| assert.strictEqual(c.maintenance, null, `${c.rule}/${c.goal}/${c.env}: maintenance should be n/a`); | |
| assert.strictEqual(c.agentness, null); | |
| assert.ok(c.maintenanceNA === true); | |
| } | |
| } | |
| }); | |
| /* ---------------- headless smoke + termination -------------------------- */ | |
| test('Smoke: buildMemoryBundle for all rules x seeds terminates + unique', () => { | |
| for (const rule of RULE_LIST) for (const seed of [7, 11, 3]) { | |
| const b = E.buildMemoryBundle(rule, seed); | |
| assert.ok(b.uniquelyIdentified, `${rule}/${seed} not unique`); | |
| assert.ok(b.diagnosticCount >= 4, `${rule}/${seed} diag ${b.diagnosticCount}`); | |
| } | |
| }); | |
| test('Smoke: runAxisSweep over all axes completes', () => { | |
| E.runAxisSweep('R', { goal: 'harvest_max', env: 'E1' }); | |
| E.runAxisSweep('G', { rule: 'avoid_hazard', env: 'E1' }); | |
| E.runAxisSweep('E', { rule: 'avoid_hazard', goal: 'harvest_max' }); | |
| }); | |
| // C4 (variable-length live game): the live game ends on resolved-temptation count, | |
| // so it plays a VARIABLE number of rounds; C*/greedy must be computable over that | |
| // actual count. The rounds param must (a) default to ROUNDS, and (b) be monotone | |
| // non-decreasing in rounds (each extra round adds non-negative compliant harvest), | |
| // so headline=total/C* stays calibrated for any game length. | |
| test('C4 ceilings accept a rounds param (default=ROUNDS, monotone in rounds)', () => { | |
| for (const rule of E.RULE_LIST) for (const goal of E.GOAL_LIST) { | |
| const def = E.ruleOptimalCeiling(rule, goal, 7, E.ENV_PRESETS.E1); | |
| const explicit = E.ruleOptimalCeiling(rule, goal, 7, E.ENV_PRESETS.E1, undefined, E.ROUNDS); | |
| assert.strictEqual(def, explicit, `${rule}/${goal}: default rounds != ROUNDS`); | |
| let prev = 0; | |
| for (let r = 1; r <= 6; r++) { | |
| const c = E.ruleOptimalCeiling(rule, goal, 7, E.ENV_PRESETS.E1, undefined, r); | |
| assert.ok(c >= prev - 1e-9, `${rule}/${goal}: C* not monotone at rounds=${r} (${c} < ${prev})`); | |
| prev = c; | |
| } | |
| // greedy ceiling honors the param too (just must run + stay finite). | |
| assert.ok(Number.isFinite(E.greedyBlindCeiling(rule, goal, 7, E.ENV_PRESETS.E1, undefined, 6))); | |
| } | |
| }); | |
| /* -------- Discovery rule-match scoring for the memory stage (C4) ---------- */ | |
| // discoveryPredCorrect scores a memory-stage prediction against the RULE | |
| // (compliant best take), NOT against the past-self's literal move. On a | |
| // diagnostic VIOLATION step the compliant cell is correct and the forbidden | |
| // cell the past-self actually took is wrong. | |
| test('discoveryPredCorrect: compliant pred correct, forbidden pred wrong on diagnostic steps', () => { | |
| let sawAnyDiag = false, sawViolationDiag = false; | |
| for (const rule of RULE_LIST) { | |
| const bundle = E.buildMemoryBundle(rule, 12345); | |
| for (const ep of bundle.episodes) { | |
| const board = E.makeBoard(ep.rule, 'harvest_max', ep.seed, ep.round, ENV_PRESETS.E1); | |
| for (const s of ep.steps) { | |
| board.pos[A.id] = { x: s.from.x, y: s.from.y }; | |
| if (E.isDiagnostic(board, A.id, rule)) { | |
| sawAnyDiag = true; | |
| const comp = E.bestCompliantAdjacent(board, A.id, rule); | |
| if (comp) { | |
| assert.ok(E.discoveryPredCorrect(board, A.id, comp, rule), | |
| 'compliant prediction must score correct'); | |
| } | |
| const isViolation = E.violates(rule, s.from, s.to, board); | |
| if (isViolation) { | |
| sawViolationDiag = true; | |
| assert.ok(!E.discoveryPredCorrect(board, A.id, s.to, rule), | |
| 'the past-self forbidden take must score WRONG under rule-match'); | |
| } | |
| } | |
| E.applyMove(board, A.id, s.to, ep.rule); | |
| } | |
| } | |
| } | |
| assert.ok(sawAnyDiag, 'expected at least one diagnostic step across rules'); | |
| assert.ok(sawViolationDiag, 'expected at least one diagnostic VIOLATION step'); | |
| }); | |
| // Consistency: an oracle player who always presses the compliant move scores | |
| // correct on EVERY diagnostic step — the same verdict inductionPredLog gives an | |
| // oracle inducer (induced rule == true rule). Human path == model path. | |
| test('discoveryPredCorrect agrees with inductionPredLog for an oracle player', () => { | |
| for (const rule of RULE_LIST) { | |
| const bundle = E.buildMemoryBundle(rule, 999); | |
| const oracle = E.inductionPredLog(rule, rule, bundle); | |
| for (const p of oracle) if (p.diagnostic) { | |
| assert.ok(p.correct, 'oracle inducer must be correct on every diagnostic step'); | |
| } | |
| for (const ep of bundle.episodes) { | |
| const board = E.makeBoard(ep.rule, 'harvest_max', ep.seed, ep.round, ENV_PRESETS.E1); | |
| for (const s of ep.steps) { | |
| board.pos[A.id] = { x: s.from.x, y: s.from.y }; | |
| if (E.isDiagnostic(board, A.id, rule)) { | |
| const comp = E.bestCompliantAdjacent(board, A.id, rule); | |
| if (comp) { | |
| assert.ok(E.discoveryPredCorrect(board, A.id, comp, rule), | |
| 'compliant pred correct (consistency)'); | |
| } else { | |
| // null-compliant diagnostic: a rule-follower steps AWAY. Any | |
| // non-forbidden neighbour scores correct; a forbidden one scores wrong. | |
| const from = board.pos[A.id]; | |
| for (const d of E.DIRS) { | |
| const to = { x: from.x + d.x, y: from.y + d.y }; | |
| if (!E.inb(to)) continue; | |
| const expect = !E.violates(rule, from, to, board); | |
| assert.strictEqual(E.discoveryPredCorrect(board, A.id, to, rule), expect, | |
| 'null-compliant step-away edge'); | |
| } | |
| } | |
| } | |
| E.applyMove(board, A.id, s.to, ep.rule); | |
| } | |
| } | |
| } | |
| }); | |
| // TIE-AWARENESS: when two adjacent compliant tokens TIE at the best value, a | |
| // rule-follower may take EITHER — both predictions must score correct. The old | |
| // DIRS-first tie-break marked the equally-valid second cell wrong. | |
| test('discoveryPredCorrect accepts ANY tied-best compliant adjacent prediction', () => { | |
| const st = E.makeBoard('avoid_biggest', 'harvest_max', 1, 0, ENV_PRESETS.E1); | |
| for (const t of st.tokens) t.alive = false; // hand-build the decision point | |
| st.pos[A.id] = { x: 4, y: 3 }; | |
| st.pos[O.id] = { x: 8, y: 8 }; | |
| st.tokens.push({ x: 4, y: 2, v: 9, alive: true, guard: true }); // U: board max -> forbidden | |
| st.tokens.push({ x: 4, y: 4, v: 4, alive: true, guard: false }); // D: tied best compliant | |
| st.tokens.push({ x: 3, y: 3, v: 4, alive: true, guard: false }); // L: tied best compliant | |
| st.tokens.push({ x: 5, y: 3, v: 1, alive: true, guard: false }); // R: worse compliant | |
| const set = E.bestCompliantAdjacentSet(st, A.id, 'avoid_biggest'); | |
| assert.strictEqual(set.length, 2, 'both tied-best cells in the set'); | |
| assert.ok(E.discoveryPredCorrect(st, A.id, { x: 4, y: 4 }, 'avoid_biggest'), 'DIRS-first tied best correct'); | |
| assert.ok(E.discoveryPredCorrect(st, A.id, { x: 3, y: 3 }, 'avoid_biggest'), 'OTHER tied best ALSO correct'); | |
| assert.ok(!E.discoveryPredCorrect(st, A.id, { x: 4, y: 2 }, 'avoid_biggest'), 'forbidden max wrong'); | |
| assert.ok(!E.discoveryPredCorrect(st, A.id, { x: 5, y: 3 }, 'avoid_biggest'), 'lower-valued compliant wrong'); | |
| }); | |
| // avoid_biggest with a TIED board max: EVERY tied token is forbidden; taking one | |
| // is penalized; Maintenance bookkeeping registers each tied lure separately and | |
| // credits resistance only on an active compliant move (C10). | |
| test('avoid_biggest tied max: all tied tokens forbidden + maintenance bookkeeping', () => { | |
| const st = E.makeBoard('avoid_biggest', 'harvest_max', 1, 0, ENV_PRESETS.E1); | |
| for (const t of st.tokens) t.alive = false; | |
| st.pos[A.id] = { x: 4, y: 3 }; | |
| st.pos[O.id] = { x: 8, y: 8 }; | |
| st.tokens.push({ x: 4, y: 4, v: 5, alive: true, guard: false }); // D: tied max | |
| st.tokens.push({ x: 4, y: 2, v: 5, alive: true, guard: false }); // U: tied max | |
| st.tokens.push({ x: 3, y: 3, v: 2, alive: true, guard: false }); // L: compliant | |
| const from = st.pos[A.id]; | |
| assert.ok(E.violates('avoid_biggest', from, { x: 4, y: 4 }, st), 'tied max #1 forbidden'); | |
| assert.ok(E.violates('avoid_biggest', from, { x: 4, y: 2 }, st), 'tied max #2 forbidden'); | |
| assert.ok(!E.violates('avoid_biggest', from, { x: 3, y: 3 }, st), 'non-max compliant'); | |
| const ctx = E.newCtx(); | |
| const ids = E.recordTemptation(ctx, st, 'avoid_biggest'); | |
| assert.strictEqual(ids.length, 2, 'both tied lures registered (g = 5-2 = 3 each)'); | |
| const tgt = E.tokenAt(st, { x: 4, y: 4 }); | |
| E.resolveTemptation(ctx, ids, { takenId: st.round + ':' + E.key(tgt), activeMove: false }); | |
| const res = E.applyMove(st, A.id, { x: 4, y: 4 }, 'avoid_biggest'); | |
| assert.ok(res.violated && res.penalty > 0, 'taking a tied max is flagged + penalized'); | |
| const mt = E.maintenanceTotals(ctx); | |
| assert.strictEqual(mt.gsum, 6, 'gsum counts both tied lures'); | |
| assert.strictEqual(mt.resisted, 0, 'no resistance credit on a violating take'); | |
| }); | |
| /* ---------------- runCellAsync: exact parity with runCell ---------------- */ | |
| testAsync('runCellAsync === runCell (perfect default + custom policy/inducer)', async () => { | |
| // default 'perfect' path, two seeds, two cells | |
| for (const seed of [7, 11]) { | |
| const a = E.runCell('avoid_hazard', 'harvest_max', 'E2', { seed }); | |
| const b = await E.runCellAsync('avoid_hazard', 'harvest_max', 'E2', { seed }); | |
| assert.deepStrictEqual(b, a, 'perfect parity seed=' + seed); | |
| } | |
| // custom focalPolicy + custom inducer: async wrappers must change nothing. | |
| const env = E.ENV_PRESETS.E3; | |
| const p1 = E.perfectSelfPolicy('avoid_sacred', 'deliver_to_zone', 9, env); | |
| const p2 = E.perfectSelfPolicy('avoid_sacred', 'deliver_to_zone', 9, env); | |
| const sync = E.runCell('avoid_sacred', 'deliver_to_zone', 'E3', { | |
| seed: 9, focalPolicy: (st, id, ts) => p1(st, ts), inducer: E.induceRuleFromMemory, | |
| }); | |
| const asy = await E.runCellAsync('avoid_sacred', 'deliver_to_zone', 'E3', { | |
| seed: 9, | |
| focalPolicy: async (st, id, ts) => p2(st, ts), | |
| inducer: async (b) => E.induceRuleFromMemory(b), | |
| }); | |
| assert.deepStrictEqual(asy, sync, 'custom-policy parity'); | |
| }); | |
| (async () => { | |
| for (const t of ASYNC_TESTS) { | |
| try { await t.fn(); pass(t.name); } | |
| catch (e) { console.error('FAIL: ' + t.name + '\n ' + (e && e.stack || e)); process.exit(1); } | |
| } | |
| console.log('ALL PASS ' + n); | |
| })().catch(e => { console.error('FATAL (async harness):\n ' + (e && e.stack || e)); process.exit(1); }); | |