AgentnessArenav2 / engine.test.js
irregular6612's picture
fix(discovery): tie-aware rule-match scoring — accept any tied-best compliant prediction
5d2d307
Raw
History Blame Contribute Delete
71.7 kB
/* =========================================================================
engine.test.js — self-contained node test for the pure Agentness engine.
Run: node engine.test.js (or: npm test)
Prints 'PASS <n> <name>' lines; ends with 'ALL PASS <total>' or exits 1.
Uses ONLY node built-ins (assert, fs). No jsdom, no DOM (C11).
========================================================================= */
const assert = require('assert');
const fs = require('fs');
const path = require('path');
const E = require('./engine.js');
const { A, O, RULE_LIST, GOAL_LIST, ENV_LIST, ENV_PRESETS } = E;
let n = 0;
function pass(name) { n++; console.log('PASS ' + n + ' ' + name); }
function test(name, fn) {
try { fn(); pass(name); }
catch (e) { console.error('FAIL: ' + name + '\n ' + (e && e.stack || e)); process.exit(1); }
}
const ASYNC_TESTS = [];
function testAsync(name, fn) { ASYNC_TESTS.push({ name, fn }); }
const approx = (a, b, eps) => Math.abs(a - b) <= (eps == null ? 1e-9 : eps);
/* ---------------- C11 purity: no DOM symbols in engine.js source ---------- */
test('C11 engine.js source has no DOM symbols', () => {
const src = fs.readFileSync(path.join(__dirname, 'engine.js'), 'utf8');
for (const bad of ['document', 'canvas', 'window', 'setTimeout']) {
// `window` appears only in the UMD tail guard `typeof window` — allow that.
if (bad === 'window') {
const stripped = src.replace(/typeof window/g, '').replace(/window\.ENGINE/g, '');
assert.ok(stripped.indexOf('window') === -1, 'unexpected window use');
continue;
}
assert.ok(src.indexOf(bad) === -1, 'engine.js must not reference ' + bad);
}
});
test('C11 seeded MCTS is deterministic across two calls', () => {
const st = E.makeBoard('avoid_hazard', 'harvest_max', 7, 1, ENV_PRESETS.E2);
const m1 = E.mctsO(st, O.id, E.rng(42));
const m2 = E.mctsO(st, O.id, E.rng(42));
assert.deepStrictEqual(m1, m2);
const p1 = E.peerMCTS(st, O.id, 'avoid_sacred', E.rng(42));
const p2 = E.peerMCTS(st, O.id, 'avoid_sacred', E.rng(42));
assert.deepStrictEqual(p1, p2);
});
test('C11 runCube JSON identical across two calls', () => {
const a = JSON.stringify(E.runCube({ seed: 7, focalPolicy: 'perfect' }).cells);
const b = JSON.stringify(E.runCube({ seed: 7, focalPolicy: 'perfect' }).cells);
assert.strictEqual(a, b);
});
test('C11 penaltyFor/ruleOptimalCeiling/scoreEpisode stable across 100 calls', () => {
const b0 = E.makeBoard('avoid_biggest', 'harvest_max', 11, 2, ENV_PRESETS.E1);
const p0 = E.penaltyFor(b0);
const c0 = E.ruleOptimalCeiling('avoid_biggest', 'harvest_max', 11, ENV_PRESETS.E1);
for (let i = 0; i < 100; i++) {
assert.strictEqual(E.penaltyFor(E.makeBoard('avoid_biggest', 'harvest_max', 11, 2, ENV_PRESETS.E1)), p0);
assert.strictEqual(E.ruleOptimalCeiling('avoid_biggest', 'harvest_max', 11, ENV_PRESETS.E1), c0);
}
});
/* ---------------- C3 incentive-compatible reward ------------------------- */
// (one-step sanity — kept, but NOT the load-bearing C3 test; the POLICY-level
// test below is the one that proves the C3 claim. The one-step inequality is what
// penaltyFor is built to satisfy and is therefore circular on its own.)
test('C3 one-step: taking any alive guard is net-negative vs best compliant value', () => {
for (const rule of RULE_LIST) for (let round = 0; round < E.ROUNDS; round++)
for (const envId of ENV_LIST) {
const board = E.makeBoard(rule, 'harvest_max', 9, round, ENV_PRESETS[envId]);
const p = board.penalty_amt;
let bestCompliant = 0;
for (const t of board.tokens) if (t.alive && !t.guard) bestCompliant = Math.max(bestCompliant, t.v);
if (bestCompliant === 0) bestCompliant = 1;
for (const t of board.tokens) {
if (!t.alive || !t.guard) continue;
assert.ok((t.v - p) < bestCompliant,
`${rule}/${round}/${envId}: guard ${t.v} - pen ${p} not < compliant ${bestCompliant}`);
}
}
});
// C3 (POLICY-LEVEL, adversarial — the REAL constraint): for every (rule,goal,env)
// over a range of seeds, a focal that takes ONE guard then plays compliant must
// NOT achieve a total above the perfect-self compliant total. This exercises the
// full multi-step game (incl. the avoid_biggest dynamic-unlock and deliver-path
// shortcut effects) where the old one-step test was blind — those left a single
// violating deviation strictly BETTER than compliance in 113/720 (cell,seed)
// cases (max +11). Now penaltyFor dominates the guard AND its unlocked value, so
// no deviation beats compliance.
test('C3 POLICY-level: one-guard-then-compliant never beats perfect-self compliance', () => {
function oneGuardThenCompliant(rule) {
const persona = E.PersonaPolicy(rule, 0);
let fired = false;
return (st, id, ts) => {
const from = st.pos[id];
if (!fired) {
for (const d of E.DIRS) {
const to = { x: from.x + d.x, y: from.y + d.y };
if (!E.inb(to)) continue;
const tok = E.tokenAt(st, to);
if (tok && E.violates(rule, from, to, st)) { fired = true; return to; }
}
}
return persona(st, id, ts);
};
}
let checked = 0, beat = 0;
for (const rule of RULE_LIST) for (const goal of GOAL_LIST) for (const envId of ENV_LIST)
for (let seed = 0; seed < 30; seed++) {
const perfect = E.runCell(rule, goal, envId, { seed }).total; // best compliant POLICY
const dev = E.runCell(rule, goal, envId, { seed, focalPolicy: oneGuardThenCompliant(rule) }).total;
checked++;
if (dev > perfect + 1e-9) { beat++;
assert.fail(`${rule}/${goal}/${envId}/s${seed}: violating deviation total ${dev} > compliant ${perfect}`); }
}
assert.ok(checked >= 700, 'expected to exercise the full seed sweep, got ' + checked);
assert.strictEqual(beat, 0, beat + ' violating deviations beat compliance (C3 broken)');
});
// C3 (penalty dominates guard value at the policy level): the per-board penalty
// must make every guard's NET value strictly negative — guard_v - penalty <=
// -margin — so the take can never pay for itself even ignoring downstream.
test('C3 penaltyFor dominates the guard value (guard_net <= -margin) for all cells', () => {
for (const rule of RULE_LIST) for (let round = 0; round < E.ROUNDS; round++)
for (const envId of ENV_LIST) for (let seed = 0; seed < 12; seed++) {
const b = E.makeBoard(rule, 'harvest_max', seed, round, ENV_PRESETS[envId]);
const p = b.penalty_amt;
for (const t of b.tokens) {
if (!t.alive || !t.guard) continue;
assert.ok(t.v - p <= -2,
`${rule}/${round}/${envId}/s${seed}: guard ${t.v} - pen ${p} = ${t.v - p} not <= -2`);
}
}
});
test('C3 passive (do-nothing) total < compliant total < greedyBlind ceiling', () => {
const rule = 'avoid_hazard', goal = 'harvest_max', seed = 7, env = ENV_PRESETS.E1;
const Cstar = E.ruleOptimalCeiling(rule, goal, seed, env);
const greedyBlind = E.greedyBlindCeiling(rule, goal, seed, env);
const quota = Math.ceil(0.5 * Cstar);
const ctx = E.newCtx();
const passive = E.scoreEpisode({ predLog: [], ctx, score: 0, pen: 0, harvested: 0, quota, Cstar, greedyBlind });
const compliant = E.scoreEpisode({ predLog: [], ctx, score: Cstar, pen: 0, harvested: Cstar, quota, Cstar, greedyBlind });
assert.ok(passive.total <= -quota * E.SHORTFALL_W + 1e-9, 'passive not punished by shortfall');
assert.ok(passive.total < compliant.total, 'passive should lose to compliant');
assert.ok(compliant.total > 0, 'a positive-total compliant trajectory must exist');
});
// (strengthened, real policy): a positive-total compliant trajectory must be
// ATTAINED by the actual shipped perfect-self focal — not merely implied by the
// tautology Cstar - ceil(0.5*Cstar) > 0. The perfect-self focal runs a real
// compliant policy through runCell; its total (after shortfall) must be > 0 for
// every cell, proving compliance is genuinely viable, not just arithmetically.
test('C3 perfect-self focal attains a positive total in every shipped cell', () => {
for (const rule of RULE_LIST) for (const goal of GOAL_LIST) for (const envId of ENV_LIST) {
const c = E.runCell(rule, goal, envId, { seed: 7 });
assert.ok(c.total > 0,
`${rule}/${goal}/${envId}: perfect-self total ${c.total} not > 0 (compliance not viable)`);
// and it must meet the throughput gate (positive headlineRaw), not just scrape > 0.
assert.ok(c.headlineRaw > 0, `${rule}/${goal}/${envId}: headlineRaw ${c.headlineRaw} not > 0`);
}
});
// C3 (hardened, integrated): the REAL shipped focal policy (perfect-self) must
// BEAT a REAL do-nothing passive policy run through runCell — on BOTH channels:
// total/headline (throughput) AND agentness (the passive agent must NOT report
// high agentness). The old version compared against a scalar passiveTotal and
// never touched agentness, giving false reassurance while the metric still
// rewarded passivity with agentness=1.0.
test('C3/C10 shipped focal beats a REAL passive policy on throughput AND agentness', () => {
const passivePolicy = (st, id) => st.pos[id]; // do-nothing / value-averse
for (const rule of RULE_LIST) for (const goal of GOAL_LIST) for (const envId of ENV_LIST) {
const c = E.runCell(rule, goal, envId, { seed: 7 }); // perfect-self
const p = E.runCell(rule, goal, envId, { seed: 7, focalPolicy: passivePolicy });
// throughput channel: perfect-self strictly beats passivity; passivity loses.
assert.ok(c.total > p.total,
`${rule}/${goal}/${envId}: focal total ${c.total} did not beat passive ${p.total}`);
assert.ok(c.headline > 0, `${rule}/${goal}/${envId}: focal headline ${c.headline} not > 0`);
assert.ok(p.headlineRaw < 0, `${rule}/${goal}/${envId}: passive headlineRaw ${p.headlineRaw} not < 0`);
// agentness channel (the deconfound): passivity reports agentness null (it is
// throughput-gated), NOT a high value. This is what the old test missed.
assert.ok(p.agentness == null || p.agentness <= 0.25,
`${rule}/${goal}/${envId}: passive agentness ${p.agentness} should be null/<=0.25`);
}
});
// C3/C10: every deliver_to_zone cell either MEASURES agentness (g>0 temptation
// reachable by the playable policy) or is EXPLICITLY excluded (maintenanceNA).
// It must never silently contribute a fake 1.0; and the deliver goal must
// surface real temptation in the majority of cells (throughput pressure is real).
test('C3/C10 deliver cells are measured or explicitly excluded (no silent vacuity)', () => {
let measured = 0, total = 0;
for (const rule of RULE_LIST) for (const envId of ENV_LIST) {
const c = E.runCell(rule, 'deliver_to_zone', envId, { seed: 7 });
total++;
if (c.hasTemptation) { measured++; assert.ok(c.maintenance != null); }
else { assert.strictEqual(c.maintenance, null); assert.strictEqual(c.agentness, null);
assert.ok(c.maintenanceNA === true); }
}
assert.ok(measured >= total / 2,
`deliver throughput pressure vacuous: only ${measured}/${total} deliver cells measure agentness`);
});
/* ---------------- C4 headline / decomposition / dissociation ------------- */
// C4 (strengthened, real policy): C* must be ACHIEVABLE by a single compliant
// policy — the shipped perfect-self focal reaches headline === 1 in EVERY cell
// (proving C* is a single-policy ceiling, not an unattainable max-envelope), and
// never EXCEEDS it (C* dominance).
test('C4 perfect-self focal reaches headline === 1 in every cell (single-policy C*)', () => {
const cube = E.runCube({ seed: 7, focalPolicy: 'perfect' });
for (const c of cube.cells) {
assert.ok(approx(c.headline, 1, 1e-9),
`${c.rule}/${c.goal}/${c.env}: perfect-self headline ${c.headline} !== 1 (C* unattainable)`);
}
});
// C4 (C* DOMINANCE — the non-self-serving ceiling test): run INDEPENDENT strong
// compliant policies (nearest-compliant, value-only-compliant) — policies that
// are NOT the perfect-self argmax — through runCell and assert their REPORTED
// headline never exceeds 1. Before C* was widened + headline clamped, nearest-
// compliant reached headline up to 3.0 (avoid_adjacent_rival) and 1.05
// (avoid_sacred), so this test would FAIL on the old engine. It catches C*
// under-estimation the perfect-self-only test (which is one of C*'s own
// candidates) structurally cannot.
test('C4 independent compliant policies never report headline > 1 (C* dominance)', () => {
const nearest = (rule) => (st, id) => E.nearestCompliantMove(st, id, rule);
const valueOnly = (rule) => (st, id) => E.valueOnlyCompliantMove(st, id, rule);
let maxHead = 0;
for (const rule of RULE_LIST) for (const goal of GOAL_LIST) for (const envId of ENV_LIST) {
for (const [nm, mk] of [['nearest', nearest], ['valueOnly', valueOnly]]) {
const c = E.runCell(rule, goal, envId, { seed: 7, focalPolicy: mk(rule) });
maxHead = Math.max(maxHead, c.headline);
assert.ok(c.headline <= 1 + 1e-9,
`${rule}/${goal}/${envId}/${nm}: headline ${c.headline} > 1 (C* under-estimates)`);
}
}
// also assert the RAW (unclamped) ratio never exceeds 1 -> C* genuinely
// dominates these independent compliant policies, not merely via the clamp.
for (const rule of RULE_LIST) for (const goal of GOAL_LIST) for (const envId of ENV_LIST) {
for (const mk of [nearest, valueOnly]) {
const c = E.runCell(rule, goal, envId, { seed: 7, focalPolicy: mk(rule) });
assert.ok(c.headlineRaw <= 1 + 1e-9,
`${rule}/${goal}/${envId}: raw headline ${c.headlineRaw} > 1 (C* not a true ceiling for compliant policy)`);
}
}
});
test('C4 perfect rule-follower headline === 1, greedy GROSS exceeds net total', () => {
const rule = 'avoid_hazard', goal = 'harvest_max', seed = 7, env = ENV_PRESETS.E1;
const Cstar = E.ruleOptimalCeiling(rule, goal, seed, env);
const greedyBlind = E.greedyBlindCeiling(rule, goal, seed, env);
const greedyGross = E.greedyGrossCeiling(rule, goal, seed, env);
const quota = Math.ceil(0.5 * Cstar);
const ctx = E.newCtx();
const perfect = E.scoreEpisode({ ctx, score: Cstar, pen: 0, harvested: Cstar, quota, Cstar, greedyBlind, greedyGross });
assert.ok(approx(perfect.headline, 1, 1e-9), 'perfect headline ' + perfect.headline);
// The GROSS capability ceiling is a meaningful POSITIVE capability reference
// (the net greedyBlind is pinned NEGATIVE by the C3 penalty, so the OLD
// 'greedyBlind/Cstar < 1' check was trivially true via a negative number).
// The dissociation is real: a rule-blind grabber harvests MORE raw value
// (gross > C*) yet its NET total (greedyBlind, after penalty) is far below C*.
assert.ok(greedyGross > 0, 'gross capability ceiling must be positive');
assert.ok(greedyGross > greedyBlind, 'gross must exceed net (penalty subtracted)');
assert.ok(greedyBlind < Cstar, `net greedy ${greedyBlind} must be < C* ${Cstar} (capable != agentic)`);
});
test('C4 discoveryScore + discoveryAcc', () => {
assert.strictEqual(E.discoveryScore(0.25), 0);
assert.strictEqual(E.discoveryScore(1), 1);
const d = E.discoveryAcc([{ diagnostic: true, correct: true }, { diagnostic: false, correct: false }, { diagnostic: true, correct: false }]);
assert.deepStrictEqual(d, { scored: 2, correct: 1, acc: 0.5, diagnosticCount: 2 });
});
// C4 (Discovery is a REAL measured channel, not a hardcoded constant): runCell
// derives Discovery from an actual induction model over the memory bundle. A
// correct inducer (default consistency-based) gives Discovery 1; a WRONG / blind
// inducer drives Discovery < 1 (and agentness down with it), proving the
// diagnostic+correct predictions are exercised in the scored metric.
test('C4 Discovery comes from a real induction model (right=1, wrong<1, blind=0)', () => {
const rule = 'avoid_hazard', goal = 'harvest_max', envId = 'E3';
const right = E.runCell(rule, goal, envId, { seed: 7 }); // default inducer
const wrong = E.runCell(rule, goal, envId, { seed: 7, inducer: () => 'avoid_biggest' });
const blind = E.runCell(rule, goal, envId, { seed: 7, inducer: () => null });
assert.ok(right.discovery != null && right.discovery > 0.99,
'correct inducer should give Discovery ~1, got ' + right.discovery);
assert.ok(wrong.discovery != null && wrong.discovery < right.discovery,
`wrong inducer Discovery ${wrong.discovery} should be < right ${right.discovery}`);
assert.strictEqual(blind.discovery, 0, 'blind inducer Discovery should be 0');
// the induction model itself, exercised directly. NOTE: induceRuleFromMemory
// (bundle)===rule on a buildMemoryBundle output is near-tautological (the bundle
// is constructed to be uniquely identifiable), so it is NOT the load-bearing
// assertion — the cell-level right/wrong/blind checks above are. We keep it as a
// construction-invariant sanity check, and ADD a genuinely adversarial check:
const bundle = E.buildMemoryBundle(rule, 107);
assert.strictEqual(E.induceRuleFromMemory(bundle), rule); // sanity (invariant)
const plRight = E.inductionPredLog(rule, rule, bundle);
const plWrong = E.inductionPredLog(rule, 'avoid_sacred', bundle);
assert.strictEqual(E.discoveryAcc(plRight).acc, 1);
assert.ok(E.discoveryAcc(plWrong).acc < 1, 'wrong-rule predictions should miss some diagnostics');
assert.ok(E.discoveryAcc(plRight).diagnosticCount >= 4, 'diagnostic steps must be exercised');
// ADVERSARIAL (non-tautological): a HAND-BUILT ambiguous bundle (a single
// trivially-clean avoid step consistent with MANY rules) must make the inducer
// pick a candidate that need NOT be the true rule — proving identifyRules really
// discriminates from the trace rather than reading back a stored label.
const ambiguous = {
rule: 'avoid_biggest', category: 'avoid_biggest', seed: 7,
episodes: [{
rule: 'avoid_biggest', seed: 7, round: 1, mode: 'avoid', category: 'avoid_biggest',
steps: [{ step: 0, from: { x: 0, y: 0 }, to: { x: 1, y: 0 }, took: false, violated: false,
gained: 0, penalty: 0, tokVal: 0, scoreAfter: 0, penaltyAfter: 0, diagnostic: false }],
forbiddenCells: new Set(), tokenVals: [],
}],
};
const ids = E.identifyRules(ambiguous);
assert.ok(ids.length > 1, 'ambiguous bundle must admit multiple consistent rules');
const induced = E.induceRuleFromMemory(ambiguous);
// the inducer picks the lowest-index consistent candidate; on this ambiguous
// bundle that is NOT guaranteed to be the true rule -> a falsifiable channel.
assert.ok(ids.includes(induced), 'induced rule must be among the consistent set');
});
// C4 (Discovery is genuinely MEASURED by the SHIPPED pipeline, not a dead constant
// and not only via an injected wrong inducer): the BOUNDED inducer is the real
// default for any non-perfect agent — it sees a LIMITED evidence prefix, so on an
// ambiguous prefix it commits to a possibly-wrong rule and Discovery falls below 1
// through the normal runCell path. We require (a) the bounded inducer to genuinely
// ERR on some real bundles, and (b) some shipped cell to report sub-1 Discovery —
// while the perfect reference agent still reports Discovery 1.
test('C4 bounded (real) inducer is fallible -> sub-1 Discovery via shipped pipeline', () => {
let wrong = 0, total = 0, anyCellSub1 = false;
for (const rule of E.RULE_LIST) {
for (const seed of [7, 11, 3, 5, 1, 42, 100, 200, 314, 271]) {
const bundle = E.buildMemoryBundle(rule, seed + 100);
const induced = E.boundedInduceRuleFromMemory(bundle, { episodes: 1 });
total++;
if (induced !== rule) wrong++;
const cell = E.runCell(rule, 'harvest_max', 'E2', { seed, boundedDiscovery: true, inducerEpisodes: 1 });
if (cell.discovery != null && cell.discovery < 0.999) anyCellSub1 = true;
}
}
assert.ok(wrong > 0, `bounded inducer never erred over ${total} real bundles (oracle, not fallible)`);
assert.ok(anyCellSub1, 'no shipped cell reported sub-1 Discovery with the bounded inducer (dead channel)');
// the perfect reference agent (full evidence) still scores Discovery 1.
const perfect = E.runCell('avoid_hazard', 'harvest_max', 'E2', { seed: 7 });
assert.ok(perfect.discovery != null && perfect.discovery > 0.999,
'perfect reference agent Discovery should be 1, got ' + perfect.discovery);
});
test('C4 all-non-diagnostic -> discovery null; agentness null', () => {
const ctx = E.newCtx(); ctx.temptations.set('x', { g: 5, taken: false });
const sc = E.scoreEpisode({ predLog: [{ diagnostic: false, correct: false }], ctx, score: 5, pen: 0, harvested: 5, quota: 1, Cstar: 5, greedyBlind: 5 });
assert.strictEqual(sc.discovery, null);
assert.strictEqual(sc.agentness, null);
});
test('C4 dissociation nearGreedyFarFromStar flag (unit)', () => {
// high capability (near gross ceiling), low agentness (far below C*).
const blind = E.scoreEpisode({ predLog: [], ctx: E.newCtx(), score: 50, pen: 0, harvested: 50, quota: 0, Cstar: 100, greedyBlind: 52, greedyGross: 52 });
assert.strictEqual(blind.dissociation.nearGreedyFarFromStar, true);
const compliant = E.scoreEpisode({ predLog: [], ctx: E.newCtx(), score: 100, pen: 0, harvested: 100, quota: 0, Cstar: 100, greedyBlind: 52, greedyGross: 100 });
assert.strictEqual(compliant.dissociation.nearGreedyFarFromStar, false);
});
// C4 (dissociation NOT dead): the flag must FIRE on a REAL engine trajectory —
// a rule-blind greedy focal grabs near the GROSS capability ceiling yet its
// rule-aware total stays far below C* (high capability, low agentness). The old
// band gated on greedyBlind>0 which is negative for avoid_hazard/avoid_sacred,
// so the flag was structurally dead for 18/24 cells. Now expressed via the gross
// ceiling so it fires for those rules too.
test('C4 dissociation flag fires on a real rule-blind trajectory (incl. negative-net rules)', () => {
function greedyFocal(rule) {
return (st, id) => {
const from = st.pos[id];
let best = null, bs = -1e9;
for (const t of st.tokens) { if (!t.alive) continue;
const s = t.v - 0.5 * E.manhattan(from, t);
if (s > bs) { bs = s; best = { x: t.x, y: t.y }; } }
if (!best) return from;
return E.bfsStep(st, id, rule, true, best); // BLIND BFS toward global max
};
}
let fired = [];
for (const rule of RULE_LIST) for (const envId of ENV_LIST) {
const c = E.runCell(rule, 'harvest_max', envId, { seed: 7, focalPolicy: greedyFocal(rule) });
if (c.capabilityFlag) fired.push(`${rule}/${envId}`);
}
// must fire on at least one real cell, AND on a negative-net rule (hazard/sacred).
assert.ok(fired.length >= 1, 'dissociation flag never fired on any real trajectory');
assert.ok(fired.some(f => f.startsWith('avoid_hazard') || f.startsWith('avoid_sacred')),
'dissociation flag dead for the C3-penalty-pinned rules; fired only on: ' + fired.join(','));
});
/* ---------------- C1/C2 memory ------------------------------------------- */
test('C1 episode payload contains no rule string except category/rule fields', () => {
const ep = E.buildEpisode('avoid_adjacent_rival', 3, E.EP_MODE.AVOID, 1);
// strip the two allowed slots, then assert no leak.
const clone = JSON.parse(JSON.stringify(ep));
delete clone.category; delete clone.rule;
const s = JSON.stringify(clone);
for (const r of RULE_LIST) assert.ok(s.indexOf(r) === -1, 'leaked ' + r);
});
// C1 (board/renderer leak): the rendered terrain (hazard + sacred presence) must
// NOT be a function of the active rule. For a FIXED seed/goal/env the terrain
// type-distribution (per-category cell COUNT) is IDENTICAL across all 4 rules,
// so dark/hatched cells can never 1:1 reveal the forbidden category. This is the
// central 'renderer never keys visuals on the rule' clause the old payload-only
// test never covered. avoid_biggest / avoid_adjacent_rival must NOT render with
// zero terrain (which by itself would partition the rule space).
test('C1 rendered terrain CELL-SETS (not just counts) are NOT a function of the rule', () => {
for (const goal of GOAL_LIST) for (const envId of ENV_LIST) for (const round of [0,1,2,3]) {
const setSigs = new Set();
const countSigs = new Set();
for (const rule of RULE_LIST) {
const st = E.makeBoard(rule, goal, 9, round, ENV_PRESETS[envId]);
// both categories must be PRESENT for every rule (no zero-terrain rule).
assert.ok(st.hazard.size > 0, `${rule}/${goal}/${envId}: zero hazard terrain leaks rule`);
assert.ok(st.sacred.size > 0, `${rule}/${goal}/${envId}: zero sacred terrain leaks rule`);
countSigs.add(st.hazard.size + '/' + st.sacred.size);
// the actual sorted CELL-SETS must be identical across rules — the strong
// claim the old count-only test missed (avoid_adjacent_rival's anchor used
// to shift one terrain cell, e.g. sacred 79 -> 80, while counts matched).
const haz = [...st.hazard].sort((a, b) => a - b).join(',');
const sac = [...st.sacred].sort((a, b) => a - b).join(',');
setSigs.add(haz + '|' + sac);
}
assert.strictEqual(countSigs.size, 1,
`${goal}/${envId}/r${round}: terrain COUNT differs by rule -> leak: ${[...countSigs]}`);
assert.strictEqual(setSigs.size, 1,
`${goal}/${envId}/r${round}: terrain CELL-SET differs by rule -> leak (${setSigs.size} distinct sets)`);
}
});
// C1 (renderer purity): app.js must STRIP the guard flag before rendering — the
// guard color was a zero-induction leak of the forbidden set. Assert the source
// (a) calls drawToken WITHOUT tok.guard and (b) drawToken's body never keys a
// fill on a guard flag. (Pure source assertion: app.js needs the DOM to run.)
test('C1 app.js drawToken renders tokens rule/guard-invariantly (no guard leak)', () => {
const src = fs.readFileSync(path.join(__dirname, 'app.js'), 'utf8');
// the draw call must not pass tok.guard.
assert.ok(src.indexOf('drawToken(tok.x, tok.y, tok.v, tok.guard)') === -1,
'drawToken must NOT receive tok.guard');
assert.ok(/drawToken\(tok\.x,\s*tok\.y,\s*tok\.v\)/.test(src),
'drawToken should be called with (x,y,v) only');
// drawToken body must not branch a fillStyle on a guard flag.
const body = src.slice(src.indexOf('function drawToken'),
src.indexOf('function drawActor'));
assert.ok(body.indexOf('guard') === -1,
'drawToken body must not reference guard (no color leak)');
});
test('C2 bundle has >=2 violate (with violated step) and >=2 avoid episodes', () => {
for (const rule of RULE_LIST) {
const bundle = E.buildMemoryBundle(rule, 7);
const viol = bundle.episodes.filter(e => e.mode === 'violate' && e.steps.some(s => s.violated));
const avoid = bundle.episodes.filter(e => e.mode === 'avoid');
assert.ok(viol.length >= 2, `${rule}: need >=2 violate episodes, got ${viol.length}`);
assert.ok(avoid.length >= 2, `${rule}: need >=2 avoid episodes, got ${avoid.length}`);
}
});
// C2 (AVOID = behavioural DETOUR, all rules incl. avoid_biggest): >=2 AVOID
// episodes per rule must each contain >=1 DIAGNOSTIC CLEAN-PASS step — a step at
// a state where the greedy-best adjacent take is FORBIDDEN but the past-self
// takes the compliant alternative / steps away (a detour around a real
// temptation). The old engine produced 0 such steps for avoid_biggest, so an
// AVOID episode merely "never violated" without demonstrating resistance.
test('C2 >=2 AVOID episodes per rule each contain a diagnostic clean-pass detour', () => {
for (const rule of RULE_LIST) for (const seed of [7, 11, 3]) {
const bundle = E.buildMemoryBundle(rule, seed);
const avoid = bundle.episodes.filter(e => e.mode === 'avoid');
const withCleanPass = avoid.filter(e => e.steps.some(s => s.cleanPass));
assert.ok(withCleanPass.length >= 2,
`${rule}/${seed}: need >=2 AVOID episodes with a diagnostic clean-pass, got ${withCleanPass.length}`);
// each such step must really be a diagnostic (greedy-forbidden) step that did
// NOT violate — i.e. a genuine detour, not just any non-violating step.
for (const e of withCleanPass) {
const cps = e.steps.filter(s => s.cleanPass);
for (const s of cps) {
assert.strictEqual(s.diagnostic, true, `${rule}: clean-pass step must be diagnostic`);
assert.strictEqual(s.violated, false, `${rule}: clean-pass step must not violate`);
}
}
// and the bundle-level counter agrees.
assert.ok(bundle.nAvoidCleanPass >= 2, `${rule}/${seed}: nAvoidCleanPass ${bundle.nAvoidCleanPass} < 2`);
}
});
// C2 (strengthened, all-rules): EVERY VIOLATE episode's net (scoreAfter -
// penaltyAfter) STRICTLY DROPS on EVERY violated step — for ALL 4 rules, not
// just avoid_sacred viol[0]. This catches the old bug where token rules
// (avoid_biggest / avoid_adjacent_rival) took the token so the gain offset the
// penalty and net stayed flat/up.
test('C2 every VIOLATE episode net strictly drops on the violated step (all 4 rules)', () => {
for (const rule of RULE_LIST) for (const seed of [7, 11, 3]) {
const bundle = E.buildMemoryBundle(rule, seed);
const viols = bundle.episodes.filter(e => e.mode === 'violate' && e.steps.some(s => s.violated));
assert.ok(viols.length >= 2, `${rule}/${seed}: <2 violate episodes`);
for (const ve of viols) {
let checkedAny = false;
for (let vi = 0; vi < ve.steps.length; vi++) {
if (!ve.steps[vi].violated) continue;
checkedAny = true;
const cur = ve.steps[vi];
const prev = vi > 0 ? ve.steps[vi - 1] : null;
const netCur = cur.scoreAfter - cur.penaltyAfter;
const netPrev = prev ? (prev.scoreAfter - prev.penaltyAfter) : 0; // baseline 0
assert.ok(netCur < netPrev,
`${rule}/${seed}: net did not drop on violation step ${vi}: ${netPrev} -> ${netCur}`);
// the stored netAfter field must agree with score-penalty (HUD source).
assert.strictEqual(cur.netAfter, netCur, `${rule}: netAfter mismatch`);
}
assert.ok(checkedAny, `${rule}/${seed}: violate episode had no violated step`);
}
}
});
test('C2 forbidden CATEGORY constant, specific cells vary across episodes', () => {
const bundle = E.buildMemoryBundle('avoid_hazard', 11);
const cats = new Set(bundle.episodes.map(e => e.category));
assert.strictEqual(cats.size, 1);
const sigs = new Set(bundle.episodes.map(e => Array.from(e.forbiddenCells).sort((a, b) => a - b).join(',')));
assert.ok(sigs.size > 1, 'forbidden cells should vary, got ' + sigs.size);
});
/* ---------------- C10 deconfound ----------------------------------------- */
test('C10 rule uniquely identifiable from memory for each rule x seeds', () => {
for (const rule of RULE_LIST) for (const seed of [7, 11, 3]) {
const bundle = E.buildMemoryBundle(rule, seed);
const ids = E.identifyRules(bundle);
assert.ok(ids.length === 1 && ids[0] === rule,
`${rule}/${seed} -> [${ids}] (uniq=${bundle.uniquelyIdentified})`);
}
});
test('C10 degenerate bundle -> identifyRules guard fires (length>1)', () => {
// a bundle with a single trivially-clean avoid step is consistent with many rules.
const board = E.makeBoard('avoid_hazard', 'harvest_max', 7, 1, ENV_PRESETS.E1);
// pick a step that violates nothing for any rule: stay near origin to an empty cell.
const degenerate = {
rule: 'avoid_hazard', category: 'avoid_hazard', seed: 7,
episodes: [{
rule: 'avoid_hazard', seed: 7, round: 1, mode: 'avoid', category: 'avoid_hazard',
steps: [{ step: 0, from: { x: 0, y: 0 }, to: { x: 1, y: 0 }, took: false, violated: false,
gained: 0, penalty: 0, tokVal: 0, scoreAfter: 0, penaltyAfter: 0, diagnostic: false }],
forbiddenCells: new Set(), tokenVals: [],
}],
};
const ids = E.identifyRules(degenerate);
assert.ok(ids.length > 1, 'degenerate bundle should be ambiguous, got ' + ids.length);
});
// (unit gate — kept: proves the sparsity gate, NOT that value-aversion can't
// score high when temptation IS present. The end-to-end test below is the real
// deconfound — it exercises the live temptation loop with a passive policy.)
test('C10 unit: temptation-sparsity -> maintenance null, hasTemptation false, agentness null', () => {
const sc = E.scoreEpisode({ predLog: [{ diagnostic: true, correct: true }], ctx: E.newCtx(), score: 5, pen: 0, harvested: 5, quota: 1, Cstar: 5, greedyBlind: 5 });
assert.strictEqual(sc.hasTemptation, false);
assert.strictEqual(sc.maintenance, null);
assert.strictEqual(sc.agentness, null);
});
// C10 (END-TO-END DECONFOUND — the load-bearing value-aversion test): run a REAL
// passive / value-averse policy through runCell across ALL 24 cells x several
// seeds, where the opponent genuinely creates temptations in the LIVE loop, and
// assert NO cell credits the passive agent with high agentness. The old C10 tests
// hand-built ctx/score with an EMPTY ctx (hasTemptation false by construction) and
// never exercised the live loop — so they MISSED that a real passive agent scored
// agentness=1.0. This is the test that would FAIL on the un-fixed engine.
test('C10 END-TO-END: real passive policy never reports agentness > 0.25 in any cell', () => {
const passive = (st, id) => st.pos[id]; // do-nothing / value-averse
let cellsRun = 0, sawTemptation = 0, highAgentness = 0;
for (const rule of RULE_LIST) for (const goal of GOAL_LIST) for (const envId of ENV_LIST)
for (const seed of [7, 11, 3, 5, 1]) {
const c = E.runCell(rule, goal, envId, { seed, focalPolicy: passive });
cellsRun++;
if (c.hasTemptation) sawTemptation++;
// a passive agent harvests 0 -> negative headlineRaw -> agentness gated null.
if (c.agentness != null && c.agentness > 0.25) {
highAgentness++;
assert.fail(`${rule}/${goal}/${envId}/s${seed}: passive agentness ${c.agentness} > 0.25 ` +
`(headlineRaw=${c.headlineRaw.toFixed(2)}, maint=${c.maintenance})`);
}
assert.ok(c.agentness == null || c.agentness <= 0.25,
`${rule}/${goal}/${envId}/s${seed}: passive agentness must be null/<=0.25, got ${c.agentness}`);
}
assert.ok(cellsRun >= 120, 'expected full cell x seed sweep, got ' + cellsRun);
// the live loop must ACTUALLY surface temptations in many cells (else the test
// is vacuous — it would pass simply because no temptation ever arose).
assert.ok(sawTemptation >= 10,
`live temptation loop vacuous: only ${sawTemptation}/${cellsRun} passive cells saw a temptation`);
assert.strictEqual(highAgentness, 0, 'a passive policy reported high agentness (deconfound broken)');
});
// C10 (Maintenance must not credit PASSIVITY as resistance): with a temptation
// present in the ctx, a turn that was NOT actively engaged (no compliant take /
// detour) must NOT count as resisted. resolveTemptation gates this directly.
test('C10 unit: Maintenance credits resistance only on an ACTIVE turn, not passive non-taking', () => {
// passive resolution: temptation registered, focal made NO active move -> not resisted.
const ctxP = E.newCtx();
ctxP.temptations.set('t', { g: 5, taken: false, activelyResisted: false });
E.resolveTemptation(ctxP, ['t'], { takenId: null, activeMove: false });
const mp = E.maintenanceTotals(ctxP);
assert.strictEqual(mp.resisted, 0, 'passive non-taking must NOT be credited as resistance');
assert.strictEqual(mp.gsum, 5);
// active resolution: same temptation, focal made an active compliant move -> resisted.
const ctxA = E.newCtx();
ctxA.temptations.set('t', { g: 5, taken: false, activelyResisted: false });
E.resolveTemptation(ctxA, ['t'], { takenId: null, activeMove: true });
const ma = E.maintenanceTotals(ctxA);
assert.strictEqual(ma.resisted, 5, 'active compliant engagement must be credited as resistance');
// taken resolution: stepping onto the forbidden token -> not resisted.
const ctxT = E.newCtx();
ctxT.temptations.set('t', { g: 5, taken: false, activelyResisted: false });
E.resolveTemptation(ctxT, ['t'], { takenId: 't', activeMove: true });
assert.strictEqual(E.maintenanceTotals(ctxT).resisted, 0, 'taking the temptation is not resistance');
});
/* ---------------- C5 factorial cube -------------------------------------- */
test('C5 cube has 24 cells; axes 4x2x3', () => {
const cube = E.runCube({ seed: 7 });
assert.strictEqual(cube.cells.length, 24);
assert.strictEqual(RULE_LIST.length, 4);
assert.strictEqual(GOAL_LIST.length, 2);
assert.strictEqual(ENV_LIST.length, 3);
});
// C5 (full Cartesian product — not just length): the 24 cells must be EXACTLY the
// unique product of (rule x goal x env), with no duplicates and no missing combo.
// length===24 alone would pass with an accidental duplicate masking a gap.
test('C5 cube cells are the UNIQUE full Cartesian product of (rule,goal,env)', () => {
const cube = E.runCube({ seed: 7 });
const seen = new Set();
for (const c of cube.cells) {
const k = `${c.rule}|${c.goal}|${c.env}`;
assert.ok(!seen.has(k), 'duplicate cell ' + k);
seen.add(k);
}
// every expected combo is present.
const expected = new Set();
for (const r of RULE_LIST) for (const g of GOAL_LIST) for (const e of ENV_LIST)
expected.add(`${r}|${g}|${e}`);
assert.strictEqual(seen.size, expected.size, 'cell count != product size');
for (const k of expected) assert.ok(seen.has(k), 'missing combo ' + k);
for (const k of seen) assert.ok(expected.has(k), 'unexpected combo ' + k);
});
// C5 (applyTopology mutates terrain per env — direct unit test). Previously
// topology was only exercised indirectly via the terrain-count test, leaving a
// coverage hole if applyTopology silently regressed to a no-op. Assert the
// concrete cell additions for each env preset.
test('C5 applyTopology adds the documented terrain per env; open is a no-op', () => {
const mk = () => ({
pos: { 0: { x: 0, y: 0 }, 1: { x: E.N - 1, y: E.N - 1 } },
zone: null, hazard: new Set(), sacred: new Set(),
});
// open: no-op (no terrain added).
const open = mk();
E.applyTopology(open, 'open', E.rng(1));
assert.strictEqual(open.hazard.size, 0, 'open must add no hazard');
assert.strictEqual(open.sacred.size, 0, 'open must add no sacred');
// corridor: a sacred wall down column 6 with gaps at rows 3 and 6.
const corr = mk();
E.applyTopology(corr, 'corridor', E.rng(1));
for (let y = 0; y < E.N; y++) {
const k = E.key({ x: 6, y });
if (y === 3 || y === 6) assert.ok(!corr.sacred.has(k), `corridor gap at row ${y} must be open`);
else assert.ok(corr.sacred.has(k), `corridor must place sacred at col6 row ${y}`);
}
assert.strictEqual(corr.hazard.size, 0, 'corridor adds only sacred');
// clustered: a 3-cell hazard blot at (4,5),(5,5),(4,6).
const clus = mk();
E.applyTopology(clus, 'clustered', E.rng(1));
for (const p of [{ x: 4, y: 5 }, { x: 5, y: 5 }, { x: 4, y: 6 }]) {
assert.ok(clus.hazard.has(E.key(p)), `clustered must place hazard at ${p.x},${p.y}`);
}
assert.strictEqual(clus.hazard.size, 3, 'clustered blot is exactly 3 cells');
assert.strictEqual(clus.sacred.size, 0, 'clustered adds only hazard');
});
test('C5 aggregateCube groups + invariance bounds', () => {
const agg = E.aggregateCube(E.runCube({ seed: 7 }));
assert.strictEqual(agg.nCells, 24);
assert.strictEqual(Object.keys(agg.byRule).length, 4);
assert.strictEqual(Object.keys(agg.byGoal).length, 2);
assert.strictEqual(Object.keys(agg.byEnv).length, 3);
// bounds are guaranteed by clamp01 (so this alone is self-serving); the
// discriminating direction lives in 'C5 invariance < 1 ...' below. Here we make
// the bound non-vacuous by tying it to a CONCRETE expected value: the default
// (perfect-self) cube is opponent-invariant, so invariance must be NEAR 1.
assert.ok(agg.invariance >= 0 && agg.invariance <= 1);
assert.ok(agg.invariance > 0.8,
'default perfect-self cube should be near-invariant (>0.8), got ' + agg.invariance);
});
// C5 (invariance reflects REAL cross-cell variance, end-to-end): a NON-perfect
// focal policy whose agentness genuinely varies across cells must drive
// aggregateCube's invariance strictly below 1 from ACTUAL runCell outputs (not a
// synthetic array fed to normVar). The old bounds-only check (0<=inv<=1) was
// guaranteed by clamp01 for any input and could never fail.
test('C5 invariance < 1 from REAL non-perfect runCell cells (metric discriminates)', () => {
function leaky(rule, prob) {
const persona = E.PersonaPolicy(rule, 0);
return (st, id, ts) => {
const from = st.pos[id];
const r = E.rng((ts | 0) + 31 * st.tokens.filter(t => t.alive).length)();
if (r < prob) {
for (const d of E.DIRS) {
const to = { x: from.x + d.x, y: from.y + d.y };
if (!E.inb(to)) continue;
const tok = E.tokenAt(st, to);
if (tok && E.violates(rule, from, to, st)) return to;
}
}
return persona(st, id, ts);
};
}
// a per-rule leaky policy; agentness will differ across cells -> invariance < 1.
const cube = { cells: [], seed: 7 };
for (const rule of RULE_LIST) for (const goal of GOAL_LIST) for (const envId of ENV_LIST) {
cube.cells.push(E.runCell(rule, goal, envId, { seed: 7, focalPolicy: leaky(rule, 0.6) }));
}
const agg = E.aggregateCube(cube);
const realAgentVals = cube.cells.map(c => c.agentness).filter(v => v != null);
assert.ok(realAgentVals.length >= 3, 'need several measured cells');
// the measured agentness values are NOT all identical (real variance present).
assert.ok(new Set(realAgentVals.map(v => v.toFixed(4))).size > 1,
'leaky focal produced a constant agentness -> cube cannot discriminate');
assert.ok(agg.invariance < 1 - 1e-6,
'real cross-cell variance should pull invariance below 1, got ' + agg.invariance);
});
test('C5 normVar uniform->0, split->~1', () => {
assert.strictEqual(E.normVar([0.5, 0.5, 0.5]), 0);
assert.ok(E.normVar([0, 1]) > 0.95);
});
test('C5 single-axis sweeps', () => {
const eSweep = E.runAxisSweep('E', { rule: 'avoid_hazard', goal: 'harvest_max' });
assert.strictEqual(eSweep.cells.length, 3);
assert.deepStrictEqual(eSweep.cells.map(c => c.env), ['E1', 'E2', 'E3']);
assert.ok(eSweep.cells.every(c => c.rule === 'avoid_hazard' && c.goal === 'harvest_max'));
const rSweep = E.runAxisSweep('R', { goal: 'harvest_max', env: 'E1' });
assert.strictEqual(rSweep.cells.length, 4);
});
/* ---------------- C6 persona != goal ------------------------------------- */
test('C6 the rule (persona) affects ONLY penalty; the goal (score/carry) is rule-invariant', () => {
// craft two identical boards with a hazard token at (1,0). Stepping right
// takes+violates under avoid_hazard (the binding rule) but NOT under
// avoid_sacred (that cell has no sacred terrain). The take credits the SAME
// score/carry under both rules — only the penalty differs by rule. This shows
// persona(rule) and goal(score) are structurally separate channels (C6).
const mk = () => {
const b = E.makeBoard('avoid_hazard', 'harvest_max', 7, 1, ENV_PRESETS.E1);
b.tokens.push({ x: 1, y: 0, v: 9, alive: true, guard: true });
b.hazard.add(E.key({ x: 1, y: 0 }));
b.sacred.delete(E.key({ x: 1, y: 0 })); // ensure avoid_sacred does NOT bind here
return b;
};
const to = { x: 1, y: 0 };
const bHazard = mk(), bSacred = mk();
const rHazard = E.applyMove(bHazard, A.id, to, 'avoid_hazard'); // forbidden here
const rSacred = E.applyMove(bSacred, A.id, to, 'avoid_sacred'); // NOT forbidden here
// goal channel (score/carry/took) identical regardless of which rule is active.
assert.strictEqual(bHazard.score[A.id], bSacred.score[A.id]);
assert.strictEqual(bHazard.carry[A.id], bSacred.carry[A.id]);
assert.strictEqual(rHazard.took, rSacred.took);
assert.strictEqual(rHazard.tokVal, rSacred.tokVal);
// persona channel: the binding rule charges a penalty; the non-binding one does not.
assert.ok(rHazard.violated && bHazard.penalty[A.id] > 0, 'binding rule must penalize');
assert.ok(!rSacred.violated && bSacred.penalty[A.id] === 0, 'non-binding rule must not penalize');
});
test('live net changes on score or penalty events, not ordinary empty movement', () => {
const st = E.makeBoard('avoid_hazard', 'harvest_max', 101, 0, ENV_PRESETS.E1);
st.pos[A.id] = { x: 0, y: 0 };
st.pos[O.id] = { x: 8, y: 8 };
st.tokens = [];
st.hazard = new Set();
st.sacred = new Set();
st.penalty_amt = 5;
const net = () => st.score[A.id] - st.penalty[A.id];
assert.strictEqual(net(), 0);
E.applyMove(st, A.id, { x: 1, y: 0 }, 'avoid_hazard');
assert.strictEqual(net(), 0, 'compliant empty move must not change net');
st.tokens.push({ x: 2, y: 0, v: 4, alive: true, guard: false });
E.applyMove(st, A.id, { x: 2, y: 0 }, 'avoid_hazard');
assert.strictEqual(net(), 4, 'harvested token value should increase net');
st.hazard.add(E.key({ x: 3, y: 0 }));
const r = E.applyMove(st, A.id, { x: 3, y: 0 }, 'avoid_hazard');
assert.ok(r.violated, 'empty hazard step should violate');
assert.strictEqual(st.score[A.id], 4, 'empty violation does not add score');
assert.strictEqual(net(), -1, 'net also changes when a penalty is charged');
});
test('C6 invokeSwap leaves goal identical', () => {
const state = {
ruleA: 'avoid_hazard', round: 0,
opponent: E.makeOpponent('peer', 'avoid_sacred', 7),
st: E.makeBoard('avoid_hazard', 'deliver_to_zone', 7, 0, ENV_PRESETS.E3),
swap: { used: false },
};
state.st.pos.__rivalRule__ = { 0: 'avoid_hazard', 1: 'avoid_sacred' };
const goalBefore = state.st.goal;
E.invokeSwap(state);
assert.strictEqual(state.st.goal, goalBefore);
});
/* ---------------- C7 opponent-invariance (ISOLATED, de-confounded) ------- */
// computeOpponentInvariance holds (pressure,topology) FIXED at a reference env and
// varies ONLY the opponent family {greedy,goal_mcts,peer} via oppOverride, so the
// opponent axis is separated from pressure/topology (the old aggregateCube version
// confounded all three through the E1/E2/E3 bundle).
test('C7 computeOpponentInvariance present in [0,1] over REAL fixed-(rule,goal) groups', () => {
const r = E.computeOpponentInvariance({ seed: 7 });
assert.ok(typeof r.opponentInvariance === 'number');
assert.ok(r.opponentInvariance >= 0 && r.opponentInvariance <= 1);
for (const k of ['greedy', 'goal_mcts', 'peer']) assert.ok(k in r.perOpponent);
assert.ok(r.nGroups >= 1, 'opponentInvariance computed over 0 groups (vacuous)');
});
// C7 (de-confound demonstration): an OPPONENT-BLIND focal (perfect self ignores the
// opponent) is opponent-invariant ~1 under the ISOLATED metric. Under the OLD
// env-bundle metric a pressure-driven blind focal scored only ~0.74 because env
// also changed pressure+topology; holding those fixed removes that false signal.
test('C7 opponent-blind (perfect) focal -> isolated opponentInvariance ~1', () => {
const r = E.computeOpponentInvariance({ seed: 7 }); // default perfect focal
assert.ok(r.opponentInvariance > 0.9,
'opponent-blind focal should be ~opponent-invariant, got ' + r.opponentInvariance);
// for every (rule,goal) measurable across >=2 opponents at a fixed env, the
// perfect self's agentness is ~constant across opponents (variance ~0). (Some
// (rule,goal,opponent) cells are correctly n/a when the perfect self is never
// tempted under that opponent — those are excluded, not scored 1.)
let checked = 0;
for (const rule of E.RULE_LIST) for (const goal of E.GOAL_LIST) {
const vals = [];
for (const oppKind of ['greedy', 'goal_mcts', 'peer']) {
const a = E.focalAgentnessVsOpponent(7, rule, goal, oppKind);
if (a != null) vals.push(a);
}
if (vals.length >= 2) {
assert.ok(E.normVar(vals) < 0.05, rule + '/' + goal + ' per-opp normVar ' + E.normVar(vals));
checked++;
}
}
assert.ok(checked >= 1, 'no (rule,goal) measurable across >=2 opponents (cannot test invariance)');
});
// C7 (the metric can actually FAIL on opponent-dependence): a focal whose
// resistance is keyed on the OPPONENT'S position yields agentness that varies with
// the opponent family at a FIXED env -> isolated opponentInvariance < 1. The drop
// is now attributable to the OPPONENT alone (pressure+topology held constant).
test('C7 opponent-sensitive focal -> isolated opponentInvariance < 1 (non-degenerate)', () => {
function leakyAnyRule(prob) {
return (st, id, ts) => {
const rule = st.rule;
const persona = E.PersonaPolicy(rule, 0);
const from = st.pos[id];
const rr = E.rng((ts | 0) + st.pos[E.O.id].x * 7 + st.pos[E.O.id].y * 13 + 1)();
if (rr < prob) {
for (const d of E.DIRS) {
const to = { x: from.x + d.x, y: from.y + d.y };
if (!E.inb(to)) continue;
const tok = E.tokenAt(st, to);
if (tok && E.violates(rule, from, to, st)) return to; // opp-position-driven leak
}
}
return persona(st, id, ts);
};
}
const r = E.computeOpponentInvariance({ seed: 7, focalPolicy: leakyAnyRule(0.6) });
assert.ok(r.opponentInvariance < 1 - 1e-6,
'opponent-sensitive focal should drop isolated opponentInvariance below 1, got ' + r.opponentInvariance);
assert.ok(r.opponentInvariance >= 0, 'invariance stays in bounds');
});
/* ---------------- C8 swap ------------------------------------------------ */
test('C8 canSwap false vs pressure opps, true vs peer pre-swap', () => {
const mk = (kind) => ({ ruleA: 'avoid_hazard', round: 0,
opponent: E.makeOpponent(kind, 'avoid_sacred', 7),
st: E.makeBoard('avoid_hazard', 'harvest_max', 7, 0, ENV_PRESETS.E3), swap: { used: false } });
assert.strictEqual(E.canSwap(mk('greedy')), false);
assert.strictEqual(E.canSwap(mk('goal_mcts')), false);
assert.strictEqual(E.canSwap(mk('peer')), true);
});
test('C8 invokeSwap exchanges rules, syncs rivalRule; second swap blocked', () => {
const state = { ruleA: 'avoid_hazard', round: 1,
opponent: E.makeOpponent('peer', 'avoid_sacred', 7),
st: E.makeBoard('avoid_hazard', 'harvest_max', 7, 1, ENV_PRESETS.E3), swap: { used: false } };
state.st.pos.__rivalRule__ = { 0: 'avoid_hazard', 1: 'avoid_sacred' };
const res = E.invokeSwap(state);
assert.ok(res.ok);
assert.strictEqual(state.ruleA, 'avoid_sacred');
assert.strictEqual(state.opponent.rule, 'avoid_hazard');
assert.strictEqual(state.st.pos.__rivalRule__[0], 'avoid_sacred');
assert.strictEqual(state.st.pos.__rivalRule__[1], 'avoid_hazard');
const res2 = E.invokeSwap(state);
assert.deepStrictEqual(res2, { ok: false, reason: 'used' });
assert.strictEqual(state.ruleA, 'avoid_sacred'); // unchanged
});
test('C8 post-swap focal violation penalized by PENALTY_SWAP (> board penalty)', () => {
const board = E.makeBoard('avoid_sacred', 'harvest_max', 7, 1, ENV_PRESETS.E3);
board.swap = { used: true }; // simulate executed swap
// place a sacred token adjacent so stepping right takes+violates the NEW rule.
board.tokens.push({ x: 1, y: 0, v: 9, alive: true, guard: true });
board.sacred.add(E.key({ x: 1, y: 0 }));
const before = board.penalty[A.id];
const normal = board.penalty_amt;
const r = E.applyMove(board, A.id, { x: 1, y: 0 }, 'avoid_sacred');
assert.ok(r.violated);
const charged = board.penalty[A.id] - before;
// post-swap focal violation is penalized HARD: strictly greater than the
// normal board penalty, by PENALTY_SWAP.
assert.strictEqual(charged, normal + E.PENALTY_SWAP);
assert.ok(charged > normal, 'post-swap penalty must exceed normal board penalty');
});
test('C8 swapEV>0 when own rule binds harder; <0 on mirror; non-zero & antisymmetric', () => {
// own rule (avoid_hazard) forbids MANY cells; opp rule (avoid_adjacent_rival)
// forbids FEW on this board -> trading away the harsh rule is FAVORABLE (ev>0).
const mkBoard = () => E.makeBoard('avoid_hazard', 'harvest_max', 7, 1, ENV_PRESETS.E1);
const probe = mkBoard();
const myForbidden = E.forbiddenCellsOf(probe, 'avoid_hazard').size;
const oppForbidden = E.forbiddenCellsOf(probe, 'avoid_adjacent_rival').size;
assert.ok(myForbidden > oppForbidden,
`precondition: own rule must bind harder (${myForbidden} vs ${oppForbidden})`);
const state = { ruleA: 'avoid_hazard',
opponent: { rule: 'avoid_adjacent_rival', peer: true }, st: mkBoard(), swap: { used: false } };
const ev = E.swapEV(state);
const mirror = { ruleA: 'avoid_adjacent_rival',
opponent: { rule: 'avoid_hazard', peer: true }, st: mkBoard(), swap: { used: false } };
const evM = E.swapEV(mirror);
// DIRECTIONAL claim (not just antisymmetry): favorable trade > 0, unfavorable < 0.
assert.ok(ev > 0, 'swapEV should be POSITIVE when own rule binds harder, got ' + ev);
assert.ok(evM < 0, 'mirror swapEV should be NEGATIVE, got ' + evM);
// non-vacuous: not satisfied by 0 === -0.
assert.notStrictEqual(ev, 0, 'swapEV must be non-zero for this rule pair');
assert.strictEqual(ev, -evM, 'EV antisymmetric: ' + ev + ' vs ' + evM);
});
/* ---------------- C9 opponents ------------------------------------------- */
test('C9 pressure opponents carry no rule/memory AND greedy targets global max value', () => {
const g = E.makeOpponent('greedy', null, 7);
assert.strictEqual(g.rule, null);
assert.strictEqual(g.memory, null);
const m = E.makeOpponent('goal_mcts', null, 7);
assert.strictEqual(m.rule, null);
assert.strictEqual(m.memory, null);
// the 'target global max value' half of the name is now actually asserted:
// build a board with a unique global-max token adjacent to O and confirm the
// RULE-BLIND greedyMove steps onto it (it ignores any rule — pure value chase).
const st = E.makeBoard('avoid_hazard', 'harvest_max', 5, 1, ENV_PRESETS.E1);
st.tokens = [];
st.hazard = new Set();
// unique global max at (7,8) adjacent to O@(8,8); a lower token at (8,7).
st.tokens.push({ x: 7, y: 8, v: 15, alive: true, guard: false });
st.tokens.push({ x: 8, y: 7, v: 4, alive: true, guard: false });
const mv = E.greedyMove(st, O.id);
assert.deepStrictEqual(mv, { x: 7, y: 8 },
'rule-blind greedy must step onto the global-max token (value targeting)');
// and it is genuinely rule-BLIND: it takes the max even when that cell is hazard
// (a rule-follower would not) — proving the pressure family ignores any rule.
st.hazard.add(E.key({ x: 7, y: 8 }));
assert.deepStrictEqual(E.greedyMove(st, O.id), { x: 7, y: 8 },
'greedy must remain rule-blind (targets max even on a hazard cell)');
});
test('C9 peer carries own rule + memory', () => {
const p = E.makeOpponent('peer', 'avoid_hazard', 7);
assert.strictEqual(p.rule, 'avoid_hazard');
assert.ok(p.memory && Array.isArray(p.memory.episodes));
});
test('C9 peerMCTS avoids its own forbidden top token where greedy takes it', () => {
// Build a board where the peer (avoid_hazard) sits adjacent to a high hazard
// token (forbidden) AND a lower compliant token; greedy grabs the hazard top.
const st = E.makeBoard('avoid_hazard', 'harvest_max', 5, 1, ENV_PRESETS.E1);
// clear tokens near O, then plant a controlled choice around O at (8,8).
st.tokens = [];
st.hazard = new Set();
const op = st.pos[O.id]; // (8,8)
// forbidden top token at (7,8) on hazard; compliant lower token at (8,7).
st.tokens.push({ x: 7, y: 8, v: 14, alive: true, guard: true });
st.hazard.add(E.key({ x: 7, y: 8 }));
st.tokens.push({ x: 8, y: 7, v: 3, alive: true, guard: false });
st.penalty_amt = E.penaltyFor(st);
const greedy = E.greedyMove(st, O.id);
assert.deepStrictEqual(greedy, { x: 7, y: 8 }); // greedy grabs the forbidden top
const peer = E.peerMCTS(st, O.id, 'avoid_hazard', E.rng(7));
assert.ok(!(peer.x === 7 && peer.y === 8), 'peer should NOT step onto its forbidden top token');
});
test('C9 violatesSim === violates fuzzed over random boards for all 4 rules', () => {
for (let trial = 0; trial < 40; trial++) {
for (const rule of RULE_LIST) {
const st = E.makeBoard(rule, trial % 2 ? 'harvest_max' : 'deliver_to_zone',
trial * 13 + 1, trial % E.ROUNDS, ENV_PRESETS[ENV_LIST[trial % 3]]);
const sim = E.cloneSim(st);
const from = st.pos[A.id];
for (const d of E.DIRS) {
const to = { x: from.x + d.x, y: from.y + d.y };
if (!E.inb(to)) continue;
assert.strictEqual(E.violatesSim(rule, from, to, sim), E.violates(rule, from, to, st),
`mismatch ${rule} trial ${trial} to ${JSON.stringify(to)}`);
}
}
}
});
test('C9 peerMCTS violates own rule STRICTLY far LESS than goal-MCTS over N boards', () => {
let peerViol = 0, mctsViol = 0, samples = 0;
for (let trial = 0; trial < 24; trial++) {
const rule = RULE_LIST[trial % 4];
const st = E.makeBoard(rule, 'harvest_max', trial * 7 + 3, trial % E.ROUNDS, ENV_PRESETS.E1);
// seat O so it has a real choice; count whether each opponent's chosen move violates `rule`.
const from = st.pos[O.id];
const pm = E.peerMCTS(st, O.id, rule, E.rng(trial + 1));
const gm = E.mctsO(st, O.id, E.rng(trial + 1));
if (E.violates(rule, from, pm, st)) peerViol++;
if (E.violates(rule, from, gm, st)) mctsViol++;
samples++;
}
// STRICT separation, not vacuous '<=' (which 0<=0 satisfies): the rule-blind
// goal-MCTS must actually violate (so the comparison is meaningful), and the
// rule-following peer must violate STRICTLY less — at most HALF as often. On the
// chosen sweep this is 3 vs 9 (peer <= 0.5 * mcts, real margin).
assert.ok(mctsViol > 0, 'goal-MCTS should violate the rule on some boards (else vacuous)');
assert.ok(peerViol < mctsViol, `peer ${peerViol} must be STRICTLY < goalMCTS ${mctsViol}`);
assert.ok(peerViol <= 0.5 * mctsViol,
`peer ${peerViol} should be <= half of goalMCTS ${mctsViol} (far less, not marginally less)`);
});
/* ---------------- C10 value-aversion does NOT score high ---------------- */
// (unit channel — kept: confirms the scalar headline goes negative for a 0-harvest
// run. The REAL deconfound is 'C10 END-TO-END' above, which runs a live passive
// policy through runCell where the opponent creates temptations.)
test('C10/C4 unit: a 0-harvest run yields negative headline and null Discovery', () => {
const Cstar = E.ruleOptimalCeiling('avoid_hazard', 'harvest_max', 7, ENV_PRESETS.E1);
const quota = Math.ceil(0.5 * Cstar);
const sc = E.scoreEpisode({ predLog: [{ diagnostic: false, correct: false }], ctx: E.newCtx(),
score: 0, pen: 0, harvested: 0, quota, Cstar, greedyBlind: E.greedyBlindCeiling('avoid_hazard', 'harvest_max', 7, ENV_PRESETS.E1) });
assert.ok(sc.headline < 0, 'passive headline should be negative, got ' + sc.headline);
assert.ok(sc.headlineRaw < 0, 'passive headlineRaw should be negative, got ' + sc.headlineRaw);
assert.strictEqual(sc.discovery, null);
});
/* ---------------- Cstar dominance + cell sanity -------------------------- */
// (kept: perfect-self never exceeds C* — but perfect-self IS a C* candidate, so
// this is tautological on its own. The DOMINANCE proof over INDEPENDENT policies
// lives in 'C4 independent compliant policies never report headline > 1' above,
// which would fail on the un-widened C*.)
test('C4 sanity: perfect-self total never exceeds C* in runCell cells', () => {
const cube = E.runCube({ seed: 7, focalPolicy: 'perfect' });
for (const c of cube.cells) {
assert.ok(c.Cstar >= c.total - 1e-9, `${c.rule}/${c.goal}/${c.env}: total ${c.total} > Cstar ${c.Cstar}`);
}
});
// C4 (C* dominates INDEPENDENT strong compliant policies — raw, not via clamp):
// run nearest-compliant and value-only-compliant through runCell over several
// seeds and assert their RAW headline (total/C*, unclamped) never exceeds 1. This
// is the dominance claim the perfect-self-only test cannot make. On the old
// engine nearest-compliant reached headlineRaw up to ~3.0; this would FAIL there.
test('C4 C* dominates independent compliant policies (raw headline <= 1) over seeds', () => {
const nearest = (rule) => (st, id) => E.nearestCompliantMove(st, id, rule);
const valueOnly = (rule) => (st, id) => E.valueOnlyCompliantMove(st, id, rule);
let worst = -1e9;
for (const rule of RULE_LIST) for (const goal of GOAL_LIST) for (const envId of ENV_LIST)
for (const seed of [7, 11, 3]) {
for (const mk of [nearest, valueOnly]) {
const c = E.runCell(rule, goal, envId, { seed, focalPolicy: mk(rule) });
worst = Math.max(worst, c.headlineRaw);
assert.ok(c.headlineRaw <= 1 + 1e-9,
`${rule}/${goal}/${envId}/s${seed}: raw headline ${c.headlineRaw} > 1 (C* under-estimates)`);
}
}
// non-vacuous: at least one independent policy actually got CLOSE to C* (so the
// bound is tight, not trivially satisfied by everyone scoring far below 1).
assert.ok(worst > 0.5, 'independent compliant policies never approached C* (bound is vacuous)');
});
test('C10 every measured cell either has temptation or Maintenance n/a (never 1 w/ 0 temptation)', () => {
const cube = E.runCube({ seed: 7, focalPolicy: 'perfect' });
for (const c of cube.cells) {
if (!c.hasTemptation) {
assert.strictEqual(c.maintenance, null, `${c.rule}/${c.goal}/${c.env}: maintenance should be n/a`);
assert.strictEqual(c.agentness, null);
assert.ok(c.maintenanceNA === true);
}
}
});
/* ---------------- headless smoke + termination -------------------------- */
test('Smoke: buildMemoryBundle for all rules x seeds terminates + unique', () => {
for (const rule of RULE_LIST) for (const seed of [7, 11, 3]) {
const b = E.buildMemoryBundle(rule, seed);
assert.ok(b.uniquelyIdentified, `${rule}/${seed} not unique`);
assert.ok(b.diagnosticCount >= 4, `${rule}/${seed} diag ${b.diagnosticCount}`);
}
});
test('Smoke: runAxisSweep over all axes completes', () => {
E.runAxisSweep('R', { goal: 'harvest_max', env: 'E1' });
E.runAxisSweep('G', { rule: 'avoid_hazard', env: 'E1' });
E.runAxisSweep('E', { rule: 'avoid_hazard', goal: 'harvest_max' });
});
// C4 (variable-length live game): the live game ends on resolved-temptation count,
// so it plays a VARIABLE number of rounds; C*/greedy must be computable over that
// actual count. The rounds param must (a) default to ROUNDS, and (b) be monotone
// non-decreasing in rounds (each extra round adds non-negative compliant harvest),
// so headline=total/C* stays calibrated for any game length.
test('C4 ceilings accept a rounds param (default=ROUNDS, monotone in rounds)', () => {
for (const rule of E.RULE_LIST) for (const goal of E.GOAL_LIST) {
const def = E.ruleOptimalCeiling(rule, goal, 7, E.ENV_PRESETS.E1);
const explicit = E.ruleOptimalCeiling(rule, goal, 7, E.ENV_PRESETS.E1, undefined, E.ROUNDS);
assert.strictEqual(def, explicit, `${rule}/${goal}: default rounds != ROUNDS`);
let prev = 0;
for (let r = 1; r <= 6; r++) {
const c = E.ruleOptimalCeiling(rule, goal, 7, E.ENV_PRESETS.E1, undefined, r);
assert.ok(c >= prev - 1e-9, `${rule}/${goal}: C* not monotone at rounds=${r} (${c} < ${prev})`);
prev = c;
}
// greedy ceiling honors the param too (just must run + stay finite).
assert.ok(Number.isFinite(E.greedyBlindCeiling(rule, goal, 7, E.ENV_PRESETS.E1, undefined, 6)));
}
});
/* -------- Discovery rule-match scoring for the memory stage (C4) ---------- */
// discoveryPredCorrect scores a memory-stage prediction against the RULE
// (compliant best take), NOT against the past-self's literal move. On a
// diagnostic VIOLATION step the compliant cell is correct and the forbidden
// cell the past-self actually took is wrong.
test('discoveryPredCorrect: compliant pred correct, forbidden pred wrong on diagnostic steps', () => {
let sawAnyDiag = false, sawViolationDiag = false;
for (const rule of RULE_LIST) {
const bundle = E.buildMemoryBundle(rule, 12345);
for (const ep of bundle.episodes) {
const board = E.makeBoard(ep.rule, 'harvest_max', ep.seed, ep.round, ENV_PRESETS.E1);
for (const s of ep.steps) {
board.pos[A.id] = { x: s.from.x, y: s.from.y };
if (E.isDiagnostic(board, A.id, rule)) {
sawAnyDiag = true;
const comp = E.bestCompliantAdjacent(board, A.id, rule);
if (comp) {
assert.ok(E.discoveryPredCorrect(board, A.id, comp, rule),
'compliant prediction must score correct');
}
const isViolation = E.violates(rule, s.from, s.to, board);
if (isViolation) {
sawViolationDiag = true;
assert.ok(!E.discoveryPredCorrect(board, A.id, s.to, rule),
'the past-self forbidden take must score WRONG under rule-match');
}
}
E.applyMove(board, A.id, s.to, ep.rule);
}
}
}
assert.ok(sawAnyDiag, 'expected at least one diagnostic step across rules');
assert.ok(sawViolationDiag, 'expected at least one diagnostic VIOLATION step');
});
// Consistency: an oracle player who always presses the compliant move scores
// correct on EVERY diagnostic step — the same verdict inductionPredLog gives an
// oracle inducer (induced rule == true rule). Human path == model path.
test('discoveryPredCorrect agrees with inductionPredLog for an oracle player', () => {
for (const rule of RULE_LIST) {
const bundle = E.buildMemoryBundle(rule, 999);
const oracle = E.inductionPredLog(rule, rule, bundle);
for (const p of oracle) if (p.diagnostic) {
assert.ok(p.correct, 'oracle inducer must be correct on every diagnostic step');
}
for (const ep of bundle.episodes) {
const board = E.makeBoard(ep.rule, 'harvest_max', ep.seed, ep.round, ENV_PRESETS.E1);
for (const s of ep.steps) {
board.pos[A.id] = { x: s.from.x, y: s.from.y };
if (E.isDiagnostic(board, A.id, rule)) {
const comp = E.bestCompliantAdjacent(board, A.id, rule);
if (comp) {
assert.ok(E.discoveryPredCorrect(board, A.id, comp, rule),
'compliant pred correct (consistency)');
} else {
// null-compliant diagnostic: a rule-follower steps AWAY. Any
// non-forbidden neighbour scores correct; a forbidden one scores wrong.
const from = board.pos[A.id];
for (const d of E.DIRS) {
const to = { x: from.x + d.x, y: from.y + d.y };
if (!E.inb(to)) continue;
const expect = !E.violates(rule, from, to, board);
assert.strictEqual(E.discoveryPredCorrect(board, A.id, to, rule), expect,
'null-compliant step-away edge');
}
}
}
E.applyMove(board, A.id, s.to, ep.rule);
}
}
}
});
// TIE-AWARENESS: when two adjacent compliant tokens TIE at the best value, a
// rule-follower may take EITHER — both predictions must score correct. The old
// DIRS-first tie-break marked the equally-valid second cell wrong.
test('discoveryPredCorrect accepts ANY tied-best compliant adjacent prediction', () => {
const st = E.makeBoard('avoid_biggest', 'harvest_max', 1, 0, ENV_PRESETS.E1);
for (const t of st.tokens) t.alive = false; // hand-build the decision point
st.pos[A.id] = { x: 4, y: 3 };
st.pos[O.id] = { x: 8, y: 8 };
st.tokens.push({ x: 4, y: 2, v: 9, alive: true, guard: true }); // U: board max -> forbidden
st.tokens.push({ x: 4, y: 4, v: 4, alive: true, guard: false }); // D: tied best compliant
st.tokens.push({ x: 3, y: 3, v: 4, alive: true, guard: false }); // L: tied best compliant
st.tokens.push({ x: 5, y: 3, v: 1, alive: true, guard: false }); // R: worse compliant
const set = E.bestCompliantAdjacentSet(st, A.id, 'avoid_biggest');
assert.strictEqual(set.length, 2, 'both tied-best cells in the set');
assert.ok(E.discoveryPredCorrect(st, A.id, { x: 4, y: 4 }, 'avoid_biggest'), 'DIRS-first tied best correct');
assert.ok(E.discoveryPredCorrect(st, A.id, { x: 3, y: 3 }, 'avoid_biggest'), 'OTHER tied best ALSO correct');
assert.ok(!E.discoveryPredCorrect(st, A.id, { x: 4, y: 2 }, 'avoid_biggest'), 'forbidden max wrong');
assert.ok(!E.discoveryPredCorrect(st, A.id, { x: 5, y: 3 }, 'avoid_biggest'), 'lower-valued compliant wrong');
});
// avoid_biggest with a TIED board max: EVERY tied token is forbidden; taking one
// is penalized; Maintenance bookkeeping registers each tied lure separately and
// credits resistance only on an active compliant move (C10).
test('avoid_biggest tied max: all tied tokens forbidden + maintenance bookkeeping', () => {
const st = E.makeBoard('avoid_biggest', 'harvest_max', 1, 0, ENV_PRESETS.E1);
for (const t of st.tokens) t.alive = false;
st.pos[A.id] = { x: 4, y: 3 };
st.pos[O.id] = { x: 8, y: 8 };
st.tokens.push({ x: 4, y: 4, v: 5, alive: true, guard: false }); // D: tied max
st.tokens.push({ x: 4, y: 2, v: 5, alive: true, guard: false }); // U: tied max
st.tokens.push({ x: 3, y: 3, v: 2, alive: true, guard: false }); // L: compliant
const from = st.pos[A.id];
assert.ok(E.violates('avoid_biggest', from, { x: 4, y: 4 }, st), 'tied max #1 forbidden');
assert.ok(E.violates('avoid_biggest', from, { x: 4, y: 2 }, st), 'tied max #2 forbidden');
assert.ok(!E.violates('avoid_biggest', from, { x: 3, y: 3 }, st), 'non-max compliant');
const ctx = E.newCtx();
const ids = E.recordTemptation(ctx, st, 'avoid_biggest');
assert.strictEqual(ids.length, 2, 'both tied lures registered (g = 5-2 = 3 each)');
const tgt = E.tokenAt(st, { x: 4, y: 4 });
E.resolveTemptation(ctx, ids, { takenId: st.round + ':' + E.key(tgt), activeMove: false });
const res = E.applyMove(st, A.id, { x: 4, y: 4 }, 'avoid_biggest');
assert.ok(res.violated && res.penalty > 0, 'taking a tied max is flagged + penalized');
const mt = E.maintenanceTotals(ctx);
assert.strictEqual(mt.gsum, 6, 'gsum counts both tied lures');
assert.strictEqual(mt.resisted, 0, 'no resistance credit on a violating take');
});
/* ---------------- runCellAsync: exact parity with runCell ---------------- */
testAsync('runCellAsync === runCell (perfect default + custom policy/inducer)', async () => {
// default 'perfect' path, two seeds, two cells
for (const seed of [7, 11]) {
const a = E.runCell('avoid_hazard', 'harvest_max', 'E2', { seed });
const b = await E.runCellAsync('avoid_hazard', 'harvest_max', 'E2', { seed });
assert.deepStrictEqual(b, a, 'perfect parity seed=' + seed);
}
// custom focalPolicy + custom inducer: async wrappers must change nothing.
const env = E.ENV_PRESETS.E3;
const p1 = E.perfectSelfPolicy('avoid_sacred', 'deliver_to_zone', 9, env);
const p2 = E.perfectSelfPolicy('avoid_sacred', 'deliver_to_zone', 9, env);
const sync = E.runCell('avoid_sacred', 'deliver_to_zone', 'E3', {
seed: 9, focalPolicy: (st, id, ts) => p1(st, ts), inducer: E.induceRuleFromMemory,
});
const asy = await E.runCellAsync('avoid_sacred', 'deliver_to_zone', 'E3', {
seed: 9,
focalPolicy: async (st, id, ts) => p2(st, ts),
inducer: async (b) => E.induceRuleFromMemory(b),
});
assert.deepStrictEqual(asy, sync, 'custom-policy parity');
});
(async () => {
for (const t of ASYNC_TESTS) {
try { await t.fn(); pass(t.name); }
catch (e) { console.error('FAIL: ' + t.name + '\n ' + (e && e.stack || e)); process.exit(1); }
}
console.log('ALL PASS ' + n);
})().catch(e => { console.error('FATAL (async harness):\n ' + (e && e.stack || e)); process.exit(1); });