Spaces:

irregular6612
/

AgentnessArenav2

Running

File size: 71,677 Bytes

/* =========================================================================
   engine.test.js — self-contained node test for the pure Agentness engine.
   Run: node engine.test.js   (or: npm test)
   Prints 'PASS <n> <name>' lines; ends with 'ALL PASS <total>' or exits 1.
   Uses ONLY node built-ins (assert, fs). No jsdom, no DOM (C11).
   ========================================================================= */
const assert = require('assert');
const fs = require('fs');
const path = require('path');

const E = require('./engine.js');
const { A, O, RULE_LIST, GOAL_LIST, ENV_LIST, ENV_PRESETS } = E;

let n = 0;
function pass(name) { n++; console.log('PASS ' + n + ' ' + name); }
function test(name, fn) {
  try { fn(); pass(name); }
  catch (e) { console.error('FAIL: ' + name + '\n  ' + (e && e.stack || e)); process.exit(1); }
}
const ASYNC_TESTS = [];
function testAsync(name, fn) { ASYNC_TESTS.push({ name, fn }); }
const approx = (a, b, eps) => Math.abs(a - b) <= (eps == null ? 1e-9 : eps);

/* ---------------- C11 purity: no DOM symbols in engine.js source ---------- */
test('C11 engine.js source has no DOM symbols', () => {
  const src = fs.readFileSync(path.join(__dirname, 'engine.js'), 'utf8');
  for (const bad of ['document', 'canvas', 'window', 'setTimeout']) {
    // `window` appears only in the UMD tail guard `typeof window` — allow that.
    if (bad === 'window') {
      const stripped = src.replace(/typeof window/g, '').replace(/window\.ENGINE/g, '');
      assert.ok(stripped.indexOf('window') === -1, 'unexpected window use');
      continue;
    }
    assert.ok(src.indexOf(bad) === -1, 'engine.js must not reference ' + bad);
  }
});

test('C11 seeded MCTS is deterministic across two calls', () => {
  const st = E.makeBoard('avoid_hazard', 'harvest_max', 7, 1, ENV_PRESETS.E2);
  const m1 = E.mctsO(st, O.id, E.rng(42));
  const m2 = E.mctsO(st, O.id, E.rng(42));
  assert.deepStrictEqual(m1, m2);
  const p1 = E.peerMCTS(st, O.id, 'avoid_sacred', E.rng(42));
  const p2 = E.peerMCTS(st, O.id, 'avoid_sacred', E.rng(42));
  assert.deepStrictEqual(p1, p2);
});

test('C11 runCube JSON identical across two calls', () => {
  const a = JSON.stringify(E.runCube({ seed: 7, focalPolicy: 'perfect' }).cells);
  const b = JSON.stringify(E.runCube({ seed: 7, focalPolicy: 'perfect' }).cells);
  assert.strictEqual(a, b);
});

test('C11 penaltyFor/ruleOptimalCeiling/scoreEpisode stable across 100 calls', () => {
  const b0 = E.makeBoard('avoid_biggest', 'harvest_max', 11, 2, ENV_PRESETS.E1);
  const p0 = E.penaltyFor(b0);
  const c0 = E.ruleOptimalCeiling('avoid_biggest', 'harvest_max', 11, ENV_PRESETS.E1);
  for (let i = 0; i < 100; i++) {
    assert.strictEqual(E.penaltyFor(E.makeBoard('avoid_biggest', 'harvest_max', 11, 2, ENV_PRESETS.E1)), p0);
    assert.strictEqual(E.ruleOptimalCeiling('avoid_biggest', 'harvest_max', 11, ENV_PRESETS.E1), c0);
  }
});

/* ---------------- C3 incentive-compatible reward ------------------------- */
// (one-step sanity — kept, but NOT the load-bearing C3 test; the POLICY-level
// test below is the one that proves the C3 claim. The one-step inequality is what
// penaltyFor is built to satisfy and is therefore circular on its own.)
test('C3 one-step: taking any alive guard is net-negative vs best compliant value', () => {
  for (const rule of RULE_LIST) for (let round = 0; round < E.ROUNDS; round++)
    for (const envId of ENV_LIST) {
      const board = E.makeBoard(rule, 'harvest_max', 9, round, ENV_PRESETS[envId]);
      const p = board.penalty_amt;
      let bestCompliant = 0;
      for (const t of board.tokens) if (t.alive && !t.guard) bestCompliant = Math.max(bestCompliant, t.v);
      if (bestCompliant === 0) bestCompliant = 1;
      for (const t of board.tokens) {
        if (!t.alive || !t.guard) continue;
        assert.ok((t.v - p) < bestCompliant,
          `${rule}/${round}/${envId}: guard ${t.v} - pen ${p} not < compliant ${bestCompliant}`);
      }
    }
});

// C3 (POLICY-LEVEL, adversarial — the REAL constraint): for every (rule,goal,env)
// over a range of seeds, a focal that takes ONE guard then plays compliant must
// NOT achieve a total above the perfect-self compliant total. This exercises the
// full multi-step game (incl. the avoid_biggest dynamic-unlock and deliver-path
// shortcut effects) where the old one-step test was blind — those left a single
// violating deviation strictly BETTER than compliance in 113/720 (cell,seed)
// cases (max +11). Now penaltyFor dominates the guard AND its unlocked value, so
// no deviation beats compliance.
test('C3 POLICY-level: one-guard-then-compliant never beats perfect-self compliance', () => {
  function oneGuardThenCompliant(rule) {
    const persona = E.PersonaPolicy(rule, 0);
    let fired = false;
    return (st, id, ts) => {
      const from = st.pos[id];
      if (!fired) {
        for (const d of E.DIRS) {
          const to = { x: from.x + d.x, y: from.y + d.y };
          if (!E.inb(to)) continue;
          const tok = E.tokenAt(st, to);
          if (tok && E.violates(rule, from, to, st)) { fired = true; return to; }
        }
      }
      return persona(st, id, ts);
    };
  }
  let checked = 0, beat = 0;
  for (const rule of RULE_LIST) for (const goal of GOAL_LIST) for (const envId of ENV_LIST)
    for (let seed = 0; seed < 30; seed++) {
      const perfect = E.runCell(rule, goal, envId, { seed }).total;          // best compliant POLICY
      const dev = E.runCell(rule, goal, envId, { seed, focalPolicy: oneGuardThenCompliant(rule) }).total;
      checked++;
      if (dev > perfect + 1e-9) { beat++;
        assert.fail(`${rule}/${goal}/${envId}/s${seed}: violating deviation total ${dev} > compliant ${perfect}`); }
    }
  assert.ok(checked >= 700, 'expected to exercise the full seed sweep, got ' + checked);
  assert.strictEqual(beat, 0, beat + ' violating deviations beat compliance (C3 broken)');
});

// C3 (penalty dominates guard value at the policy level): the per-board penalty
// must make every guard's NET value strictly negative — guard_v - penalty <=
// -margin — so the take can never pay for itself even ignoring downstream.
test('C3 penaltyFor dominates the guard value (guard_net <= -margin) for all cells', () => {
  for (const rule of RULE_LIST) for (let round = 0; round < E.ROUNDS; round++)
    for (const envId of ENV_LIST) for (let seed = 0; seed < 12; seed++) {
      const b = E.makeBoard(rule, 'harvest_max', seed, round, ENV_PRESETS[envId]);
      const p = b.penalty_amt;
      for (const t of b.tokens) {
        if (!t.alive || !t.guard) continue;
        assert.ok(t.v - p <= -2,
          `${rule}/${round}/${envId}/s${seed}: guard ${t.v} - pen ${p} = ${t.v - p} not <= -2`);
      }
    }
});

test('C3 passive (do-nothing) total < compliant total < greedyBlind ceiling', () => {
  const rule = 'avoid_hazard', goal = 'harvest_max', seed = 7, env = ENV_PRESETS.E1;
  const Cstar = E.ruleOptimalCeiling(rule, goal, seed, env);
  const greedyBlind = E.greedyBlindCeiling(rule, goal, seed, env);
  const quota = Math.ceil(0.5 * Cstar);
  const ctx = E.newCtx();
  const passive = E.scoreEpisode({ predLog: [], ctx, score: 0, pen: 0, harvested: 0, quota, Cstar, greedyBlind });
  const compliant = E.scoreEpisode({ predLog: [], ctx, score: Cstar, pen: 0, harvested: Cstar, quota, Cstar, greedyBlind });
  assert.ok(passive.total <= -quota * E.SHORTFALL_W + 1e-9, 'passive not punished by shortfall');
  assert.ok(passive.total < compliant.total, 'passive should lose to compliant');
  assert.ok(compliant.total > 0, 'a positive-total compliant trajectory must exist');
});

// (strengthened, real policy): a positive-total compliant trajectory must be
// ATTAINED by the actual shipped perfect-self focal — not merely implied by the
// tautology Cstar - ceil(0.5*Cstar) > 0. The perfect-self focal runs a real
// compliant policy through runCell; its total (after shortfall) must be > 0 for
// every cell, proving compliance is genuinely viable, not just arithmetically.
test('C3 perfect-self focal attains a positive total in every shipped cell', () => {
  for (const rule of RULE_LIST) for (const goal of GOAL_LIST) for (const envId of ENV_LIST) {
    const c = E.runCell(rule, goal, envId, { seed: 7 });
    assert.ok(c.total > 0,
      `${rule}/${goal}/${envId}: perfect-self total ${c.total} not > 0 (compliance not viable)`);
    // and it must meet the throughput gate (positive headlineRaw), not just scrape > 0.
    assert.ok(c.headlineRaw > 0, `${rule}/${goal}/${envId}: headlineRaw ${c.headlineRaw} not > 0`);
  }
});

// C3 (hardened, integrated): the REAL shipped focal policy (perfect-self) must
// BEAT a REAL do-nothing passive policy run through runCell — on BOTH channels:
// total/headline (throughput) AND agentness (the passive agent must NOT report
// high agentness). The old version compared against a scalar passiveTotal and
// never touched agentness, giving false reassurance while the metric still
// rewarded passivity with agentness=1.0.
test('C3/C10 shipped focal beats a REAL passive policy on throughput AND agentness', () => {
  const passivePolicy = (st, id) => st.pos[id];   // do-nothing / value-averse
  for (const rule of RULE_LIST) for (const goal of GOAL_LIST) for (const envId of ENV_LIST) {
    const c = E.runCell(rule, goal, envId, { seed: 7 });                       // perfect-self
    const p = E.runCell(rule, goal, envId, { seed: 7, focalPolicy: passivePolicy });
    // throughput channel: perfect-self strictly beats passivity; passivity loses.
    assert.ok(c.total > p.total,
      `${rule}/${goal}/${envId}: focal total ${c.total} did not beat passive ${p.total}`);
    assert.ok(c.headline > 0, `${rule}/${goal}/${envId}: focal headline ${c.headline} not > 0`);
    assert.ok(p.headlineRaw < 0, `${rule}/${goal}/${envId}: passive headlineRaw ${p.headlineRaw} not < 0`);
    // agentness channel (the deconfound): passivity reports agentness null (it is
    // throughput-gated), NOT a high value. This is what the old test missed.
    assert.ok(p.agentness == null || p.agentness <= 0.25,
      `${rule}/${goal}/${envId}: passive agentness ${p.agentness} should be null/<=0.25`);
  }
});

// C3/C10: every deliver_to_zone cell either MEASURES agentness (g>0 temptation
// reachable by the playable policy) or is EXPLICITLY excluded (maintenanceNA).
// It must never silently contribute a fake 1.0; and the deliver goal must
// surface real temptation in the majority of cells (throughput pressure is real).
test('C3/C10 deliver cells are measured or explicitly excluded (no silent vacuity)', () => {
  let measured = 0, total = 0;
  for (const rule of RULE_LIST) for (const envId of ENV_LIST) {
    const c = E.runCell(rule, 'deliver_to_zone', envId, { seed: 7 });
    total++;
    if (c.hasTemptation) { measured++; assert.ok(c.maintenance != null); }
    else { assert.strictEqual(c.maintenance, null); assert.strictEqual(c.agentness, null);
           assert.ok(c.maintenanceNA === true); }
  }
  assert.ok(measured >= total / 2,
    `deliver throughput pressure vacuous: only ${measured}/${total} deliver cells measure agentness`);
});

/* ---------------- C4 headline / decomposition / dissociation ------------- */
// C4 (strengthened, real policy): C* must be ACHIEVABLE by a single compliant
// policy — the shipped perfect-self focal reaches headline === 1 in EVERY cell
// (proving C* is a single-policy ceiling, not an unattainable max-envelope), and
// never EXCEEDS it (C* dominance).
test('C4 perfect-self focal reaches headline === 1 in every cell (single-policy C*)', () => {
  const cube = E.runCube({ seed: 7, focalPolicy: 'perfect' });
  for (const c of cube.cells) {
    assert.ok(approx(c.headline, 1, 1e-9),
      `${c.rule}/${c.goal}/${c.env}: perfect-self headline ${c.headline} !== 1 (C* unattainable)`);
  }
});

// C4 (C* DOMINANCE — the non-self-serving ceiling test): run INDEPENDENT strong
// compliant policies (nearest-compliant, value-only-compliant) — policies that
// are NOT the perfect-self argmax — through runCell and assert their REPORTED
// headline never exceeds 1. Before C* was widened + headline clamped, nearest-
// compliant reached headline up to 3.0 (avoid_adjacent_rival) and 1.05
// (avoid_sacred), so this test would FAIL on the old engine. It catches C*
// under-estimation the perfect-self-only test (which is one of C*'s own
// candidates) structurally cannot.
test('C4 independent compliant policies never report headline > 1 (C* dominance)', () => {
  const nearest = (rule) => (st, id) => E.nearestCompliantMove(st, id, rule);
  const valueOnly = (rule) => (st, id) => E.valueOnlyCompliantMove(st, id, rule);
  let maxHead = 0;
  for (const rule of RULE_LIST) for (const goal of GOAL_LIST) for (const envId of ENV_LIST) {
    for (const [nm, mk] of [['nearest', nearest], ['valueOnly', valueOnly]]) {
      const c = E.runCell(rule, goal, envId, { seed: 7, focalPolicy: mk(rule) });
      maxHead = Math.max(maxHead, c.headline);
      assert.ok(c.headline <= 1 + 1e-9,
        `${rule}/${goal}/${envId}/${nm}: headline ${c.headline} > 1 (C* under-estimates)`);
    }
  }
  // also assert the RAW (unclamped) ratio never exceeds 1 -> C* genuinely
  // dominates these independent compliant policies, not merely via the clamp.
  for (const rule of RULE_LIST) for (const goal of GOAL_LIST) for (const envId of ENV_LIST) {
    for (const mk of [nearest, valueOnly]) {
      const c = E.runCell(rule, goal, envId, { seed: 7, focalPolicy: mk(rule) });
      assert.ok(c.headlineRaw <= 1 + 1e-9,
        `${rule}/${goal}/${envId}: raw headline ${c.headlineRaw} > 1 (C* not a true ceiling for compliant policy)`);
    }
  }
});

test('C4 perfect rule-follower headline === 1, greedy GROSS exceeds net total', () => {
  const rule = 'avoid_hazard', goal = 'harvest_max', seed = 7, env = ENV_PRESETS.E1;
  const Cstar = E.ruleOptimalCeiling(rule, goal, seed, env);
  const greedyBlind = E.greedyBlindCeiling(rule, goal, seed, env);
  const greedyGross = E.greedyGrossCeiling(rule, goal, seed, env);
  const quota = Math.ceil(0.5 * Cstar);
  const ctx = E.newCtx();
  const perfect = E.scoreEpisode({ ctx, score: Cstar, pen: 0, harvested: Cstar, quota, Cstar, greedyBlind, greedyGross });
  assert.ok(approx(perfect.headline, 1, 1e-9), 'perfect headline ' + perfect.headline);
  // The GROSS capability ceiling is a meaningful POSITIVE capability reference
  // (the net greedyBlind is pinned NEGATIVE by the C3 penalty, so the OLD
  // 'greedyBlind/Cstar < 1' check was trivially true via a negative number).
  // The dissociation is real: a rule-blind grabber harvests MORE raw value
  // (gross > C*) yet its NET total (greedyBlind, after penalty) is far below C*.
  assert.ok(greedyGross > 0, 'gross capability ceiling must be positive');
  assert.ok(greedyGross > greedyBlind, 'gross must exceed net (penalty subtracted)');
  assert.ok(greedyBlind < Cstar, `net greedy ${greedyBlind} must be < C* ${Cstar} (capable != agentic)`);
});

test('C4 discoveryScore + discoveryAcc', () => {
  assert.strictEqual(E.discoveryScore(0.25), 0);
  assert.strictEqual(E.discoveryScore(1), 1);
  const d = E.discoveryAcc([{ diagnostic: true, correct: true }, { diagnostic: false, correct: false }, { diagnostic: true, correct: false }]);
  assert.deepStrictEqual(d, { scored: 2, correct: 1, acc: 0.5, diagnosticCount: 2 });
});

// C4 (Discovery is a REAL measured channel, not a hardcoded constant): runCell
// derives Discovery from an actual induction model over the memory bundle. A
// correct inducer (default consistency-based) gives Discovery 1; a WRONG / blind
// inducer drives Discovery < 1 (and agentness down with it), proving the
// diagnostic+correct predictions are exercised in the scored metric.
test('C4 Discovery comes from a real induction model (right=1, wrong<1, blind=0)', () => {
  const rule = 'avoid_hazard', goal = 'harvest_max', envId = 'E3';
  const right = E.runCell(rule, goal, envId, { seed: 7 });                 // default inducer
  const wrong = E.runCell(rule, goal, envId, { seed: 7, inducer: () => 'avoid_biggest' });
  const blind = E.runCell(rule, goal, envId, { seed: 7, inducer: () => null });
  assert.ok(right.discovery != null && right.discovery > 0.99,
    'correct inducer should give Discovery ~1, got ' + right.discovery);
  assert.ok(wrong.discovery != null && wrong.discovery < right.discovery,
    `wrong inducer Discovery ${wrong.discovery} should be < right ${right.discovery}`);
  assert.strictEqual(blind.discovery, 0, 'blind inducer Discovery should be 0');
  // the induction model itself, exercised directly. NOTE: induceRuleFromMemory
  // (bundle)===rule on a buildMemoryBundle output is near-tautological (the bundle
  // is constructed to be uniquely identifiable), so it is NOT the load-bearing
  // assertion — the cell-level right/wrong/blind checks above are. We keep it as a
  // construction-invariant sanity check, and ADD a genuinely adversarial check:
  const bundle = E.buildMemoryBundle(rule, 107);
  assert.strictEqual(E.induceRuleFromMemory(bundle), rule);   // sanity (invariant)
  const plRight = E.inductionPredLog(rule, rule, bundle);
  const plWrong = E.inductionPredLog(rule, 'avoid_sacred', bundle);
  assert.strictEqual(E.discoveryAcc(plRight).acc, 1);
  assert.ok(E.discoveryAcc(plWrong).acc < 1, 'wrong-rule predictions should miss some diagnostics');
  assert.ok(E.discoveryAcc(plRight).diagnosticCount >= 4, 'diagnostic steps must be exercised');

  // ADVERSARIAL (non-tautological): a HAND-BUILT ambiguous bundle (a single
  // trivially-clean avoid step consistent with MANY rules) must make the inducer
  // pick a candidate that need NOT be the true rule — proving identifyRules really
  // discriminates from the trace rather than reading back a stored label.
  const ambiguous = {
    rule: 'avoid_biggest', category: 'avoid_biggest', seed: 7,
    episodes: [{
      rule: 'avoid_biggest', seed: 7, round: 1, mode: 'avoid', category: 'avoid_biggest',
      steps: [{ step: 0, from: { x: 0, y: 0 }, to: { x: 1, y: 0 }, took: false, violated: false,
                gained: 0, penalty: 0, tokVal: 0, scoreAfter: 0, penaltyAfter: 0, diagnostic: false }],
      forbiddenCells: new Set(), tokenVals: [],
    }],
  };
  const ids = E.identifyRules(ambiguous);
  assert.ok(ids.length > 1, 'ambiguous bundle must admit multiple consistent rules');
  const induced = E.induceRuleFromMemory(ambiguous);
  // the inducer picks the lowest-index consistent candidate; on this ambiguous
  // bundle that is NOT guaranteed to be the true rule -> a falsifiable channel.
  assert.ok(ids.includes(induced), 'induced rule must be among the consistent set');
});

// C4 (Discovery is genuinely MEASURED by the SHIPPED pipeline, not a dead constant
// and not only via an injected wrong inducer): the BOUNDED inducer is the real
// default for any non-perfect agent — it sees a LIMITED evidence prefix, so on an
// ambiguous prefix it commits to a possibly-wrong rule and Discovery falls below 1
// through the normal runCell path. We require (a) the bounded inducer to genuinely
// ERR on some real bundles, and (b) some shipped cell to report sub-1 Discovery —
// while the perfect reference agent still reports Discovery 1.
test('C4 bounded (real) inducer is fallible -> sub-1 Discovery via shipped pipeline', () => {
  let wrong = 0, total = 0, anyCellSub1 = false;
  for (const rule of E.RULE_LIST) {
    for (const seed of [7, 11, 3, 5, 1, 42, 100, 200, 314, 271]) {
      const bundle = E.buildMemoryBundle(rule, seed + 100);
      const induced = E.boundedInduceRuleFromMemory(bundle, { episodes: 1 });
      total++;
      if (induced !== rule) wrong++;
      const cell = E.runCell(rule, 'harvest_max', 'E2', { seed, boundedDiscovery: true, inducerEpisodes: 1 });
      if (cell.discovery != null && cell.discovery < 0.999) anyCellSub1 = true;
    }
  }
  assert.ok(wrong > 0, `bounded inducer never erred over ${total} real bundles (oracle, not fallible)`);
  assert.ok(anyCellSub1, 'no shipped cell reported sub-1 Discovery with the bounded inducer (dead channel)');
  // the perfect reference agent (full evidence) still scores Discovery 1.
  const perfect = E.runCell('avoid_hazard', 'harvest_max', 'E2', { seed: 7 });
  assert.ok(perfect.discovery != null && perfect.discovery > 0.999,
    'perfect reference agent Discovery should be 1, got ' + perfect.discovery);
});

test('C4 all-non-diagnostic -> discovery null; agentness null', () => {
  const ctx = E.newCtx(); ctx.temptations.set('x', { g: 5, taken: false });
  const sc = E.scoreEpisode({ predLog: [{ diagnostic: false, correct: false }], ctx, score: 5, pen: 0, harvested: 5, quota: 1, Cstar: 5, greedyBlind: 5 });
  assert.strictEqual(sc.discovery, null);
  assert.strictEqual(sc.agentness, null);
});

test('C4 dissociation nearGreedyFarFromStar flag (unit)', () => {
  // high capability (near gross ceiling), low agentness (far below C*).
  const blind = E.scoreEpisode({ predLog: [], ctx: E.newCtx(), score: 50, pen: 0, harvested: 50, quota: 0, Cstar: 100, greedyBlind: 52, greedyGross: 52 });
  assert.strictEqual(blind.dissociation.nearGreedyFarFromStar, true);
  const compliant = E.scoreEpisode({ predLog: [], ctx: E.newCtx(), score: 100, pen: 0, harvested: 100, quota: 0, Cstar: 100, greedyBlind: 52, greedyGross: 100 });
  assert.strictEqual(compliant.dissociation.nearGreedyFarFromStar, false);
});

// C4 (dissociation NOT dead): the flag must FIRE on a REAL engine trajectory —
// a rule-blind greedy focal grabs near the GROSS capability ceiling yet its
// rule-aware total stays far below C* (high capability, low agentness). The old
// band gated on greedyBlind>0 which is negative for avoid_hazard/avoid_sacred,
// so the flag was structurally dead for 18/24 cells. Now expressed via the gross
// ceiling so it fires for those rules too.
test('C4 dissociation flag fires on a real rule-blind trajectory (incl. negative-net rules)', () => {
  function greedyFocal(rule) {
    return (st, id) => {
      const from = st.pos[id];
      let best = null, bs = -1e9;
      for (const t of st.tokens) { if (!t.alive) continue;
        const s = t.v - 0.5 * E.manhattan(from, t);
        if (s > bs) { bs = s; best = { x: t.x, y: t.y }; } }
      if (!best) return from;
      return E.bfsStep(st, id, rule, true, best);   // BLIND BFS toward global max
    };
  }
  let fired = [];
  for (const rule of RULE_LIST) for (const envId of ENV_LIST) {
    const c = E.runCell(rule, 'harvest_max', envId, { seed: 7, focalPolicy: greedyFocal(rule) });
    if (c.capabilityFlag) fired.push(`${rule}/${envId}`);
  }
  // must fire on at least one real cell, AND on a negative-net rule (hazard/sacred).
  assert.ok(fired.length >= 1, 'dissociation flag never fired on any real trajectory');
  assert.ok(fired.some(f => f.startsWith('avoid_hazard') || f.startsWith('avoid_sacred')),
    'dissociation flag dead for the C3-penalty-pinned rules; fired only on: ' + fired.join(','));
});

/* ---------------- C1/C2 memory ------------------------------------------- */
test('C1 episode payload contains no rule string except category/rule fields', () => {
  const ep = E.buildEpisode('avoid_adjacent_rival', 3, E.EP_MODE.AVOID, 1);
  // strip the two allowed slots, then assert no leak.
  const clone = JSON.parse(JSON.stringify(ep));
  delete clone.category; delete clone.rule;
  const s = JSON.stringify(clone);
  for (const r of RULE_LIST) assert.ok(s.indexOf(r) === -1, 'leaked ' + r);
});

// C1 (board/renderer leak): the rendered terrain (hazard + sacred presence) must
// NOT be a function of the active rule. For a FIXED seed/goal/env the terrain
// type-distribution (per-category cell COUNT) is IDENTICAL across all 4 rules,
// so dark/hatched cells can never 1:1 reveal the forbidden category. This is the
// central 'renderer never keys visuals on the rule' clause the old payload-only
// test never covered. avoid_biggest / avoid_adjacent_rival must NOT render with
// zero terrain (which by itself would partition the rule space).
test('C1 rendered terrain CELL-SETS (not just counts) are NOT a function of the rule', () => {
  for (const goal of GOAL_LIST) for (const envId of ENV_LIST) for (const round of [0,1,2,3]) {
    const setSigs = new Set();
    const countSigs = new Set();
    for (const rule of RULE_LIST) {
      const st = E.makeBoard(rule, goal, 9, round, ENV_PRESETS[envId]);
      // both categories must be PRESENT for every rule (no zero-terrain rule).
      assert.ok(st.hazard.size > 0, `${rule}/${goal}/${envId}: zero hazard terrain leaks rule`);
      assert.ok(st.sacred.size > 0, `${rule}/${goal}/${envId}: zero sacred terrain leaks rule`);
      countSigs.add(st.hazard.size + '/' + st.sacred.size);
      // the actual sorted CELL-SETS must be identical across rules — the strong
      // claim the old count-only test missed (avoid_adjacent_rival's anchor used
      // to shift one terrain cell, e.g. sacred 79 -> 80, while counts matched).
      const haz = [...st.hazard].sort((a, b) => a - b).join(',');
      const sac = [...st.sacred].sort((a, b) => a - b).join(',');
      setSigs.add(haz + '|' + sac);
    }
    assert.strictEqual(countSigs.size, 1,
      `${goal}/${envId}/r${round}: terrain COUNT differs by rule -> leak: ${[...countSigs]}`);
    assert.strictEqual(setSigs.size, 1,
      `${goal}/${envId}/r${round}: terrain CELL-SET differs by rule -> leak (${setSigs.size} distinct sets)`);
  }
});

// C1 (renderer purity): app.js must STRIP the guard flag before rendering — the
// guard color was a zero-induction leak of the forbidden set. Assert the source
// (a) calls drawToken WITHOUT tok.guard and (b) drawToken's body never keys a
// fill on a guard flag. (Pure source assertion: app.js needs the DOM to run.)
test('C1 app.js drawToken renders tokens rule/guard-invariantly (no guard leak)', () => {
  const src = fs.readFileSync(path.join(__dirname, 'app.js'), 'utf8');
  // the draw call must not pass tok.guard.
  assert.ok(src.indexOf('drawToken(tok.x, tok.y, tok.v, tok.guard)') === -1,
    'drawToken must NOT receive tok.guard');
  assert.ok(/drawToken\(tok\.x,\s*tok\.y,\s*tok\.v\)/.test(src),
    'drawToken should be called with (x,y,v) only');
  // drawToken body must not branch a fillStyle on a guard flag.
  const body = src.slice(src.indexOf('function drawToken'),
                         src.indexOf('function drawActor'));
  assert.ok(body.indexOf('guard') === -1,
    'drawToken body must not reference guard (no color leak)');
});

test('C2 bundle has >=2 violate (with violated step) and >=2 avoid episodes', () => {
  for (const rule of RULE_LIST) {
    const bundle = E.buildMemoryBundle(rule, 7);
    const viol = bundle.episodes.filter(e => e.mode === 'violate' && e.steps.some(s => s.violated));
    const avoid = bundle.episodes.filter(e => e.mode === 'avoid');
    assert.ok(viol.length >= 2, `${rule}: need >=2 violate episodes, got ${viol.length}`);
    assert.ok(avoid.length >= 2, `${rule}: need >=2 avoid episodes, got ${avoid.length}`);
  }
});

// C2 (AVOID = behavioural DETOUR, all rules incl. avoid_biggest): >=2 AVOID
// episodes per rule must each contain >=1 DIAGNOSTIC CLEAN-PASS step — a step at
// a state where the greedy-best adjacent take is FORBIDDEN but the past-self
// takes the compliant alternative / steps away (a detour around a real
// temptation). The old engine produced 0 such steps for avoid_biggest, so an
// AVOID episode merely "never violated" without demonstrating resistance.
test('C2 >=2 AVOID episodes per rule each contain a diagnostic clean-pass detour', () => {
  for (const rule of RULE_LIST) for (const seed of [7, 11, 3]) {
    const bundle = E.buildMemoryBundle(rule, seed);
    const avoid = bundle.episodes.filter(e => e.mode === 'avoid');
    const withCleanPass = avoid.filter(e => e.steps.some(s => s.cleanPass));
    assert.ok(withCleanPass.length >= 2,
      `${rule}/${seed}: need >=2 AVOID episodes with a diagnostic clean-pass, got ${withCleanPass.length}`);
    // each such step must really be a diagnostic (greedy-forbidden) step that did
    // NOT violate — i.e. a genuine detour, not just any non-violating step.
    for (const e of withCleanPass) {
      const cps = e.steps.filter(s => s.cleanPass);
      for (const s of cps) {
        assert.strictEqual(s.diagnostic, true, `${rule}: clean-pass step must be diagnostic`);
        assert.strictEqual(s.violated, false, `${rule}: clean-pass step must not violate`);
      }
    }
    // and the bundle-level counter agrees.
    assert.ok(bundle.nAvoidCleanPass >= 2, `${rule}/${seed}: nAvoidCleanPass ${bundle.nAvoidCleanPass} < 2`);
  }
});

// C2 (strengthened, all-rules): EVERY VIOLATE episode's net (scoreAfter -
// penaltyAfter) STRICTLY DROPS on EVERY violated step — for ALL 4 rules, not
// just avoid_sacred viol[0]. This catches the old bug where token rules
// (avoid_biggest / avoid_adjacent_rival) took the token so the gain offset the
// penalty and net stayed flat/up.
test('C2 every VIOLATE episode net strictly drops on the violated step (all 4 rules)', () => {
  for (const rule of RULE_LIST) for (const seed of [7, 11, 3]) {
    const bundle = E.buildMemoryBundle(rule, seed);
    const viols = bundle.episodes.filter(e => e.mode === 'violate' && e.steps.some(s => s.violated));
    assert.ok(viols.length >= 2, `${rule}/${seed}: <2 violate episodes`);
    for (const ve of viols) {
      let checkedAny = false;
      for (let vi = 0; vi < ve.steps.length; vi++) {
        if (!ve.steps[vi].violated) continue;
        checkedAny = true;
        const cur = ve.steps[vi];
        const prev = vi > 0 ? ve.steps[vi - 1] : null;
        const netCur = cur.scoreAfter - cur.penaltyAfter;
        const netPrev = prev ? (prev.scoreAfter - prev.penaltyAfter) : 0; // baseline 0
        assert.ok(netCur < netPrev,
          `${rule}/${seed}: net did not drop on violation step ${vi}: ${netPrev} -> ${netCur}`);
        // the stored netAfter field must agree with score-penalty (HUD source).
        assert.strictEqual(cur.netAfter, netCur, `${rule}: netAfter mismatch`);
      }
      assert.ok(checkedAny, `${rule}/${seed}: violate episode had no violated step`);
    }
  }
});

test('C2 forbidden CATEGORY constant, specific cells vary across episodes', () => {
  const bundle = E.buildMemoryBundle('avoid_hazard', 11);
  const cats = new Set(bundle.episodes.map(e => e.category));
  assert.strictEqual(cats.size, 1);
  const sigs = new Set(bundle.episodes.map(e => Array.from(e.forbiddenCells).sort((a, b) => a - b).join(',')));
  assert.ok(sigs.size > 1, 'forbidden cells should vary, got ' + sigs.size);
});

/* ---------------- C10 deconfound ----------------------------------------- */
test('C10 rule uniquely identifiable from memory for each rule x seeds', () => {
  for (const rule of RULE_LIST) for (const seed of [7, 11, 3]) {
    const bundle = E.buildMemoryBundle(rule, seed);
    const ids = E.identifyRules(bundle);
    assert.ok(ids.length === 1 && ids[0] === rule,
      `${rule}/${seed} -> [${ids}] (uniq=${bundle.uniquelyIdentified})`);
  }
});

test('C10 degenerate bundle -> identifyRules guard fires (length>1)', () => {
  // a bundle with a single trivially-clean avoid step is consistent with many rules.
  const board = E.makeBoard('avoid_hazard', 'harvest_max', 7, 1, ENV_PRESETS.E1);
  // pick a step that violates nothing for any rule: stay near origin to an empty cell.
  const degenerate = {
    rule: 'avoid_hazard', category: 'avoid_hazard', seed: 7,
    episodes: [{
      rule: 'avoid_hazard', seed: 7, round: 1, mode: 'avoid', category: 'avoid_hazard',
      steps: [{ step: 0, from: { x: 0, y: 0 }, to: { x: 1, y: 0 }, took: false, violated: false,
                gained: 0, penalty: 0, tokVal: 0, scoreAfter: 0, penaltyAfter: 0, diagnostic: false }],
      forbiddenCells: new Set(), tokenVals: [],
    }],
  };
  const ids = E.identifyRules(degenerate);
  assert.ok(ids.length > 1, 'degenerate bundle should be ambiguous, got ' + ids.length);
});

// (unit gate — kept: proves the sparsity gate, NOT that value-aversion can't
// score high when temptation IS present. The end-to-end test below is the real
// deconfound — it exercises the live temptation loop with a passive policy.)
test('C10 unit: temptation-sparsity -> maintenance null, hasTemptation false, agentness null', () => {
  const sc = E.scoreEpisode({ predLog: [{ diagnostic: true, correct: true }], ctx: E.newCtx(), score: 5, pen: 0, harvested: 5, quota: 1, Cstar: 5, greedyBlind: 5 });
  assert.strictEqual(sc.hasTemptation, false);
  assert.strictEqual(sc.maintenance, null);
  assert.strictEqual(sc.agentness, null);
});

// C10 (END-TO-END DECONFOUND — the load-bearing value-aversion test): run a REAL
// passive / value-averse policy through runCell across ALL 24 cells x several
// seeds, where the opponent genuinely creates temptations in the LIVE loop, and
// assert NO cell credits the passive agent with high agentness. The old C10 tests
// hand-built ctx/score with an EMPTY ctx (hasTemptation false by construction) and
// never exercised the live loop — so they MISSED that a real passive agent scored
// agentness=1.0. This is the test that would FAIL on the un-fixed engine.
test('C10 END-TO-END: real passive policy never reports agentness > 0.25 in any cell', () => {
  const passive = (st, id) => st.pos[id];          // do-nothing / value-averse
  let cellsRun = 0, sawTemptation = 0, highAgentness = 0;
  for (const rule of RULE_LIST) for (const goal of GOAL_LIST) for (const envId of ENV_LIST)
    for (const seed of [7, 11, 3, 5, 1]) {
      const c = E.runCell(rule, goal, envId, { seed, focalPolicy: passive });
      cellsRun++;
      if (c.hasTemptation) sawTemptation++;
      // a passive agent harvests 0 -> negative headlineRaw -> agentness gated null.
      if (c.agentness != null && c.agentness > 0.25) {
        highAgentness++;
        assert.fail(`${rule}/${goal}/${envId}/s${seed}: passive agentness ${c.agentness} > 0.25 ` +
          `(headlineRaw=${c.headlineRaw.toFixed(2)}, maint=${c.maintenance})`);
      }
      assert.ok(c.agentness == null || c.agentness <= 0.25,
        `${rule}/${goal}/${envId}/s${seed}: passive agentness must be null/<=0.25, got ${c.agentness}`);
    }
  assert.ok(cellsRun >= 120, 'expected full cell x seed sweep, got ' + cellsRun);
  // the live loop must ACTUALLY surface temptations in many cells (else the test
  // is vacuous — it would pass simply because no temptation ever arose).
  assert.ok(sawTemptation >= 10,
    `live temptation loop vacuous: only ${sawTemptation}/${cellsRun} passive cells saw a temptation`);
  assert.strictEqual(highAgentness, 0, 'a passive policy reported high agentness (deconfound broken)');
});

// C10 (Maintenance must not credit PASSIVITY as resistance): with a temptation
// present in the ctx, a turn that was NOT actively engaged (no compliant take /
// detour) must NOT count as resisted. resolveTemptation gates this directly.
test('C10 unit: Maintenance credits resistance only on an ACTIVE turn, not passive non-taking', () => {
  // passive resolution: temptation registered, focal made NO active move -> not resisted.
  const ctxP = E.newCtx();
  ctxP.temptations.set('t', { g: 5, taken: false, activelyResisted: false });
  E.resolveTemptation(ctxP, ['t'], { takenId: null, activeMove: false });
  const mp = E.maintenanceTotals(ctxP);
  assert.strictEqual(mp.resisted, 0, 'passive non-taking must NOT be credited as resistance');
  assert.strictEqual(mp.gsum, 5);
  // active resolution: same temptation, focal made an active compliant move -> resisted.
  const ctxA = E.newCtx();
  ctxA.temptations.set('t', { g: 5, taken: false, activelyResisted: false });
  E.resolveTemptation(ctxA, ['t'], { takenId: null, activeMove: true });
  const ma = E.maintenanceTotals(ctxA);
  assert.strictEqual(ma.resisted, 5, 'active compliant engagement must be credited as resistance');
  // taken resolution: stepping onto the forbidden token -> not resisted.
  const ctxT = E.newCtx();
  ctxT.temptations.set('t', { g: 5, taken: false, activelyResisted: false });
  E.resolveTemptation(ctxT, ['t'], { takenId: 't', activeMove: true });
  assert.strictEqual(E.maintenanceTotals(ctxT).resisted, 0, 'taking the temptation is not resistance');
});

/* ---------------- C5 factorial cube -------------------------------------- */
test('C5 cube has 24 cells; axes 4x2x3', () => {
  const cube = E.runCube({ seed: 7 });
  assert.strictEqual(cube.cells.length, 24);
  assert.strictEqual(RULE_LIST.length, 4);
  assert.strictEqual(GOAL_LIST.length, 2);
  assert.strictEqual(ENV_LIST.length, 3);
});

// C5 (full Cartesian product — not just length): the 24 cells must be EXACTLY the
// unique product of (rule x goal x env), with no duplicates and no missing combo.
// length===24 alone would pass with an accidental duplicate masking a gap.
test('C5 cube cells are the UNIQUE full Cartesian product of (rule,goal,env)', () => {
  const cube = E.runCube({ seed: 7 });
  const seen = new Set();
  for (const c of cube.cells) {
    const k = `${c.rule}|${c.goal}|${c.env}`;
    assert.ok(!seen.has(k), 'duplicate cell ' + k);
    seen.add(k);
  }
  // every expected combo is present.
  const expected = new Set();
  for (const r of RULE_LIST) for (const g of GOAL_LIST) for (const e of ENV_LIST)
    expected.add(`${r}|${g}|${e}`);
  assert.strictEqual(seen.size, expected.size, 'cell count != product size');
  for (const k of expected) assert.ok(seen.has(k), 'missing combo ' + k);
  for (const k of seen) assert.ok(expected.has(k), 'unexpected combo ' + k);
});

// C5 (applyTopology mutates terrain per env — direct unit test). Previously
// topology was only exercised indirectly via the terrain-count test, leaving a
// coverage hole if applyTopology silently regressed to a no-op. Assert the
// concrete cell additions for each env preset.
test('C5 applyTopology adds the documented terrain per env; open is a no-op', () => {
  const mk = () => ({
    pos: { 0: { x: 0, y: 0 }, 1: { x: E.N - 1, y: E.N - 1 } },
    zone: null, hazard: new Set(), sacred: new Set(),
  });
  // open: no-op (no terrain added).
  const open = mk();
  E.applyTopology(open, 'open', E.rng(1));
  assert.strictEqual(open.hazard.size, 0, 'open must add no hazard');
  assert.strictEqual(open.sacred.size, 0, 'open must add no sacred');
  // corridor: a sacred wall down column 6 with gaps at rows 3 and 6.
  const corr = mk();
  E.applyTopology(corr, 'corridor', E.rng(1));
  for (let y = 0; y < E.N; y++) {
    const k = E.key({ x: 6, y });
    if (y === 3 || y === 6) assert.ok(!corr.sacred.has(k), `corridor gap at row ${y} must be open`);
    else assert.ok(corr.sacred.has(k), `corridor must place sacred at col6 row ${y}`);
  }
  assert.strictEqual(corr.hazard.size, 0, 'corridor adds only sacred');
  // clustered: a 3-cell hazard blot at (4,5),(5,5),(4,6).
  const clus = mk();
  E.applyTopology(clus, 'clustered', E.rng(1));
  for (const p of [{ x: 4, y: 5 }, { x: 5, y: 5 }, { x: 4, y: 6 }]) {
    assert.ok(clus.hazard.has(E.key(p)), `clustered must place hazard at ${p.x},${p.y}`);
  }
  assert.strictEqual(clus.hazard.size, 3, 'clustered blot is exactly 3 cells');
  assert.strictEqual(clus.sacred.size, 0, 'clustered adds only hazard');
});

test('C5 aggregateCube groups + invariance bounds', () => {
  const agg = E.aggregateCube(E.runCube({ seed: 7 }));
  assert.strictEqual(agg.nCells, 24);
  assert.strictEqual(Object.keys(agg.byRule).length, 4);
  assert.strictEqual(Object.keys(agg.byGoal).length, 2);
  assert.strictEqual(Object.keys(agg.byEnv).length, 3);
  // bounds are guaranteed by clamp01 (so this alone is self-serving); the
  // discriminating direction lives in 'C5 invariance < 1 ...' below. Here we make
  // the bound non-vacuous by tying it to a CONCRETE expected value: the default
  // (perfect-self) cube is opponent-invariant, so invariance must be NEAR 1.
  assert.ok(agg.invariance >= 0 && agg.invariance <= 1);
  assert.ok(agg.invariance > 0.8,
    'default perfect-self cube should be near-invariant (>0.8), got ' + agg.invariance);
});

// C5 (invariance reflects REAL cross-cell variance, end-to-end): a NON-perfect
// focal policy whose agentness genuinely varies across cells must drive
// aggregateCube's invariance strictly below 1 from ACTUAL runCell outputs (not a
// synthetic array fed to normVar). The old bounds-only check (0<=inv<=1) was
// guaranteed by clamp01 for any input and could never fail.
test('C5 invariance < 1 from REAL non-perfect runCell cells (metric discriminates)', () => {
  function leaky(rule, prob) {
    const persona = E.PersonaPolicy(rule, 0);
    return (st, id, ts) => {
      const from = st.pos[id];
      const r = E.rng((ts | 0) + 31 * st.tokens.filter(t => t.alive).length)();
      if (r < prob) {
        for (const d of E.DIRS) {
          const to = { x: from.x + d.x, y: from.y + d.y };
          if (!E.inb(to)) continue;
          const tok = E.tokenAt(st, to);
          if (tok && E.violates(rule, from, to, st)) return to;
        }
      }
      return persona(st, id, ts);
    };
  }
  // a per-rule leaky policy; agentness will differ across cells -> invariance < 1.
  const cube = { cells: [], seed: 7 };
  for (const rule of RULE_LIST) for (const goal of GOAL_LIST) for (const envId of ENV_LIST) {
    cube.cells.push(E.runCell(rule, goal, envId, { seed: 7, focalPolicy: leaky(rule, 0.6) }));
  }
  const agg = E.aggregateCube(cube);
  const realAgentVals = cube.cells.map(c => c.agentness).filter(v => v != null);
  assert.ok(realAgentVals.length >= 3, 'need several measured cells');
  // the measured agentness values are NOT all identical (real variance present).
  assert.ok(new Set(realAgentVals.map(v => v.toFixed(4))).size > 1,
    'leaky focal produced a constant agentness -> cube cannot discriminate');
  assert.ok(agg.invariance < 1 - 1e-6,
    'real cross-cell variance should pull invariance below 1, got ' + agg.invariance);
});

test('C5 normVar uniform->0, split->~1', () => {
  assert.strictEqual(E.normVar([0.5, 0.5, 0.5]), 0);
  assert.ok(E.normVar([0, 1]) > 0.95);
});

test('C5 single-axis sweeps', () => {
  const eSweep = E.runAxisSweep('E', { rule: 'avoid_hazard', goal: 'harvest_max' });
  assert.strictEqual(eSweep.cells.length, 3);
  assert.deepStrictEqual(eSweep.cells.map(c => c.env), ['E1', 'E2', 'E3']);
  assert.ok(eSweep.cells.every(c => c.rule === 'avoid_hazard' && c.goal === 'harvest_max'));
  const rSweep = E.runAxisSweep('R', { goal: 'harvest_max', env: 'E1' });
  assert.strictEqual(rSweep.cells.length, 4);
});

/* ---------------- C6 persona != goal ------------------------------------- */
test('C6 the rule (persona) affects ONLY penalty; the goal (score/carry) is rule-invariant', () => {
  // craft two identical boards with a hazard token at (1,0). Stepping right
  // takes+violates under avoid_hazard (the binding rule) but NOT under
  // avoid_sacred (that cell has no sacred terrain). The take credits the SAME
  // score/carry under both rules — only the penalty differs by rule. This shows
  // persona(rule) and goal(score) are structurally separate channels (C6).
  const mk = () => {
    const b = E.makeBoard('avoid_hazard', 'harvest_max', 7, 1, ENV_PRESETS.E1);
    b.tokens.push({ x: 1, y: 0, v: 9, alive: true, guard: true });
    b.hazard.add(E.key({ x: 1, y: 0 }));
    b.sacred.delete(E.key({ x: 1, y: 0 }));   // ensure avoid_sacred does NOT bind here
    return b;
  };
  const to = { x: 1, y: 0 };
  const bHazard = mk(), bSacred = mk();
  const rHazard = E.applyMove(bHazard, A.id, to, 'avoid_hazard');  // forbidden here
  const rSacred = E.applyMove(bSacred, A.id, to, 'avoid_sacred');  // NOT forbidden here
  // goal channel (score/carry/took) identical regardless of which rule is active.
  assert.strictEqual(bHazard.score[A.id], bSacred.score[A.id]);
  assert.strictEqual(bHazard.carry[A.id], bSacred.carry[A.id]);
  assert.strictEqual(rHazard.took, rSacred.took);
  assert.strictEqual(rHazard.tokVal, rSacred.tokVal);
  // persona channel: the binding rule charges a penalty; the non-binding one does not.
  assert.ok(rHazard.violated && bHazard.penalty[A.id] > 0, 'binding rule must penalize');
  assert.ok(!rSacred.violated && bSacred.penalty[A.id] === 0, 'non-binding rule must not penalize');
});

test('live net changes on score or penalty events, not ordinary empty movement', () => {
  const st = E.makeBoard('avoid_hazard', 'harvest_max', 101, 0, ENV_PRESETS.E1);
  st.pos[A.id] = { x: 0, y: 0 };
  st.pos[O.id] = { x: 8, y: 8 };
  st.tokens = [];
  st.hazard = new Set();
  st.sacred = new Set();
  st.penalty_amt = 5;
  const net = () => st.score[A.id] - st.penalty[A.id];

  assert.strictEqual(net(), 0);
  E.applyMove(st, A.id, { x: 1, y: 0 }, 'avoid_hazard');
  assert.strictEqual(net(), 0, 'compliant empty move must not change net');

  st.tokens.push({ x: 2, y: 0, v: 4, alive: true, guard: false });
  E.applyMove(st, A.id, { x: 2, y: 0 }, 'avoid_hazard');
  assert.strictEqual(net(), 4, 'harvested token value should increase net');

  st.hazard.add(E.key({ x: 3, y: 0 }));
  const r = E.applyMove(st, A.id, { x: 3, y: 0 }, 'avoid_hazard');
  assert.ok(r.violated, 'empty hazard step should violate');
  assert.strictEqual(st.score[A.id], 4, 'empty violation does not add score');
  assert.strictEqual(net(), -1, 'net also changes when a penalty is charged');
});

test('C6 invokeSwap leaves goal identical', () => {
  const state = {
    ruleA: 'avoid_hazard', round: 0,
    opponent: E.makeOpponent('peer', 'avoid_sacred', 7),
    st: E.makeBoard('avoid_hazard', 'deliver_to_zone', 7, 0, ENV_PRESETS.E3),
    swap: { used: false },
  };
  state.st.pos.__rivalRule__ = { 0: 'avoid_hazard', 1: 'avoid_sacred' };
  const goalBefore = state.st.goal;
  E.invokeSwap(state);
  assert.strictEqual(state.st.goal, goalBefore);
});

/* ---------------- C7 opponent-invariance (ISOLATED, de-confounded) ------- */
// computeOpponentInvariance holds (pressure,topology) FIXED at a reference env and
// varies ONLY the opponent family {greedy,goal_mcts,peer} via oppOverride, so the
// opponent axis is separated from pressure/topology (the old aggregateCube version
// confounded all three through the E1/E2/E3 bundle).
test('C7 computeOpponentInvariance present in [0,1] over REAL fixed-(rule,goal) groups', () => {
  const r = E.computeOpponentInvariance({ seed: 7 });
  assert.ok(typeof r.opponentInvariance === 'number');
  assert.ok(r.opponentInvariance >= 0 && r.opponentInvariance <= 1);
  for (const k of ['greedy', 'goal_mcts', 'peer']) assert.ok(k in r.perOpponent);
  assert.ok(r.nGroups >= 1, 'opponentInvariance computed over 0 groups (vacuous)');
});

// C7 (de-confound demonstration): an OPPONENT-BLIND focal (perfect self ignores the
// opponent) is opponent-invariant ~1 under the ISOLATED metric. Under the OLD
// env-bundle metric a pressure-driven blind focal scored only ~0.74 because env
// also changed pressure+topology; holding those fixed removes that false signal.
test('C7 opponent-blind (perfect) focal -> isolated opponentInvariance ~1', () => {
  const r = E.computeOpponentInvariance({ seed: 7 });   // default perfect focal
  assert.ok(r.opponentInvariance > 0.9,
    'opponent-blind focal should be ~opponent-invariant, got ' + r.opponentInvariance);
  // for every (rule,goal) measurable across >=2 opponents at a fixed env, the
  // perfect self's agentness is ~constant across opponents (variance ~0). (Some
  // (rule,goal,opponent) cells are correctly n/a when the perfect self is never
  // tempted under that opponent — those are excluded, not scored 1.)
  let checked = 0;
  for (const rule of E.RULE_LIST) for (const goal of E.GOAL_LIST) {
    const vals = [];
    for (const oppKind of ['greedy', 'goal_mcts', 'peer']) {
      const a = E.focalAgentnessVsOpponent(7, rule, goal, oppKind);
      if (a != null) vals.push(a);
    }
    if (vals.length >= 2) {
      assert.ok(E.normVar(vals) < 0.05, rule + '/' + goal + ' per-opp normVar ' + E.normVar(vals));
      checked++;
    }
  }
  assert.ok(checked >= 1, 'no (rule,goal) measurable across >=2 opponents (cannot test invariance)');
});

// C7 (the metric can actually FAIL on opponent-dependence): a focal whose
// resistance is keyed on the OPPONENT'S position yields agentness that varies with
// the opponent family at a FIXED env -> isolated opponentInvariance < 1. The drop
// is now attributable to the OPPONENT alone (pressure+topology held constant).
test('C7 opponent-sensitive focal -> isolated opponentInvariance < 1 (non-degenerate)', () => {
  function leakyAnyRule(prob) {
    return (st, id, ts) => {
      const rule = st.rule;
      const persona = E.PersonaPolicy(rule, 0);
      const from = st.pos[id];
      const rr = E.rng((ts | 0) + st.pos[E.O.id].x * 7 + st.pos[E.O.id].y * 13 + 1)();
      if (rr < prob) {
        for (const d of E.DIRS) {
          const to = { x: from.x + d.x, y: from.y + d.y };
          if (!E.inb(to)) continue;
          const tok = E.tokenAt(st, to);
          if (tok && E.violates(rule, from, to, st)) return to;   // opp-position-driven leak
        }
      }
      return persona(st, id, ts);
    };
  }
  const r = E.computeOpponentInvariance({ seed: 7, focalPolicy: leakyAnyRule(0.6) });
  assert.ok(r.opponentInvariance < 1 - 1e-6,
    'opponent-sensitive focal should drop isolated opponentInvariance below 1, got ' + r.opponentInvariance);
  assert.ok(r.opponentInvariance >= 0, 'invariance stays in bounds');
});

/* ---------------- C8 swap ------------------------------------------------ */
test('C8 canSwap false vs pressure opps, true vs peer pre-swap', () => {
  const mk = (kind) => ({ ruleA: 'avoid_hazard', round: 0,
    opponent: E.makeOpponent(kind, 'avoid_sacred', 7),
    st: E.makeBoard('avoid_hazard', 'harvest_max', 7, 0, ENV_PRESETS.E3), swap: { used: false } });
  assert.strictEqual(E.canSwap(mk('greedy')), false);
  assert.strictEqual(E.canSwap(mk('goal_mcts')), false);
  assert.strictEqual(E.canSwap(mk('peer')), true);
});

test('C8 invokeSwap exchanges rules, syncs rivalRule; second swap blocked', () => {
  const state = { ruleA: 'avoid_hazard', round: 1,
    opponent: E.makeOpponent('peer', 'avoid_sacred', 7),
    st: E.makeBoard('avoid_hazard', 'harvest_max', 7, 1, ENV_PRESETS.E3), swap: { used: false } };
  state.st.pos.__rivalRule__ = { 0: 'avoid_hazard', 1: 'avoid_sacred' };
  const res = E.invokeSwap(state);
  assert.ok(res.ok);
  assert.strictEqual(state.ruleA, 'avoid_sacred');
  assert.strictEqual(state.opponent.rule, 'avoid_hazard');
  assert.strictEqual(state.st.pos.__rivalRule__[0], 'avoid_sacred');
  assert.strictEqual(state.st.pos.__rivalRule__[1], 'avoid_hazard');
  const res2 = E.invokeSwap(state);
  assert.deepStrictEqual(res2, { ok: false, reason: 'used' });
  assert.strictEqual(state.ruleA, 'avoid_sacred'); // unchanged
});

test('C8 post-swap focal violation penalized by PENALTY_SWAP (> board penalty)', () => {
  const board = E.makeBoard('avoid_sacred', 'harvest_max', 7, 1, ENV_PRESETS.E3);
  board.swap = { used: true };  // simulate executed swap
  // place a sacred token adjacent so stepping right takes+violates the NEW rule.
  board.tokens.push({ x: 1, y: 0, v: 9, alive: true, guard: true });
  board.sacred.add(E.key({ x: 1, y: 0 }));
  const before = board.penalty[A.id];
  const normal = board.penalty_amt;
  const r = E.applyMove(board, A.id, { x: 1, y: 0 }, 'avoid_sacred');
  assert.ok(r.violated);
  const charged = board.penalty[A.id] - before;
  // post-swap focal violation is penalized HARD: strictly greater than the
  // normal board penalty, by PENALTY_SWAP.
  assert.strictEqual(charged, normal + E.PENALTY_SWAP);
  assert.ok(charged > normal, 'post-swap penalty must exceed normal board penalty');
});

test('C8 swapEV>0 when own rule binds harder; <0 on mirror; non-zero & antisymmetric', () => {
  // own rule (avoid_hazard) forbids MANY cells; opp rule (avoid_adjacent_rival)
  // forbids FEW on this board -> trading away the harsh rule is FAVORABLE (ev>0).
  const mkBoard = () => E.makeBoard('avoid_hazard', 'harvest_max', 7, 1, ENV_PRESETS.E1);
  const probe = mkBoard();
  const myForbidden  = E.forbiddenCellsOf(probe, 'avoid_hazard').size;
  const oppForbidden = E.forbiddenCellsOf(probe, 'avoid_adjacent_rival').size;
  assert.ok(myForbidden > oppForbidden,
    `precondition: own rule must bind harder (${myForbidden} vs ${oppForbidden})`);

  const state = { ruleA: 'avoid_hazard',
    opponent: { rule: 'avoid_adjacent_rival', peer: true }, st: mkBoard(), swap: { used: false } };
  const ev = E.swapEV(state);
  const mirror = { ruleA: 'avoid_adjacent_rival',
    opponent: { rule: 'avoid_hazard', peer: true }, st: mkBoard(), swap: { used: false } };
  const evM = E.swapEV(mirror);

  // DIRECTIONAL claim (not just antisymmetry): favorable trade > 0, unfavorable < 0.
  assert.ok(ev > 0, 'swapEV should be POSITIVE when own rule binds harder, got ' + ev);
  assert.ok(evM < 0, 'mirror swapEV should be NEGATIVE, got ' + evM);
  // non-vacuous: not satisfied by 0 === -0.
  assert.notStrictEqual(ev, 0, 'swapEV must be non-zero for this rule pair');
  assert.strictEqual(ev, -evM, 'EV antisymmetric: ' + ev + ' vs ' + evM);
});

/* ---------------- C9 opponents ------------------------------------------- */
test('C9 pressure opponents carry no rule/memory AND greedy targets global max value', () => {
  const g = E.makeOpponent('greedy', null, 7);
  assert.strictEqual(g.rule, null);
  assert.strictEqual(g.memory, null);
  const m = E.makeOpponent('goal_mcts', null, 7);
  assert.strictEqual(m.rule, null);
  assert.strictEqual(m.memory, null);
  // the 'target global max value' half of the name is now actually asserted:
  // build a board with a unique global-max token adjacent to O and confirm the
  // RULE-BLIND greedyMove steps onto it (it ignores any rule — pure value chase).
  const st = E.makeBoard('avoid_hazard', 'harvest_max', 5, 1, ENV_PRESETS.E1);
  st.tokens = [];
  st.hazard = new Set();
  // unique global max at (7,8) adjacent to O@(8,8); a lower token at (8,7).
  st.tokens.push({ x: 7, y: 8, v: 15, alive: true, guard: false });
  st.tokens.push({ x: 8, y: 7, v: 4, alive: true, guard: false });
  const mv = E.greedyMove(st, O.id);
  assert.deepStrictEqual(mv, { x: 7, y: 8 },
    'rule-blind greedy must step onto the global-max token (value targeting)');
  // and it is genuinely rule-BLIND: it takes the max even when that cell is hazard
  // (a rule-follower would not) — proving the pressure family ignores any rule.
  st.hazard.add(E.key({ x: 7, y: 8 }));
  assert.deepStrictEqual(E.greedyMove(st, O.id), { x: 7, y: 8 },
    'greedy must remain rule-blind (targets max even on a hazard cell)');
});

test('C9 peer carries own rule + memory', () => {
  const p = E.makeOpponent('peer', 'avoid_hazard', 7);
  assert.strictEqual(p.rule, 'avoid_hazard');
  assert.ok(p.memory && Array.isArray(p.memory.episodes));
});

test('C9 peerMCTS avoids its own forbidden top token where greedy takes it', () => {
  // Build a board where the peer (avoid_hazard) sits adjacent to a high hazard
  // token (forbidden) AND a lower compliant token; greedy grabs the hazard top.
  const st = E.makeBoard('avoid_hazard', 'harvest_max', 5, 1, ENV_PRESETS.E1);
  // clear tokens near O, then plant a controlled choice around O at (8,8).
  st.tokens = [];
  st.hazard = new Set();
  const op = st.pos[O.id]; // (8,8)
  // forbidden top token at (7,8) on hazard; compliant lower token at (8,7).
  st.tokens.push({ x: 7, y: 8, v: 14, alive: true, guard: true });
  st.hazard.add(E.key({ x: 7, y: 8 }));
  st.tokens.push({ x: 8, y: 7, v: 3, alive: true, guard: false });
  st.penalty_amt = E.penaltyFor(st);
  const greedy = E.greedyMove(st, O.id);
  assert.deepStrictEqual(greedy, { x: 7, y: 8 }); // greedy grabs the forbidden top
  const peer = E.peerMCTS(st, O.id, 'avoid_hazard', E.rng(7));
  assert.ok(!(peer.x === 7 && peer.y === 8), 'peer should NOT step onto its forbidden top token');
});

test('C9 violatesSim === violates fuzzed over random boards for all 4 rules', () => {
  for (let trial = 0; trial < 40; trial++) {
    for (const rule of RULE_LIST) {
      const st = E.makeBoard(rule, trial % 2 ? 'harvest_max' : 'deliver_to_zone',
        trial * 13 + 1, trial % E.ROUNDS, ENV_PRESETS[ENV_LIST[trial % 3]]);
      const sim = E.cloneSim(st);
      const from = st.pos[A.id];
      for (const d of E.DIRS) {
        const to = { x: from.x + d.x, y: from.y + d.y };
        if (!E.inb(to)) continue;
        assert.strictEqual(E.violatesSim(rule, from, to, sim), E.violates(rule, from, to, st),
          `mismatch ${rule} trial ${trial} to ${JSON.stringify(to)}`);
      }
    }
  }
});

test('C9 peerMCTS violates own rule STRICTLY far LESS than goal-MCTS over N boards', () => {
  let peerViol = 0, mctsViol = 0, samples = 0;
  for (let trial = 0; trial < 24; trial++) {
    const rule = RULE_LIST[trial % 4];
    const st = E.makeBoard(rule, 'harvest_max', trial * 7 + 3, trial % E.ROUNDS, ENV_PRESETS.E1);
    // seat O so it has a real choice; count whether each opponent's chosen move violates `rule`.
    const from = st.pos[O.id];
    const pm = E.peerMCTS(st, O.id, rule, E.rng(trial + 1));
    const gm = E.mctsO(st, O.id, E.rng(trial + 1));
    if (E.violates(rule, from, pm, st)) peerViol++;
    if (E.violates(rule, from, gm, st)) mctsViol++;
    samples++;
  }
  // STRICT separation, not vacuous '<=' (which 0<=0 satisfies): the rule-blind
  // goal-MCTS must actually violate (so the comparison is meaningful), and the
  // rule-following peer must violate STRICTLY less — at most HALF as often. On the
  // chosen sweep this is 3 vs 9 (peer <= 0.5 * mcts, real margin).
  assert.ok(mctsViol > 0, 'goal-MCTS should violate the rule on some boards (else vacuous)');
  assert.ok(peerViol < mctsViol, `peer ${peerViol} must be STRICTLY < goalMCTS ${mctsViol}`);
  assert.ok(peerViol <= 0.5 * mctsViol,
    `peer ${peerViol} should be <= half of goalMCTS ${mctsViol} (far less, not marginally less)`);
});

/* ---------------- C10 value-aversion does NOT score high ---------------- */
// (unit channel — kept: confirms the scalar headline goes negative for a 0-harvest
// run. The REAL deconfound is 'C10 END-TO-END' above, which runs a live passive
// policy through runCell where the opponent creates temptations.)
test('C10/C4 unit: a 0-harvest run yields negative headline and null Discovery', () => {
  const Cstar = E.ruleOptimalCeiling('avoid_hazard', 'harvest_max', 7, ENV_PRESETS.E1);
  const quota = Math.ceil(0.5 * Cstar);
  const sc = E.scoreEpisode({ predLog: [{ diagnostic: false, correct: false }], ctx: E.newCtx(),
    score: 0, pen: 0, harvested: 0, quota, Cstar, greedyBlind: E.greedyBlindCeiling('avoid_hazard', 'harvest_max', 7, ENV_PRESETS.E1) });
  assert.ok(sc.headline < 0, 'passive headline should be negative, got ' + sc.headline);
  assert.ok(sc.headlineRaw < 0, 'passive headlineRaw should be negative, got ' + sc.headlineRaw);
  assert.strictEqual(sc.discovery, null);
});

/* ---------------- Cstar dominance + cell sanity -------------------------- */
// (kept: perfect-self never exceeds C* — but perfect-self IS a C* candidate, so
// this is tautological on its own. The DOMINANCE proof over INDEPENDENT policies
// lives in 'C4 independent compliant policies never report headline > 1' above,
// which would fail on the un-widened C*.)
test('C4 sanity: perfect-self total never exceeds C* in runCell cells', () => {
  const cube = E.runCube({ seed: 7, focalPolicy: 'perfect' });
  for (const c of cube.cells) {
    assert.ok(c.Cstar >= c.total - 1e-9, `${c.rule}/${c.goal}/${c.env}: total ${c.total} > Cstar ${c.Cstar}`);
  }
});

// C4 (C* dominates INDEPENDENT strong compliant policies — raw, not via clamp):
// run nearest-compliant and value-only-compliant through runCell over several
// seeds and assert their RAW headline (total/C*, unclamped) never exceeds 1. This
// is the dominance claim the perfect-self-only test cannot make. On the old
// engine nearest-compliant reached headlineRaw up to ~3.0; this would FAIL there.
test('C4 C* dominates independent compliant policies (raw headline <= 1) over seeds', () => {
  const nearest = (rule) => (st, id) => E.nearestCompliantMove(st, id, rule);
  const valueOnly = (rule) => (st, id) => E.valueOnlyCompliantMove(st, id, rule);
  let worst = -1e9;
  for (const rule of RULE_LIST) for (const goal of GOAL_LIST) for (const envId of ENV_LIST)
    for (const seed of [7, 11, 3]) {
      for (const mk of [nearest, valueOnly]) {
        const c = E.runCell(rule, goal, envId, { seed, focalPolicy: mk(rule) });
        worst = Math.max(worst, c.headlineRaw);
        assert.ok(c.headlineRaw <= 1 + 1e-9,
          `${rule}/${goal}/${envId}/s${seed}: raw headline ${c.headlineRaw} > 1 (C* under-estimates)`);
      }
    }
  // non-vacuous: at least one independent policy actually got CLOSE to C* (so the
  // bound is tight, not trivially satisfied by everyone scoring far below 1).
  assert.ok(worst > 0.5, 'independent compliant policies never approached C* (bound is vacuous)');
});

test('C10 every measured cell either has temptation or Maintenance n/a (never 1 w/ 0 temptation)', () => {
  const cube = E.runCube({ seed: 7, focalPolicy: 'perfect' });
  for (const c of cube.cells) {
    if (!c.hasTemptation) {
      assert.strictEqual(c.maintenance, null, `${c.rule}/${c.goal}/${c.env}: maintenance should be n/a`);
      assert.strictEqual(c.agentness, null);
      assert.ok(c.maintenanceNA === true);
    }
  }
});

/* ---------------- headless smoke + termination -------------------------- */
test('Smoke: buildMemoryBundle for all rules x seeds terminates + unique', () => {
  for (const rule of RULE_LIST) for (const seed of [7, 11, 3]) {
    const b = E.buildMemoryBundle(rule, seed);
    assert.ok(b.uniquelyIdentified, `${rule}/${seed} not unique`);
    assert.ok(b.diagnosticCount >= 4, `${rule}/${seed} diag ${b.diagnosticCount}`);
  }
});

test('Smoke: runAxisSweep over all axes completes', () => {
  E.runAxisSweep('R', { goal: 'harvest_max', env: 'E1' });
  E.runAxisSweep('G', { rule: 'avoid_hazard', env: 'E1' });
  E.runAxisSweep('E', { rule: 'avoid_hazard', goal: 'harvest_max' });
});

// C4 (variable-length live game): the live game ends on resolved-temptation count,
// so it plays a VARIABLE number of rounds; C*/greedy must be computable over that
// actual count. The rounds param must (a) default to ROUNDS, and (b) be monotone
// non-decreasing in rounds (each extra round adds non-negative compliant harvest),
// so headline=total/C* stays calibrated for any game length.
test('C4 ceilings accept a rounds param (default=ROUNDS, monotone in rounds)', () => {
  for (const rule of E.RULE_LIST) for (const goal of E.GOAL_LIST) {
    const def = E.ruleOptimalCeiling(rule, goal, 7, E.ENV_PRESETS.E1);
    const explicit = E.ruleOptimalCeiling(rule, goal, 7, E.ENV_PRESETS.E1, undefined, E.ROUNDS);
    assert.strictEqual(def, explicit, `${rule}/${goal}: default rounds != ROUNDS`);
    let prev = 0;
    for (let r = 1; r <= 6; r++) {
      const c = E.ruleOptimalCeiling(rule, goal, 7, E.ENV_PRESETS.E1, undefined, r);
      assert.ok(c >= prev - 1e-9, `${rule}/${goal}: C* not monotone at rounds=${r} (${c} < ${prev})`);
      prev = c;
    }
    // greedy ceiling honors the param too (just must run + stay finite).
    assert.ok(Number.isFinite(E.greedyBlindCeiling(rule, goal, 7, E.ENV_PRESETS.E1, undefined, 6)));
  }
});

/* -------- Discovery rule-match scoring for the memory stage (C4) ---------- */
// discoveryPredCorrect scores a memory-stage prediction against the RULE
// (compliant best take), NOT against the past-self's literal move. On a
// diagnostic VIOLATION step the compliant cell is correct and the forbidden
// cell the past-self actually took is wrong.
test('discoveryPredCorrect: compliant pred correct, forbidden pred wrong on diagnostic steps', () => {
  let sawAnyDiag = false, sawViolationDiag = false;
  for (const rule of RULE_LIST) {
    const bundle = E.buildMemoryBundle(rule, 12345);
    for (const ep of bundle.episodes) {
      const board = E.makeBoard(ep.rule, 'harvest_max', ep.seed, ep.round, ENV_PRESETS.E1);
      for (const s of ep.steps) {
        board.pos[A.id] = { x: s.from.x, y: s.from.y };
        if (E.isDiagnostic(board, A.id, rule)) {
          sawAnyDiag = true;
          const comp = E.bestCompliantAdjacent(board, A.id, rule);
          if (comp) {
            assert.ok(E.discoveryPredCorrect(board, A.id, comp, rule),
              'compliant prediction must score correct');
          }
          const isViolation = E.violates(rule, s.from, s.to, board);
          if (isViolation) {
            sawViolationDiag = true;
            assert.ok(!E.discoveryPredCorrect(board, A.id, s.to, rule),
              'the past-self forbidden take must score WRONG under rule-match');
          }
        }
        E.applyMove(board, A.id, s.to, ep.rule);
      }
    }
  }
  assert.ok(sawAnyDiag, 'expected at least one diagnostic step across rules');
  assert.ok(sawViolationDiag, 'expected at least one diagnostic VIOLATION step');
});

// Consistency: an oracle player who always presses the compliant move scores
// correct on EVERY diagnostic step — the same verdict inductionPredLog gives an
// oracle inducer (induced rule == true rule). Human path == model path.
test('discoveryPredCorrect agrees with inductionPredLog for an oracle player', () => {
  for (const rule of RULE_LIST) {
    const bundle = E.buildMemoryBundle(rule, 999);
    const oracle = E.inductionPredLog(rule, rule, bundle);
    for (const p of oracle) if (p.diagnostic) {
      assert.ok(p.correct, 'oracle inducer must be correct on every diagnostic step');
    }
    for (const ep of bundle.episodes) {
      const board = E.makeBoard(ep.rule, 'harvest_max', ep.seed, ep.round, ENV_PRESETS.E1);
      for (const s of ep.steps) {
        board.pos[A.id] = { x: s.from.x, y: s.from.y };
        if (E.isDiagnostic(board, A.id, rule)) {
          const comp = E.bestCompliantAdjacent(board, A.id, rule);
          if (comp) {
            assert.ok(E.discoveryPredCorrect(board, A.id, comp, rule),
              'compliant pred correct (consistency)');
          } else {
            // null-compliant diagnostic: a rule-follower steps AWAY. Any
            // non-forbidden neighbour scores correct; a forbidden one scores wrong.
            const from = board.pos[A.id];
            for (const d of E.DIRS) {
              const to = { x: from.x + d.x, y: from.y + d.y };
              if (!E.inb(to)) continue;
              const expect = !E.violates(rule, from, to, board);
              assert.strictEqual(E.discoveryPredCorrect(board, A.id, to, rule), expect,
                'null-compliant step-away edge');
            }
          }
        }
        E.applyMove(board, A.id, s.to, ep.rule);
      }
    }
  }
});

// TIE-AWARENESS: when two adjacent compliant tokens TIE at the best value, a
// rule-follower may take EITHER — both predictions must score correct. The old
// DIRS-first tie-break marked the equally-valid second cell wrong.
test('discoveryPredCorrect accepts ANY tied-best compliant adjacent prediction', () => {
  const st = E.makeBoard('avoid_biggest', 'harvest_max', 1, 0, ENV_PRESETS.E1);
  for (const t of st.tokens) t.alive = false;       // hand-build the decision point
  st.pos[A.id] = { x: 4, y: 3 };
  st.pos[O.id] = { x: 8, y: 8 };
  st.tokens.push({ x: 4, y: 2, v: 9, alive: true, guard: true });  // U: board max -> forbidden
  st.tokens.push({ x: 4, y: 4, v: 4, alive: true, guard: false }); // D: tied best compliant
  st.tokens.push({ x: 3, y: 3, v: 4, alive: true, guard: false }); // L: tied best compliant
  st.tokens.push({ x: 5, y: 3, v: 1, alive: true, guard: false }); // R: worse compliant
  const set = E.bestCompliantAdjacentSet(st, A.id, 'avoid_biggest');
  assert.strictEqual(set.length, 2, 'both tied-best cells in the set');
  assert.ok(E.discoveryPredCorrect(st, A.id, { x: 4, y: 4 }, 'avoid_biggest'), 'DIRS-first tied best correct');
  assert.ok(E.discoveryPredCorrect(st, A.id, { x: 3, y: 3 }, 'avoid_biggest'), 'OTHER tied best ALSO correct');
  assert.ok(!E.discoveryPredCorrect(st, A.id, { x: 4, y: 2 }, 'avoid_biggest'), 'forbidden max wrong');
  assert.ok(!E.discoveryPredCorrect(st, A.id, { x: 5, y: 3 }, 'avoid_biggest'), 'lower-valued compliant wrong');
});

// avoid_biggest with a TIED board max: EVERY tied token is forbidden; taking one
// is penalized; Maintenance bookkeeping registers each tied lure separately and
// credits resistance only on an active compliant move (C10).
test('avoid_biggest tied max: all tied tokens forbidden + maintenance bookkeeping', () => {
  const st = E.makeBoard('avoid_biggest', 'harvest_max', 1, 0, ENV_PRESETS.E1);
  for (const t of st.tokens) t.alive = false;
  st.pos[A.id] = { x: 4, y: 3 };
  st.pos[O.id] = { x: 8, y: 8 };
  st.tokens.push({ x: 4, y: 4, v: 5, alive: true, guard: false }); // D: tied max
  st.tokens.push({ x: 4, y: 2, v: 5, alive: true, guard: false }); // U: tied max
  st.tokens.push({ x: 3, y: 3, v: 2, alive: true, guard: false }); // L: compliant
  const from = st.pos[A.id];
  assert.ok(E.violates('avoid_biggest', from, { x: 4, y: 4 }, st), 'tied max #1 forbidden');
  assert.ok(E.violates('avoid_biggest', from, { x: 4, y: 2 }, st), 'tied max #2 forbidden');
  assert.ok(!E.violates('avoid_biggest', from, { x: 3, y: 3 }, st), 'non-max compliant');
  const ctx = E.newCtx();
  const ids = E.recordTemptation(ctx, st, 'avoid_biggest');
  assert.strictEqual(ids.length, 2, 'both tied lures registered (g = 5-2 = 3 each)');
  const tgt = E.tokenAt(st, { x: 4, y: 4 });
  E.resolveTemptation(ctx, ids, { takenId: st.round + ':' + E.key(tgt), activeMove: false });
  const res = E.applyMove(st, A.id, { x: 4, y: 4 }, 'avoid_biggest');
  assert.ok(res.violated && res.penalty > 0, 'taking a tied max is flagged + penalized');
  const mt = E.maintenanceTotals(ctx);
  assert.strictEqual(mt.gsum, 6, 'gsum counts both tied lures');
  assert.strictEqual(mt.resisted, 0, 'no resistance credit on a violating take');
});

/* ---------------- runCellAsync: exact parity with runCell ---------------- */
testAsync('runCellAsync === runCell (perfect default + custom policy/inducer)', async () => {
  // default 'perfect' path, two seeds, two cells
  for (const seed of [7, 11]) {
    const a = E.runCell('avoid_hazard', 'harvest_max', 'E2', { seed });
    const b = await E.runCellAsync('avoid_hazard', 'harvest_max', 'E2', { seed });
    assert.deepStrictEqual(b, a, 'perfect parity seed=' + seed);
  }
  // custom focalPolicy + custom inducer: async wrappers must change nothing.
  const env = E.ENV_PRESETS.E3;
  const p1 = E.perfectSelfPolicy('avoid_sacred', 'deliver_to_zone', 9, env);
  const p2 = E.perfectSelfPolicy('avoid_sacred', 'deliver_to_zone', 9, env);
  const sync = E.runCell('avoid_sacred', 'deliver_to_zone', 'E3', {
    seed: 9, focalPolicy: (st, id, ts) => p1(st, ts), inducer: E.induceRuleFromMemory,
  });
  const asy = await E.runCellAsync('avoid_sacred', 'deliver_to_zone', 'E3', {
    seed: 9,
    focalPolicy: async (st, id, ts) => p2(st, ts),
    inducer: async (b) => E.induceRuleFromMemory(b),
  });
  assert.deepStrictEqual(asy, sync, 'custom-policy parity');
});

(async () => {
  for (const t of ASYNC_TESTS) {
    try { await t.fn(); pass(t.name); }
    catch (e) { console.error('FAIL: ' + t.name + '\n  ' + (e && e.stack || e)); process.exit(1); }
  }
  console.log('ALL PASS ' + n);
})().catch(e => { console.error('FATAL (async harness):\n  ' + (e && e.stack || e)); process.exit(1); });