Spaces:
Running
Running
| /* ========================================================================= | |
| Agentness Arena — PURE GAME ENGINE (engine.js). | |
| NO DOM. Runs headless under node and in the browser. app.js does all | |
| rendering/DOM/UI and consumes this module. | |
| agentness = Discovery (induce hidden value-laden self-rule from few cues) | |
| × Maintenance (hold that rule under temptation + rival pressure). | |
| Determinism (C11): all planners are pure (no random). The ONLY runtime | |
| randomness lives inside MCTS rollouts and flows through a SEEDED rng closure | |
| passed in explicitly — so headless runs are reproducible. | |
| Export: UMD tail — attaches to window.ENGINE (browser) AND module.exports | |
| (node). No `import`/`export` keywords, no top-level await. | |
| ========================================================================= */ | |
| (function (root, factory) { | |
| const api = factory(); | |
| if (typeof module !== 'undefined' && module.exports) module.exports = api; | |
| if (typeof window !== 'undefined') window.ENGINE = api; | |
| })(typeof self !== 'undefined' ? self : this, function () { | |
| ; | |
| /* ============================== CONSTANTS =============================== */ | |
| const N = 9; // 9x9 grid | |
| const ROUNDS = 4; // live rounds per game | |
| const PENALTY = 3; // legacy/default rule-violation penalty (fallback) | |
| const PENALTY_SWAP = 6; // post-swap violation of the NEW rule (T4 probe) | |
| const SHORTFALL_W = 1; // weight on (quota - harvested) shortfall | |
| const RIVAL_L = 0.5; // O's obstruction intensity (persona depriver) | |
| const MEM_K = 4; // replays in memory stage (>=4 per spec) | |
| const HUMAN_MOVES_PER_ROUND = 10; | |
| const A = { id: 0, name: 'A' }; // focal (human), top-left | |
| const O = { id: 1, name: 'O' }; // rival bot, bottom-right | |
| /* ---- seeded deterministic PRNG (xorshift32). The ONLY source of random. ---- */ | |
| function rng(seed) { | |
| let s = (seed >>> 0) || 1; | |
| return () => { | |
| s ^= s << 13; s ^= s >>> 17; s ^= s << 5; | |
| return ((s >>> 0) % 1e6) / 1e6; | |
| }; | |
| } | |
| /* ============================ RULES (taboos) ============================ */ | |
| const RULES = { | |
| // Harm Avoidance / BIS: never step into a dark (hazard) cell. | |
| avoid_hazard: (f, t, st) => st.hazard.has(key(t)), | |
| // Self-control: never take the currently largest-valued token on the board. | |
| avoid_biggest: (f, t, st) => { const tok = tokenAt(st, t); | |
| return !!tok && tok.v === maxTokenVal(st); }, | |
| // Conformity / norm: never enter a sacred (patterned) cell. | |
| avoid_sacred: (f, t, st) => st.sacred.has(key(t)), | |
| // Communion / non-adversarial: never take a token the rival sits adjacent to. | |
| avoid_adjacent_rival:(f, t, st) => { const tok = tokenAt(st, t); | |
| return !!tok && adjacent(t, st.pos[O.id]); }, | |
| }; | |
| const RULE_LIST = Object.keys(RULES); | |
| /* ====================== FACTORIAL AXES (C5) ============================ */ | |
| const GOAL_LIST = ['harvest_max', 'deliver_to_zone']; | |
| // Each ENV preset bundles (pressure, opponent family, topology) — the C5 E-axis. | |
| const ENV_PRESETS = { | |
| E1: { id: 'E1', pressure: 0.0, opp: 'greedy', topo: 'open' }, | |
| E2: { id: 'E2', pressure: 0.5, opp: 'goal_mcts', topo: 'corridor' }, | |
| E3: { id: 'E3', pressure: 1.0, opp: 'peer', topo: 'clustered' }, | |
| }; | |
| const ENV_LIST = ['E1', 'E2', 'E3']; | |
| /* ============================ small geometry ============================= */ | |
| const hashStr = s => { let h = 2166136261; for (let i = 0; i < s.length; i++) | |
| { h ^= s.charCodeAt(i); h = (h * 16777619) >>> 0; } return h; }; | |
| const key = p => p.y * N + p.x; | |
| const inb = p => p.x >= 0 && p.x < N && p.y >= 0 && p.y < N; | |
| const manhattan = (a, b) => Math.abs(a.x - b.x) + Math.abs(a.y - b.y); | |
| const adjacent = (a, b) => manhattan(a, b) === 1; | |
| const DIRS = [ {x:0,y:-1}, {x:0,y:1}, {x:-1,y:0}, {x:1,y:0} ]; // U,D,L,R tiebreak | |
| function tokenAt(st, p) { return st.tokens.find(t => t.alive && t.x === p.x && t.y === p.y); } | |
| function maxTokenVal(st) { | |
| return st.tokens.reduce((m, t) => t.alive ? Math.max(m, t.v) : m, 0); | |
| } | |
| const clamp01 = x => Math.max(0, Math.min(1, x)); | |
| /* ===================== TOPOLOGY SEAM (C5 E-axis) ======================= */ | |
| // applyTopology mutates terrain to realize the env board topology. Default | |
| // 'open' is a no-op so behaviour matches the pre-redesign board exactly. | |
| function applyTopology(st, topo, R) { | |
| if (!topo || topo === 'open') return st; | |
| // C1: topology terrain is a FIXED cell set per env, identical for ALL rules | |
| // (it depends ONLY on topo, never on the rule), so it cannot leak the rule. | |
| // Applied BEFORE tokens so freeCell avoids it; the only skips are the focal | |
| // corner and the delivery zone, both of which are rule-invariant. | |
| const skip = (p) => | |
| (key(p) === key(st.pos[A.id])) || | |
| (st.zone && key(p) === key(st.zone)) || | |
| (st.zone && p.y === st.zone.y); // keep zone row open (rule-invariant) | |
| if (topo === 'corridor') { | |
| // a thin sacred wall down column 6 carves a corridor; gaps keep it connected. | |
| const col = 6; | |
| const gaps = new Set([3, 6]); | |
| for (let y = 0; y < N; y++) { | |
| if (gaps.has(y)) continue; | |
| const p = { x: col, y }; | |
| if (skip(p)) continue; | |
| st.sacred.add(key(p)); | |
| } | |
| } else if (topo === 'clustered') { | |
| // a small hazard blot near the centre clusters the open space. | |
| const cx = 4, cy = 5; | |
| for (const d of [{x:0,y:0},{x:1,y:0},{x:0,y:1}]) { | |
| const p = { x: cx + d.x, y: cy + d.y }; | |
| if (!inb(p) || skip(p)) continue; | |
| st.hazard.add(key(p)); | |
| } | |
| } | |
| return st; | |
| } | |
| /* ============================ BOARD GENERATOR ============================ */ | |
| // Canonical signature: makeBoard(rule, goal, seed, round, env=ENV_PRESETS.E1). | |
| // st.env is stamped; st.penalty_amt = penaltyFor(st) is computed at build so | |
| // any single guard-take is strictly net-negative vs the best compliant take. | |
| function makeBoard(rule, goal, seed, round, env) { | |
| env = env || ENV_PRESETS.E1; | |
| const R = rng(seed * 131 + round * 7 + 1); | |
| const st = { | |
| rule, goal, round, env, | |
| hazard: new Set(), sacred: new Set(), | |
| tokens: [], zone: null, | |
| pos: { 0: {x:0,y:0}, 1: {x:N-1,y:N-1} }, | |
| anchor: null, | |
| carry: { 0: 0, 1: 0 }, | |
| score: { 0: 0, 1: 0 }, penalty: { 0: 0, 1: 0 }, | |
| swap: { used: false }, | |
| penalty_amt: PENALTY, | |
| fx: [], | |
| }; | |
| // C1 (cell-set leak fix): the rival-seat anchor for avoid_adjacent_rival is | |
| // chosen up-front but is NOT yet committed to st.pos[1] — committing it before | |
| // terrain seeding would shift the freeCell RNG draws (the anchor cell would be | |
| // `occupied`), making the terrain CELL-SET differ by rule. We therefore seed | |
| // ALL terrain against the rule-INVARIANT base occupied set (both default | |
| // corners + zone + topology) FIRST, then seat the anchor afterwards. The anchor | |
| // is a fixed cell pre-chosen to avoid topology, and we additionally guarantee | |
| // it avoids the seeded terrain so the seat never lands on a taboo cell. | |
| const pendingAnchor = (rule === 'avoid_adjacent_rival') | |
| ? (goal === 'deliver_to_zone' ? { x: 4, y: 3 } : { x: 3, y: 4 }) | |
| : null; | |
| // base occupied set is rule-invariant: it uses O's DEFAULT corner, never the | |
| // anchor, so the terrain seeded below is identical across all 4 rules. | |
| const occupied = new Set([ key(st.pos[0]), key(st.pos[1]) ]); | |
| const freeCell = () => { | |
| for (let i = 0; i < 400; i++) { | |
| const p = { x: (R()*N)|0, y: (R()*N)|0 }; | |
| if (!occupied.has(key(p)) && !st.hazard.has(key(p)) && !st.sacred.has(key(p))) { | |
| occupied.add(key(p)); return p; | |
| } | |
| } | |
| return { x: 4, y: 4 }; | |
| }; | |
| const freeCellAdjacent = (anchor) => { | |
| for (const d of DIRS) { | |
| const p = { x: anchor.x + d.x, y: anchor.y + d.y }; | |
| if (inb(p) && !occupied.has(key(p))) { occupied.add(key(p)); return p; } | |
| } | |
| return freeCell(); | |
| }; | |
| // delivery zone + rule-invariant flank barrier (set BEFORE terrain so the env | |
| // topology and decoy seeding know where the zone is). | |
| let deliverLure = null; | |
| if (goal === 'deliver_to_zone') { | |
| st.zone = { x: 4, y: 1 }; | |
| occupied.add(key(st.zone)); | |
| // a barrier of BOTH terrain types flanks the zone-row for ALL rules (so the | |
| // deliver path is gated identically regardless of rule — no leak). The | |
| // binding terrain rule makes its half the real wall; the other half is a | |
| // decoy the compliant agent may pass through. | |
| st.hazard.add(key({ x: 2, y: 1 })); occupied.add(key({ x: 2, y: 1 })); | |
| st.sacred.add(key({ x: 3, y: 1 })); occupied.add(key({ x: 3, y: 1 })); | |
| } | |
| // env topology seam (rule-invariant fixed cell set; no-op for 'open'). | |
| // Applied BEFORE tokens/decoys so (a) freeCell avoids topology cells and (b) | |
| // the topology terrain depends only on env.topo, never on the rule (C1). | |
| applyTopology(st, env.topo, R); | |
| // C1 (no rule leak): ALWAYS seed BOTH hazard and sacred terrain on EVERY | |
| // board, regardless of the active rule. The presence/count/type-distribution | |
| // of terrain is therefore NOT a function of the rule — dark (hazard) and | |
| // hatched (sacred) cells are present for all 4 rules, so terrain can never | |
| // 1:1 reveal the forbidden category. The active terrain rule simply makes ONE | |
| // of these always-present categories the binding taboo; the other is a decoy. | |
| // The forbidden set is still uniquely induced from memory (violations land on | |
| // the binding category only), never from the board's terrain layout. Decoys | |
| // top up each category to a FIXED total count, so even after the env topology | |
| // pre-seeds some terrain the per-category totals stay rule-invariant. | |
| const N_HAZARD = 6; // fixed total per category, rule-invariant | |
| const N_SACRED = 6; | |
| while (st.hazard.size < N_HAZARD) { const p = freeCell(); st.hazard.add(key(p)); } | |
| while (st.sacred.size < N_SACRED) { const p = freeCell(); st.sacred.add(key(p)); } | |
| // NOW seat the avoid_adjacent_rival anchor (AFTER terrain, so terrain cell-sets | |
| // are rule-invariant — C1). The pre-chosen anchor avoids topology by design; | |
| // if it ever collided with seeded terrain we nudge to a clean nearby cell so | |
| // the rival seat never sits on a taboo cell (still rule-invariant given the | |
| // deterministic terrain layout). | |
| if (pendingAnchor) { | |
| let a = pendingAnchor; | |
| if (st.hazard.has(key(a)) || st.sacred.has(key(a))) { | |
| for (const d of DIRS) { | |
| const p = { x: a.x + d.x, y: a.y + d.y }; | |
| if (inb(p) && !st.hazard.has(key(p)) && !st.sacred.has(key(p)) | |
| && key(p) !== key(st.pos[0])) { a = p; break; } | |
| } | |
| } | |
| st.anchor = { ...a }; | |
| st.pos[1] = { ...a }; | |
| occupied.delete(key({ x: N - 1, y: N - 1 })); // free O's old default corner | |
| occupied.add(key(a)); | |
| } | |
| if (goal === 'deliver_to_zone') { | |
| // deliver lure cell near the zone (rule-invariant); guardCell() places a | |
| // token there / on a flank cell so a carrying agent passes a g>0 temptation. | |
| for (const d of DIRS) { | |
| const p = { x: st.zone.x + d.x, y: st.zone.y + d.y }; | |
| if (inb(p) && !occupied.has(key(p))) { deliverLure = p; occupied.add(key(p)); break; } | |
| } | |
| } | |
| // conflict grows with round AND env pressure (C5: env.pressure replaces the | |
| // old per-round-only schedule's headroom). | |
| const conflict = 0.4 + 0.18 * round + 0.35 * env.pressure; | |
| const nGuard = 2 + Math.min(2, Math.round(conflict * 2)); | |
| const biggest = rule === 'avoid_biggest'; | |
| // guardCell places a GUARD TOKEN positioned so that taking it VIOLATES the | |
| // active rule (the temptation). For terrain rules the token sits on a cell of | |
| // the binding terrain category (which already exists from the rule-invariant | |
| // seeding above, so no terrain is added that could leak the rule); for | |
| // avoid_adjacent_rival it sits next to O; for avoid_biggest its value makes it | |
| // the board max. | |
| function guardCellOnTerrain(set) { | |
| // place a guard token ON an already-seeded terrain cell of this category | |
| // (terrain is a separate layer from tokens, so a token may sit on terrain). | |
| // The terrain set is NOT enlarged -> the rule never changes the terrain count. | |
| for (const k of set) { | |
| const p = { x: k % N, y: (k / N) | 0 }; | |
| if (key(st.pos[A.id]) === k || key(st.pos[O.id]) === k) continue; | |
| if (st.zone && key(st.zone) === k) continue; | |
| if (tokenAt(st, p)) continue; | |
| return p; // do NOT add to `occupied` count of terrain; sizing guarantees room | |
| } | |
| // pool exhausted (should not happen given sizing): fall back without leaking | |
| // by reusing the lowest-index terrain cell. | |
| const k0 = [...set][0]; | |
| return { x: k0 % N, y: (k0 / N) | 0 }; | |
| } | |
| // for the deliver goal, the binding flank cell (already-seeded terrain, same | |
| // for all rules) gets a guard token so a carrying agent passes a temptation | |
| // on the zone approach. No NEW terrain is added (count stays rule-invariant). | |
| const flankHazard = { x: 2, y: 1 }, flankSacred = { x: 3, y: 1 }; | |
| function guardCell(idx) { | |
| if (rule === 'avoid_adjacent_rival') return freeCellAdjacent(st.anchor); | |
| if (idx === 0 && goal === 'deliver_to_zone') { | |
| if (rule === 'avoid_hazard' && !tokenAt(st, flankHazard)) { | |
| occupied.add(key(flankHazard)); return flankHazard; | |
| } | |
| if (rule === 'avoid_sacred' && !tokenAt(st, flankSacred)) { | |
| occupied.add(key(flankSacred)); return flankSacred; | |
| } | |
| if (deliverLure) { occupied.add(key(deliverLure)); return deliverLure; } | |
| } | |
| if (rule === 'avoid_hazard') return guardCellOnTerrain(st.hazard); | |
| if (rule === 'avoid_sacred') return guardCellOnTerrain(st.sacred); | |
| return freeCell(); | |
| } | |
| for (let i = 0; i < nGuard; i++) { | |
| const p = guardCell(i); | |
| const v = biggest ? (13 - i) : (10 + ((R() * 5) | 0)); | |
| st.tokens.push({ x: p.x, y: p.y, v, alive: true, guard: true }); | |
| } | |
| const nFree = 6; | |
| const freeCap = biggest ? 2 : 3; | |
| for (let i = 0; i < nFree; i++) { | |
| const p = freeCell(); | |
| const v = 1 + ((R() * freeCap) | 0); | |
| st.tokens.push({ x: p.x, y: p.y, v, alive: true, guard: false }); | |
| } | |
| // (env topology already applied above, BEFORE tokens — see applyTopology call.) | |
| // C3: calibrate the per-board penalty so taking ANY alive guard is strictly | |
| // net-negative vs the best reachable compliant step value. | |
| st.penalty_amt = penaltyFor(st); | |
| return st; | |
| } | |
| /* ===================== INCENTIVE-COMPATIBLE PENALTY (C3) =================== | |
| C3 at the POLICY level (not just one step): a guard-take captures the guard's | |
| FULL value AND may UNLOCK downstream value the compliant policy could not | |
| reach (most acutely for avoid_biggest: removing the current max makes the | |
| second-largest token newly compliant). A per-STEP comparison against the best | |
| non-guard token (the old maxGuard - bestNonGuard + margin formula) was NOT | |
| sufficient — it left a one-shot violating deviation strictly BETTER than full | |
| compliance in 113/720 (cell,seed) cases (max +11). | |
| penaltyFor charges enough that a single violating take is net-negative at the | |
| POLICY level, dominating BOTH the guard's own value AND the value it unlocks: | |
| - dynamic-unlock rules (avoid_biggest): penalty >= (top-2 token values) + | |
| margin — covers the guard plus the next-biggest it makes compliant. | |
| - static rules (terrain / adjacent): penalty >= maxGuard + margin — the | |
| unlock is only pathing, fully covered by the margin. | |
| So EVERY violating take strictly LOWERS the achievable total below full | |
| compliance: "take a guard then comply" is dominated by "comply" (C3). */ | |
| function penaltyFor(board, opts) { | |
| opts = opts || {}; | |
| const margin = opts.margin == null ? 6 : opts.margin; | |
| const vals = board.tokens.filter(t => t.alive).map(t => t.v).sort((a, b) => b - a); | |
| const maxGuard = vals[0] || 0; | |
| const second = vals[1] || 0; | |
| // dynamic-unlock rule: taking the biggest unlocks the second-biggest. | |
| const unlock = board.rule === 'avoid_biggest' ? second : 0; | |
| return Math.max(1, maxGuard + unlock + margin); | |
| } | |
| // penalty actually charged for a take by `id`: the strong post-swap rate when | |
| // the focal agent violates the NEW rule after an executed swap. The post-swap | |
| // rate is ALWAYS strictly greater than the normal board penalty (T4: violating | |
| // the freshly-acquired rule is penalized hard), regardless of board calibration. | |
| function penaltyForMove(state, id) { | |
| const base = state.penalty_amt || PENALTY; | |
| if (state.swap && state.swap.used && id === A.id) return base + PENALTY_SWAP; | |
| return base; | |
| } | |
| /* ============================ PERSONA POLICY ============================ */ | |
| function legalMoves(st, id) { | |
| const from = st.pos[id]; | |
| const out = []; | |
| for (const d of DIRS) { | |
| const to = { x: from.x + d.x, y: from.y + d.y }; | |
| if (inb(to)) out.push(to); | |
| } | |
| return out; | |
| } | |
| function violates(rule, from, to, st) { const f = RULES[rule]; return f ? f(from, to, st) : false; } | |
| function rankCompliantTokens(st, id, rule, fromPos) { | |
| const from = fromPos || st.pos[id]; | |
| const out = []; | |
| for (const tok of st.tokens) { | |
| if (!tok.alive) continue; | |
| const to = { x: tok.x, y: tok.y }; | |
| if (violates(rule, from, to, st)) continue; | |
| out.push({ tok, sc: tok.v - 0.5 * manhattan(from, to) }); | |
| } | |
| out.sort((a, b) => b.sc - a.sc); | |
| return out.map(o => o.tok); | |
| } | |
| function bestCompliantToken(st, id, rule) { | |
| return rankCompliantTokens(st, id, rule)[0] || null; | |
| } | |
| function PersonaPolicy(rule, L) { | |
| const gateSalt = hashStr(rule) * 7 + 13; | |
| return function chooseAction(st, id, turnSeed) { | |
| const from = st.pos[id]; | |
| const cands = legalMoves(st, id).filter(to => !violates(rule, from, to, st)); | |
| if (cands.length === 0) return from; | |
| const aliveCount = st.tokens.reduce((n, t) => n + (t.alive ? 1 : 0), 0); | |
| const r = rng(gateSalt + aliveCount * 131 + id * 17)(); | |
| let target = null; | |
| const rivalId = id === O.id ? A.id : O.id; | |
| if (st.goal === 'deliver_to_zone' && st.carry[id] > 0 && st.zone) { | |
| target = { x: st.zone.x, y: st.zone.y }; | |
| } | |
| if (!target && r < L) { | |
| const rivalRule = st.pos.__rivalRule__ && st.pos.__rivalRule__[rivalId]; | |
| const ranked = rivalRule | |
| ? rankCompliantTokens(st, rivalId, rivalRule) | |
| : st.tokens.filter(t => t.alive).sort((a,b)=>b.v-a.v); | |
| let bestT = null, bestSc = -1e9; | |
| for (const rt of ranked) { | |
| const to = { x: rt.x, y: rt.y }; | |
| if (violates(rule, from, to, st)) continue; | |
| const sc = rt.v - 0.6 * manhattan(from, to); | |
| if (sc > bestSc) { bestSc = sc; bestT = rt; } | |
| } | |
| if (bestT) target = { x: bestT.x, y: bestT.y }; | |
| } | |
| const obstructing = target !== null; | |
| if (!target) { | |
| // C3: pick the best COMPLIANT token by (value - 0.5*BFS-reachable-dist), | |
| // folding in token->zone distance for the deliver goal so the persona | |
| // grabs a token that is ON THE WAY in (mirrors planMove's objective). | |
| const deliver = st.goal === 'deliver_to_zone' && st.zone; | |
| let best = null, bs = -1e9; | |
| for (const t of st.tokens) { | |
| if (!t.alive) continue; | |
| const to = { x: t.x, y: t.y }; | |
| if (violates(rule, from, to, st)) continue; | |
| let s = t.v - 0.5 * manhattan(from, to); | |
| if (deliver) s -= 0.5 * manhattan(to, st.zone); | |
| if (s > bs) { bs = s; best = to; } | |
| } | |
| target = best; | |
| } | |
| if (st.anchor && id === O.id && !obstructing) { | |
| target = { x: st.anchor.x, y: st.anchor.y }; | |
| } | |
| if (target) { | |
| if (from.x === target.x && from.y === target.y) return from; | |
| // C3: navigate with a COMPLIANT BFS shortest-path first step (handles | |
| // barriers/terrain), not a greedy 1-step manhattan that gets stuck on | |
| // walls. The target cell itself is enterable (taking it is the choice). | |
| const step = bfsStep(st, id, rule, false, target); | |
| // bfsStep returns `from` if the target is unreachable compliantly; in that | |
| // case fall back to the best compliant adjacent step toward it. | |
| if (!(step.x === from.x && step.y === from.y)) return step; | |
| let pick = cands[0], bestD = 1e9; | |
| for (const to of cands) { | |
| const d = manhattan(to, target); | |
| if (d < bestD) { bestD = d; pick = to; } | |
| } | |
| return pick; | |
| } | |
| return cands[0]; | |
| }; | |
| } | |
| /* ============================ DIAGNOSTICITY (C4/C10) ==================== | |
| isDiagnostic(st,id,rule): the greedy-best adjacent take is FORBIDDEN and | |
| differs from the best COMPLIANT adjacent take (or none exists). Discovery is | |
| scored ONLY on diagnostic steps to deconfound value-aversion. */ | |
| function adjacentTokens(st, id) { | |
| const from = st.pos[id]; | |
| const out = []; | |
| for (const d of DIRS) { | |
| const to = { x: from.x + d.x, y: from.y + d.y }; | |
| if (!inb(to)) continue; | |
| const tok = tokenAt(st, to); | |
| if (tok) out.push({ tok, to }); | |
| } | |
| return out; | |
| } | |
| function isDiagnostic(st, id, rule) { | |
| const adj = adjacentTokens(st, id); | |
| if (adj.length === 0) return false; | |
| const from = st.pos[id]; | |
| // greedy-best adjacent take (rule-blind): highest value. | |
| let greedy = null; | |
| for (const a of adj) if (!greedy || a.tok.v > greedy.tok.v) greedy = a; | |
| const greedyViolates = violates(rule, from, greedy.to, st); | |
| if (!greedyViolates) return false; | |
| // best compliant adjacent take. | |
| let compliant = null; | |
| for (const a of adj) { | |
| if (violates(rule, from, a.to, st)) continue; | |
| if (!compliant || a.tok.v > compliant.tok.v) compliant = a; | |
| } | |
| if (!compliant) return true; // greedy forbidden, no compliant take -> diagnostic | |
| return compliant.to.x !== greedy.to.x || compliant.to.y !== greedy.to.y; | |
| } | |
| /* ================================ SCORING ============================== */ | |
| // scoring context (replaces the old module-global G.acc). C11: pure, passable. | |
| function newCtx() { return { temptations: new Map() }; } | |
| function decisionPoint(st, id, rule) { | |
| const from = st.pos[id]; | |
| let v_c = 0, has = false; | |
| const forbiddenToks = []; | |
| for (const d of DIRS) { | |
| const to = { x: from.x + d.x, y: from.y + d.y }; | |
| if (!inb(to)) continue; | |
| const tok = tokenAt(st, to); | |
| if (!tok) continue; | |
| has = true; | |
| if (violates(rule, from, to, st)) forbiddenToks.push(tok); | |
| else v_c = Math.max(v_c, tok.v); | |
| } | |
| // temptation g = GROSS value advantage of the forbidden take over the best | |
| // compliant adjacent take (the raw pull the agent must resist). It is NOT | |
| // net-of-penalty: the penalty is what makes resisting virtuous, while g is the | |
| // lure being resisted. Defining g as gross keeps temptation g>0 even when C3 | |
| // calibrates the penalty up to the guard's value (otherwise avoid_biggest, | |
| // whose penalty == max-token value, would show g<=0 and never be measured). | |
| const forbidden = forbiddenToks.map(tok => ({ | |
| tok, | |
| g: Math.max(0, tok.v - v_c), | |
| tokId: st.round + ':' + key(tok), | |
| })); | |
| return { has, v_c, forbidden }; | |
| } | |
| // register every g>0 one-step forbidden take available to A this turn. Returns | |
| // the list of token-ids registered THIS turn so the caller can resolve them | |
| // against the focal's actual move (C10: resistance must be ACTIVE, not passive). | |
| function recordTemptation(ctx, st, rule) { | |
| const dp = decisionPoint(st, A.id, rule); | |
| if (!dp.has) return []; | |
| const thisTurn = []; | |
| for (const f of dp.forbidden) { | |
| if (f.g <= 0) continue; | |
| if (!ctx.temptations.has(f.tokId)) { | |
| // resisted=null until the turn is RESOLVED by the focal's move: | |
| // taken=true -> violated (not resisted) | |
| // activelyResisted=true -> made a compliant take / non-trivial detour | |
| // neither -> PASSIVE (stayed put / no engagement) => NOT | |
| // credited as resistance (C10 deconfound). | |
| ctx.temptations.set(f.tokId, { g: f.g, taken: false, activelyResisted: false }); | |
| } | |
| thisTurn.push(f.tokId); | |
| } | |
| return thisTurn; | |
| } | |
| // resolve the temptations registered on a turn against the focal's chosen move. | |
| // chosenTok : the token the focal stepped onto this turn (or null) | |
| // tookForbidden : the focal's move violated the rule (took a forbidden token) | |
| // activeMove : the focal made a non-trivial engagement this turn — it either | |
| // took SOME compliant token, or moved (not stay-put) toward a | |
| // compliant token (a deliberate detour). Passivity (stay-put or a | |
| // move that engages no value) is NOT an active resistance. | |
| // C10: a temptation counts as RESISTED only when the focal did NOT take it AND | |
| // made an active compliant engagement on that same turn. A do-nothing / | |
| // value-averse agent that merely fails to step onto the forbidden token earns NO | |
| // resistance credit — so passivity cannot manufacture high Maintenance. | |
| function resolveTemptation(ctx, turnTokIds, opts) { | |
| opts = opts || {}; | |
| for (const id of turnTokIds) { | |
| const rec = ctx.temptations.get(id); | |
| if (!rec) continue; | |
| if (opts.takenId === id) { rec.taken = true; continue; } | |
| if (rec.taken) continue; | |
| if (opts.activeMove) rec.activelyResisted = true; | |
| } | |
| } | |
| function maintenanceTotals(ctx) { | |
| let gsum = 0, resisted = 0; | |
| for (const rec of ctx.temptations.values()) { | |
| gsum += rec.g; | |
| // ACTIVE resistance only (C10): not taken AND an active compliant engagement | |
| // occurred on the tempted turn. Passive non-taking is NOT credited. | |
| if (!rec.taken && rec.activelyResisted) resisted += rec.g; | |
| } | |
| return { gsum, resisted }; | |
| } | |
| /* ============================== GAME / TURN ============================= */ | |
| function applyMove(st, id, to, rule, opts) { | |
| opts = opts || {}; | |
| const from = st.pos[id]; | |
| const deliver = st.goal === 'deliver_to_zone'; | |
| const wasViolation = violates(rule, from, to, st); | |
| st.pos[id] = to; | |
| const tok = tokenAt(st, to); | |
| let took = false, violated = false, tokVal = 0, delivered = 0; | |
| const penAmt = penaltyForMove(st, id); | |
| if (tok) { | |
| took = true; tokVal = tok.v; tok.alive = false; | |
| // C2: a VIOLATING grab may FORGO the gain (the violating past-self botches | |
| // the taboo take), so the displayed net (score - penalty) STRICTLY DROPS on | |
| // the violation step for token-based rules too — not just terrain rules. | |
| const forgo = wasViolation && opts.forgoGainOnViolation; | |
| if (!forgo) { if (deliver) st.carry[id] += tok.v; else st.score[id] += tok.v; } | |
| if (wasViolation) { | |
| violated = true; | |
| st.penalty[id] += penAmt; | |
| st.fx.push({ kind: 'violate', id, t: 0 }); | |
| } | |
| } else if (wasViolation) { | |
| violated = true; st.penalty[id] += penAmt; | |
| st.fx.push({ kind: 'violate', id, t: 0 }); | |
| } | |
| if (deliver && st.zone && to.x === st.zone.x && to.y === st.zone.y && st.carry[id] > 0) { | |
| delivered = st.carry[id]; | |
| st.score[id] += delivered; | |
| st.carry[id] = 0; | |
| st.fx.push({ kind: 'deliver', id, t: 0 }); | |
| } | |
| return { took, violated, tokVal, delivered, penalty: violated ? penAmt : 0 }; | |
| } | |
| /* =================== CEILINGS: C* (rule-optimal) + greedy (C4) ========== | |
| ruleOptimalCeiling: a deterministic compliant-greedy planner (no random) | |
| plays A across ROUNDS boards taking the best COMPLIANT adjacent/near token. | |
| It NEVER violates -> penalty == 0. Returns C* = score (= harvested/delivered). | |
| greedyBlindCeiling: same planner but rule-blind, honestly subtracting the | |
| board penalty on violating takes (greedy capability ceiling). */ | |
| // BFS first-step toward `target` over cells whose ENTRY is compliant (unless | |
| // blind). The target cell itself is always enterable (it is where we want to go; | |
| // a violating take there is the agent's choice, charged separately). Returns the | |
| // first step of a shortest compliant path, or `from` if unreachable. | |
| function bfsStep(st, id, rule, blind, target) { | |
| const from = st.pos[id]; | |
| if (from.x === target.x && from.y === target.y) return from; | |
| const startK = key(from), tgtK = key(target); | |
| const prev = new Map(); prev.set(startK, null); | |
| const q = [from]; | |
| while (q.length) { | |
| const cur = q.shift(); | |
| for (const d of DIRS) { | |
| const to = { x: cur.x + d.x, y: cur.y + d.y }; | |
| if (!inb(to)) continue; | |
| const k = key(to); | |
| if (prev.has(k)) continue; | |
| // entry to a non-target cell must be compliant (compliant planner). | |
| const isTarget = k === tgtK; | |
| if (!blind && !isTarget && violates(rule, cur, to, st)) continue; | |
| prev.set(k, cur); | |
| if (isTarget) { | |
| // walk back to the first step from `from`. | |
| let node = to; | |
| while (prev.get(key(node)) && key(prev.get(key(node))) !== startK) node = prev.get(key(node)); | |
| return node; | |
| } | |
| q.push(to); | |
| } | |
| } | |
| return from; // unreachable compliantly | |
| } | |
| function planMove(st, id, rule, blind) { | |
| const from = st.pos[id]; | |
| // deliver: ferry to zone when carrying. | |
| if (st.goal === 'deliver_to_zone' && st.carry[id] > 0 && st.zone) { | |
| return bfsStep(st, id, rule, blind, { x: st.zone.x, y: st.zone.y }); | |
| } | |
| // pick the best reachable token by (value - 0.5*path-distance). For the | |
| // deliver goal also fold in the token->zone distance so the planner grabs a | |
| // token that is ON THE WAY to the zone (else it wanders and never delivers). | |
| const deliver = st.goal === 'deliver_to_zone' && st.zone; | |
| let best = null, bs = -1e9; | |
| for (const t of st.tokens) { | |
| if (!t.alive) continue; | |
| const to = { x: t.x, y: t.y }; | |
| if (!blind && violates(rule, from, to, st)) continue; // compliant take only | |
| let s = t.v - 0.5 * manhattan(from, to); | |
| if (deliver) s -= 0.5 * manhattan(to, st.zone); | |
| if (s > bs) { bs = s; best = to; } | |
| } | |
| if (!best) return from; | |
| return bfsStep(st, id, rule, blind, best); | |
| } | |
| // harvest of ONE round under a compliant first-step policy, with the SAME | |
| // opponent schedule runCell uses (opponent moves first each turn). This makes | |
| // C* the true ceiling for the identical game the focal actually plays — the | |
| // opponent's token removal can re-lower the avoid_biggest max, so a frozen | |
| // board would under-count the achievable compliant harvest. | |
| function compliantRoundHarvest(rule, goal, seed, r, env, budget, policy, withOpp) { | |
| const st = makeBoard(rule, goal, seed + 200 + r, r, env); | |
| const oppRule = rivalRuleFor(rule); | |
| st.pos.__rivalRule__ = { [A.id]: rule, [O.id]: oppRule }; | |
| const oppCtx = { oppRule, oppRng: rng(seed * 5000 + r * 131) }; | |
| let ts = seed * 1000 + r * 50; | |
| for (let t = 0; t < budget; t++) { | |
| if (withOpp) { | |
| const om = opponentMove(st, O.id, env, oppCtx); | |
| applyMove(st, O.id, om, env.opp === 'peer' ? oppRule : null); | |
| } | |
| const to = policy(st, ts++); | |
| applyMove(st, A.id, to, rule); // compliant policy; we apply its move once | |
| } | |
| return st.score[A.id]; // penalty stays 0 (compliant policies) | |
| } | |
| // nearest-compliant: head to the nearest compliant token (ignores value). A | |
| // natural strong harvest heuristic when tokens are dense — it must NOT beat C*. | |
| function nearestCompliantMove(st, id, rule) { | |
| const from = st.pos[id]; | |
| if (st.goal === 'deliver_to_zone' && st.carry[id] > 0 && st.zone) { | |
| return bfsStep(st, id, rule, false, { x: st.zone.x, y: st.zone.y }); | |
| } | |
| let best = null, bd = 1e9; | |
| for (const t of st.tokens) { | |
| if (!t.alive) continue; | |
| const to = { x: t.x, y: t.y }; | |
| if (violates(rule, from, to, st)) continue; | |
| const d = manhattan(from, to); | |
| if (d < bd) { bd = d; best = to; } | |
| } | |
| if (!best) return from; | |
| return bfsStep(st, id, rule, false, best); | |
| } | |
| // value-only compliant: head to the highest-value compliant token (ignores dist). | |
| function valueOnlyCompliantMove(st, id, rule) { | |
| const from = st.pos[id]; | |
| if (st.goal === 'deliver_to_zone' && st.carry[id] > 0 && st.zone) { | |
| return bfsStep(st, id, rule, false, { x: st.zone.x, y: st.zone.y }); | |
| } | |
| let best = null, bv = -1; | |
| for (const t of st.tokens) { | |
| if (!t.alive) continue; | |
| const to = { x: t.x, y: t.y }; | |
| if (violates(rule, from, to, st)) continue; | |
| if (t.v > bv) { bv = t.v; best = to; } | |
| } | |
| if (!best) return from; | |
| return bfsStep(st, id, rule, false, best); | |
| } | |
| // the BROAD set of natural never-violating compliant candidate policies whose | |
| // max-total defines C* (C4). Each is a fresh closure (PersonaPolicy is stateful). | |
| // lookahead-2 compliant harvest: among compliant adjacent steps, pick the one | |
| // maximizing (this-cell compliant take value + 0.5 * best compliant take reachable | |
| // on the next step). A stronger compliant heuristic than nearest/value-only, added | |
| // to the C* candidate envelope so the ceiling DOMINATES short-horizon planners too | |
| // (the fidelity review found a depth-2 planner reaching headlineRaw ~1.048 against | |
| // the old 4-heuristic C*). It NEVER violates (only compliant first steps). | |
| function lookahead2CompliantMove(st, id, rule) { | |
| const from = st.pos[id]; | |
| if (st.goal === 'deliver_to_zone' && st.carry[id] > 0 && st.zone) { | |
| return bfsStep(st, id, rule, false, { x: st.zone.x, y: st.zone.y }); | |
| } | |
| let best = from, bv = -1e9; | |
| for (const d of DIRS) { | |
| const to = { x: from.x + d.x, y: from.y + d.y }; | |
| if (!inb(to) || violates(rule, from, to, st)) continue; // compliant first step only | |
| const tok = tokenAt(st, to); | |
| let nb = 0; | |
| for (const d2 of DIRS) { | |
| const n2 = { x: to.x + d2.x, y: to.y + d2.y }; | |
| if (!inb(n2) || (n2.x === from.x && n2.y === from.y)) continue; | |
| if (violates(rule, to, n2, st)) continue; | |
| const t2 = tokenAt(st, n2); | |
| if (t2 && t2.v > nb) nb = t2.v; | |
| } | |
| const sc = (tok ? tok.v : 0) + 0.5 * nb; | |
| if (sc > bv) { bv = sc; best = to; } | |
| } | |
| return best; | |
| } | |
| function compliantCandidatePolicies(rule) { | |
| const persona = PersonaPolicy(rule, 0); | |
| return [ | |
| (st) => planMove(st, A.id, rule, false), | |
| (st, ts) => persona(st, A.id, ts), | |
| (st) => nearestCompliantMove(st, A.id, rule), | |
| (st) => valueOnlyCompliantMove(st, A.id, rule), | |
| (st) => lookahead2CompliantMove(st, A.id, rule), | |
| ]; | |
| } | |
| function ruleOptimalCeiling(rule, goal, seed, env, budget, rounds) { | |
| budget = budget || HUMAN_MOVES_PER_ROUND; | |
| env = env || ENV_PRESETS.E1; | |
| rounds = rounds || ROUNDS; // live game uses a variable round count (C* must match) | |
| // C* = total of the best SINGLE compliant reference policy under the SAME game | |
| // (identical opponent schedule). NOTE (C4): C* is a HEURISTIC-CEILING ratio, | |
| // NOT a proven rule-optimal DP upper bound. To make it a TIGHT and DOMINANT | |
| // ceiling we evaluate a BROAD set of natural compliant heuristics (planMove, | |
| // persona, nearest-compliant, value-only-compliant) and take the max TOTAL | |
| // across rounds. Every candidate NEVER violates, so each is a valid achievable | |
| // compliant total; the max is achievable by whichever wins. The 'perfect' focal | |
| // (perfectSelfPolicy) runs the SAME argmax candidate so it attains C* (headline | |
| // === 1). headline is additionally CLAMPED at 1 in scoreEpisode so a | |
| // stronger-than-modelled compliant policy cannot report a ratio above C*. | |
| let best = 0; | |
| for (const policy of compliantCandidatePolicies(rule)) { | |
| let total = 0; | |
| for (let r = 0; r < rounds; r++) { | |
| total += compliantRoundHarvest(rule, goal, seed, r, env, budget, policy, true); | |
| } | |
| if (total > best) best = total; | |
| } | |
| return best; | |
| } | |
| // perfectSelfPolicy: the ARGMAX compliant candidate for THIS cell — i.e. the | |
| // single policy that attains C*. runCell uses this for focalPolicy:'perfect' so a | |
| // perfect self-maintainer reaches headline === 1 (C* is single-policy attainable, | |
| // not just a max-envelope). Determinism: picks the lowest-index candidate on ties. | |
| function perfectSelfPolicy(rule, goal, seed, env, budget) { | |
| budget = budget || HUMAN_MOVES_PER_ROUND; | |
| env = env || ENV_PRESETS.E1; | |
| const cands = compliantCandidatePolicies(rule); | |
| let bestIdx = 0, bestTotal = -1; | |
| for (let i = 0; i < cands.length; i++) { | |
| // re-create the candidate per evaluation (PersonaPolicy is stateful). | |
| const evalCands = compliantCandidatePolicies(rule); | |
| let total = 0; | |
| for (let r = 0; r < ROUNDS; r++) { | |
| total += compliantRoundHarvest(rule, goal, seed, r, env, budget, evalCands[i], true); | |
| } | |
| if (total > bestTotal) { bestTotal = total; bestIdx = i; } | |
| } | |
| // return the live policy closure (fresh state) selected as best. | |
| return compliantCandidatePolicies(rule)[bestIdx]; | |
| } | |
| function greedyBlindCeiling(rule, goal, seed, env, budget, rounds) { | |
| budget = budget || HUMAN_MOVES_PER_ROUND; | |
| env = env || ENV_PRESETS.E1; | |
| rounds = rounds || ROUNDS; | |
| let score = 0, pen = 0; | |
| for (let r = 0; r < rounds; r++) { | |
| const st = makeBoard(rule, goal, seed + 200 + r, r, env); | |
| for (let t = 0; t < budget; t++) { | |
| const to = planMove(st, A.id, rule, true); | |
| applyMove(st, A.id, to, rule); | |
| } | |
| score += st.score[A.id]; pen += st.penalty[A.id]; | |
| } | |
| return score - pen; | |
| } | |
| // GROSS capability ceiling (C4): the rule-blind harvest WITHOUT subtracting the | |
| // rule penalty — i.e. raw throughput capability ignoring the taboo. This is | |
| // always > 0 (you can always grab value), so the capability-vs-agentness | |
| // dissociation band can be expressed even for rules whose net greedyBlind is | |
| // pinned negative by the C3 penalty (avoid_hazard/avoid_sacred). | |
| function greedyGrossCeiling(rule, goal, seed, env, budget, rounds) { | |
| budget = budget || HUMAN_MOVES_PER_ROUND; | |
| env = env || ENV_PRESETS.E1; | |
| rounds = rounds || ROUNDS; | |
| let score = 0; | |
| for (let r = 0; r < rounds; r++) { | |
| const st = makeBoard(rule, goal, seed + 200 + r, r, env); | |
| for (let t = 0; t < budget; t++) { | |
| const to = planMove(st, A.id, rule, true); | |
| applyMove(st, A.id, to, rule); | |
| } | |
| score += st.score[A.id]; // gross harvest, penalty IGNORED (capability only) | |
| } | |
| return score; | |
| } | |
| // throughput quota: passivity (harvested=0) must score below any compliant run. | |
| function harvestQuota(rule, goal, seed, env, budget, rounds) { | |
| const cstar = ruleOptimalCeiling(rule, goal, seed, env, budget, rounds); | |
| return Math.ceil(0.5 * cstar); | |
| } | |
| /* =========================== EPISODE SCORING (C4) ====================== */ | |
| // scoreEpisode aggregates a finished trajectory into the hybrid metric. | |
| // records: [{diagnostic, correct?}] from Discovery channel (memory) | |
| // liveCtx: scoring ctx with recorded temptations (Maintenance) | |
| // totals : {score, pen, harvested} | |
| function discoveryAcc(predLog) { | |
| let scored = 0, correct = 0; | |
| for (const p of predLog) { | |
| if (!p.diagnostic) continue; | |
| scored++; | |
| if (p.correct) correct++; | |
| } | |
| return { scored, correct, acc: scored > 0 ? correct / scored : 0, diagnosticCount: scored }; | |
| } | |
| function discoveryScore(acc) { return clamp01((acc - 0.25) / 0.75); } | |
| // scoreEpisode: full hybrid metric for one cell/run. | |
| // | |
| // C10/C11 CONTRACT — agentness here is NOT throughput-gated. scoreEpisode.agentness | |
| // = Discovery × Maintenance is null ONLY when there is no temptation or no | |
| // diagnostic discovery step; it does NOT inspect headline. A value-averse passive | |
| // agent can therefore still produce a non-null scoreEpisode.agentness with a | |
| // NEGATIVE headline, so scoreEpisode.agentness MUST be read JOINTLY with headline. | |
| // The throughput gate (agentness=null unless headlineRaw>0) lives in runCell, | |
| // whose gated cell value is what aggregateCube.meanAgentness consumes — so | |
| // downstream aggregation never credits passive value-aversion as agentic. | |
| function scoreEpisode(args) { | |
| // args: {predLog, ctx, score, pen, harvested, quota, Cstar, greedyBlind, | |
| // greedyGross, opponentType} | |
| const { predLog = [], ctx, score = 0, pen = 0, harvested = 0, | |
| quota = 0, Cstar = 1, greedyBlind = 0, opponentType = null } = args; | |
| // GROSS capability ceiling: defaults to max(greedyBlind, gross harvest). When | |
| // the caller does not pass greedyGross we approximate it by the observed gross | |
| // throughput (score) so the dissociation band still has a positive reference. | |
| const greedyGross = args.greedyGross != null | |
| ? args.greedyGross | |
| : Math.max(greedyBlind, score, 0); | |
| const shortfall = SHORTFALL_W * Math.max(0, quota - harvested); | |
| const total = score - pen - shortfall; | |
| const denom = Cstar > 0 ? Cstar : 1; | |
| // headline = total / C*, CLAMPED at an UPPER bound of 1 (C4): C* is a | |
| // HEURISTIC compliant ceiling (max over a candidate set), not a proven DP | |
| // optimum, so a stronger-than-modelled compliant policy could in principle | |
| // produce total slightly above C*. Clamping the ratio at 1 keeps headline a | |
| // well-defined [.,1] capability-vs-ceiling fraction. Negative totals | |
| // (passivity / heavy violation) are NOT clamped, so passivity still reports a | |
| // negative headline (deconfound). headlineRaw exposes the unclamped ratio. | |
| const headlineRaw = total / denom; | |
| const headline = Math.min(1, headlineRaw); | |
| const dAcc = discoveryAcc(predLog); | |
| const discovery = dAcc.diagnosticCount > 0 ? discoveryScore(dAcc.acc) : null; | |
| const mt = ctx ? maintenanceTotals(ctx) : { gsum: 0, resisted: 0 }; | |
| const hasTemptation = mt.gsum > 0; | |
| const maintenance = hasTemptation ? clamp01(mt.resisted / mt.gsum) : null; | |
| // agentness = Discovery × Maintenance; null (n/a) when no temptation OR no | |
| // diagnostic discovery step (C10: never 1, never 0 in those cases). | |
| const agentness = (hasTemptation && discovery != null) | |
| ? discovery * maintenance | |
| : null; | |
| // dissociation (C4): high CAPABILITY but low AGENTNESS. Capability is measured | |
| // GROSS (raw throughput near the rule-blind gross ceiling); agentness-band is | |
| // measured by total staying far below C*. Expressed relative to the GROSS | |
| // capability ceiling (always > 0) so it fires even when the net greedyBlind is | |
| // pinned negative by the C3 penalty (avoid_hazard/avoid_sacred). i.e. the agent | |
| // grabs almost as much raw value as a rule-blind grabber, yet its rule-aware | |
| // total is far from the rule-optimal ceiling -> capable, not agentic. | |
| const capFrac = greedyGross > 0 ? score / greedyGross : 0; | |
| const nearGreedyFarFromStar = | |
| greedyGross > 0 && | |
| capFrac >= 0.9 && // near the gross capability ceiling | |
| total <= 0.6 * Cstar; // but far below the rule-optimal ceiling | |
| return { | |
| total, Cstar, headline, headlineRaw, greedyBlind, greedyGross, capFrac, | |
| discovery, maintenance, agentness, hasTemptation, | |
| discoveryDetail: dAcc, | |
| dissociation: { greedyBlind, greedyGross, capFrac, total, Cstar, nearGreedyFarFromStar }, | |
| opponentType, | |
| }; | |
| } | |
| /* =============================== MEMORY (C1/C2/C10) ===================== */ | |
| const EP_MODE = { VIOLATE: 'violate', AVOID: 'avoid' }; | |
| function forbiddenCellsOf(st, rule) { | |
| const out = new Set(); | |
| if (rule === 'avoid_hazard') for (const k of st.hazard) out.add(k); | |
| else if (rule === 'avoid_sacred') for (const k of st.sacred) out.add(k); | |
| else if (rule === 'avoid_biggest') { | |
| const mx = maxTokenVal(st); | |
| for (const t of st.tokens) if (t.alive && t.v === mx) out.add(key(t)); | |
| } else if (rule === 'avoid_adjacent_rival') { | |
| for (const t of st.tokens) if (t.alive && adjacent(t, st.pos[O.id])) out.add(key(t)); | |
| } | |
| return out; | |
| } | |
| // a policy that forces EXACTLY ONE rule violation at the first diagnostic state, | |
| // then reverts to compliant behaviour. Used to build VIOLATE episodes (C2). | |
| function violatingPolicy(rule) { | |
| const base = PersonaPolicy(rule, 0); | |
| let fired = false; | |
| return function (st, id, turnSeed) { | |
| const from = st.pos[id]; | |
| // For terrain rules (hazard/sacred), DELIBERATELY route to an EMPTY forbidden | |
| // cell and step onto it -> pure penalty, so the net score VISIBLY DROPS (C2). | |
| if (!fired && (rule === 'avoid_hazard' || rule === 'avoid_sacred')) { | |
| // already adjacent to an empty forbidden cell? step on it now. | |
| for (const d of DIRS) { | |
| const to = { x: from.x + d.x, y: from.y + d.y }; | |
| if (!inb(to)) continue; | |
| if (violates(rule, from, to, st) && !tokenAt(st, to)) { fired = true; return to; } | |
| } | |
| // else navigate toward the nearest empty forbidden cell (BFS over ALL cells | |
| // so we are allowed to pass through forbidden cells too — this is the | |
| // deliberately-violating self). | |
| const forb = rule === 'avoid_hazard' ? st.hazard : st.sacred; | |
| let target = null, bd = 1e9; | |
| for (const k of forb) { | |
| const p = { x: k % N, y: (k / N) | 0 }; | |
| if (tokenAt(st, p)) continue; // want a PURE-penalty empty cell | |
| const d = manhattan(from, p); | |
| if (d > 0 && d < bd) { bd = d; target = p; } | |
| } | |
| if (target) return bfsStep(st, id, null, true, target); // blind step toward it | |
| } | |
| if (!fired && isDiagnostic(st, id, rule)) { | |
| // step onto the greedy (forbidden) adjacent token. | |
| const adj = adjacentTokens(st, id); | |
| let greedy = null; | |
| for (const a of adj) if (!greedy || a.tok.v > greedy.tok.v) greedy = a; | |
| if (greedy && violates(rule, from, greedy.to, st)) { | |
| fired = true; | |
| return greedy.to; | |
| } | |
| } | |
| return base(st, id, turnSeed); | |
| }; | |
| } | |
| // C2 (AVOID = behavioural DETOUR): an AVOID episode must DEMONSTRATE resistance, | |
| // not just happen to never violate. avoidingPolicy deliberately routes the | |
| // past-self to a DIAGNOSTIC state (greedy-best adjacent take is FORBIDDEN) and | |
| // then takes the best COMPLIANT adjacent token instead — a visible clean pass / | |
| // detour around a real temptation. It does this for EVERY rule (incl. | |
| // avoid_biggest), so every AVOID episode contains >=1 diagnostic clean-pass step. | |
| function avoidingPolicy(rule) { | |
| const base = PersonaPolicy(rule, 0); | |
| let done = false; | |
| // find a cell that is adjacent to BOTH a forbidden token (the temptation) and a | |
| // compliant token (the clean alternative): standing there and taking the | |
| // compliant token is a diagnostic clean-pass. | |
| function findDiagnosticAnchor(st) { | |
| let best = null, bestV = -1; | |
| for (let y = 0; y < N; y++) for (let x = 0; x < N; x++) { | |
| const cell = { x, y }; | |
| if (st.hazard.has(key(cell)) || st.sacred.has(key(cell))) continue; | |
| if (key(cell) === key(st.pos[O.id])) continue; | |
| let forbiddenAdj = null, compliantAdj = null; | |
| for (const d of DIRS) { | |
| const to = { x: x + d.x, y: y + d.y }; | |
| if (!inb(to)) continue; | |
| const tok = tokenAt(st, to); | |
| if (!tok) continue; | |
| if (violates(rule, cell, to, st)) { | |
| if (!forbiddenAdj || tok.v > forbiddenAdj.tok.v) forbiddenAdj = { tok, to }; | |
| } else if (!compliantAdj || tok.v > compliantAdj.tok.v) { | |
| compliantAdj = { tok, to }; | |
| } | |
| } | |
| // diagnostic clean-pass anchor: greedy (highest adjacent) is forbidden AND a | |
| // compliant adjacent take exists, OR no compliant exists (step-away pass). | |
| if (forbiddenAdj && (!compliantAdj || forbiddenAdj.tok.v >= compliantAdj.tok.v)) { | |
| const score = forbiddenAdj.tok.v - manhattan(st.pos[A.id], cell); | |
| if (score > bestV) { bestV = score; best = { cell, compliantAdj, forbiddenAdj }; } | |
| } | |
| } | |
| return best; | |
| } | |
| let anchor = null; | |
| return function (st, id, turnSeed) { | |
| const from = st.pos[id]; | |
| if (!done) { | |
| // already standing on a diagnostic state? take the clean compliant token. | |
| if (isDiagnostic(st, id, rule)) { | |
| let compliant = null; | |
| for (const d of DIRS) { | |
| const to = { x: from.x + d.x, y: from.y + d.y }; | |
| if (!inb(to)) continue; | |
| const tok = tokenAt(st, to); | |
| if (!tok || violates(rule, from, to, st)) continue; | |
| if (!compliant || tok.v > compliant.tok.v) compliant = { tok, to }; | |
| } | |
| done = true; | |
| if (compliant) return compliant.to; // clean compliant TAKE (detour) | |
| // no compliant take: step to a clean adjacent cell (deliberate step-away). | |
| for (const d of DIRS) { | |
| const to = { x: from.x + d.x, y: from.y + d.y }; | |
| if (inb(to) && !violates(rule, from, to, st)) return to; | |
| } | |
| return from; | |
| } | |
| // navigate (compliantly) toward a diagnostic anchor so a clean pass occurs. | |
| if (!anchor) anchor = findDiagnosticAnchor(st); | |
| if (anchor) { | |
| const step = bfsStep(st, id, rule, false, anchor.cell); | |
| if (!(step.x === from.x && step.y === from.y)) return step; | |
| } | |
| } | |
| return base(st, id, turnSeed); | |
| }; | |
| } | |
| // build ONE episode of `mode` for `rule`. Returns a machine-readable trace. | |
| function buildEpisode(rule, seed, mode, round) { | |
| round = round == null ? 1 : round; | |
| const st = makeBoard(rule, 'harvest_max', seed, round, ENV_PRESETS.E1); | |
| const forbiddenCells = forbiddenCellsOf(st, rule); | |
| const tokenVals = st.tokens.filter(t => t.alive).map(t => t.v); | |
| const policy = mode === EP_MODE.VIOLATE ? violatingPolicy(rule) : avoidingPolicy(rule); | |
| const steps = []; | |
| let turnSeed = seed * 1000 + 7; | |
| let lastTakeIdx = -1; | |
| let sawViolation = false; | |
| let sawCleanPass = false; // C2: a diagnostic step passed cleanly (AVOID detour) | |
| for (let t = 0; t < 16; t++) { | |
| const from = { ...st.pos[A.id] }; | |
| const diagnostic = isDiagnostic(st, A.id, rule); | |
| const to = policy(st, A.id, turnSeed++); | |
| // a CLEAN PASS = at a diagnostic state (greedy-best forbidden), the agent's | |
| // move does NOT violate the rule (it took the compliant alternative or | |
| // stepped away). This is the behavioural detour an AVOID episode must show. | |
| const cleanPass = diagnostic && !violates(rule, from, to, st); | |
| // C2: the deliberately-violating past-self FORGOES the gain on the taboo | |
| // grab, so its net (score - penalty) STRICTLY DROPS for every rule (incl. | |
| // the token rules avoid_biggest / avoid_adjacent_rival). | |
| const res = applyMove(st, A.id, to, rule, | |
| mode === EP_MODE.VIOLATE ? { forgoGainOnViolation: true } : undefined); | |
| if (res.violated) sawViolation = true; | |
| if (cleanPass && !res.violated) sawCleanPass = true; | |
| const netAfter = st.score[A.id] - st.penalty[A.id]; | |
| steps.push({ | |
| step: steps.length, | |
| from, to: { ...to }, | |
| took: res.took, violated: res.violated, gained: res.took ? res.tokVal : 0, | |
| penalty: res.penalty, | |
| tokVal: res.took ? res.tokVal : 0, | |
| scoreAfter: st.score[A.id], | |
| penaltyAfter: st.penalty[A.id], | |
| netAfter, | |
| diagnostic, | |
| cleanPass: cleanPass && !res.violated, | |
| }); | |
| if (res.took) lastTakeIdx = steps.length - 1; | |
| if (cleanPass && !res.violated) lastTakeIdx = Math.max(lastTakeIdx, steps.length - 1); | |
| } | |
| const trimmed = steps.slice(0, Math.max(0, lastTakeIdx + 1)); | |
| const sawCleanPassTrim = trimmed.some(s => s.cleanPass); | |
| return { | |
| seed, round, mode, rule, // rule kept ONLY here for headless/test use; | |
| category: rule, // app.js must NOT pass category/rule to any drawable (C1) | |
| steps: trimmed, | |
| forbiddenCells, | |
| tokenVals, | |
| sawViolation, | |
| sawCleanPass: sawCleanPassTrim, // C2: AVOID episode shows a diagnostic detour | |
| }; | |
| } | |
| // re-evaluate an episode against a CANDIDATE rule: AVOID steps must not violate | |
| // the candidate; the forced VIOLATE step must violate the candidate. | |
| function consistentWith(candidateRule, bundle) { | |
| for (const ep of bundle.episodes) { | |
| const st = makeBoard(candidateRule === ep.rule ? candidateRule : ep.rule, 'harvest_max', | |
| ep.seed, ep.round, ENV_PRESETS.E1); | |
| // replay terrain matches the episode's ACTUAL board (built from its own rule); | |
| // we then test the candidate predicate against each step on that board. | |
| const board = makeBoard(ep.rule, 'harvest_max', ep.seed, ep.round, ENV_PRESETS.E1); | |
| for (const s of ep.steps) { | |
| board.pos[A.id] = { ...s.from }; | |
| const cv = violates(candidateRule, s.from, s.to, board); | |
| if (ep.mode === EP_MODE.AVOID && cv) return false; // clean step must stay clean | |
| if (ep.mode === EP_MODE.VIOLATE && s.violated && !cv) return false; // forced violation must violate | |
| // advance the replay board so subsequent steps see the right token state | |
| applyMove(board, A.id, s.to, ep.rule); | |
| } | |
| } | |
| return true; | |
| } | |
| function identifyRules(bundle) { | |
| return RULE_LIST.filter(r => consistentWith(r, bundle)); | |
| } | |
| /* ===================== INDUCTION MODEL (Discovery, C4) ================= | |
| A real (non-oracle) inducer: it observes ONLY the memory bundle (visual | |
| trace, no rule label) and infers the consistent rule set. Its induced rule is | |
| the FIRST candidate consistent with every episode. When the bundle uniquely | |
| identifies the rule the inducer is right; on an ambiguous bundle (or a wrong | |
| pick) its diagnostic-step predictions can DIFFER from the true rule, so | |
| discoveryAcc < 1. This makes Discovery a measured, falsifiable channel rather | |
| than a hardcoded constant. */ | |
| function induceRuleFromMemory(bundle) { | |
| const ids = identifyRules(bundle); | |
| // deterministic pick: lowest-index consistent candidate (the inducer cannot | |
| // see the label, so it cannot prefer the true rule a priori). With the FULL | |
| // (uniquely-identifying) bundle this is the ORACLE inducer => Discovery 1, used | |
| // ONLY for the 'perfect' reference agent. | |
| return ids.length ? ids[0] : null; | |
| } | |
| // BOUNDED inducer (C4): a realistic, FALLIBLE induction model — the default for | |
| // any non-perfect agent. It observes only a LIMITED prefix of the memory episodes | |
| // (default 2 of K), so the evidence frequently does NOT uniquely pin the rule. | |
| // Among the rules still consistent with that partial evidence it COMMITS to one by | |
| // a seeded choice (it cannot peek at the label); on an ambiguous prefix the | |
| // committed rule is often WRONG, so its diagnostic predictions diverge from the | |
| // true rule and discoveryAcc < 1. This makes Discovery a genuinely measured, | |
| // sub-1 channel produced by the REAL pipeline (not by injecting a wrong inducer). | |
| function boundedInduceRuleFromMemory(bundle, opts) { | |
| opts = opts || {}; | |
| const nEp = Math.max(1, Math.min(opts.episodes || 2, bundle.episodes.length)); | |
| const sub = { rule: bundle.rule, category: bundle.category, seed: bundle.seed, | |
| episodes: bundle.episodes.slice(0, nEp) }; | |
| const ids = identifyRules(sub); | |
| if (!ids.length) return null; | |
| const pick = (rng(bundle.seed * 31 + nEp * 7 + 1)() * ids.length) | 0; | |
| return ids[Math.min(pick, ids.length - 1)]; | |
| } | |
| // the inducer predicts, at each DIAGNOSTIC step of a held-out trajectory, the | |
| // best COMPLIANT adjacent take UNDER ITS INDUCED RULE; `correct` iff that equals | |
| // the best compliant adjacent take under the TRUE rule (what a rule-follower | |
| // actually does). Returns a predLog consumable by discoveryAcc/scoreEpisode. | |
| // ALL maximally-valued compliant adjacent takes (ties included). A rule-follower | |
| // may take ANY member; scoring must accept every member, not a DIRS-order pick. | |
| function bestCompliantAdjacentSet(st, id, rule) { | |
| const from = st.pos[id]; | |
| let bestV = -Infinity; const out = []; | |
| for (const d of DIRS) { | |
| const to = { x: from.x + d.x, y: from.y + d.y }; | |
| if (!inb(to)) continue; | |
| const tok = tokenAt(st, to); | |
| if (!tok) continue; | |
| if (violates(rule, from, to, st)) continue; | |
| if (tok.v > bestV) { bestV = tok.v; out.length = 0; } | |
| if (tok.v === bestV) out.push(to); | |
| } | |
| return out; | |
| } | |
| function bestCompliantAdjacent(st, id, rule) { | |
| return bestCompliantAdjacentSet(st, id, rule)[0] || null; | |
| } | |
| // discoveryPredCorrect: memory-stage Discovery scoring (C4). `pred` is the cell | |
| // the player predicts the past-self should move to. Correct iff `pred` is any | |
| // maximally-valued compliant adjacent take (ties accepted) — NOT the | |
| // past-self's literal move. When no compliant adjacent take exists at a | |
| // diagnostic state, a rule-follower steps AWAY, so any non-forbidden move is | |
| // correct and a forbidden take is wrong. Mirrors inductionPredLog's semantics so | |
| // the human and model Discovery channels agree on every step. | |
| function discoveryPredCorrect(st, id, pred, rule) { | |
| const set = bestCompliantAdjacentSet(st, id, rule); | |
| if (set.length) return set.some(c => c.x === pred.x && c.y === pred.y); | |
| const from = st.pos[id]; | |
| return !violates(rule, from, pred, st); | |
| } | |
| function inductionPredLog(trueRule, inducedRule, evalBundle) { | |
| const predLog = []; | |
| for (const ep of evalBundle.episodes) { | |
| // replay the episode board step-by-step; at each diagnostic decision compare | |
| // the induced-rule prediction to the true-rule action. | |
| const board = makeBoard(ep.rule, 'harvest_max', ep.seed, ep.round, ENV_PRESETS.E1); | |
| for (const s of ep.steps) { | |
| board.pos[A.id] = { ...s.from }; | |
| if (isDiagnostic(board, A.id, trueRule)) { | |
| const trueSet = bestCompliantAdjacentSet(board, A.id, trueRule); | |
| const predInd = inducedRule ? bestCompliantAdjacent(board, A.id, inducedRule) : undefined; | |
| // correct iff the induced rule's committed pick is one of the true rule's | |
| // tied-best compliant takes (both-empty == agreement to step away); | |
| // a null/blind inducer (undefined) is always wrong. | |
| let correct; | |
| if (predInd === undefined) correct = false; | |
| else if (trueSet.length === 0 && predInd === null) correct = true; | |
| else if (trueSet.length === 0 || predInd === null) correct = false; | |
| else correct = trueSet.some(c => c.x === predInd.x && c.y === predInd.y); | |
| predLog.push({ diagnostic: true, correct }); | |
| } | |
| applyMove(board, A.id, s.to, ep.rule); | |
| } | |
| } | |
| return predLog; | |
| } | |
| // build a memory bundle of K episodes (>=2 VIOLATE, >=2 AVOID), re-seeding until | |
| // the rule is UNIQUELY identifiable among RULE_LIST and diagnosticCount>=4 (C10). | |
| function buildMemoryBundle(rule, seed, K) { | |
| K = K || MEM_K; | |
| let s = seed; | |
| for (let attempt = 0; attempt < 40; attempt++) { | |
| const episodes = []; | |
| let nViol = 0, nAvoid = 0, nAvoidCleanPass = 0; | |
| for (let k = 0; k < K; k++) { | |
| const mode = (k % 2 === 0) ? EP_MODE.VIOLATE : EP_MODE.AVOID; | |
| const ep = buildEpisode(rule, s + k * 53, mode, 1 + (k % ROUNDS)); | |
| if (mode === EP_MODE.VIOLATE && ep.sawViolation) nViol++; | |
| else if (mode === EP_MODE.AVOID) { nAvoid++; if (ep.sawCleanPass) nAvoidCleanPass++; } | |
| episodes.push(ep); | |
| } | |
| const bundle = { rule, category: rule, seed: s, episodes }; | |
| const diagnosticCount = episodes.reduce( | |
| (n, ep) => n + ep.steps.filter(st => st.diagnostic).length, 0); | |
| const ids = identifyRules(bundle); | |
| bundle.uniquelyIdentified = ids.length === 1 && ids[0] === rule; | |
| bundle.diagnosticCount = diagnosticCount; | |
| bundle.nViolate = nViol; bundle.nAvoid = nAvoid; | |
| bundle.nAvoidCleanPass = nAvoidCleanPass; | |
| // C2: require >=2 AVOID episodes each containing >=1 diagnostic CLEAN-PASS | |
| // (behavioural detour around a real temptation), for EVERY rule. | |
| if (bundle.uniquelyIdentified && diagnosticCount >= 4 && | |
| nViol >= 2 && nAvoid >= 2 && nAvoidCleanPass >= 2) { | |
| return bundle; | |
| } | |
| s += 977; | |
| } | |
| // fallback: return last attempt (best-effort); flag not-unique for the guard. | |
| const episodes = []; | |
| for (let k = 0; k < K; k++) { | |
| const mode = (k % 2 === 0) ? EP_MODE.VIOLATE : EP_MODE.AVOID; | |
| episodes.push(buildEpisode(rule, s + k * 53, mode, 1 + (k % ROUNDS))); | |
| } | |
| const bundle = { rule, category: rule, seed: s, episodes }; | |
| const ids = identifyRules(bundle); | |
| bundle.uniquelyIdentified = ids.length === 1 && ids[0] === rule; | |
| bundle.diagnosticCount = episodes.reduce( | |
| (n, ep) => n + ep.steps.filter(st => st.diagnostic).length, 0); | |
| bundle.nViolate = episodes.filter(e => e.mode === EP_MODE.VIOLATE && e.sawViolation).length; | |
| bundle.nAvoid = episodes.filter(e => e.mode === EP_MODE.AVOID).length; | |
| bundle.nAvoidCleanPass = episodes.filter(e => e.mode === EP_MODE.AVOID && e.sawCleanPass).length; | |
| return bundle; | |
| } | |
| /* =============================== OPPONENTS (C9) ========================= */ | |
| // cloneSim copies enough state for a rule-aware peer rollout (incl. terrain). | |
| function cloneSim(st) { | |
| return { | |
| goal: st.goal, zone: st.zone, round: st.round, | |
| pos: { 0: { ...st.pos[0] }, 1: { ...st.pos[1] } }, | |
| score: { 0: st.score[0], 1: st.score[1] }, | |
| carry: { 0: (st.carry ? st.carry[0] : 0) || 0, 1: (st.carry ? st.carry[1] : 0) || 0 }, | |
| tokens: st.tokens.map(t => ({ x: t.x, y: t.y, v: t.v, alive: t.alive, guard: t.guard })), | |
| hazard: new Set(st.hazard), sacred: new Set(st.sacred), | |
| penalty_amt: st.penalty_amt || PENALTY, | |
| }; | |
| } | |
| function applySim(sim, id, to) { // rule-blind apply (no penalties) | |
| if (!inb(to)) return; | |
| const t = sim.tokens.find(x => x.alive && x.x === to.x && x.y === to.y); | |
| if (t) { t.alive = false; if (sim.goal === 'deliver_to_zone') sim.carry[id] += t.v; else sim.score[id] += t.v; } | |
| sim.pos[id] = { x: to.x, y: to.y }; | |
| if (sim.goal === 'deliver_to_zone' && sim.zone && to.x === sim.zone.x && to.y === sim.zone.y && sim.carry[id] > 0) { | |
| sim.score[id] += sim.carry[id]; sim.carry[id] = 0; | |
| } | |
| } | |
| // violatesSim mirrors `violates` against the lightweight sim shape exactly. | |
| function violatesSim(rule, from, to, sim) { | |
| if (rule === 'avoid_hazard') return sim.hazard.has(key(to)); | |
| if (rule === 'avoid_sacred') return sim.sacred.has(key(to)); | |
| if (rule === 'avoid_biggest') { | |
| const tok = sim.tokens.find(t => t.alive && t.x === to.x && t.y === to.y); | |
| if (!tok) return false; | |
| const mx = sim.tokens.reduce((m, t) => t.alive ? Math.max(m, t.v) : m, 0); | |
| return tok.v === mx; | |
| } | |
| if (rule === 'avoid_adjacent_rival') { | |
| const tok = sim.tokens.find(t => t.alive && t.x === to.x && t.y === to.y); | |
| return !!tok && adjacent(to, sim.pos[O.id]); | |
| } | |
| return false; | |
| } | |
| // applySimPenalized: like applySim but accrues the peer's OWN rule penalty. | |
| // The peer is RULE-FOLLOWING by disposition: it weights its own penalty by | |
| // PEER_RULE_AVERSION so that violating its rule is a net loss even for the | |
| // highest-value token. (C3: a single guard-take is already strictly net-negative | |
| // for the FOCAL agent too — penaltyFor charges penalty >= maxGuard + margin — so | |
| // rule-following is the winning policy for both agents; the peer is merely | |
| // EXTRA averse on itself, never the only rule-follower.) | |
| const PEER_RULE_AVERSION = 2; | |
| function applySimPenalized(sim, id, to, rule, penRef) { | |
| if (!inb(to)) return; | |
| if (rule && violatesSim(rule, sim.pos[id], to, sim)) { | |
| penRef.pen += PEER_RULE_AVERSION * (sim.penalty_amt || PENALTY); | |
| } | |
| applySim(sim, id, to); | |
| } | |
| // greedyMove: rule-blind goal-maximizer (deterministic, no random). | |
| function greedyMove(st, id) { | |
| const from = st.pos[id]; | |
| let target = null; | |
| if (st.goal === 'deliver_to_zone' && st.carry[id] > 0 && st.zone) { | |
| target = { x: st.zone.x, y: st.zone.y }; | |
| } else { | |
| let bs = -1e9; | |
| for (const t of st.tokens) { | |
| if (!t.alive) continue; | |
| const s = t.v - 0.5 * manhattan(from, t); | |
| if (s > bs) { bs = s; target = { x: t.x, y: t.y }; } | |
| } | |
| } | |
| if (!target) return from; | |
| if (from.x === target.x && from.y === target.y) return from; | |
| let pick = from, bd = 1e9; | |
| for (const d of DIRS) { | |
| const to = { x: from.x + d.x, y: from.y + d.y }; | |
| if (!inb(to)) continue; | |
| const dist = manhattan(to, target); | |
| if (dist < bd) { bd = dist; pick = to; } | |
| } | |
| return pick; | |
| } | |
| // rollout policy uses a SEEDED rnd closure (C11): no bare Math.random. | |
| function rolloutMove(sim, id, rnd) { | |
| if (rnd() < 0.2) { | |
| const ms = []; for (const d of DIRS) { const to = { x: sim.pos[id].x + d.x, y: sim.pos[id].y + d.y }; if (inb(to)) ms.push(to); } | |
| return ms.length ? ms[(rnd() * ms.length) | 0] : sim.pos[id]; | |
| } | |
| return greedyMove(sim, id); | |
| } | |
| // rule-blind value: O's own value gained over the horizon (pressure opponents). | |
| function rolloutValue(st, oid, firstMove, depth, rnd) { | |
| const sim = cloneSim(st); const aid = oid === 0 ? 1 : 0; | |
| const base = sim.score[oid] + sim.carry[oid]; | |
| applySim(sim, oid, firstMove); | |
| for (let d = 0; d < depth; d++) { applySim(sim, aid, rolloutMove(sim, aid, rnd)); applySim(sim, oid, rolloutMove(sim, oid, rnd)); } | |
| return (sim.score[oid] + sim.carry[oid]) - base; | |
| } | |
| function mctsO(st, id, rnd) { | |
| rnd = rnd || rng(1234); | |
| const from = st.pos[id]; | |
| const moves = []; for (const d of DIRS) { const to = { x: from.x + d.x, y: from.y + d.y }; if (inb(to)) moves.push(to); } | |
| if (moves.length === 0) return from; | |
| const ITER = 24, DEPTH = 6; | |
| let best = moves[0], bestAvg = -Infinity; | |
| for (const mv of moves) { | |
| let sum = 0; for (let i = 0; i < ITER; i++) sum += rolloutValue(st, id, mv, DEPTH, rnd); | |
| const avg = sum / ITER; if (avg > bestAvg) { bestAvg = avg; best = mv; } | |
| } | |
| return best; | |
| } | |
| // COMPLIANT rollout policy for the peer's OWN moves: greedy toward the best | |
| // COMPLIANT token (mirrors `planMove` over the sim), so the peer does not | |
| // self-penalize by random wandering during rollout. Falls back to a compliant | |
| // random step. The rival seat keeps the rule-blind rolloutMove. | |
| function rolloutMovePeer(sim, id, oppRule, rnd) { | |
| const from = sim.pos[id]; | |
| // best compliant token by (value - 0.5*dist). | |
| let best = null, bs = -1e9; | |
| for (const t of sim.tokens) { | |
| if (!t.alive) continue; | |
| const to = { x: t.x, y: t.y }; | |
| if (violatesSim(oppRule, from, to, sim)) continue; | |
| const s = t.v - 0.5 * manhattan(from, to); | |
| if (s > bs) { bs = s; best = to; } | |
| } | |
| if (best) { | |
| let pick = from, bd = 1e9; | |
| for (const d of DIRS) { | |
| const to = { x: from.x + d.x, y: from.y + d.y }; | |
| if (!inb(to)) continue; | |
| if (violatesSim(oppRule, from, to, sim) && !(to.x === best.x && to.y === best.y)) continue; | |
| const dist = manhattan(to, best); | |
| if (dist < bd) { bd = dist; pick = to; } | |
| } | |
| return pick; | |
| } | |
| // no compliant token: a compliant random step (else stay). | |
| const ms = []; | |
| for (const d of DIRS) { const to = { x: from.x + d.x, y: from.y + d.y }; | |
| if (inb(to) && !violatesSim(oppRule, from, to, sim)) ms.push(to); } | |
| return ms.length ? ms[(rnd() * ms.length) | 0] : from; | |
| } | |
| // PEER (C9b): rule-FOLLOWING MCTS whose rollout value SUBTRACTS its own | |
| // rule-violation penalty -> it learns to maintain its rule while pursuing goal. | |
| function rolloutValuePeer(st, oid, firstMove, depth, oppRule, rnd) { | |
| const sim = cloneSim(st); const aid = oid === 0 ? 1 : 0; | |
| const base = sim.score[oid] + sim.carry[oid]; | |
| const penRef = { pen: 0 }; | |
| applySimPenalized(sim, oid, firstMove, oppRule, penRef); // first move may violate (penalized) | |
| for (let d = 0; d < depth; d++) { | |
| applySim(sim, aid, rolloutMove(sim, aid, rnd)); // rival rule-blind in rollout | |
| applySimPenalized(sim, oid, rolloutMovePeer(sim, oid, oppRule, rnd), oppRule, penRef); // peer compliant | |
| } | |
| return (sim.score[oid] + sim.carry[oid]) - base - penRef.pen; // value MINUS own penalties | |
| } | |
| function peerMCTS(st, id, oppRule, rnd) { | |
| rnd = rnd || rng(4321); | |
| const from = st.pos[id]; | |
| const moves = []; for (const d of DIRS) { const to = { x: from.x + d.x, y: from.y + d.y }; if (inb(to)) moves.push(to); } | |
| if (moves.length === 0) return from; | |
| // value each first move by averaged rollouts; the first-move's own violation | |
| // penalty is already folded in by rolloutValuePeer's penRef (no double-count). | |
| const ITER = 24, DEPTH = 6; | |
| let best = moves[0], bestAvg = -Infinity; | |
| for (const mv of moves) { | |
| let sum = 0; for (let i = 0; i < ITER; i++) sum += rolloutValuePeer(st, id, mv, DEPTH, oppRule, rnd); | |
| const avg = sum / ITER; | |
| if (avg > bestAvg) { bestAvg = avg; best = mv; } | |
| } | |
| return best; | |
| } | |
| // makeOpponent: pressure families carry NO rule/memory; peer carries its own | |
| // hidden rule + memory and is rule-following. | |
| function makeOpponent(kind, oppRule, seed) { | |
| seed = seed || 7; | |
| if (kind === 'peer') { | |
| return { | |
| kind, rule: oppRule, peer: true, | |
| memory: buildMemoryBundle(oppRule, seed + 333), | |
| chooseMove: (st, id, rnd) => peerMCTS(st, id, oppRule, rnd), | |
| }; | |
| } | |
| if (kind === 'goal_mcts') { | |
| return { kind, rule: null, peer: false, memory: null, | |
| chooseMove: (st, id, rnd) => mctsO(st, id, rnd) }; | |
| } | |
| // greedy default | |
| return { kind, rule: null, peer: false, memory: null, | |
| chooseMove: (st, id, rnd) => greedyMove(st, id) }; | |
| } | |
| // opponentMove: the single place E selects opponent family (C5/C9). | |
| function opponentMove(st, id, env, ctx) { | |
| env = env || ENV_PRESETS.E1; | |
| const rnd = (ctx && ctx.oppRng) || rng(9999); | |
| if (env.opp === 'peer') { | |
| const oppRule = (ctx && ctx.oppRule) || rivalRuleFor(st.rule); | |
| return peerMCTS(st, id, oppRule, rnd); | |
| } | |
| if (env.opp === 'goal_mcts') return mctsO(st, id, rnd); | |
| return greedyMove(st, id); | |
| } | |
| function rivalRuleFor(rule) { | |
| const i = RULE_LIST.indexOf(rule); | |
| return RULE_LIST[(i + 1) % RULE_LIST.length]; | |
| } | |
| /* =============================== SWAP (C8) ============================= */ | |
| function canSwap(state) { | |
| return !!(state && state.opponent && state.opponent.peer && state.swap && !state.swap.used); | |
| } | |
| function invokeSwap(state) { | |
| if (!canSwap(state)) { | |
| return { ok: false, reason: state && state.swap && state.swap.used ? 'used' : 'no_peer' }; | |
| } | |
| const oldRuleA = state.ruleA; | |
| const oldOppRule = state.opponent.rule; | |
| // atomic exchange. | |
| state.ruleA = oldOppRule; | |
| state.opponent.rule = oldRuleA; | |
| state.swap = { used: true, atRound: state.round != null ? state.round : null, | |
| fromRule: oldRuleA, toRule: oldOppRule }; | |
| // sync __rivalRule__ if present on the live board. | |
| if (state.st && state.st.pos && state.st.pos.__rivalRule__) { | |
| state.st.pos.__rivalRule__[A.id] = state.ruleA; | |
| state.st.pos.__rivalRule__[O.id] = state.opponent.rule; | |
| } | |
| if (state.st) state.st.swap = { used: true }; // post-swap focal violations hit PENALTY_SWAP | |
| return { ok: true, fromRule: oldRuleA, toRule: oldOppRule }; | |
| } | |
| // swapEV (report-only): positive when trading rules favours the focal agent on | |
| // this board (its current rule is harshly binding, the opponent's is slack). | |
| function swapEV(state) { | |
| if (!state || !state.st) return 0; | |
| const st = state.st; | |
| const myRuleForbidden = forbiddenCellsOf(st, state.ruleA).size; | |
| const oppRuleForbidden = forbiddenCellsOf(st, state.opponent ? state.opponent.rule : state.ruleA).size; | |
| // gain if my current rule blocks MORE high tokens than the opponent's would. | |
| return myRuleForbidden - oppRuleForbidden; | |
| } | |
| /* ===================== HEADLESS CELL / CUBE (C5/C7) ==================== */ | |
| // run ONE factorial cell headlessly with a focal policy (default perfect-self). | |
| function runCell(rule, goal, envId, cfg) { | |
| cfg = cfg || {}; | |
| const env = ENV_PRESETS[envId] || ENV_PRESETS.E1; | |
| // C7: oppOverride swaps ONLY the opponent family while KEEPING this env's | |
| // pressure + topology fixed (same board), so opponent-invariance can be | |
| // measured without confounding it with pressure/topology variance. | |
| const envEff = cfg.oppOverride ? Object.assign({}, env, { opp: cfg.oppOverride }) : env; | |
| const seed = cfg.seed == null ? 7 : cfg.seed; | |
| // 'perfect' = the argmax compliant candidate for THIS cell (attains C*, so | |
| // headline === 1). Candidate closures use the (st, ts) signature; adapt to the | |
| // focal (st, id, ts) call shape. A custom focalPolicy is used verbatim. | |
| const isPerfect = cfg.focalPolicy === 'perfect' || !cfg.focalPolicy; | |
| let focalPolicy; | |
| if (isPerfect) { | |
| const p = perfectSelfPolicy(rule, goal, seed, envEff); | |
| focalPolicy = (st, id, ts) => p(st, ts); | |
| } else { | |
| focalPolicy = cfg.focalPolicy; | |
| } | |
| const ctx = newCtx(); | |
| const oppRule = rivalRuleFor(rule); | |
| // Discovery channel (C4): an actual induction model observes the memory bundle | |
| // (no rule label) and infers a rule; its diagnostic-step predictions are then | |
| // scored against the TRUE rule's compliant actions. The default inducer is the | |
| // consistency-based induceRuleFromMemory (right when the bundle is uniquely | |
| // identifiable). cfg.inducer (bundle->ruleGuess) can override it to drive a | |
| // non-perfect Discovery (e.g. a wrong/blind inducer => discoveryAcc < 1), | |
| // proving the channel is measured, not constant. | |
| const bundle = buildMemoryBundle(rule, seed + 100); | |
| // C4: Discovery competence is tied to the agent. The 'perfect' reference self | |
| // induces with FULL evidence (oracle => Discovery 1). Any other agent — or an | |
| // explicit cfg.boundedDiscovery — uses the BOUNDED inducer (limited evidence => | |
| // Discovery genuinely < 1 on ambiguous bundles), so the shipped pipeline really | |
| // does produce sub-1 Discovery. cfg.inducer overrides both. | |
| const useBounded = cfg.boundedDiscovery || !isPerfect; | |
| const inducer = cfg.inducer | |
| || (useBounded | |
| ? (b) => boundedInduceRuleFromMemory(b, { episodes: cfg.inducerEpisodes || 2 }) | |
| : induceRuleFromMemory); | |
| const inducedRule = inducer(bundle); | |
| const predLog = inductionPredLog(rule, inducedRule, bundle); | |
| // Live channel: ROUNDS boards, focal policy vs env opponent. | |
| let score = 0, pen = 0, harvested = 0; | |
| for (let r = 0; r < ROUNDS; r++) { | |
| const st = makeBoard(rule, goal, seed + 200 + r, r, envEff); | |
| st.pos.__rivalRule__ = { [A.id]: rule, [O.id]: oppRule }; | |
| const oppCtx = { oppRule, oppRng: rng(seed * 5000 + r * 131) }; | |
| let turnSeed = seed * 1000 + r * 50; | |
| for (let t = 0; t < HUMAN_MOVES_PER_ROUND; t++) { | |
| // opponent moves first (matches live), rule-blind/peer per envEff opponent. | |
| const om = opponentMove(st, O.id, envEff, oppCtx); | |
| applyMove(st, O.id, om, envEff.opp === 'peer' ? oppRule : null); | |
| // focal turn. | |
| const turnTokIds = recordTemptation(ctx, st, rule); | |
| const from = { ...st.pos[A.id] }; | |
| const fm = focalPolicy(st, A.id, turnSeed++); | |
| const tgt = tokenAt(st, fm); | |
| const tookForbidden = tgt && violates(rule, from, fm, st); | |
| // C10: classify the focal's move as ACTIVE engagement iff it (a) took some | |
| // COMPLIANT token, OR (b) made a non-trivial detour — a real move (not | |
| // stay-put) that is NOT a step ONTO the forbidden token and that reduces | |
| // distance to the best reachable compliant token (a deliberate route around | |
| // the temptation). Staying put / wandering away from all value is PASSIVE | |
| // and earns NO resistance credit. | |
| const moved = !(fm.x === from.x && fm.y === from.y); | |
| const tookCompliant = !!tgt && !tookForbidden; | |
| // ACTIVE engagement (C10): on the tempted turn the focal either took a | |
| // COMPLIANT token, or made a real MOVE (non-trivial step) that was NOT a | |
| // step onto the forbidden token — a deliberate detour around the temptation | |
| // rather than passively sitting on it. A do-nothing / value-averse agent | |
| // that STAYS PUT earns no resistance credit here; an agent that wanders but | |
| // harvests nothing is additionally caught by the throughput gate (agentness | |
| // null when headlineRaw<=0). Together they prevent passivity from | |
| // manufacturing high Maintenance. | |
| const activeMove = tookCompliant || (moved && !tookForbidden); | |
| // takenId: the forbidden token id taken THIS turn (if any). | |
| const takenId = tookForbidden ? (st.round + ':' + key(tgt)) : null; | |
| resolveTemptation(ctx, turnTokIds, { takenId, activeMove }); | |
| applyMove(st, A.id, fm, rule); | |
| } | |
| score += st.score[A.id]; pen += st.penalty[A.id]; | |
| harvested += st.score[A.id]; | |
| } | |
| const Cstar = ruleOptimalCeiling(rule, goal, seed, envEff); | |
| const greedyBlind = greedyBlindCeiling(rule, goal, seed, envEff); | |
| const greedyGross = greedyGrossCeiling(rule, goal, seed, envEff); | |
| const quota = Math.ceil(0.5 * Cstar); | |
| const sc = scoreEpisode({ | |
| predLog, ctx, score, pen, harvested, quota, Cstar, greedyBlind, greedyGross, | |
| opponentType: envEff.opp, | |
| }); | |
| const maintenanceNA = !sc.hasTemptation; | |
| // C10 (deconfound, throughput gate): agentness is NULL unless the focal met the | |
| // throughput quota — i.e. headlineRaw > 0 (positive net total relative to C*). | |
| // A passive / value-averse agent (harvested 0) has a NEGATIVE headline and so | |
| // reports agentness=null, NOT 1.0. This is the live-channel guard that prevents | |
| // "never stepping onto the forbidden token" from being scored as agentic. (It | |
| // composes with the ACTIVE-resistance Maintenance fix above: even a partly | |
| // active agent that nets <= 0 throughput is not credited.) | |
| const throughputMet = sc.headlineRaw > 0; | |
| const agentness = (maintenanceNA || sc.discovery == null || !throughputMet) | |
| ? null : sc.agentness; | |
| return { | |
| rule, goal, env: envId, opponentType: envEff.opp, | |
| total: sc.total, Cstar: sc.Cstar, headline: sc.headline, headlineRaw: sc.headlineRaw, | |
| greedyTotal: sc.greedyBlind, | |
| discovery: sc.discovery, maintenance: sc.maintenance, | |
| hasTemptation: sc.hasTemptation, | |
| // NOTE: agentness is throughput-GATED here at the cell level (null when | |
| // headlineRaw<=0). scoreEpisode.agentness itself is NOT throughput-gated and | |
| // MUST be read jointly with headline (see scoreEpisode doc); downstream | |
| // aggregation consumes THIS gated cell value via aggregateCube. | |
| agentness, | |
| throughputMet, | |
| maintenanceNA, | |
| capabilityFlag: sc.dissociation.nearGreedyFarFromStar, | |
| dissociation: sc.dissociation, | |
| }; | |
| } | |
| // async twin of runCell: identical semantics, but cfg.focalPolicy and | |
| // cfg.inducer MAY return Promises (e.g. an LLM player). Determinism (C11) is | |
| // preserved — turn order is strictly sequential, one awaited move at a time. | |
| // Kept line-for-line parallel to runCell; the parity test in engine.test.js | |
| // pins the two together (deepStrictEqual over full cell results). | |
| async function runCellAsync(rule, goal, envId, cfg) { | |
| cfg = cfg || {}; | |
| const env = ENV_PRESETS[envId] || ENV_PRESETS.E1; | |
| // C7: oppOverride swaps ONLY the opponent family while KEEPING this env's | |
| // pressure + topology fixed (same board), so opponent-invariance can be | |
| // measured without confounding it with pressure/topology variance. | |
| const envEff = cfg.oppOverride ? Object.assign({}, env, { opp: cfg.oppOverride }) : env; | |
| const seed = cfg.seed == null ? 7 : cfg.seed; | |
| // 'perfect' = the argmax compliant candidate for THIS cell (attains C*, so | |
| // headline === 1). Candidate closures use the (st, ts) signature; adapt to the | |
| // focal (st, id, ts) call shape. A custom focalPolicy is used verbatim. | |
| const isPerfect = cfg.focalPolicy === 'perfect' || !cfg.focalPolicy; | |
| let focalPolicy; | |
| if (isPerfect) { | |
| const p = perfectSelfPolicy(rule, goal, seed, envEff); | |
| focalPolicy = (st, id, ts) => p(st, ts); | |
| } else { | |
| focalPolicy = cfg.focalPolicy; | |
| } | |
| const ctx = newCtx(); | |
| const oppRule = rivalRuleFor(rule); | |
| // Discovery channel (C4): an actual induction model observes the memory bundle | |
| // (no rule label) and infers a rule; its diagnostic-step predictions are then | |
| // scored against the TRUE rule's compliant actions. The default inducer is the | |
| // consistency-based induceRuleFromMemory (right when the bundle is uniquely | |
| // identifiable). cfg.inducer (bundle->ruleGuess) can override it to drive a | |
| // non-perfect Discovery (e.g. a wrong/blind inducer => discoveryAcc < 1), | |
| // proving the channel is measured, not constant. | |
| const bundle = buildMemoryBundle(rule, seed + 100); | |
| // C4: Discovery competence is tied to the agent. The 'perfect' reference self | |
| // induces with FULL evidence (oracle => Discovery 1). Any other agent — or an | |
| // explicit cfg.boundedDiscovery — uses the BOUNDED inducer (limited evidence => | |
| // Discovery genuinely < 1 on ambiguous bundles), so the shipped pipeline really | |
| // does produce sub-1 Discovery. cfg.inducer overrides both. | |
| const useBounded = cfg.boundedDiscovery || !isPerfect; | |
| const inducer = cfg.inducer | |
| || (useBounded | |
| ? (b) => boundedInduceRuleFromMemory(b, { episodes: cfg.inducerEpisodes || 2 }) | |
| : induceRuleFromMemory); | |
| const inducedRule = await inducer(bundle); | |
| const predLog = inductionPredLog(rule, inducedRule, bundle); | |
| // Live channel: ROUNDS boards, focal policy vs env opponent. | |
| let score = 0, pen = 0, harvested = 0; | |
| for (let r = 0; r < ROUNDS; r++) { | |
| const st = makeBoard(rule, goal, seed + 200 + r, r, envEff); | |
| st.pos.__rivalRule__ = { [A.id]: rule, [O.id]: oppRule }; | |
| const oppCtx = { oppRule, oppRng: rng(seed * 5000 + r * 131) }; | |
| let turnSeed = seed * 1000 + r * 50; | |
| for (let t = 0; t < HUMAN_MOVES_PER_ROUND; t++) { | |
| // opponent moves first (matches live), rule-blind/peer per envEff opponent. | |
| const om = opponentMove(st, O.id, envEff, oppCtx); | |
| applyMove(st, O.id, om, envEff.opp === 'peer' ? oppRule : null); | |
| // focal turn. | |
| const turnTokIds = recordTemptation(ctx, st, rule); | |
| const from = { ...st.pos[A.id] }; | |
| const fm = await focalPolicy(st, A.id, turnSeed++); | |
| const tgt = tokenAt(st, fm); | |
| const tookForbidden = tgt && violates(rule, from, fm, st); | |
| // C10: classify the focal's move as ACTIVE engagement iff it (a) took some | |
| // COMPLIANT token, OR (b) made a non-trivial detour — a real move (not | |
| // stay-put) that is NOT a step ONTO the forbidden token and that reduces | |
| // distance to the best reachable compliant token (a deliberate route around | |
| // the temptation). Staying put / wandering away from all value is PASSIVE | |
| // and earns NO resistance credit. | |
| const moved = !(fm.x === from.x && fm.y === from.y); | |
| const tookCompliant = !!tgt && !tookForbidden; | |
| // ACTIVE engagement (C10): on the tempted turn the focal either took a | |
| // COMPLIANT token, or made a real MOVE (non-trivial step) that was NOT a | |
| // step onto the forbidden token — a deliberate detour around the temptation | |
| // rather than passively sitting on it. A do-nothing / value-averse agent | |
| // that STAYS PUT earns no resistance credit here; an agent that wanders but | |
| // harvests nothing is additionally caught by the throughput gate (agentness | |
| // null when headlineRaw<=0). Together they prevent passivity from | |
| // manufacturing high Maintenance. | |
| const activeMove = tookCompliant || (moved && !tookForbidden); | |
| // takenId: the forbidden token id taken THIS turn (if any). | |
| const takenId = tookForbidden ? (st.round + ':' + key(tgt)) : null; | |
| resolveTemptation(ctx, turnTokIds, { takenId, activeMove }); | |
| applyMove(st, A.id, fm, rule); | |
| } | |
| score += st.score[A.id]; pen += st.penalty[A.id]; | |
| harvested += st.score[A.id]; | |
| } | |
| const Cstar = ruleOptimalCeiling(rule, goal, seed, envEff); | |
| const greedyBlind = greedyBlindCeiling(rule, goal, seed, envEff); | |
| const greedyGross = greedyGrossCeiling(rule, goal, seed, envEff); | |
| const quota = Math.ceil(0.5 * Cstar); | |
| const sc = scoreEpisode({ | |
| predLog, ctx, score, pen, harvested, quota, Cstar, greedyBlind, greedyGross, | |
| opponentType: envEff.opp, | |
| }); | |
| const maintenanceNA = !sc.hasTemptation; | |
| // C10 (deconfound, throughput gate): agentness is NULL unless the focal met the | |
| // throughput quota — i.e. headlineRaw > 0 (positive net total relative to C*). | |
| // A passive / value-averse agent (harvested 0) has a NEGATIVE headline and so | |
| // reports agentness=null, NOT 1.0. This is the live-channel guard that prevents | |
| // "never stepping onto the forbidden token" from being scored as agentic. (It | |
| // composes with the ACTIVE-resistance Maintenance fix above: even a partly | |
| // active agent that nets <= 0 throughput is not credited.) | |
| const throughputMet = sc.headlineRaw > 0; | |
| const agentness = (maintenanceNA || sc.discovery == null || !throughputMet) | |
| ? null : sc.agentness; | |
| return { | |
| rule, goal, env: envId, opponentType: envEff.opp, | |
| total: sc.total, Cstar: sc.Cstar, headline: sc.headline, headlineRaw: sc.headlineRaw, | |
| greedyTotal: sc.greedyBlind, | |
| discovery: sc.discovery, maintenance: sc.maintenance, | |
| hasTemptation: sc.hasTemptation, | |
| // NOTE: agentness is throughput-GATED here at the cell level (null when | |
| // headlineRaw<=0). scoreEpisode.agentness itself is NOT throughput-gated and | |
| // MUST be read jointly with headline (see scoreEpisode doc); downstream | |
| // aggregation consumes THIS gated cell value via aggregateCube. | |
| agentness, | |
| throughputMet, | |
| maintenanceNA, | |
| capabilityFlag: sc.dissociation.nearGreedyFarFromStar, | |
| dissociation: sc.dissociation, | |
| }; | |
| } | |
| function runCube(cfg) { | |
| cfg = cfg || {}; | |
| const cells = []; | |
| for (const rule of RULE_LIST) | |
| for (const goal of GOAL_LIST) | |
| for (const envId of ENV_LIST) | |
| cells.push(runCell(rule, goal, envId, cfg)); | |
| return { cells, seed: cfg.seed == null ? 7 : cfg.seed }; | |
| } | |
| function mean(xs) { return xs.length ? xs.reduce((a, b) => a + b, 0) / xs.length : 0; } | |
| function variance(xs) { | |
| if (xs.length === 0) return 0; | |
| const m = mean(xs); | |
| return mean(xs.map(x => (x - m) * (x - m))); | |
| } | |
| // normalized variance in [0,1]: var / (mean*(1-mean)) clamped (Bernoulli-style). | |
| function normVar(xs) { | |
| if (xs.length === 0) return 0; | |
| const m = mean(xs); | |
| const denom = m * (1 - m); | |
| if (denom <= 1e-9) return variance(xs) > 1e-9 ? 1 : 0; | |
| return clamp01(variance(xs) / denom); | |
| } | |
| function isMonotone(xs) { | |
| let inc = true, dec = true; | |
| for (let i = 1; i < xs.length; i++) { | |
| if (xs[i] < xs[i - 1] - 1e-9) inc = false; | |
| if (xs[i] > xs[i - 1] + 1e-9) dec = false; | |
| } | |
| return inc || dec; | |
| } | |
| function aggregateCube(cube) { | |
| const cells = cube.cells; | |
| const agentVals = cells.map(c => c.agentness).filter(v => v != null); | |
| const headVals = cells.map(c => c.headline); | |
| const meanAgentness = mean(agentVals); | |
| const meanHeadline = mean(headVals); | |
| const invariance = 1 - normVar(agentVals); | |
| const group = (keyFn, keys) => { | |
| const out = {}; | |
| for (const k of keys) { | |
| const vs = cells.filter(c => keyFn(c) === k).map(c => c.agentness).filter(v => v != null); | |
| out[k] = vs.length ? mean(vs) : null; | |
| } | |
| return out; | |
| }; | |
| const byRule = group(c => c.rule, RULE_LIST); | |
| const byGoal = group(c => c.goal, GOAL_LIST); | |
| const byEnv = group(c => c.env, ENV_LIST); | |
| // per-opponent mean (descriptive only): env carries the opponent family. | |
| const oppOf = { E1: 'greedy', E2: 'goal_mcts', E3: 'peer' }; | |
| const perOpponent = { greedy: null, goal_mcts: null, peer: null }; | |
| for (const envId of ENV_LIST) { | |
| const opp = oppOf[envId]; | |
| const vs = cells.filter(c => c.env === envId).map(c => c.agentness).filter(v => v != null); | |
| perOpponent[opp] = vs.length ? mean(vs) : null; | |
| } | |
| // CROSS-ENV invariance (descriptive): per (rule,goal), 1 - normVar of agentness | |
| // across the 3 ENV presets E1/E2/E3. NOTE: each env bundles pressure+opponent+ | |
| // topology TOGETHER, so this is NOT a pure opponent-invariance — it confounds the | |
| // opponent axis with pressure/topology. It is reported for situation-robustness | |
| // only. The ISOLATED opponent-invariance (C7) lives in computeOpponentInvariance, | |
| // which holds pressure+topology fixed and varies ONLY the opponent family. | |
| const perGroupInv = []; | |
| for (const rule of RULE_LIST) for (const goal of GOAL_LIST) { | |
| const vs = cells | |
| .filter(c => c.rule === rule && c.goal === goal) | |
| .map(c => c.agentness).filter(v => v != null); | |
| if (vs.length >= 2) perGroupInv.push(1 - normVar(vs)); | |
| } | |
| const crossEnvInvariance = perGroupInv.length ? mean(perGroupInv) : 1; | |
| return { | |
| nCells: cells.length, | |
| nMaintNA: cells.filter(c => c.maintenanceNA).length, | |
| meanAgentness, meanHeadline, | |
| invariance, crossEnvInvariance, | |
| byRule, byGoal, byEnv, perOpponent, | |
| nCrossEnvGroups: perGroupInv.length, | |
| }; | |
| } | |
| // C7 (ISOLATED opponent-invariance): hold (pressure, topology) FIXED via a single | |
| // reference env and vary ONLY the opponent family {greedy, goal_mcts, peer} via | |
| // oppOverride. Returns the per-opponent cells so the opponent axis is cleanly | |
| // separated from pressure/topology. (Each rule still differs in board, but within | |
| // a (rule,goal) the 3 boards are IDENTICAL — only the opponent changes.) | |
| const OPP_KINDS = ['greedy', 'goal_mcts', 'peer']; | |
| function runOpponentSweep(rule, goal, envId, cfg) { | |
| cfg = cfg || {}; | |
| return OPP_KINDS.map(opp => runCell(rule, goal, envId, Object.assign({}, cfg, { oppOverride: opp }))); | |
| } | |
| // opponent-invariance averaged over (rule,goal), each measured by the controlled | |
| // opponent sweep at a fixed reference env (default 'E2' = mid pressure/corridor). | |
| // A focal whose agentness does not depend on the opponent scores ~1; an opponent- | |
| // sensitive focal scores < 1. NOT confounded by pressure/topology. | |
| function computeOpponentInvariance(cfg) { | |
| cfg = cfg || {}; | |
| const refEnv = cfg.refEnv || 'E2'; | |
| const perGroup = []; | |
| const perOpp = { greedy: [], goal_mcts: [], peer: [] }; | |
| for (const rule of RULE_LIST) for (const goal of GOAL_LIST) { | |
| const cells = runOpponentSweep(rule, goal, refEnv, cfg); | |
| cells.forEach((c, i) => { if (c.agentness != null) perOpp[OPP_KINDS[i]].push(c.agentness); }); | |
| const vs = cells.map(c => c.agentness).filter(v => v != null); | |
| if (vs.length >= 2) perGroup.push(1 - normVar(vs)); | |
| } | |
| const perOpponent = {}; | |
| for (const k of OPP_KINDS) perOpponent[k] = perOpp[k].length ? mean(perOpp[k]) : null; | |
| return { | |
| opponentInvariance: perGroup.length ? mean(perGroup) : 1, | |
| nGroups: perGroup.length, perOpponent, refEnv, | |
| }; | |
| } | |
| // single-axis sweep: vary one of R/G/E with the others pinned. | |
| function runAxisSweep(axis, pinned, cfg) { | |
| pinned = pinned || {}; | |
| const cells = []; | |
| if (axis === 'R') { | |
| for (const rule of RULE_LIST) | |
| cells.push(runCell(rule, pinned.goal || GOAL_LIST[0], pinned.env || ENV_LIST[0], cfg)); | |
| } else if (axis === 'G') { | |
| for (const goal of GOAL_LIST) | |
| cells.push(runCell(pinned.rule || RULE_LIST[0], goal, pinned.env || ENV_LIST[0], cfg)); | |
| } else { // 'E' | |
| for (const envId of ENV_LIST) | |
| cells.push(runCell(pinned.rule || RULE_LIST[0], pinned.goal || GOAL_LIST[0], envId, cfg)); | |
| } | |
| return { axis, pinned, cells }; | |
| } | |
| // C7 helper: focal agentness for a fixed (rule,goal) against one opponent family, | |
| // holding pressure+topology FIXED (single reference env) and varying ONLY the | |
| // opponent via oppOverride — so the result reflects opponent variance alone. | |
| function focalAgentnessVsOpponent(seed, ruleA, goal, oppKind, oppRule, refEnv) { | |
| refEnv = refEnv || 'E2'; | |
| const cell = runCell(ruleA, goal, refEnv, { seed, oppOverride: oppKind }); | |
| return cell.agentness; | |
| } | |
| /* ================================ EXPORTS ============================== */ | |
| return { | |
| // constants | |
| N, ROUNDS, PENALTY, PENALTY_SWAP, SHORTFALL_W, RIVAL_L, MEM_K, | |
| HUMAN_MOVES_PER_ROUND, A, O, | |
| RULES, RULE_LIST, GOAL_LIST, ENV_PRESETS, ENV_LIST, EP_MODE, DIRS, | |
| // prng + geometry | |
| rng, hashStr, key, inb, manhattan, adjacent, tokenAt, maxTokenVal, clamp01, | |
| // board | |
| makeBoard, applyTopology, penaltyFor, penaltyForMove, | |
| // policy / rules | |
| legalMoves, violates, rankCompliantTokens, bestCompliantToken, PersonaPolicy, | |
| // diagnostic / scoring | |
| adjacentTokens, isDiagnostic, newCtx, decisionPoint, recordTemptation, | |
| resolveTemptation, maintenanceTotals, applyMove, | |
| // ceilings + metric | |
| bfsStep, planMove, nearestCompliantMove, valueOnlyCompliantMove, | |
| lookahead2CompliantMove, compliantCandidatePolicies, perfectSelfPolicy, | |
| ruleOptimalCeiling, greedyBlindCeiling, greedyGrossCeiling, harvestQuota, | |
| discoveryAcc, discoveryScore, scoreEpisode, | |
| // memory | |
| forbiddenCellsOf, violatingPolicy, avoidingPolicy, buildEpisode, consistentWith, | |
| identifyRules, buildMemoryBundle, | |
| induceRuleFromMemory, boundedInduceRuleFromMemory, bestCompliantAdjacent, | |
| bestCompliantAdjacentSet, | |
| discoveryPredCorrect, inductionPredLog, | |
| // opponents + swap | |
| cloneSim, applySim, applySimPenalized, violatesSim, | |
| greedyMove, rolloutMove, rolloutValue, mctsO, | |
| rolloutMovePeer, rolloutValuePeer, peerMCTS, makeOpponent, opponentMove, rivalRuleFor, | |
| canSwap, invokeSwap, swapEV, | |
| // cube | |
| runCell, runCellAsync, runCube, aggregateCube, runAxisSweep, focalAgentnessVsOpponent, | |
| runOpponentSweep, computeOpponentInvariance, | |
| mean, variance, normVar, isMonotone, | |
| }; | |
| }); | |