// Pure unit tests for the dynamic shadow budget logic. These import // the standalone helpers in `evolution/shadow-core.ts`, which have no // database dependencies, so the test runs without DATABASE_URL. // // node --test artifacts/api-server/src/lib/__tests__/shadow-budget.unit.test.mjs import { test } from "node:test"; import assert from "node:assert/strict"; import { decideShadowBudget, percentile, shouldEmitThresholdEvent, MIN_SAMPLES_FOR_DYNAMIC, SHADOW_BUDGET_MULTIPLIER, SHADOW_BUDGET_SAFETY_FACTOR, SHADOW_BUDGET_THRESHOLD_EPSILON, } from "../evolution/shadow-core.ts"; test("percentile linear interpolation matches numpy default", () => { assert.equal(percentile([], 0.75), 0); assert.equal(percentile([42], 0.75), 42); // [1,2,3,4]; pos = 3 * 0.75 = 2.25; lo=2, hi=3 → 3 + 0.25*(4-3) = 3.25 assert.equal(percentile([4, 1, 3, 2], 0.75), 3.25); // Constant series → P75 = the value, no interpolation drift. assert.equal(percentile(new Array(20).fill(1000), 0.75), 1000); }); test("decideShadowBudget falls back when history < MIN_SAMPLES_FOR_DYNAMIC", () => { const fewCosts = new Array(MIN_SAMPLES_FOR_DYNAMIC - 1).fill(800); const out = decideShadowBudget({ recentActiveCostsMs: fewCosts, recentRowCount: fewCosts.length, recentSkippedCount: 0, fallbackActiveCostMs: 1000, }); assert.equal(out.mode, "fallback"); assert.equal(out.thresholdMs, 1000 * SHADOW_BUDGET_MULTIPLIER); assert.equal(out.p75Ms, null); assert.equal(out.safetyFactor, SHADOW_BUDGET_MULTIPLIER); assert.equal(out.sampleCount, fewCosts.length); }); test("decideShadowBudget returns 'unknown' when neither history nor fallback", () => { const out = decideShadowBudget({ recentActiveCostsMs: [], recentRowCount: 0, recentSkippedCount: 0, }); assert.equal(out.mode, "unknown"); assert.equal(out.thresholdMs, null); assert.equal(out.recentSkipRatio, 0); }); test("decideShadowBudget switches to dynamic P75 once we cross MIN_SAMPLES_FOR_DYNAMIC", () => { // 12 samples — well above MIN. Distribution: ten ~1000ms runs and two // ~3000ms outliers. P75 of [1000…1000, 3000, 3000] = 1000 (still on // the steady-state side); outliers should not blow the cap. const costs = [ 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 3000, 3000, ]; const out = decideShadowBudget({ recentActiveCostsMs: costs, recentRowCount: costs.length, recentSkippedCount: 0, // Pass a wildly different fallback to prove the dynamic path wins. fallbackActiveCostMs: 50, }); assert.equal(out.mode, "dynamic"); assert.equal(out.sampleCount, costs.length); assert.ok(out.p75Ms !== null); // P75 of this sample = 1000ms; threshold = 1000 * SAFETY_FACTOR assert.equal(out.thresholdMs, 1000 * SHADOW_BUDGET_SAFETY_FACTOR); }); test("decideShadowBudget reports recent skip ratio and sample count", () => { const costs = new Array(20).fill(900); const out = decideShadowBudget({ recentActiveCostsMs: costs, recentRowCount: 20, recentSkippedCount: 5, }); assert.equal(out.recentSkipRatio, 0.25); assert.equal(out.recentSampleCount, 20); assert.equal(out.mode, "dynamic"); }); test("decideShadowBudget tolerates a high-variance network without starving CI", () => { // Bursty network: most runs ~500ms, occasional 5000ms spikes. With // the legacy "shadow > 1.5 * active per row" rule, ANY shadow run // observed during a 500ms active turn would skip if it happened to // exceed 750ms — easy to do for a candidate variant doing extra // work. The dynamic P75 cap stays anchored on the high side of the // typical population, so reasonable shadow runs survive. const costs = [ 500, 510, 480, 520, 490, 530, 5000, 510, 470, 4900, 500, 520, 540, 480, 510, ]; const out = decideShadowBudget({ recentActiveCostsMs: costs, recentRowCount: costs.length, recentSkippedCount: 0, }); assert.equal(out.mode, "dynamic"); // P75 of this sample is ~530ms, threshold ~795ms. A shadow run that // takes 700ms during a 500ms active turn now passes (would have // failed under the legacy 1.5×500 = 750ms per-row rule? Yes, 700<750 // here it passes either way; but it also passes during a *small* // active turn where the per-row rule would have skipped a 700ms // candidate against a 400ms active.) assert.ok(out.thresholdMs !== null); assert.ok(out.thresholdMs > 700, "threshold should comfortably allow normal candidate runs"); assert.ok(out.thresholdMs < 5000, "threshold should still reject the 5000ms spike"); }); test("shouldEmitThresholdEvent always emits the first dynamic threshold", () => { assert.equal(shouldEmitThresholdEvent(1500, null), true); assert.equal(shouldEmitThresholdEvent(1500, 0), true); }); test("shouldEmitThresholdEvent suppresses sub-epsilon noise", () => { // 5% drift, below the default 10% epsilon → suppressed. assert.equal(shouldEmitThresholdEvent(1050, 1000), false); assert.equal(shouldEmitThresholdEvent(950, 1000), false); }); test("shouldEmitThresholdEvent emits on >= epsilon drift", () => { // Exactly the epsilon threshold should trip (>=). assert.equal( shouldEmitThresholdEvent(1000 * (1 + SHADOW_BUDGET_THRESHOLD_EPSILON), 1000), true, ); // 25% drop → emit. assert.equal(shouldEmitThresholdEvent(750, 1000), true); });