doatlas-2 / artifacts /api-server /src /lib /__tests__ /shadow-budget.unit.test.mjs
Iostream-Li's picture
Add files using upload-large-folder tool
ff78003 verified
// Pure unit tests for the dynamic shadow budget logic. These import
// the standalone helpers in `evolution/shadow-core.ts`, which have no
// database dependencies, so the test runs without DATABASE_URL.
//
// node --test artifacts/api-server/src/lib/__tests__/shadow-budget.unit.test.mjs
import { test } from "node:test";
import assert from "node:assert/strict";
import {
decideShadowBudget,
percentile,
shouldEmitThresholdEvent,
MIN_SAMPLES_FOR_DYNAMIC,
SHADOW_BUDGET_MULTIPLIER,
SHADOW_BUDGET_SAFETY_FACTOR,
SHADOW_BUDGET_THRESHOLD_EPSILON,
} from "../evolution/shadow-core.ts";
test("percentile linear interpolation matches numpy default", () => {
assert.equal(percentile([], 0.75), 0);
assert.equal(percentile([42], 0.75), 42);
// [1,2,3,4]; pos = 3 * 0.75 = 2.25; lo=2, hi=3 → 3 + 0.25*(4-3) = 3.25
assert.equal(percentile([4, 1, 3, 2], 0.75), 3.25);
// Constant series → P75 = the value, no interpolation drift.
assert.equal(percentile(new Array(20).fill(1000), 0.75), 1000);
});
test("decideShadowBudget falls back when history < MIN_SAMPLES_FOR_DYNAMIC", () => {
const fewCosts = new Array(MIN_SAMPLES_FOR_DYNAMIC - 1).fill(800);
const out = decideShadowBudget({
recentActiveCostsMs: fewCosts,
recentRowCount: fewCosts.length,
recentSkippedCount: 0,
fallbackActiveCostMs: 1000,
});
assert.equal(out.mode, "fallback");
assert.equal(out.thresholdMs, 1000 * SHADOW_BUDGET_MULTIPLIER);
assert.equal(out.p75Ms, null);
assert.equal(out.safetyFactor, SHADOW_BUDGET_MULTIPLIER);
assert.equal(out.sampleCount, fewCosts.length);
});
test("decideShadowBudget returns 'unknown' when neither history nor fallback", () => {
const out = decideShadowBudget({
recentActiveCostsMs: [],
recentRowCount: 0,
recentSkippedCount: 0,
});
assert.equal(out.mode, "unknown");
assert.equal(out.thresholdMs, null);
assert.equal(out.recentSkipRatio, 0);
});
test("decideShadowBudget switches to dynamic P75 once we cross MIN_SAMPLES_FOR_DYNAMIC", () => {
// 12 samples — well above MIN. Distribution: ten ~1000ms runs and two
// ~3000ms outliers. P75 of [1000…1000, 3000, 3000] = 1000 (still on
// the steady-state side); outliers should not blow the cap.
const costs = [
1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 3000, 3000,
];
const out = decideShadowBudget({
recentActiveCostsMs: costs,
recentRowCount: costs.length,
recentSkippedCount: 0,
// Pass a wildly different fallback to prove the dynamic path wins.
fallbackActiveCostMs: 50,
});
assert.equal(out.mode, "dynamic");
assert.equal(out.sampleCount, costs.length);
assert.ok(out.p75Ms !== null);
// P75 of this sample = 1000ms; threshold = 1000 * SAFETY_FACTOR
assert.equal(out.thresholdMs, 1000 * SHADOW_BUDGET_SAFETY_FACTOR);
});
test("decideShadowBudget reports recent skip ratio and sample count", () => {
const costs = new Array(20).fill(900);
const out = decideShadowBudget({
recentActiveCostsMs: costs,
recentRowCount: 20,
recentSkippedCount: 5,
});
assert.equal(out.recentSkipRatio, 0.25);
assert.equal(out.recentSampleCount, 20);
assert.equal(out.mode, "dynamic");
});
test("decideShadowBudget tolerates a high-variance network without starving CI", () => {
// Bursty network: most runs ~500ms, occasional 5000ms spikes. With
// the legacy "shadow > 1.5 * active per row" rule, ANY shadow run
// observed during a 500ms active turn would skip if it happened to
// exceed 750ms — easy to do for a candidate variant doing extra
// work. The dynamic P75 cap stays anchored on the high side of the
// typical population, so reasonable shadow runs survive.
const costs = [
500, 510, 480, 520, 490, 530, 5000, 510, 470, 4900, 500, 520, 540, 480, 510,
];
const out = decideShadowBudget({
recentActiveCostsMs: costs,
recentRowCount: costs.length,
recentSkippedCount: 0,
});
assert.equal(out.mode, "dynamic");
// P75 of this sample is ~530ms, threshold ~795ms. A shadow run that
// takes 700ms during a 500ms active turn now passes (would have
// failed under the legacy 1.5×500 = 750ms per-row rule? Yes, 700<750
// here it passes either way; but it also passes during a *small*
// active turn where the per-row rule would have skipped a 700ms
// candidate against a 400ms active.)
assert.ok(out.thresholdMs !== null);
assert.ok(out.thresholdMs > 700, "threshold should comfortably allow normal candidate runs");
assert.ok(out.thresholdMs < 5000, "threshold should still reject the 5000ms spike");
});
test("shouldEmitThresholdEvent always emits the first dynamic threshold", () => {
assert.equal(shouldEmitThresholdEvent(1500, null), true);
assert.equal(shouldEmitThresholdEvent(1500, 0), true);
});
test("shouldEmitThresholdEvent suppresses sub-epsilon noise", () => {
// 5% drift, below the default 10% epsilon → suppressed.
assert.equal(shouldEmitThresholdEvent(1050, 1000), false);
assert.equal(shouldEmitThresholdEvent(950, 1000), false);
});
test("shouldEmitThresholdEvent emits on >= epsilon drift", () => {
// Exactly the epsilon threshold should trip (>=).
assert.equal(
shouldEmitThresholdEvent(1000 * (1 + SHADOW_BUDGET_THRESHOLD_EPSILON), 1000),
true,
);
// 25% drop → emit.
assert.equal(shouldEmitThresholdEvent(750, 1000), true);
});