Iostream-Li
/

doatlas-2

Model card Files Files and versions

doatlas-2 / artifacts /api-server /src /lib /__tests__ /shadow-budget.unit.test.mjs

Iostream-Li's picture

Add files using upload-large-folder tool

ff78003 verified 18 days ago

history blame contribute delete

5.37 kB

	// Pure unit tests for the dynamic shadow budget logic. These import
	// the standalone helpers in `evolution/shadow-core.ts`, which have no
	// database dependencies, so the test runs without DATABASE_URL.
	//
	// node --test artifacts/api-server/src/lib/__tests__/shadow-budget.unit.test.mjs
	import { test } from "node:test";
	import assert from "node:assert/strict";

	import {
	decideShadowBudget,
	percentile,
	shouldEmitThresholdEvent,
	MIN_SAMPLES_FOR_DYNAMIC,
	SHADOW_BUDGET_MULTIPLIER,
	SHADOW_BUDGET_SAFETY_FACTOR,
	SHADOW_BUDGET_THRESHOLD_EPSILON,
	} from "../evolution/shadow-core.ts";

	test("percentile linear interpolation matches numpy default", () => {
	assert.equal(percentile([], 0.75), 0);
	assert.equal(percentile([42], 0.75), 42);
	// [1,2,3,4]; pos = 3 * 0.75 = 2.25; lo=2, hi=3 → 3 + 0.25*(4-3) = 3.25
	assert.equal(percentile([4, 1, 3, 2], 0.75), 3.25);
	// Constant series → P75 = the value, no interpolation drift.
	assert.equal(percentile(new Array(20).fill(1000), 0.75), 1000);
	});

	test("decideShadowBudget falls back when history < MIN_SAMPLES_FOR_DYNAMIC", () => {
	const fewCosts = new Array(MIN_SAMPLES_FOR_DYNAMIC - 1).fill(800);
	const out = decideShadowBudget({
	recentActiveCostsMs: fewCosts,
	recentRowCount: fewCosts.length,
	recentSkippedCount: 0,
	fallbackActiveCostMs: 1000,
	});
	assert.equal(out.mode, "fallback");
	assert.equal(out.thresholdMs, 1000 * SHADOW_BUDGET_MULTIPLIER);
	assert.equal(out.p75Ms, null);
	assert.equal(out.safetyFactor, SHADOW_BUDGET_MULTIPLIER);
	assert.equal(out.sampleCount, fewCosts.length);
	});

	test("decideShadowBudget returns 'unknown' when neither history nor fallback", () => {
	const out = decideShadowBudget({
	recentActiveCostsMs: [],
	recentRowCount: 0,
	recentSkippedCount: 0,
	});
	assert.equal(out.mode, "unknown");
	assert.equal(out.thresholdMs, null);
	assert.equal(out.recentSkipRatio, 0);
	});

	test("decideShadowBudget switches to dynamic P75 once we cross MIN_SAMPLES_FOR_DYNAMIC", () => {
	// 12 samples — well above MIN. Distribution: ten ~1000ms runs and two
	// ~3000ms outliers. P75 of [1000…1000, 3000, 3000] = 1000 (still on
	// the steady-state side); outliers should not blow the cap.
	const costs = [
	1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 3000, 3000,
	];
	const out = decideShadowBudget({
	recentActiveCostsMs: costs,
	recentRowCount: costs.length,
	recentSkippedCount: 0,
	// Pass a wildly different fallback to prove the dynamic path wins.
	fallbackActiveCostMs: 50,
	});
	assert.equal(out.mode, "dynamic");
	assert.equal(out.sampleCount, costs.length);
	assert.ok(out.p75Ms !== null);
	// P75 of this sample = 1000ms; threshold = 1000 * SAFETY_FACTOR
	assert.equal(out.thresholdMs, 1000 * SHADOW_BUDGET_SAFETY_FACTOR);
	});

	test("decideShadowBudget reports recent skip ratio and sample count", () => {
	const costs = new Array(20).fill(900);
	const out = decideShadowBudget({
	recentActiveCostsMs: costs,
	recentRowCount: 20,
	recentSkippedCount: 5,
	});
	assert.equal(out.recentSkipRatio, 0.25);
	assert.equal(out.recentSampleCount, 20);
	assert.equal(out.mode, "dynamic");
	});

	test("decideShadowBudget tolerates a high-variance network without starving CI", () => {
	// Bursty network: most runs ~500ms, occasional 5000ms spikes. With
	// the legacy "shadow > 1.5 * active per row" rule, ANY shadow run
	// observed during a 500ms active turn would skip if it happened to
	// exceed 750ms — easy to do for a candidate variant doing extra
	// work. The dynamic P75 cap stays anchored on the high side of the
	// typical population, so reasonable shadow runs survive.
	const costs = [
	500, 510, 480, 520, 490, 530, 5000, 510, 470, 4900, 500, 520, 540, 480, 510,
	];
	const out = decideShadowBudget({
	recentActiveCostsMs: costs,
	recentRowCount: costs.length,
	recentSkippedCount: 0,
	});
	assert.equal(out.mode, "dynamic");
	// P75 of this sample is ~530ms, threshold ~795ms. A shadow run that
	// takes 700ms during a 500ms active turn now passes (would have
	// failed under the legacy 1.5×500 = 750ms per-row rule? Yes, 700<750
	// here it passes either way; but it also passes during a small
	// active turn where the per-row rule would have skipped a 700ms
	// candidate against a 400ms active.)
	assert.ok(out.thresholdMs !== null);
	assert.ok(out.thresholdMs > 700, "threshold should comfortably allow normal candidate runs");
	assert.ok(out.thresholdMs < 5000, "threshold should still reject the 5000ms spike");
	});

	test("shouldEmitThresholdEvent always emits the first dynamic threshold", () => {
	assert.equal(shouldEmitThresholdEvent(1500, null), true);
	assert.equal(shouldEmitThresholdEvent(1500, 0), true);
	});

	test("shouldEmitThresholdEvent suppresses sub-epsilon noise", () => {
	// 5% drift, below the default 10% epsilon → suppressed.
	assert.equal(shouldEmitThresholdEvent(1050, 1000), false);
	assert.equal(shouldEmitThresholdEvent(950, 1000), false);
	});

	test("shouldEmitThresholdEvent emits on >= epsilon drift", () => {
	// Exactly the epsilon threshold should trip (>=).
	assert.equal(
	shouldEmitThresholdEvent(1000 * (1 + SHADOW_BUDGET_THRESHOLD_EPSILON), 1000),
	true,
	);
	// 25% drop → emit.
	assert.equal(shouldEmitThresholdEvent(750, 1000), true);
	});