// Pure unit tests for the dynamic shadow budget logic. These import
// the standalone helpers in `evolution/shadow-core.ts`, which have no
// database dependencies, so the test runs without DATABASE_URL.
//
//   node --test artifacts/api-server/src/lib/__tests__/shadow-budget.unit.test.mjs
import { test } from "node:test";
import assert from "node:assert/strict";

import {
  decideShadowBudget,
  percentile,
  shouldEmitThresholdEvent,
  MIN_SAMPLES_FOR_DYNAMIC,
  SHADOW_BUDGET_MULTIPLIER,
  SHADOW_BUDGET_SAFETY_FACTOR,
  SHADOW_BUDGET_THRESHOLD_EPSILON,
} from "../evolution/shadow-core.ts";

test("percentile linear interpolation matches numpy default", () => {
  assert.equal(percentile([], 0.75), 0);
  assert.equal(percentile([42], 0.75), 42);
  // [1,2,3,4]; pos = 3 * 0.75 = 2.25; lo=2, hi=3 → 3 + 0.25*(4-3) = 3.25
  assert.equal(percentile([4, 1, 3, 2], 0.75), 3.25);
  // Constant series → P75 = the value, no interpolation drift.
  assert.equal(percentile(new Array(20).fill(1000), 0.75), 1000);
});

test("decideShadowBudget falls back when history < MIN_SAMPLES_FOR_DYNAMIC", () => {
  const fewCosts = new Array(MIN_SAMPLES_FOR_DYNAMIC - 1).fill(800);
  const out = decideShadowBudget({
    recentActiveCostsMs: fewCosts,
    recentRowCount: fewCosts.length,
    recentSkippedCount: 0,
    fallbackActiveCostMs: 1000,
  });
  assert.equal(out.mode, "fallback");
  assert.equal(out.thresholdMs, 1000 * SHADOW_BUDGET_MULTIPLIER);
  assert.equal(out.p75Ms, null);
  assert.equal(out.safetyFactor, SHADOW_BUDGET_MULTIPLIER);
  assert.equal(out.sampleCount, fewCosts.length);
});

test("decideShadowBudget returns 'unknown' when neither history nor fallback", () => {
  const out = decideShadowBudget({
    recentActiveCostsMs: [],
    recentRowCount: 0,
    recentSkippedCount: 0,
  });
  assert.equal(out.mode, "unknown");
  assert.equal(out.thresholdMs, null);
  assert.equal(out.recentSkipRatio, 0);
});

test("decideShadowBudget switches to dynamic P75 once we cross MIN_SAMPLES_FOR_DYNAMIC", () => {
  // 12 samples — well above MIN. Distribution: ten ~1000ms runs and two
  // ~3000ms outliers. P75 of [1000…1000, 3000, 3000] = 1000 (still on
  // the steady-state side); outliers should not blow the cap.
  const costs = [
    1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 3000, 3000,
  ];
  const out = decideShadowBudget({
    recentActiveCostsMs: costs,
    recentRowCount: costs.length,
    recentSkippedCount: 0,
    // Pass a wildly different fallback to prove the dynamic path wins.
    fallbackActiveCostMs: 50,
  });
  assert.equal(out.mode, "dynamic");
  assert.equal(out.sampleCount, costs.length);
  assert.ok(out.p75Ms !== null);
  // P75 of this sample = 1000ms; threshold = 1000 * SAFETY_FACTOR
  assert.equal(out.thresholdMs, 1000 * SHADOW_BUDGET_SAFETY_FACTOR);
});

test("decideShadowBudget reports recent skip ratio and sample count", () => {
  const costs = new Array(20).fill(900);
  const out = decideShadowBudget({
    recentActiveCostsMs: costs,
    recentRowCount: 20,
    recentSkippedCount: 5,
  });
  assert.equal(out.recentSkipRatio, 0.25);
  assert.equal(out.recentSampleCount, 20);
  assert.equal(out.mode, "dynamic");
});

test("decideShadowBudget tolerates a high-variance network without starving CI", () => {
  // Bursty network: most runs ~500ms, occasional 5000ms spikes. With
  // the legacy "shadow > 1.5 * active per row" rule, ANY shadow run
  // observed during a 500ms active turn would skip if it happened to
  // exceed 750ms — easy to do for a candidate variant doing extra
  // work. The dynamic P75 cap stays anchored on the high side of the
  // typical population, so reasonable shadow runs survive.
  const costs = [
    500, 510, 480, 520, 490, 530, 5000, 510, 470, 4900, 500, 520, 540, 480, 510,
  ];
  const out = decideShadowBudget({
    recentActiveCostsMs: costs,
    recentRowCount: costs.length,
    recentSkippedCount: 0,
  });
  assert.equal(out.mode, "dynamic");
  // P75 of this sample is ~530ms, threshold ~795ms. A shadow run that
  // takes 700ms during a 500ms active turn now passes (would have
  // failed under the legacy 1.5×500 = 750ms per-row rule? Yes, 700<750
  // here it passes either way; but it also passes during a *small*
  // active turn where the per-row rule would have skipped a 700ms
  // candidate against a 400ms active.)
  assert.ok(out.thresholdMs !== null);
  assert.ok(out.thresholdMs > 700, "threshold should comfortably allow normal candidate runs");
  assert.ok(out.thresholdMs < 5000, "threshold should still reject the 5000ms spike");
});

test("shouldEmitThresholdEvent always emits the first dynamic threshold", () => {
  assert.equal(shouldEmitThresholdEvent(1500, null), true);
  assert.equal(shouldEmitThresholdEvent(1500, 0), true);
});

test("shouldEmitThresholdEvent suppresses sub-epsilon noise", () => {
  // 5% drift, below the default 10% epsilon → suppressed.
  assert.equal(shouldEmitThresholdEvent(1050, 1000), false);
  assert.equal(shouldEmitThresholdEvent(950, 1000), false);
});

test("shouldEmitThresholdEvent emits on >= epsilon drift", () => {
  // Exactly the epsilon threshold should trip (>=).
  assert.equal(
    shouldEmitThresholdEvent(1000 * (1 + SHADOW_BUDGET_THRESHOLD_EPSILON), 1000),
    true,
  );
  // 25% drop → emit.
  assert.equal(shouldEmitThresholdEvent(750, 1000), true);
});