| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| import { test } from "node:test"; |
| import assert from "node:assert/strict"; |
| import { randomBytes } from "node:crypto"; |
| import type { RequestHandler } from "express"; |
|
|
| const ORIGINAL_DSN = process.env.DATABASE_URL || ""; |
| const SKIP = !ORIGINAL_DSN; |
|
|
| function dsnWithSearchPath(dsn: string, schema: string): string { |
| const opt = `options=-c%20search_path%3D${encodeURIComponent(schema)}`; |
| return dsn.includes("?") ? `${dsn}&${opt}` : `${dsn}?${opt}`; |
| } |
|
|
| interface CapturedResponse { |
| statusCode: number; |
| body: unknown; |
| } |
|
|
| async function callHandler( |
| handler: RequestHandler, |
| req: Record<string, unknown>, |
| ): Promise<CapturedResponse> { |
| let statusCode = 200; |
| let body: unknown = null; |
| const res = { |
| status(code: number) { |
| statusCode = code; |
| return this; |
| }, |
| json(payload: unknown) { |
| body = payload; |
| return this; |
| }, |
| }; |
| await handler(req as never, res as never, (() => {}) as never); |
| return { statusCode, body }; |
| } |
|
|
| test("shadow budget DB integration", { |
| skip: SKIP && "DATABASE_URL not set", |
| }, async (t) => { |
| const schema = `shadow_budget_test_${randomBytes(6).toString("hex")}`; |
| process.env.DATABASE_URL = dsnWithSearchPath(ORIGINAL_DSN, schema); |
|
|
| const { pool } = await import("@workspace/db"); |
| await pool.query(`CREATE SCHEMA "${schema}"`); |
|
|
| |
| |
| |
| |
| |
| await pool.query(` |
| CREATE TABLE "${schema}".tool_networks ( |
| id text PRIMARY KEY, |
| name text NOT NULL UNIQUE, |
| problem_class_path text NOT NULL, |
| description text NOT NULL DEFAULT '', |
| input_contract jsonb NOT NULL, |
| output_contract jsonb NOT NULL, |
| internal_graph jsonb NOT NULL, |
| active_variant_id text, |
| builder_model_tier text NOT NULL DEFAULT 'strong', |
| release_tier_floor text NOT NULL DEFAULT 'strong', |
| config jsonb NOT NULL DEFAULT '{}'::jsonb, |
| status text NOT NULL DEFAULT 'active', |
| cost_hint double precision, |
| latency_hint_ms integer, |
| capability_tags jsonb NOT NULL DEFAULT '[]'::jsonb, |
| legacy_alias_node_id text, |
| created_at timestamptz NOT NULL DEFAULT now(), |
| updated_at timestamptz NOT NULL DEFAULT now() |
| ); |
| CREATE TABLE "${schema}".network_shadow_samples ( |
| id text PRIMARY KEY, |
| network_id text NOT NULL, |
| active_variant_id text NOT NULL, |
| shadow_variant_id text NOT NULL, |
| problem_class_path text NOT NULL, |
| active_score double precision NOT NULL, |
| shadow_score double precision NOT NULL, |
| critical_signal boolean NOT NULL DEFAULT false, |
| active_cost_ms integer, |
| shadow_cost_ms integer, |
| budget_skipped boolean NOT NULL DEFAULT false, |
| conversation_id text, |
| message_id text, |
| created_at timestamptz NOT NULL DEFAULT now() |
| ); |
| CREATE INDEX "${schema}_nss_net" ON "${schema}".network_shadow_samples(network_id); |
| CREATE INDEX "${schema}_nss_created" ON "${schema}".network_shadow_samples(created_at); |
| CREATE TABLE "${schema}".network_evolution_events ( |
| id text PRIMARY KEY, |
| network_id text NOT NULL, |
| kind text NOT NULL, |
| variant_id text, |
| payload jsonb NOT NULL DEFAULT '{}'::jsonb, |
| related_event_id text, |
| promotion_id text, |
| created_at timestamptz NOT NULL DEFAULT now() |
| ); |
| CREATE INDEX "${schema}_nee_net" ON "${schema}".network_evolution_events(network_id); |
| CREATE INDEX "${schema}_nee_kind" ON "${schema}".network_evolution_events(kind); |
| CREATE INDEX "${schema}_nee_created" ON "${schema}".network_evolution_events(created_at); |
| CREATE TABLE "${schema}".network_promotions ( |
| id text PRIMARY KEY, |
| network_id text NOT NULL, |
| from_variant_id text, |
| to_variant_id text NOT NULL, |
| reason text NOT NULL, |
| metrics_snapshot jsonb NOT NULL DEFAULT '{}'::jsonb, |
| decided_by text NOT NULL DEFAULT 'system', |
| created_at timestamptz NOT NULL DEFAULT now() |
| ); |
| `); |
|
|
| const shadow = await import("../evolution/shadow"); |
| const events = await import("../evolution/events"); |
| const flywheel = await import("../../routes/evolutionFlywheel"); |
|
|
| t.after(async () => { |
| try { |
| await pool.query(`DROP SCHEMA IF EXISTS "${schema}" CASCADE`); |
| } finally { |
| process.env.DATABASE_URL = ORIGINAL_DSN; |
| } |
| }); |
|
|
| const NETWORK_ID = "tnet_test_shadow_1"; |
| const ACTIVE_VARIANT = "ver_active_1"; |
| const SHADOW_VARIANT = "ver_shadow_1"; |
| const PROBLEM_CLASS = "research/test"; |
|
|
| await t.test("seed: create one tool network row", async () => { |
| await pool.query( |
| `INSERT INTO "${schema}".tool_networks |
| (id, name, problem_class_path, input_contract, output_contract, internal_graph) |
| VALUES ($1, $2, $3, $4::jsonb, $4::jsonb, $4::jsonb)`, |
| [NETWORK_ID, "test_shadow_network", PROBLEM_CLASS, "{}"], |
| ); |
| }); |
|
|
| await t.test( |
| "seed N>=10 cost samples → switches from fallback to dynamic mode", |
| async () => { |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| for (let i = 0; i < 9; i += 1) { |
| await shadow.recordShadowSample({ |
| networkId: NETWORK_ID, |
| activeVariantId: ACTIVE_VARIANT, |
| shadowVariantId: SHADOW_VARIANT, |
| problemClassPath: PROBLEM_CLASS, |
| activeScore: 0.7, |
| shadowScore: 0.7, |
| activeCostMs: 500, |
| |
| |
| shadowCostMs: 300, |
| }); |
| } |
| |
| await shadow.recordShadowSample({ |
| networkId: NETWORK_ID, |
| activeVariantId: ACTIVE_VARIANT, |
| shadowVariantId: SHADOW_VARIANT, |
| problemClassPath: PROBLEM_CLASS, |
| activeScore: 0.7, |
| shadowScore: 0.7, |
| activeCostMs: 3000, |
| shadowCostMs: 300, |
| }); |
|
|
| const rows = await pool.query<{ count: string }>( |
| `SELECT COUNT(*)::text AS count FROM "${schema}".network_shadow_samples WHERE network_id = $1`, |
| [NETWORK_ID], |
| ); |
| assert.equal(rows.rows[0]!.count, "10"); |
|
|
| |
| |
| const budget = await shadow.computeShadowBudget(NETWORK_ID, 500); |
| assert.equal(budget.mode, "dynamic"); |
| assert.equal(budget.p75Ms, 500); |
| assert.equal(budget.thresholdMs, 500 * 1.5); |
| assert.equal(budget.sampleCount, 10); |
| }, |
| ); |
|
|
| await t.test( |
| "shadow_budget_threshold event fires when dynamic threshold drifts >10%", |
| async () => { |
| |
| |
| |
| const fallbackOnly = await events.listEvents({ |
| networkId: NETWORK_ID, |
| kind: "shadow_budget_threshold", |
| limit: 50, |
| }); |
| assert.ok( |
| fallbackOnly.length >= 1, |
| "fallback-mode threshold events must already exist from seed", |
| ); |
| assert.ok( |
| fallbackOnly.every( |
| (ev) => (ev.payload as Record<string, unknown>).mode === "fallback", |
| ), |
| "no dynamic-mode events should exist yet", |
| ); |
|
|
| |
| |
| |
| await shadow.recordShadowSample({ |
| networkId: NETWORK_ID, |
| activeVariantId: ACTIVE_VARIANT, |
| shadowVariantId: SHADOW_VARIANT, |
| problemClassPath: PROBLEM_CLASS, |
| activeScore: 0.72, |
| shadowScore: 0.7, |
| activeCostMs: 500, |
| shadowCostMs: 300, |
| }); |
|
|
| const afterDynamic = await events.listEvents({ |
| networkId: NETWORK_ID, |
| kind: "shadow_budget_threshold", |
| limit: 50, |
| }); |
| const dynamicEvents = afterDynamic.filter( |
| (ev) => (ev.payload as Record<string, unknown>).mode === "dynamic", |
| ); |
| assert.ok( |
| dynamicEvents.length >= 1, |
| `crossing into dynamic mode with >10% drift must emit at least one dynamic event (got ${dynamicEvents.length})`, |
| ); |
| const firstDynamic = dynamicEvents[0]!.payload as Record<string, unknown>; |
| assert.equal(firstDynamic.mode, "dynamic"); |
| assert.equal(typeof firstDynamic.thresholdMs, "number"); |
| assert.equal(typeof firstDynamic.p75Ms, "number"); |
| assert.equal(typeof firstDynamic.safetyFactor, "number"); |
| assert.equal(typeof firstDynamic.previousThresholdMs, "number"); |
|
|
| |
| |
| |
| |
| const before = dynamicEvents.length; |
| for (let i = 0; i < 30; i += 1) { |
| await shadow.recordShadowSample({ |
| networkId: NETWORK_ID, |
| activeVariantId: ACTIVE_VARIANT, |
| shadowVariantId: SHADOW_VARIANT, |
| problemClassPath: PROBLEM_CLASS, |
| activeScore: 0.72, |
| shadowScore: 0.7, |
| activeCostMs: 4000, |
| shadowCostMs: 1200, |
| }); |
| } |
| const afterDrift = await events.listEvents({ |
| networkId: NETWORK_ID, |
| kind: "shadow_budget_threshold", |
| limit: 50, |
| }); |
| const driftDynamic = afterDrift.filter( |
| (ev) => (ev.payload as Record<string, unknown>).mode === "dynamic", |
| ); |
| assert.ok( |
| driftDynamic.length > before, |
| `>10% drift in P75 must emit additional dynamic events (before=${before} after=${driftDynamic.length})`, |
| ); |
| const latest = driftDynamic[0]!.payload as Record<string, unknown>; |
| assert.ok( |
| (latest.thresholdMs as number) > (firstDynamic.thresholdMs as number), |
| "the post-drift threshold should reflect the higher P75", |
| ); |
| }, |
| ); |
|
|
| await t.test( |
| "shadow_budget_skipped row + event payload include thresholdMs/mode/p75Ms", |
| async () => { |
| |
| |
| |
| |
| const result = await shadow.recordShadowSample({ |
| networkId: NETWORK_ID, |
| activeVariantId: ACTIVE_VARIANT, |
| shadowVariantId: SHADOW_VARIANT, |
| problemClassPath: PROBLEM_CLASS, |
| activeScore: 0.72, |
| shadowScore: 0.7, |
| activeCostMs: 4000, |
| shadowCostMs: 99000, |
| }); |
| assert.equal(result.budgetSkipped, true, "row must be flagged as budget_skipped"); |
| assert.equal(result.budget.mode, "dynamic"); |
| assert.equal(typeof result.budget.thresholdMs, "number"); |
|
|
| |
| const dbRow = await pool.query<{ budget_skipped: boolean }>( |
| `SELECT budget_skipped FROM "${schema}".network_shadow_samples WHERE id = $1`, |
| [result.row.id], |
| ); |
| assert.equal(dbRow.rows[0]!.budget_skipped, true); |
|
|
| const skipEvents = await events.listEvents({ |
| networkId: NETWORK_ID, |
| kind: "shadow_budget_skipped", |
| limit: 5, |
| }); |
| assert.ok(skipEvents.length >= 1, "shadow_budget_skipped event must be emitted"); |
| const payload = skipEvents[0]!.payload as Record<string, unknown>; |
| |
| assert.equal(typeof payload.thresholdMs, "number"); |
| assert.equal(payload.budgetMode, "dynamic"); |
| assert.equal(typeof payload.p75Ms, "number"); |
| assert.equal(typeof payload.safetyFactor, "number"); |
| |
| assert.equal(payload.activeCostMs, 4000); |
| assert.equal(payload.shadowCostMs, 99000); |
| |
| assert.equal(payload.multiplier, payload.safetyFactor); |
| }, |
| ); |
|
|
| await t.test( |
| "GET /admin/evolution/networks surfaces shadowBudget per network", |
| async () => { |
| const handler = flywheel._handlers.listNetworks; |
| const { statusCode, body } = await callHandler(handler, {}); |
| assert.equal(statusCode, 200); |
| assert.ok( |
| body && typeof body === "object" && "networks" in (body as object), |
| "response must have a networks array", |
| ); |
| const networks = (body as { networks: unknown[] }).networks; |
| assert.ok(Array.isArray(networks) && networks.length === 1); |
| const net = networks[0] as Record<string, unknown>; |
| assert.equal(net.id, NETWORK_ID); |
| assert.equal(net.fitness, null, "fitness short-circuits when activeVariantId is null"); |
| assert.ok( |
| net.shadowBudget && typeof net.shadowBudget === "object", |
| "shadowBudget field must be present on the response", |
| ); |
| const sb = net.shadowBudget as Record<string, unknown>; |
| assert.equal(sb.mode, "dynamic"); |
| assert.equal(typeof sb.thresholdMs, "number"); |
| assert.equal(typeof sb.p75Ms, "number"); |
| assert.equal(typeof sb.safetyFactor, "number"); |
| assert.equal(typeof sb.recentSkipRatio, "number"); |
| assert.equal(typeof sb.sampleCount, "number"); |
| assert.equal(typeof sb.recentSampleCount, "number"); |
| }, |
| ); |
| }); |
|
|