| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| import { test } from "node:test"; |
| import assert from "node:assert/strict"; |
| import { randomBytes } from "node:crypto"; |
|
|
| const ORIGINAL_DSN = process.env.DATABASE_URL || ""; |
| const SKIP = !ORIGINAL_DSN; |
|
|
| function dsnWithSearchPath(dsn: string, schema: string): string { |
| const opt = `options=-c%20search_path%3D${encodeURIComponent(schema)}`; |
| return dsn.includes("?") ? `${dsn}&${opt}` : `${dsn}?${opt}`; |
| } |
|
|
| test( |
| "shadow A/B sampling against live PG ephemeral schema", |
| { skip: SKIP && "DATABASE_URL not set" }, |
| async (t) => { |
| const schema = `shadow_sampling_test_${randomBytes(6).toString("hex")}`; |
| process.env.DATABASE_URL = dsnWithSearchPath(ORIGINAL_DSN, schema); |
|
|
| const { pool } = await import("@workspace/db"); |
| await pool.query(`CREATE SCHEMA "${schema}"`); |
|
|
| |
| |
| await pool.query(` |
| CREATE TABLE "${schema}".problem_classes ( |
| id text PRIMARY KEY, |
| path text NOT NULL UNIQUE, |
| parent_path text, |
| label text NOT NULL, |
| description text NOT NULL DEFAULT '', |
| capability_tags jsonb NOT NULL DEFAULT '[]'::jsonb, |
| reviewer_weights jsonb, |
| status text NOT NULL DEFAULT 'active', |
| created_at timestamptz NOT NULL DEFAULT now(), |
| updated_at timestamptz NOT NULL DEFAULT now() |
| ); |
| CREATE TABLE "${schema}".network_promotions ( |
| id text PRIMARY KEY, |
| network_id text NOT NULL, |
| from_variant_id text, |
| to_variant_id text NOT NULL, |
| reason text NOT NULL, |
| metrics_snapshot jsonb NOT NULL DEFAULT '{}'::jsonb, |
| decided_by text NOT NULL DEFAULT 'system', |
| created_at timestamptz NOT NULL DEFAULT now() |
| ); |
| CREATE TABLE "${schema}".tool_networks ( |
| id text PRIMARY KEY, |
| name text NOT NULL UNIQUE, |
| problem_class_path text NOT NULL, |
| description text NOT NULL DEFAULT '', |
| input_contract jsonb NOT NULL, |
| output_contract jsonb NOT NULL, |
| internal_graph jsonb NOT NULL, |
| active_variant_id text, |
| builder_model_tier text NOT NULL DEFAULT 'strong', |
| release_tier_floor text NOT NULL DEFAULT 'strong', |
| config jsonb NOT NULL DEFAULT '{}'::jsonb, |
| status text NOT NULL DEFAULT 'active', |
| cost_hint double precision, |
| latency_hint_ms integer, |
| capability_tags jsonb NOT NULL DEFAULT '[]'::jsonb, |
| legacy_alias_node_id text, |
| created_at timestamptz NOT NULL DEFAULT now(), |
| updated_at timestamptz NOT NULL DEFAULT now() |
| ); |
| CREATE TABLE "${schema}".network_versions ( |
| id text PRIMARY KEY, |
| network_id text NOT NULL, |
| version_label text NOT NULL, |
| internal_graph jsonb NOT NULL, |
| config jsonb NOT NULL DEFAULT '{}'::jsonb, |
| status text NOT NULL DEFAULT 'draft', |
| built_by text NOT NULL DEFAULT 'system', |
| builder_model_tier text NOT NULL DEFAULT 'strong', |
| private_namespace text, |
| created_at timestamptz NOT NULL DEFAULT now() |
| ); |
| CREATE TABLE "${schema}".network_shadow_samples ( |
| id text PRIMARY KEY, |
| network_id text NOT NULL, |
| active_variant_id text NOT NULL, |
| shadow_variant_id text NOT NULL, |
| problem_class_path text NOT NULL, |
| active_score double precision NOT NULL, |
| shadow_score double precision NOT NULL, |
| critical_signal boolean NOT NULL DEFAULT false, |
| active_cost_ms integer, |
| shadow_cost_ms integer, |
| budget_skipped boolean NOT NULL DEFAULT false, |
| conversation_id text, |
| message_id text, |
| created_at timestamptz NOT NULL DEFAULT now() |
| ); |
| CREATE TABLE "${schema}".network_evolution_events ( |
| id text PRIMARY KEY, |
| network_id text NOT NULL, |
| kind text NOT NULL, |
| variant_id text, |
| payload jsonb NOT NULL DEFAULT '{}'::jsonb, |
| related_event_id text, |
| promotion_id text, |
| created_at timestamptz NOT NULL DEFAULT now() |
| ); |
| `); |
|
|
| const tn = await import("../tool-network"); |
| const sh = await import("../evolution/shadow"); |
|
|
| t.after(async () => { |
| try { |
| await pool.query(`DROP SCHEMA IF EXISTS "${schema}" CASCADE`); |
| } finally { |
| process.env.DATABASE_URL = ORIGINAL_DSN; |
| } |
| }); |
|
|
| |
| |
| |
| |
| |
| |
| async function waitFor<T>( |
| probe: () => Promise<T | null>, |
| timeoutMs = 4000, |
| ): Promise<T | null> { |
| const deadline = Date.now() + timeoutMs; |
| while (Date.now() < deadline) { |
| const v = await probe(); |
| if (v) return v; |
| await new Promise((r) => setTimeout(r, 50)); |
| } |
| return null; |
| } |
|
|
| |
| const networkName = `shadow_t_${randomBytes(3).toString("hex")}`; |
|
|
| await tn.upsertProblemClass({ |
| path: "shadow_sampling/test", |
| label: "shadow sampling test class", |
| }); |
|
|
| const network = await tn.upsertNetwork({ |
| name: networkName, |
| problemClassPath: "shadow_sampling/test", |
| inputContract: { type: "object", properties: {} }, |
| outputContract: { type: "object", properties: {} }, |
| internalGraph: { nodes: [], edges: [] }, |
| initialVariant: { versionLabel: "v1", config: { tag: "active" } }, |
| }); |
| assert.ok(network.activeVariantId, "active variant created"); |
| const activeVariantId = network.activeVariantId!; |
|
|
| |
| const shadowVariant = await tn.createVariant({ |
| networkId: network.id, |
| versionLabel: "v2", |
| config: { tag: "shadow" }, |
| internalGraph: { nodes: [], edges: [] }, |
| builderModelTier: "strong", |
| promote: false, |
| reason: "test_seed", |
| }); |
|
|
| |
| |
| |
| let runnerImpl: ( |
| _input: Record<string, unknown>, |
| variant: { id: string }, |
| ctx: { signal?: AbortSignal }, |
| ) => Promise<{ |
| output: Record<string, unknown>; |
| steps: never[]; |
| durationMs: number; |
| metrics: { reviewerScore: number }; |
| }> = async (_input, variant) => ({ |
| output: { ok: true, variantId: variant.id }, |
| steps: [], |
| durationMs: 50, |
| metrics: { reviewerScore: variant.id === activeVariantId ? 0.6 : 0.9 }, |
| }); |
|
|
| tn.registerNetworkRunner(networkName, async (input, variant, ctx) => |
| runnerImpl(input, variant, ctx), |
| ); |
|
|
| |
| await t.test("forced sample rate writes one paired sample row", async () => { |
| await pool.query( |
| `DELETE FROM "${schema}".network_shadow_samples`, |
| ); |
| const result = await tn.runNetwork({ |
| networkName, |
| input: { q: "happy" }, |
| enableShadow: true, |
| shadowSampleRate: 1, |
| }); |
| assert.equal( |
| (result.output as { variantId: string }).variantId, |
| activeVariantId, |
| "active variant produced the user-visible result", |
| ); |
| const row = await waitFor(async () => { |
| const r = await pool.query<{ |
| active_variant_id: string; |
| shadow_variant_id: string; |
| active_score: string; |
| shadow_score: string; |
| budget_skipped: boolean; |
| }>( |
| `SELECT active_variant_id, shadow_variant_id, active_score::text, shadow_score::text, budget_skipped |
| FROM "${schema}".network_shadow_samples |
| WHERE network_id = $1`, |
| [network.id], |
| ); |
| return r.rows[0] ?? null; |
| }); |
| assert.ok(row, "a sample row was persisted"); |
| assert.equal(row!.active_variant_id, activeVariantId); |
| assert.equal(row!.shadow_variant_id, shadowVariant.id); |
| assert.equal(Number(row!.active_score), 0.6); |
| assert.equal(Number(row!.shadow_score), 0.9); |
| assert.equal(row!.budget_skipped, false); |
| }); |
|
|
| |
| await t.test( |
| "missing shadow candidate produces zero rows and no error", |
| async () => { |
| await pool.query( |
| `DELETE FROM "${schema}".network_shadow_samples`, |
| ); |
| |
| await pool.query( |
| `UPDATE "${schema}".network_versions SET status = 'demoted' WHERE id = $1`, |
| [shadowVariant.id], |
| ); |
| const result = await tn.runNetwork({ |
| networkName, |
| input: { q: "no-candidate" }, |
| enableShadow: true, |
| shadowSampleRate: 1, |
| }); |
| assert.ok(result, "active result still returned"); |
| |
| |
| await new Promise((r) => setTimeout(r, 200)); |
| const cnt = await pool.query<{ c: string }>( |
| `SELECT COUNT(*)::text AS c FROM "${schema}".network_shadow_samples WHERE network_id = $1`, |
| [network.id], |
| ); |
| assert.equal(Number(cnt.rows[0]!.c), 0); |
| |
| await pool.query( |
| `UPDATE "${schema}".network_versions SET status = 'shadow' WHERE id = $1`, |
| [shadowVariant.id], |
| ); |
| }, |
| ); |
|
|
| |
| await t.test( |
| "shadow runner failure records an event and leaves the active result intact", |
| async () => { |
| await pool.query( |
| `DELETE FROM "${schema}".network_shadow_samples`, |
| ); |
| await pool.query( |
| `DELETE FROM "${schema}".network_evolution_events`, |
| ); |
| runnerImpl = async (_input, variant) => { |
| if (variant.id === shadowVariant.id) { |
| throw new Error("synthetic shadow failure"); |
| } |
| return { |
| output: { ok: true, variantId: variant.id }, |
| steps: [], |
| durationMs: 40, |
| metrics: { reviewerScore: 0.5 }, |
| }; |
| }; |
| const result = await tn.runNetwork({ |
| networkName, |
| input: { q: "boom" }, |
| enableShadow: true, |
| shadowSampleRate: 1, |
| }); |
| assert.equal( |
| (result.output as { variantId: string }).variantId, |
| activeVariantId, |
| "active path is unaffected by shadow failure", |
| ); |
| const ev = await waitFor(async () => { |
| const r = await pool.query<{ kind: string }>( |
| `SELECT kind FROM "${schema}".network_evolution_events |
| WHERE network_id = $1 AND kind = 'shadow_runner_error'`, |
| [network.id], |
| ); |
| return r.rows[0] ?? null; |
| }); |
| assert.ok(ev, "shadow_runner_error event was recorded"); |
| const sampleCnt = await pool.query<{ c: string }>( |
| `SELECT COUNT(*)::text AS c FROM "${schema}".network_shadow_samples WHERE network_id = $1`, |
| [network.id], |
| ); |
| assert.equal( |
| Number(sampleCnt.rows[0]!.c), |
| 0, |
| "no shadow sample row was written when the runner threw", |
| ); |
| }, |
| ); |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| await t.test( |
| "active durationMs<=0 emits a one-shot shadow_no_active_cost event", |
| async () => { |
| await pool.query( |
| `DELETE FROM "${schema}".network_shadow_samples`, |
| ); |
| await pool.query( |
| `DELETE FROM "${schema}".network_evolution_events`, |
| ); |
| |
| |
| sh.__shadowNoActiveCostEmitted.clear(); |
|
|
| |
| |
| |
| |
| runnerImpl = async (_input, variant) => ({ |
| output: { ok: true, variantId: variant.id }, |
| steps: [], |
| durationMs: variant.id === activeVariantId ? 0 : 30, |
| metrics: { reviewerScore: 0.5 }, |
| }); |
|
|
| await tn.runNetwork({ |
| networkName, |
| input: { q: "no-cost-1" }, |
| enableShadow: true, |
| shadowSampleRate: 1, |
| }); |
|
|
| const ev = await waitFor(async () => { |
| const r = await pool.query<{ payload: Record<string, unknown> }>( |
| `SELECT payload FROM "${schema}".network_evolution_events |
| WHERE network_id = $1 AND kind = 'shadow_no_active_cost'`, |
| [network.id], |
| ); |
| return r.rows[0] ?? null; |
| }); |
| assert.ok(ev, "shadow_no_active_cost event was recorded"); |
| assert.equal( |
| (ev!.payload as { activeVariantId: string }).activeVariantId, |
| activeVariantId, |
| "payload identifies the active variant that produced 0ms", |
| ); |
|
|
| |
| const sampleCnt = await pool.query<{ c: string }>( |
| `SELECT COUNT(*)::text AS c |
| FROM "${schema}".network_shadow_samples WHERE network_id = $1`, |
| [network.id], |
| ); |
| assert.equal( |
| Number(sampleCnt.rows[0]!.c), |
| 0, |
| "no sample row written when active cost is missing", |
| ); |
|
|
| |
| |
| await tn.runNetwork({ |
| networkName, |
| input: { q: "no-cost-2" }, |
| enableShadow: true, |
| shadowSampleRate: 1, |
| }); |
| await tn.runNetwork({ |
| networkName, |
| input: { q: "no-cost-3" }, |
| enableShadow: true, |
| shadowSampleRate: 1, |
| }); |
| |
| await new Promise((r) => setTimeout(r, 200)); |
|
|
| const eventCnt = await pool.query<{ c: string }>( |
| `SELECT COUNT(*)::text AS c |
| FROM "${schema}".network_evolution_events |
| WHERE network_id = $1 AND kind = 'shadow_no_active_cost'`, |
| [network.id], |
| ); |
| assert.equal( |
| Number(eventCnt.rows[0]!.c), |
| 1, |
| "shadow_no_active_cost is emitted at most once per process per network", |
| ); |
|
|
| |
| |
| runnerImpl = async (_input, variant) => ({ |
| output: { ok: true, variantId: variant.id }, |
| steps: [], |
| durationMs: 30, |
| metrics: { reviewerScore: 0.5 }, |
| }); |
| }, |
| ); |
|
|
| |
| |
| |
| |
| |
| |
| await t.test( |
| "shadow run exceeding 1.5× active budget aborts the runner via ctx.signal", |
| async () => { |
| await pool.query( |
| `DELETE FROM "${schema}".network_shadow_samples`, |
| ); |
| |
| |
| await pool.query( |
| `DELETE FROM "${schema}".network_evolution_events`, |
| ); |
| let shadowAborted = false; |
| let shadowFinishedAt = 0; |
| const shadowStartAt = { value: 0 }; |
| runnerImpl = async (_input, variant, ctx) => { |
| if (variant.id === shadowVariant.id) { |
| shadowStartAt.value = Date.now(); |
| |
| |
| |
| const ceiling = Date.now() + 10_000; |
| while (Date.now() < ceiling) { |
| if (ctx.signal?.aborted) { |
| shadowAborted = true; |
| shadowFinishedAt = Date.now(); |
| throw Object.assign(new Error("aborted"), { |
| name: "AbortError", |
| }); |
| } |
| await new Promise((r) => setTimeout(r, 50)); |
| } |
| shadowFinishedAt = Date.now(); |
| return { |
| output: { ok: true, variantId: variant.id }, |
| steps: [], |
| durationMs: 10_000, |
| metrics: { reviewerScore: 0.95 }, |
| }; |
| } |
| return { |
| output: { ok: true, variantId: variant.id }, |
| steps: [], |
| durationMs: 30, |
| metrics: { reviewerScore: 0.6 }, |
| }; |
| }; |
| const result = await tn.runNetwork({ |
| networkName, |
| input: { q: "slow-shadow" }, |
| enableShadow: true, |
| shadowSampleRate: 1, |
| }); |
| assert.ok(result, "active result returned without waiting for shadow"); |
|
|
| |
| |
| |
| |
| |
| const row = await waitFor( |
| async () => { |
| const r = await pool.query<{ budget_skipped: boolean }>( |
| `SELECT budget_skipped |
| FROM "${schema}".network_shadow_samples |
| WHERE network_id = $1 |
| ORDER BY created_at DESC LIMIT 1`, |
| [network.id], |
| ); |
| return r.rows[0] ?? null; |
| }, |
| 5_000, |
| ); |
| assert.ok(row, "budget_skipped row was persisted after timeout"); |
| assert.equal(row!.budget_skipped, true); |
|
|
| |
| |
| |
| |
| const cancelDeadline = Date.now() + 1_000; |
| while (Date.now() < cancelDeadline && !shadowAborted) { |
| await new Promise((r) => setTimeout(r, 50)); |
| } |
| assert.equal( |
| shadowAborted, |
| true, |
| "shadow runner observed ctx.signal.aborted and stopped", |
| ); |
| |
| |
| |
| |
| |
| const elapsed = shadowFinishedAt - shadowStartAt.value; |
| assert.ok( |
| elapsed < 3_000, |
| `shadow runner stopped within budget+slack (elapsed=${elapsed}ms, expected <3000ms)`, |
| ); |
|
|
| |
| runnerImpl = async (_input, variant) => ({ |
| output: { ok: true, variantId: variant.id }, |
| steps: [], |
| durationMs: 30, |
| metrics: { reviewerScore: 0.5 }, |
| }); |
| }, |
| ); |
|
|
| |
| await t.test( |
| "non-cooperative slow shadow runner is marked budget_skipped", |
| async () => { |
| await pool.query( |
| `DELETE FROM "${schema}".network_shadow_samples`, |
| ); |
| runnerImpl = async (_input, variant) => { |
| if (variant.id === shadowVariant.id) { |
| |
| |
| await new Promise((r) => setTimeout(r, 2_500)); |
| return { |
| output: { ok: true, variantId: variant.id }, |
| steps: [], |
| durationMs: 2_500, |
| metrics: { reviewerScore: 0.95 }, |
| }; |
| } |
| return { |
| output: { ok: true, variantId: variant.id }, |
| steps: [], |
| durationMs: 30, |
| metrics: { reviewerScore: 0.6 }, |
| }; |
| }; |
| await tn.runNetwork({ |
| networkName, |
| input: { q: "slow-shadow-noncoop" }, |
| enableShadow: true, |
| shadowSampleRate: 1, |
| }); |
| const row = await waitFor( |
| async () => { |
| const r = await pool.query<{ budget_skipped: boolean }>( |
| `SELECT budget_skipped |
| FROM "${schema}".network_shadow_samples |
| WHERE network_id = $1 |
| ORDER BY created_at DESC LIMIT 1`, |
| [network.id], |
| ); |
| return r.rows[0] ?? null; |
| }, |
| 5_000, |
| ); |
| assert.ok(row, "a row was persisted for the legacy runner"); |
| assert.equal( |
| row!.budget_skipped, |
| true, |
| "row marked budget_skipped even though runner ignored signal", |
| ); |
| runnerImpl = async (_input, variant) => ({ |
| output: { ok: true, variantId: variant.id }, |
| steps: [], |
| durationMs: 30, |
| metrics: { reviewerScore: 0.5 }, |
| }); |
| }, |
| ); |
|
|
| |
| await t.test( |
| "runNetworkForChat opts in to shadow sampling automatically", |
| async () => { |
| await pool.query( |
| `DELETE FROM "${schema}".network_shadow_samples`, |
| ); |
| runnerImpl = async (_input, variant) => ({ |
| output: { ok: true, variantId: variant.id }, |
| steps: [], |
| durationMs: 30, |
| metrics: { reviewerScore: 0.7 }, |
| }); |
| await tn.runNetworkForChat({ |
| networkName, |
| input: { q: "chat" }, |
| shadowSampleRate: 1, |
| }); |
| const row = await waitFor(async () => { |
| const r = await pool.query<{ shadow_variant_id: string }>( |
| `SELECT shadow_variant_id FROM "${schema}".network_shadow_samples WHERE network_id = $1`, |
| [network.id], |
| ); |
| return r.rows[0] ?? null; |
| }); |
| assert.ok(row, "runNetworkForChat triggered the shadow path"); |
| }, |
| ); |
|
|
| |
| await t.test( |
| "dispatchNetworkTool routes run_<network> calls and triggers shadow", |
| async () => { |
| await pool.query(`DELETE FROM "${schema}".network_shadow_samples`); |
| runnerImpl = async (_input, variant) => ({ |
| output: { ok: true, dispatched: true, variantId: variant.id }, |
| steps: [], |
| durationMs: 30, |
| metrics: { reviewerScore: variant.id === activeVariantId ? 0.4 : 0.8 }, |
| }); |
| |
| |
| |
| const dispatch = await tn.dispatchNetworkTool( |
| `run_${networkName}`, |
| { q: "via-chat" }, |
| { |
| conversationId: "conv_chat_test", |
| messageId: null, |
| ownerUserId: "user_chat_test", |
| meta: { actor: "chat" }, |
| }, |
| 1, |
| ); |
| assert.equal(dispatch.matched, true, "run_<network> name was matched"); |
| assert.ok(dispatch.result, "active result returned to caller"); |
| const out = dispatch.result!.output as { |
| ok: boolean; |
| dispatched: boolean; |
| variantId: string; |
| }; |
| assert.equal(out.ok, true); |
| assert.equal(out.dispatched, true); |
| assert.equal( |
| out.variantId, |
| activeVariantId, |
| "the user-visible result is from the ACTIVE variant", |
| ); |
| |
| |
| const row = await waitFor(async () => { |
| const r = await pool.query<{ |
| conversation_id: string | null; |
| shadow_variant_id: string; |
| }>( |
| `SELECT conversation_id, shadow_variant_id |
| FROM "${schema}".network_shadow_samples |
| WHERE network_id = $1 |
| ORDER BY created_at DESC LIMIT 1`, |
| [network.id], |
| ); |
| return r.rows[0] ?? null; |
| }); |
| assert.ok(row, "chat dispatch wrote a shadow sample row"); |
| assert.equal( |
| row!.conversation_id, |
| "conv_chat_test", |
| "the chat ctx (conversationId) was propagated through the runner", |
| ); |
| assert.equal(row!.shadow_variant_id, shadowVariant.id); |
| }, |
| ); |
|
|
| |
| |
| |
| |
| await t.test( |
| "routes/messages.ts gates allowedToolNames before dispatchNetworkTool", |
| async () => { |
| const fs = await import("node:fs/promises"); |
| const path = await import("node:path"); |
| const url = await import("node:url"); |
| const here = path.dirname(url.fileURLToPath(import.meta.url)); |
| const messagesPath = path.resolve(here, "../../routes/messages.ts"); |
| const src = await fs.readFile(messagesPath, "utf-8"); |
| const allowIdx = src.indexOf("!allowedToolNames.has(tc.name)"); |
| const dispatchIdx = src.indexOf("dispatchNetworkTool(tc.name"); |
| assert.ok(allowIdx > 0, "allowedToolNames gate present"); |
| assert.ok(dispatchIdx > 0, "dispatchNetworkTool call present"); |
| assert.ok( |
| allowIdx < dispatchIdx, |
| "allowedToolNames gate must come BEFORE dispatchNetworkTool " + |
| "(otherwise out-of-subgraph run_<network> tools bypass the allowlist)", |
| ); |
| }, |
| ); |
|
|
| |
| |
| |
| |
| await t.test( |
| "chat dispatcher gate rejects run_<network> outside allowedToolNames", |
| async () => { |
| await pool.query(`DELETE FROM "${schema}".network_shadow_samples`); |
| |
| |
| |
| |
| |
| |
| const allowed = new Set<string>(["web_search"]); |
| const toolName = `run_${networkName}`; |
| let result: unknown; |
| let dispatched = false; |
| if (allowed && !allowed.has(toolName)) { |
| result = { |
| error: `Tool ${toolName} is not in the resolved capability subgraph for this turn.`, |
| error_code: "tool_not_in_subgraph", |
| }; |
| } else { |
| dispatched = true; |
| await tn.dispatchNetworkTool(toolName, { q: "blocked" }, undefined, 1); |
| } |
| assert.equal(dispatched, false, "dispatcher was NOT consulted"); |
| assert.deepEqual(result, { |
| error: `Tool ${toolName} is not in the resolved capability subgraph for this turn.`, |
| error_code: "tool_not_in_subgraph", |
| }); |
| |
| await new Promise((r) => setTimeout(r, 200)); |
| const cnt = await pool.query<{ c: string }>( |
| `SELECT COUNT(*)::text AS c FROM "${schema}".network_shadow_samples WHERE network_id = $1`, |
| [network.id], |
| ); |
| assert.equal( |
| Number(cnt.rows[0]!.c), |
| 0, |
| "no shadow sample row written for blocked network call", |
| ); |
| }, |
| ); |
|
|
| |
| await t.test( |
| "dispatchNetworkTool falls through for non-network and unknown names", |
| async () => { |
| const a = await tn.dispatchNetworkTool("web_search", { q: "x" }); |
| assert.equal(a.matched, false, "non run_-prefixed name skipped"); |
| const b = await tn.dispatchNetworkTool( |
| "run_does_not_exist_zzz", |
| { q: "x" }, |
| ); |
| assert.equal( |
| b.matched, |
| false, |
| "run_-prefixed name with no registered network skipped", |
| ); |
| }, |
| ); |
|
|
| |
| |
| |
| |
| await t.test( |
| "missing reviewer metric routes through fallback and emits counter event", |
| async () => { |
| await pool.query(`DELETE FROM "${schema}".network_shadow_samples`); |
| await pool.query(`DELETE FROM "${schema}".network_evolution_events`); |
| |
| |
| |
| |
| runnerImpl = (async ( |
| _input: Record<string, unknown>, |
| variant: { id: string }, |
| ) => ({ |
| output: { ok: true, variantId: variant.id }, |
| steps: [], |
| durationMs: 30, |
| })) as unknown as typeof runnerImpl; |
| await tn.runNetwork({ |
| networkName, |
| input: { q: "no-metrics" }, |
| enableShadow: true, |
| shadowSampleRate: 1, |
| }); |
| const row = await waitFor(async () => { |
| const r = await pool.query<{ |
| active_score: string; |
| shadow_score: string; |
| }>( |
| `SELECT active_score::text, shadow_score::text |
| FROM "${schema}".network_shadow_samples |
| WHERE network_id = $1 |
| ORDER BY created_at DESC LIMIT 1`, |
| [network.id], |
| ); |
| return r.rows[0] ?? null; |
| }); |
| assert.ok(row, "fallback-graded sample row was persisted"); |
| |
| assert.equal(Number(row!.active_score), 1); |
| assert.equal(Number(row!.shadow_score), 1); |
|
|
| |
| |
| |
| const events = await waitFor(async () => { |
| const r = await pool.query<{ |
| payload: { side: string; source: string }; |
| }>( |
| `SELECT payload |
| FROM "${schema}".network_evolution_events |
| WHERE network_id = $1 AND kind = 'shadow_reviewer_fallback' |
| ORDER BY created_at ASC`, |
| [network.id], |
| ); |
| return r.rows.length === 2 ? r.rows : null; |
| }); |
| assert.ok(events, "two shadow_reviewer_fallback events written"); |
| const sides = new Set(events!.map((e) => e.payload.side)); |
| assert.deepEqual( |
| [...sides].sort(), |
| ["active", "shadow"], |
| "fallback counter fires once per side", |
| ); |
| for (const e of events!) { |
| assert.equal( |
| e.payload.source, |
| "fallback_binary", |
| "non-empty output ⇒ binary fallback source", |
| ); |
| } |
|
|
| |
| runnerImpl = async (_input, variant) => ({ |
| output: { ok: true, variantId: variant.id }, |
| steps: [], |
| durationMs: 30, |
| metrics: { reviewerScore: 0.5 }, |
| }); |
| }, |
| ); |
|
|
| |
| await t.test("pickShadowCandidate returns only matching shadows", async () => { |
| const cand = await sh.pickShadowCandidate(network.id, activeVariantId); |
| assert.ok(cand, "candidate found"); |
| assert.equal(cand!.id, shadowVariant.id); |
| |
| const none = await sh.pickShadowCandidate(network.id, shadowVariant.id); |
| assert.equal( |
| none, |
| null, |
| "no other shadow candidate exists once we exclude the only one", |
| ); |
| }); |
| }, |
| ); |
|
|