/** * Tool capability graph — Node-side tests. * * Covers: * - tagsForIntent against the shared fixture (parity with Python via * artifacts/research-engine/tests/test_graph_client.py) * - resolveSubgraph: 1-hop expansion, empty intent, shadow nodes * - describeSubgraph string shape * - recordPlannerGap accumulator (one row per tag, count grows) * - autoExtendIfNeeded creates exactly one provisional node + edge per * crossed-threshold gap, marks the gap as extended * - approveNode / rejectNode round-trip * * Skipped when DATABASE_URL is unset. */ import { test } from "node:test"; import assert from "node:assert/strict"; import { randomBytes } from "node:crypto"; import { readFileSync } from "node:fs"; import { dirname, resolve } from "node:path"; import { fileURLToPath } from "node:url"; const __filename = fileURLToPath(import.meta.url); const __dirname = dirname(__filename); const ORIGINAL_DSN = process.env.DATABASE_URL || ""; const SKIP = !ORIGINAL_DSN; function dsnWithSearchPath(dsn: string, schema: string): string { const opt = `options=-c%20search_path%3D${encodeURIComponent(schema)}`; return dsn.includes("?") ? `${dsn}&${opt}` : `${dsn}?${opt}`; } interface IntentCase { text: string; expected: string[]; } function loadFixture(): IntentCase[] { const p = resolve(__dirname, "fixtures/intent-cases.json"); const data = JSON.parse(readFileSync(p, "utf8")) as { cases: IntentCase[] }; return data.cases; } test("tool-graph against live PG ephemeral schema", { skip: SKIP && "DATABASE_URL not set", }, async (t) => { const schema = `tool_graph_test_${randomBytes(6).toString("hex")}`; process.env.DATABASE_URL = dsnWithSearchPath(ORIGINAL_DSN, schema); const { pool } = await import("@workspace/db"); await pool.query(`CREATE SCHEMA "${schema}"`); // Mirror the four tables in lib/db/src/schema/toolGraph.ts. await pool.query(` CREATE TABLE "${schema}".tool_nodes ( id text PRIMARY KEY, name text NOT NULL UNIQUE, description text NOT NULL DEFAULT '', capability_tags jsonb NOT NULL DEFAULT '[]'::jsonb, input_kind text NOT NULL DEFAULT 'json', output_kind text NOT NULL DEFAULT 'json', status text NOT NULL DEFAULT 'verified', owner_process text NOT NULL, spec_json jsonb NOT NULL, created_by text NOT NULL DEFAULT 'system', handler_ref text, handler_stub text, cost_hint double precision, latency_hint_ms integer, version integer NOT NULL DEFAULT 1, created_at timestamptz NOT NULL DEFAULT now(), updated_at timestamptz NOT NULL DEFAULT now() ); CREATE TABLE "${schema}".tool_edges ( id text PRIMARY KEY, from_node text NOT NULL REFERENCES "${schema}".tool_nodes(id) ON DELETE CASCADE, to_node text NOT NULL REFERENCES "${schema}".tool_nodes(id) ON DELETE CASCADE, relation text NOT NULL, weight double precision NOT NULL DEFAULT 1.0, contract jsonb, created_at timestamptz NOT NULL DEFAULT now(), CONSTRAINT tool_edges_uniq UNIQUE (from_node, to_node, relation) ); CREATE TABLE "${schema}".tool_node_evidence ( id text PRIMARY KEY, node_id text NOT NULL REFERENCES "${schema}".tool_nodes(id) ON DELETE CASCADE, kind text NOT NULL, payload jsonb NOT NULL, success integer NOT NULL DEFAULT 0, failure integer NOT NULL DEFAULT 0, shadow_user_id text, created_at timestamptz NOT NULL DEFAULT now(), seam_folded_at timestamptz ); CREATE INDEX "${schema}_tne_seam_unfolded" ON "${schema}".tool_node_evidence(kind, seam_folded_at); CREATE TABLE "${schema}".tool_edge_health ( id text PRIMARY KEY, edge_id text NOT NULL REFERENCES "${schema}".tool_edges(id) ON DELETE CASCADE, traversal_count integer NOT NULL DEFAULT 0, contract_issue_count integer NOT NULL DEFAULT 0, missing_field_count integer NOT NULL DEFAULT 0, ema_coverage double precision NOT NULL DEFAULT 1.0, ema_health_score double precision NOT NULL DEFAULT 1.0, top_missing_fields jsonb NOT NULL DEFAULT '{}'::jsonb, top_contract_issues jsonb NOT NULL DEFAULT '{}'::jsonb, formula_version integer NOT NULL DEFAULT 1, last_sample_at timestamptz, last_folded_evidence_id text, computed_at timestamptz NOT NULL DEFAULT now() ); CREATE UNIQUE INDEX "${schema}_teh_edge" ON "${schema}".tool_edge_health(edge_id); CREATE TABLE "${schema}".tool_gap_signals ( id text PRIMARY KEY, capability_tag text NOT NULL UNIQUE, invocation_count integer NOT NULL DEFAULT 0, status text NOT NULL DEFAULT 'open', last_context jsonb, extended_node_id text, first_seen_at timestamptz NOT NULL DEFAULT now(), last_seen_at timestamptz NOT NULL DEFAULT now() ); CREATE TABLE "${schema}".tool_deprecation_candidates ( id text PRIMARY KEY, node_id text NOT NULL REFERENCES "${schema}".tool_nodes(id) ON DELETE CASCADE, classification text NOT NULL, status text NOT NULL DEFAULT 'open', proposal_context jsonb NOT NULL DEFAULT '{}'::jsonb, defer_until timestamptz, re_arm_until timestamptz, decided_by text, decided_at timestamptz, created_at timestamptz NOT NULL DEFAULT now(), updated_at timestamptz NOT NULL DEFAULT now() ); CREATE UNIQUE INDEX "${schema}_tdc_node" ON "${schema}".tool_deprecation_candidates(node_id); CREATE TABLE "${schema}".tool_nodes_archive ( id text PRIMARY KEY, name text NOT NULL, description text NOT NULL DEFAULT '', capability_tags jsonb NOT NULL DEFAULT '[]'::jsonb, input_kind text NOT NULL DEFAULT 'json', output_kind text NOT NULL DEFAULT 'json', status text NOT NULL, owner_process text NOT NULL, spec_json jsonb NOT NULL, created_by text NOT NULL DEFAULT 'system', handler_ref text, handler_stub text, cost_hint double precision, latency_hint_ms integer, version integer NOT NULL DEFAULT 1, created_at timestamptz NOT NULL, updated_at timestamptz NOT NULL, archived_at timestamptz NOT NULL DEFAULT now() ); CREATE TABLE "${schema}".tool_edges_archive ( id text PRIMARY KEY, from_node text NOT NULL, to_node text NOT NULL, relation text NOT NULL, weight double precision NOT NULL DEFAULT 1.0, contract jsonb, created_at timestamptz NOT NULL, archived_at timestamptz NOT NULL DEFAULT now() ); CREATE TABLE "${schema}".tool_node_evidence_archive ( id text PRIMARY KEY, node_id text NOT NULL, kind text NOT NULL, payload jsonb NOT NULL, success integer NOT NULL DEFAULT 0, failure integer NOT NULL DEFAULT 0, shadow_user_id text, created_at timestamptz NOT NULL, seam_folded_at timestamptz, archived_at timestamptz NOT NULL DEFAULT now() ); CREATE TABLE "${schema}".tool_summary_paths ( id text PRIMARY KEY, name text NOT NULL UNIQUE, alias_node_id text, description_derived text NOT NULL DEFAULT '', description_override text, expansion_node_names jsonb NOT NULL DEFAULT '[]'::jsonb, head_atom_name text, tail_atom_name text, capability_tags jsonb NOT NULL DEFAULT '[]'::jsonb, est_cost_hint double precision, est_latency_ms_hint integer, status text NOT NULL DEFAULT 'active', version integer NOT NULL DEFAULT 1, traversal_count integer NOT NULL DEFAULT 0, created_at timestamptz NOT NULL DEFAULT now(), updated_at timestamptz NOT NULL DEFAULT now() ); CREATE INDEX "${schema}_tsp_status" ON "${schema}".tool_summary_paths(status); CREATE TABLE "${schema}".tool_spawn_templates ( id text PRIMARY KEY, fingerprint_hash text NOT NULL, fingerprint_algo_version integer NOT NULL DEFAULT 1, failure_mode text NOT NULL, missing_field_types jsonb NOT NULL DEFAULT '[]'::jsonb, unused_field_types jsonb NOT NULL DEFAULT '[]'::jsonb, downstream_input_schema_fingerprint text NOT NULL, promoted_input_schema_fingerprint text NOT NULL, promoted_output_schema_fingerprint text NOT NULL, handler_skeleton text NOT NULL DEFAULT '', spec_skeleton jsonb NOT NULL DEFAULT '{}'::jsonb, capability_tag text NOT NULL DEFAULT '', source_node_name text NOT NULL, offered_count integer NOT NULL DEFAULT 0, reuse_count integer NOT NULL DEFAULT 0, success_count integer NOT NULL DEFAULT 0, reject_count integer NOT NULL DEFAULT 0, status text NOT NULL DEFAULT 'active', demoted_at timestamptz, demoted_reason text, version integer NOT NULL DEFAULT 1, created_at timestamptz NOT NULL DEFAULT now(), updated_at timestamptz NOT NULL DEFAULT now() ); CREATE INDEX "${schema}_tspt_fp" ON "${schema}".tool_spawn_templates(fingerprint_hash); CREATE INDEX "${schema}_tspt_status" ON "${schema}".tool_spawn_templates(status); CREATE INDEX "${schema}_tspt_failure_mode" ON "${schema}".tool_spawn_templates(failure_mode); CREATE UNIQUE INDEX "${schema}_tspt_uniq" ON "${schema}".tool_spawn_templates(fingerprint_hash, fingerprint_algo_version); `); const tg = await import("../tool-graph"); t.after(async () => { try { await pool.query(`DROP SCHEMA IF EXISTS "${schema}" CASCADE`); } finally { process.env.DATABASE_URL = ORIGINAL_DSN; } }); // -------------------------------------------------- intent fixture parity await t.test("tagsForIntent matches the shared fixture", () => { const cases = loadFixture(); for (const c of cases) { const got = tg.tagsForIntent(c.text).slice().sort(); const want = c.expected.slice().sort(); assert.deepEqual( got, want, `case "${c.text}" → got ${JSON.stringify(got)} want ${JSON.stringify(want)}`, ); } }); // -------------------------------------------------- subgraph behaviour // Seed a tiny graph: search_pubmed (literature_search) feeds // summarize_bucket (summarization). Plus an unrelated lookup_uniprot // (protein_lookup) that should be excluded from a literature query. let pubmedId = ""; let summaryId = ""; let uniprotId = ""; await t.test("seed minimal verified graph", async () => { const a = await tg.upsertNode({ id: "tnode_pubmed_1", name: "search_pubmed", description: "Search PubMed for papers.", capabilityTags: ["literature_search"], inputKind: "json", outputKind: "json", status: "verified", ownerProcess: "node", specJson: { name: "search_pubmed" }, createdBy: "system", }); const b = await tg.upsertNode({ id: "tnode_summary_1", name: "summarize_bucket", description: "Summarize a literature bucket.", capabilityTags: ["summarization"], inputKind: "json", outputKind: "json", status: "verified", ownerProcess: "node", specJson: { name: "summarize_bucket" }, createdBy: "system", }); const c = await tg.upsertNode({ id: "tnode_uniprot_1", name: "lookup_uniprot", description: "Lookup a UniProt protein.", capabilityTags: ["protein_lookup"], inputKind: "json", outputKind: "json", status: "verified", ownerProcess: "node", specJson: { name: "lookup_uniprot" }, createdBy: "system", }); pubmedId = a.id; summaryId = b.id; uniprotId = c.id; await tg.upsertEdge({ id: "tedge_1", fromNode: a.id, toNode: b.id, relation: "feeds", weight: 1.0, }); }); await t.test("resolveSubgraph: literature intent pulls in 1-hop summary", async () => { const sg = await tg.resolveSubgraph({ intentText: "find pubmed papers" }); const names = sg.nodes.map((n) => n.name).sort(); assert.deepEqual(names, ["search_pubmed", "summarize_bucket"]); assert.equal(sg.matchedTags[0], "literature_search"); assert.ok(sg.edges.some((e) => e.relation === "feeds")); assert.ok(!names.includes("lookup_uniprot")); }); await t.test("resolveSubgraph: empty intent returns empty subgraph", async () => { const sg = await tg.resolveSubgraph({ intentText: "你好啊" }); assert.deepEqual(sg.nodes, []); assert.deepEqual(sg.edges, []); assert.deepEqual(sg.matchedTags, []); }); await t.test("resolveSubgraph: includeShadowNodeIds appends provisional", async () => { // Add a provisional node not connected to anything literature-y. const prov = await tg.upsertNode({ id: "tnode_shadow_1", name: "shadow_candidate", description: "Shadow candidate.", capabilityTags: ["literature_search"], inputKind: "json", outputKind: "json", status: "provisional", ownerProcess: "node", specJson: { name: "shadow_candidate" }, createdBy: "auto", }); const sg = await tg.resolveSubgraph({ intentText: "find pubmed papers", includeShadowNodeIds: [prov.id], }); const names = sg.nodes.map((n) => n.name).sort(); assert.ok(names.includes("shadow_candidate")); // Without the shadow id, the provisional node must not appear. const sgPlain = await tg.resolveSubgraph({ intentText: "find pubmed papers" }); assert.ok(!sgPlain.nodes.some((n) => n.name === "shadow_candidate")); }); // -------------------------------------------------- describe await t.test("describeSubgraph renders names + connections", async () => { const sg = await tg.resolveSubgraph({ intentText: "find pubmed papers" }); const text = tg.describeSubgraph(sg); assert.ok(text.includes("search_pubmed")); assert.ok(text.includes("summarize_bucket")); assert.ok(text.includes("Connections:")); assert.ok(text.includes("—[feeds]→")); }); // -------------------------------------------------- planner gap accumulator await t.test("recordPlannerGap accumulates per tag", async () => { await tg.recordPlannerGap("orphan_capability", { ctx: 1 }); await tg.recordPlannerGap("orphan_capability", { ctx: 2 }); const rows = await tg.listGapSignals(); const orphan = rows.find((r) => r.capabilityTag === "orphan_capability"); assert.ok(orphan, "gap signal row must exist"); assert.equal(orphan!.invocationCount, 2); assert.equal(orphan!.status, "open"); }); // -------------------------------------------------- auto-extend await t.test("autoExtendIfNeeded creates provisional + edge once threshold crossed", async () => { // Push the orphan_capability gap signal up to threshold (3 by default). await tg.recordPlannerGap("orphan_capability", { ctx: 3 }); const before = await tg.listNodes({ status: "provisional" }); const created = await tg.autoExtendIfNeeded(); assert.equal(created, 1, "exactly one provisional node should be created"); const after = await tg.listNodes({ status: "provisional" }); assert.equal( after.length - before.length, 1, "provisional node count must grow by 1", ); const gaps = await tg.listGapSignals(); const ext = gaps.find((g) => g.capabilityTag === "orphan_capability"); assert.equal(ext?.status, "extended"); assert.ok(ext?.extendedNodeId, "gap row must point at the new node"); // Running auto-extend again should not duplicate. const created2 = await tg.autoExtendIfNeeded(); assert.equal(created2, 0); }); // -------------------------------------------------- approve / reject await t.test("approveNode promotes provisional to verified", async () => { const provs = await tg.listNodes({ status: "provisional" }); const target = provs[0]!; const result = await tg.approveNode(target.id); assert.ok(result, "approveNode should return a node"); assert.equal(result!.status, "verified"); const verifiedNow = await tg.listNodes({ status: "verified" }); assert.ok(verifiedNow.some((n) => n.id === target.id)); }); await t.test("rejectNode deletes node and dismisses linked gap", async () => { // Force a fresh provisional via another auto-extend cycle. await tg.recordPlannerGap("another_gap", { ctx: 1 }); await tg.recordPlannerGap("another_gap", { ctx: 2 }); await tg.recordPlannerGap("another_gap", { ctx: 3 }); await tg.autoExtendIfNeeded(); const provs = await tg.listNodes({ status: "provisional" }); assert.ok(provs.length > 0); const target = provs[0]!; const ok = await tg.rejectNode(target.id); assert.equal(ok, true); const stillThere = await tg.getNode(target.id); assert.equal(stillThere, null); const gaps = await tg.listGapSignals(); const linked = gaps.find((g) => g.extendedNodeId === target.id); assert.equal(linked?.status, "dismissed"); }); // -------------------------------------------------- contract validator (#156) await t.test("validateContract: matching shape passes", () => { const r = tg.validateContract({ produces: { type: "object", properties: { ensembl_id: { type: "string" }, name: { type: "string" }, }, }, consumes: { type: "object", properties: { ensembl_id: { type: "string" } }, required: ["ensembl_id"], }, }); assert.equal(r.ok, true); assert.deepEqual(r.issues, []); }); await t.test("validateContract: missing required field flagged", () => { const r = tg.validateContract({ produces: { type: "object", properties: { ensembl_id: { type: "string" } }, }, consumes: { type: "object", properties: { ensembl_id: { type: "string" }, diseases: { type: "array" }, }, required: ["ensembl_id", "diseases"], }, }); assert.equal(r.ok, false); assert.equal(r.issues.length, 1); assert.equal(r.issues[0]!.field, "diseases"); assert.equal(r.issues[0]!.reason, "missing_in_produces"); }); await t.test("validateContract: type mismatch flagged", () => { const r = tg.validateContract({ produces: { type: "object", properties: { count: { type: "string" } }, }, consumes: { type: "object", properties: { count: { type: "integer" } }, required: ["count"], }, }); assert.equal(r.ok, false); assert.equal(r.issues[0]!.reason, "type_mismatch"); assert.equal(r.issues[0]!.expectedType, "integer"); assert.equal(r.issues[0]!.actualType, "string"); }); await t.test("validateContract: optional missing field ignored", () => { const r = tg.validateContract({ produces: { type: "object", properties: { a: { type: "string" } }, }, consumes: { type: "object", properties: { a: { type: "string" }, b: { type: "string" }, }, required: ["a"], }, }); assert.equal(r.ok, true); }); // -------------------------------------------------- composition_alias exclusion (#156) await t.test("composition_alias is excluded from resolveSubgraph", async () => { // Insert an alias node tagged "literature_search" — it would otherwise // be picked up by a literature intent. The resolver must exclude it // because its status is composition_alias, not verified. await tg.upsertNode({ id: "tnode_alias_1", name: "summarize_literature_bucket_alias", description: "Decomposed alias.", capabilityTags: ["literature_search"], inputKind: "json", outputKind: "json", status: "composition_alias", ownerProcess: "node", specJson: { name: "summarize_literature_bucket_alias" }, createdBy: "system", }); const sg = await tg.resolveSubgraph({ intentText: "find pubmed papers" }); const names = sg.nodes.map((n) => n.name); assert.ok( !names.includes("summarize_literature_bucket_alias"), `alias must not appear in subgraph; got ${JSON.stringify(names)}`, ); // listNodes(verified) must also exclude it. const verified = await tg.listNodes({ status: "verified" }); assert.ok(!verified.some((n) => n.name === "summarize_literature_bucket_alias")); // listNodes(any) must still see it (admin/debug surface). const any = await tg.listNodes({ status: "any" }); assert.ok(any.some((n) => n.name === "summarize_literature_bucket_alias")); }); // -------------------------------------------------- edge contract round-trip (#156) await t.test("upsertEdge persists contract jsonb and surfaces it on listEdges", async () => { // Two scratch nodes for the contract round-trip. const a = await tg.upsertNode({ id: "tnode_ot_find_1", name: "opentargets_find_target_id_test", description: "Test atomic node A.", capabilityTags: ["target_disease"], inputKind: "json", outputKind: "json", status: "verified", ownerProcess: "node", specJson: { name: "opentargets_find_target_id_test" }, createdBy: "system", }); const b = await tg.upsertNode({ id: "tnode_ot_assoc_1", name: "opentargets_get_target_associations_test", description: "Test atomic node B.", capabilityTags: ["target_disease"], inputKind: "json", outputKind: "json", status: "verified", ownerProcess: "node", specJson: { name: "opentargets_get_target_associations_test" }, createdBy: "system", }); const contract = { produces: { type: "object" as const, properties: { ensembl_id: { type: "string" as const } }, required: ["ensembl_id"], }, consumes: { type: "object" as const, properties: { ensembl_id: { type: "string" as const } }, required: ["ensembl_id"], }, }; await tg.upsertEdge({ id: "tedge_contract_1", fromNode: a.id, toNode: b.id, relation: "feeds", weight: 1.0, contract, }); const edges = await tg.listEdges([a.id, b.id]); const ours = edges.find( (e) => e.fromNode === a.id && e.toNode === b.id && e.relation === "feeds", ); assert.ok(ours, "edge must be persisted"); assert.ok(ours!.contract, "contract must round-trip"); assert.equal( ours!.contract!.produces.properties!["ensembl_id"]!.type, "string", ); }); // -------------------------------------------------- validateContract pure (#156) await t.test("validateContract flags missing fields and type mismatches", async () => { const okRes = tg.validateContract({ produces: { type: "object", properties: { x: { type: "string" }, y: { type: "number" } }, }, consumes: { type: "object", properties: { x: { type: "string" } }, required: ["x"], }, }); assert.equal(okRes.ok, true); const missing = tg.validateContract({ produces: { type: "object", properties: { other: { type: "string" } } }, consumes: { type: "object", properties: { x: { type: "string" } }, required: ["x"], }, }); assert.equal(missing.ok, false); assert.ok( missing.issues.some((i) => i.field === "x" && i.reason === "missing_in_produces"), ); const mismatch = tg.validateContract({ produces: { type: "object", properties: { x: { type: "number" } } }, consumes: { type: "object", properties: { x: { type: "string" } }, required: ["x"], }, }); assert.equal(mismatch.ok, false); assert.ok( mismatch.issues.some((i) => i.field === "x" && i.reason === "type_mismatch"), ); }); // -------------------------------------------------- seed structural invariants (#156) await t.test("seed: every persisted edge carries a contract; aliases compose ≥2 atoms", async () => { const seedMod = await import("../tool-graph-seed"); const { ATOMIC_NODES, ATOMIC_EDGES, TOOL_EDGES, ALIAS_TOOLS, DATA_SOURCES, deriveContract, aliasMembersOf } = seedMod.__SEED_INTERNALS__; // Universe of valid endpoint names: atoms ∪ aliases ∪ data sources. const atomNames = new Set(ATOMIC_NODES.map((n) => n.name)); const aliasNames: Set = ALIAS_TOOLS; const dataNames = new Set(DATA_SOURCES.map((d) => d.name)); const known = new Set([...atomNames, ...aliasNames, ...dataNames]); for (const edge of ATOMIC_EDGES) { assert.ok( known.has(edge.from), `ATOMIC_EDGES from "${edge.from}" must reference a known node`, ); assert.ok( known.has(edge.to), `ATOMIC_EDGES to "${edge.to}" must reference a known node`, ); } // Every alias must compose ≥2 atomic members (i.e. real decomposition, // not a degenerate one-step "alias"). ALIAS_TOOLS is a Set of // alias names. for (const aliasName of ALIAS_TOOLS) { const members = aliasMembersOf(aliasName); assert.ok( members.length >= 2, `alias "${aliasName}" must compose ≥2 atoms, got ${members.length}`, ); } // deriveContract on a known atomic feeds-edge must validate. // Pick the first feeds edge between two known atoms. const feeds = ATOMIC_EDGES.find( (e) => e.relation === "feeds" && atomNames.has(e.from) && atomNames.has(e.to), ); assert.ok(feeds, "expected at least one atom→atom feeds edge in ATOMIC_EDGES"); const c = deriveContract(feeds!); assert.ok(c, "deriveContract must produce a contract for known feeds edge"); const v = tg.validateContract(c); assert.equal( v.ok, true, `derived contract for ${feeds!.from}→${feeds!.to} must validate; issues=${JSON.stringify(v.issues)}`, ); // Heuristic TOOL_EDGES are coarser (cross-domain adjacency) and per // #156 must NOT carry hand-authored contracts — they declare only // (from,to,relation) and the seed mechanically derives the contract // from each endpoint's declared schema. Assert the entries are // shape-only and that the persisted edges they produce all carry // a contract (see SEED_INTERNALS-backed structural test below). for (const e of TOOL_EDGES) { assert.ok( !("contract" in e), `TOOL_EDGES entry ${e.from}→${e.to} (${e.relation}) must NOT carry a hand-authored contract — derive from endpoint schemas instead`, ); } // Substrate-drift regression (per #156 spec line 32). For every // backed_by edge whose upstream declares an explicit // `consumesSubstrateSchema`, the derived contract's `consumes` // side must match that declaration (not the substrate's // outputSchema). And the contract must validate as long as the // substrate produces every required substrate-row field. const backedBy = ATOMIC_EDGES.filter((e) => e.relation === "backed_by"); let substrateChecks = 0; for (const e of backedBy) { const upNode = ATOMIC_NODES.find((n: { name: string }) => n.name === e.from); const dsNode = DATA_SOURCES.find((n: { name: string }) => n.name === e.to); if (!upNode || !dsNode || !upNode.consumesSubstrateSchema) continue; substrateChecks += 1; const derived = deriveContract(e); assert.deepEqual( derived.consumes, upNode.consumesSubstrateSchema, `backed_by ${e.from}→${e.to} consumes side must equal upstream's consumesSubstrateSchema`, ); assert.deepEqual( derived.produces, dsNode.outputSchema, `backed_by ${e.from}→${e.to} produces side must equal substrate's outputSchema`, ); } assert.ok( substrateChecks > 0, "expected at least one backed_by edge whose upstream declares consumesSubstrateSchema", ); // Drift-injection: synthesize a fake atomic op whose // consumesSubstrateSchema requires a field the substrate does NOT // produce. The derived contract must FAIL structural validation — // proving substrate-shape drift is detectable at the seam. const fakeOp = { name: "__drift_test_op", capabilityTags: ["test"], sideEffectKind: "read" as const, ownerProcess: "node" as const, inputSchema: { type: "object" }, outputSchema: { type: "object" }, consumesSubstrateSchema: { type: "object", properties: { field_that_substrate_does_not_emit: { type: "string" }, }, required: ["field_that_substrate_does_not_emit"], }, }; const fakeContract: tg.ContractSpec = { produces: DATA_SOURCES[0].outputSchema as Record, consumes: fakeOp.consumesSubstrateSchema as Record, }; const driftV = tg.validateContract(fakeContract); assert.equal( driftV.ok, false, "substrate-drift contract must fail structural validation", ); assert.ok( driftV.issues.some( (i: { reason: string }) => i.reason === "missing_in_produces", ), "drift validation must report missing_in_produces", ); }); // -------------------------------------------------- planner provisional preservation (#156) await t.test("seed cleanup preserves provisional uncontracted edges", async () => { // Insert a provisional node + an uncontracted edge between two // provisional nodes. The seed's stale-cleanup must NOT delete it, // because both endpoints are outside the curated seed (TOOLS / // DATA_SOURCES / ATOMIC_NODES). This is the regression test for // the safety scope of the cleanup query. const provA = await tg.upsertNode({ id: "tnode_prov_a", name: "provisional_atom_alpha", description: "Planner-extended provisional node A.", capabilityTags: ["custom_capability"], inputKind: "json", outputKind: "json", status: "provisional", ownerProcess: "node", specJson: { name: "provisional_atom_alpha" }, createdBy: "planner", }); const provB = await tg.upsertNode({ id: "tnode_prov_b", name: "provisional_atom_beta", description: "Planner-extended provisional node B.", capabilityTags: ["custom_capability"], inputKind: "json", outputKind: "json", status: "provisional", ownerProcess: "node", specJson: { name: "provisional_atom_beta" }, createdBy: "planner", }); await tg.upsertEdge({ id: "tedge_prov_uncontracted", fromNode: provA.id, toNode: provB.id, relation: "feeds", weight: 1.0, // contract intentionally omitted — provisional edges are free of // contracts until an admin verifies them. }); // Run seed (full driver — touches the same DB). const seedMod = await import("../tool-graph-seed"); await seedMod.seedToolGraph(); // Per #156 spec line 82 ("Every edge ships with a contract or it // does not exist"), the seed cleanup must REMOVE this uncontracted // edge — even though both endpoints are provisional planner nodes. // The invariant is absolute; planner auto-extension code must // synthesize a contract before upserting any edge. const edges = await tg.listEdges([provA.id, provB.id]); const survived = edges.find( (e) => e.fromNode === provA.id && e.toNode === provB.id && e.relation === "feeds", ); assert.equal( survived, undefined, "uncontracted edges must be removed by seed cleanup (spec invariant)", ); }); // -------------------------------------------------- seam health (#157) await t.test("seam-health detector: clean payload yields perfect score", async () => { const { evaluateSeam } = await import("../seam-health-detector"); const r = evaluateSeam( { produces: { type: "object" }, consumes: { type: "object", properties: { id: { type: "string" }, score: { type: "number" } }, required: ["id"], }, }, { id: "abc", score: 0.5 }, ); assert.equal(r.missingFields.length, 0); assert.equal(r.contractIssues.length, 0); assert.equal(r.coverage, 1); assert.equal(r.healthScore, 1); }); await t.test("seam-health detector: missing required field is flagged and scored down", async () => { const { evaluateSeam } = await import("../seam-health-detector"); const r = evaluateSeam( { produces: { type: "object" }, consumes: { type: "object", properties: { id: { type: "string" }, score: { type: "number" } }, required: ["id", "score"], }, }, { id: "abc" }, ); assert.deepEqual(r.missingFields, ["score"]); assert.equal(r.coverage, 0.5); assert.ok(r.healthScore < 0.5, `expected healthScore < 0.5, got ${r.healthScore}`); }); await t.test("seam-health detector: type mismatch raises contract issue", async () => { const { evaluateSeam } = await import("../seam-health-detector"); const r = evaluateSeam( { produces: { type: "object" }, consumes: { type: "object", properties: { items: { type: "array" }, name: { type: "string" } }, required: ["items"], }, }, { items: "not-an-array", name: 42 }, ); assert.ok(r.contractIssues.includes("expected_array:items")); assert.ok(r.contractIssues.includes("type_mismatch:name")); }); await t.test("seam-health detector: advisory contract suppresses contract issues", async () => { const { evaluateSeam } = await import("../seam-health-detector"); const r = evaluateSeam( { produces: { type: "object" }, consumes: { type: "object", properties: { items: { type: "array" } }, required: ["items"], }, mappingHints: { advisory: true }, }, { items: "broken" }, ); assert.deepEqual(r.contractIssues, []); }); await t.test("seam-health detector: malformed contract degrades to empty sample (no throw)", async () => { const { evaluateSeam } = await import("../seam-health-detector"); const r = evaluateSeam(null, { foo: 1 }); assert.equal(r.healthScore, 1); assert.equal(r.coverage, 1); }); await t.test("captureSeamHealth + recomputeEdgeHealth round-trip: missing field drops score, clean recovery raises EMA", async () => { // Set up a tiny seam: producer → consumer with a contract requiring // `target.id`. Then write 3 broken samples followed by 3 clean // samples through the public capture API and watch the EMA move. const prod = await tg.upsertNode({ id: "tnode_seam_prod", name: "seam_test_producer", description: "producer", capabilityTags: ["test_producer"], inputKind: "json", outputKind: "json", status: "verified", ownerProcess: "node", specJson: { name: "seam_test_producer" }, createdBy: "system", }); const cons = await tg.upsertNode({ id: "tnode_seam_cons", name: "seam_test_consumer", description: "consumer", capabilityTags: ["test_consumer"], inputKind: "json", outputKind: "json", status: "verified", ownerProcess: "node", specJson: { name: "seam_test_consumer" }, createdBy: "system", }); await tg.upsertEdge({ id: "tedge_seam_test", fromNode: prod.id, toNode: cons.id, relation: "feeds", weight: 1.0, contract: { produces: { type: "object" }, consumes: { type: "object", properties: { id: { type: "string" }, score: { type: "number" } }, required: ["id", "score"], }, }, }); // 3 broken samples (missing score) for (let i = 0; i < 3; i++) { const r = await tg.captureSeamHealth({ upstreamName: "seam_test_producer", downstreamName: "seam_test_consumer", payload: { id: `x${i}` }, }); assert.ok(r, "captureSeamHealth must return a result for declared edge"); assert.deepEqual(r!.sample.missingFields, ["score"]); } let agg = await tg.recomputeEdgeHealth(); assert.ok(agg.samplesFolded >= 3); let listed = await tg.listEdgeHealth(); let mine = listed.find((h) => h.edgeId === "tedge_seam_test"); assert.ok(mine, "edge health row must exist after recompute"); assert.equal(mine!.traversalCount, 3); assert.ok(mine!.missingFieldCount >= 3); assert.ok(mine!.emaHealthScore < 0.7, `expected unhealthy EMA after misses, got ${mine!.emaHealthScore}`); assert.deepEqual(mine!.topMissingFields, { score: 3 }); // listUnhealthyEdges surfaces it const bad = await tg.listUnhealthyEdges({ minTraversals: 1, maxHealthScore: 0.85 }); assert.ok(bad.some((e) => e.edgeId === "tedge_seam_test"), "unhealthy listing must include the broken seam"); // 3 clean samples — EMA should recover upward for (let i = 0; i < 3; i++) { await tg.captureSeamHealth({ upstreamName: "seam_test_producer", downstreamName: "seam_test_consumer", payload: { id: `y${i}`, score: i * 0.1 }, }); } agg = await tg.recomputeEdgeHealth(); listed = await tg.listEdgeHealth(); mine = listed.find((h) => h.edgeId === "tedge_seam_test")!; assert.equal(mine.traversalCount, 6); assert.ok( mine.emaHealthScore > 0.4, `expected EMA to climb after clean samples, got ${mine.emaHealthScore}`, ); // getEdgeHealth returns recent samples const detail = await tg.getEdgeHealth("tedge_seam_test", 10); assert.ok(detail.edge); assert.ok(detail.recent.length >= 1); assert.ok( typeof (detail.recent[0]!.payload as { health_score?: number }).health_score === "number", ); }); await t.test("captureSeamHealth is non-blocking when downstream is unknown", async () => { const r = await tg.captureSeamHealth({ downstreamName: "definitely_does_not_exist", payload: { foo: 1 }, }); assert.equal(r, null); }); // -------------------------------------------------- deprecation (#159) await t.test("classifier: cold node flagged after coldDays of inactivity", async () => { // Insert a verified node old enough to bypass the age cutoff and an // ancient invocation row. coldDays=30 → 90 days old qualifies. const old = await tg.upsertNode({ id: "tnode_cold_1", name: "cold_widget_search", description: "An unused capability.", capabilityTags: ["cold_widget"], inputKind: "json", outputKind: "json", status: "verified", ownerProcess: "node", specJson: { name: "cold_widget_search" }, createdBy: "system", }); // Backdate created_at so the age filter inside runDeprecationDetector // accepts it. const ninetyDaysAgo = new Date(Date.now() - 90 * 86_400_000); await pool.query( `UPDATE "${schema}".tool_nodes SET created_at = $1, updated_at = $1 WHERE id = $2`, [ninetyDaysAgo, old.id], ); await pool.query( `INSERT INTO "${schema}".tool_node_evidence (id, node_id, kind, payload, success, failure, created_at) VALUES ($1, $2, 'invocation', '{}'::jsonb, 1, 0, $3)`, ["tev_cold_old", old.id, ninetyDaysAgo], ); const summary = await tg.runDeprecationDetector({ coldDays: 30 }); assert.ok(summary.proposalsConsidered >= 1); const open = await tg.listDeprecationCandidates({ status: "open" }); const mine = open.find((c) => c.nodeId === old.id); assert.ok(mine, "cold node must produce an open candidate"); assert.equal(mine!.classification, "cold"); const supportingMetrics = (mine!.proposalContext as { supportingMetrics: { invocationCount: number; daysSinceLastInvocation: number } }).supportingMetrics; assert.equal(supportingMetrics.invocationCount, 1); assert.ok(supportingMetrics.daysSinceLastInvocation! >= 30); }); await t.test("classifier: redundant requires sibling AND health gap (false-positive guard)", async () => { // Two nodes with the same tag and similar invocation counts must NOT // be flagged as redundant — only when the sibling dominates by ≥10× // AND has a healthy edge AND target has an unhealthy edge. const sibA = await tg.upsertNode({ id: "tnode_redA", name: "popular_search", description: "Popular implementation.", capabilityTags: ["sibling_capability"], inputKind: "json", outputKind: "json", status: "verified", ownerProcess: "node", specJson: { name: "popular_search" }, createdBy: "system", }); const sibB = await tg.upsertNode({ id: "tnode_redB", name: "rival_search", description: "Equal-popularity rival.", capabilityTags: ["sibling_capability"], inputKind: "json", outputKind: "json", status: "verified", ownerProcess: "node", specJson: { name: "rival_search" }, createdBy: "system", }); const ageOk = new Date(Date.now() - 90 * 86_400_000); await pool.query( `UPDATE "${schema}".tool_nodes SET created_at = $1 WHERE id IN ($2,$3)`, [ageOk, sibA.id, sibB.id], ); // Equal traversals → must not be flagged redundant for either. for (let i = 0; i < 5; i++) { await pool.query( `INSERT INTO "${schema}".tool_node_evidence (id, node_id, kind, payload, success, failure) VALUES ($1, $2, 'invocation', '{}'::jsonb, 1, 0)`, [`tev_eq_a_${i}`, sibA.id], ); await pool.query( `INSERT INTO "${schema}".tool_node_evidence (id, node_id, kind, payload, success, failure) VALUES ($1, $2, 'invocation', '{}'::jsonb, 1, 0)`, [`tev_eq_b_${i}`, sibB.id], ); } await tg.runDeprecationDetector({ coldDays: 30 }); const cands = await tg.listDeprecationCandidates({ status: "any" }); assert.ok( !cands.some((c) => c.classification === "redundant" && (c.nodeId === sibA.id || c.nodeId === sibB.id)), "evenly used siblings must not be flagged redundant", ); }); await t.test("classifier: superseded triggered by spec_json.replacedBy", async () => { const sup = await tg.upsertNode({ id: "tnode_sup_1", name: "ancient_widget", description: "A node that has been spliced.", capabilityTags: ["splice_test"], inputKind: "json", outputKind: "json", status: "verified", ownerProcess: "node", specJson: { name: "ancient_widget", replacedBy: "tnode_modern" }, createdBy: "system", }); const ageOk = new Date(Date.now() - 90 * 86_400_000); await pool.query( `UPDATE "${schema}".tool_nodes SET created_at = $1 WHERE id = $2`, [ageOk, sup.id], ); await tg.runDeprecationDetector({ coldDays: 30 }); const cands = await tg.listDeprecationCandidates({ status: "any" }); const mine = cands.find((c) => c.nodeId === sup.id); assert.ok(mine, "spec_json.replacedBy must produce a candidate"); assert.equal(mine!.classification, "superseded"); const sm = (mine!.proposalContext as { supportingMetrics: { replacedBy?: string } }).supportingMetrics; assert.equal(sm.replacedBy, "tnode_modern"); }); await t.test("resolveSubgraph excludes deprecated nodes", async () => { // Promote one of our seeded literature nodes through the deprecation // flow and confirm it disappears from resolveSubgraph. const ageOk = new Date(Date.now() - 200 * 86_400_000); await pool.query( `UPDATE "${schema}".tool_nodes SET created_at = $1, updated_at = $1 WHERE id = $2`, [ageOk, pubmedId], ); await tg.runDeprecationDetector({ coldDays: 30 }); const cands = await tg.listDeprecationCandidates({ status: "open" }); const mine = cands.find((c) => c.nodeId === pubmedId); assert.ok(mine, "pubmed seed must now be a candidate"); await tg.deprecateNode(mine!.id, "tester"); const sg = await tg.resolveSubgraph({ intentText: "find pubmed papers" }); assert.ok(!sg.nodes.some((n) => n.id === pubmedId), "deprecated node must not appear in subgraph"); // Direct reference helper exposes replacedBy when present. const direct = await tg.resolveNodeForDirectReference("search_pubmed"); assert.ok(direct && direct.deprecated === true, "direct reference must flag deprecation"); }); await t.test("defer + re-arm windows are respected by the detector", async () => { const node = await tg.upsertNode({ id: "tnode_defer_1", name: "defer_target", description: "Defer flow target.", capabilityTags: ["defer_capability"], inputKind: "json", outputKind: "json", status: "verified", ownerProcess: "node", specJson: { name: "defer_target" }, createdBy: "system", }); const ageOk = new Date(Date.now() - 200 * 86_400_000); await pool.query( `UPDATE "${schema}".tool_nodes SET created_at = $1, updated_at = $1 WHERE id = $2`, [ageOk, node.id], ); await tg.runDeprecationDetector({ coldDays: 30 }); const opened = (await tg.listDeprecationCandidates({ status: "open" })).find( (c) => c.nodeId === node.id, ); assert.ok(opened, "first detector pass must open the candidate"); await tg.deferDeprecationCandidate(opened!.id, "tester", 14); const r1 = await tg.runDeprecationDetector({ coldDays: 30 }); assert.ok(r1.skippedDeferred >= 1, "deferred candidate must be skipped"); const stillDeferred = ( await tg.listDeprecationCandidates({ status: "deferred" }) ).find((c) => c.nodeId === node.id); assert.ok(stillDeferred, "row stays deferred when defer_until is in the future"); // Reject path: re-arm window also blocks re-proposal. await tg.rejectDeprecationCandidate(opened!.id, "tester", 30); const r2 = await tg.runDeprecationDetector({ coldDays: 30 }); assert.ok(r2.skippedReArm >= 1, "rejected candidate must be skipped"); }); await t.test("archive job moves rows transactionally and leaves nothing in hot tables", async () => { const node = await tg.upsertNode({ id: "tnode_arch_1", name: "archive_target", description: "Archive flow target.", capabilityTags: ["arch_capability"], inputKind: "json", outputKind: "json", status: "verified", ownerProcess: "node", specJson: { name: "archive_target" }, createdBy: "system", }); // Add an edge + an evidence row so all three archive tables receive // contributions. const peer = await tg.upsertNode({ id: "tnode_arch_peer", name: "archive_peer", description: "peer", capabilityTags: ["arch_capability"], inputKind: "json", outputKind: "json", status: "verified", ownerProcess: "node", specJson: { name: "archive_peer" }, createdBy: "system", }); await tg.upsertEdge({ id: "tedge_arch_1", fromNode: node.id, toNode: peer.id, relation: "feeds", weight: 1.0, }); await pool.query( `INSERT INTO "${schema}".tool_node_evidence (id, node_id, kind, payload, success, failure) VALUES ($1, $2, 'invocation', '{}'::jsonb, 1, 0)`, ["tev_arch_1", node.id], ); // Force-deprecate by creating a candidate row directly + approving. await pool.query( `INSERT INTO "${schema}".tool_deprecation_candidates (id, node_id, classification, status, proposal_context, created_at, updated_at) VALUES ($1, $2, 'cold', 'open', '{}'::jsonb, now(), now())`, ["tdep_arch_1", node.id], ); await tg.deprecateNode("tdep_arch_1", "tester"); // Backdate updated_at past the archive cutoff (default 180 days). await pool.query( `UPDATE "${schema}".tool_nodes SET updated_at = $1 WHERE id = $2`, [new Date(Date.now() - 200 * 86_400_000), node.id], ); const r = await tg.runArchiveJob(); assert.ok(r.nodesArchived >= 1, "archive job must move at least one node"); assert.ok(r.edgesArchived >= 1); assert.ok(r.evidenceArchived >= 1); // Hot tables must be clean. const hotNode = await pool.query( `SELECT 1 FROM "${schema}".tool_nodes WHERE id = $1`, [node.id], ); assert.equal(hotNode.rowCount, 0); const hotEdge = await pool.query( `SELECT 1 FROM "${schema}".tool_edges WHERE id = $1`, ["tedge_arch_1"], ); assert.equal(hotEdge.rowCount, 0); const archNode = await pool.query( `SELECT 1 FROM "${schema}".tool_nodes_archive WHERE id = $1`, [node.id], ); assert.equal(archNode.rowCount, 1); const archEdge = await pool.query( `SELECT 1 FROM "${schema}".tool_edges_archive WHERE id = $1`, ["tedge_arch_1"], ); assert.equal(archEdge.rowCount, 1); }); // -------------------------------------------------- spawn templates (#161) await t.test("schemaFingerprint strips field names but preserves structure", async () => { const st = await import("../spawn-templates"); const a = st.schemaFingerprint({ type: "object", properties: { target_id: { type: "string" }, score: { type: "number" }, }, required: ["target_id"], }); const b = st.schemaFingerprint({ type: "object", properties: { // Different names, same structure → same fingerprint. ensembl_id: { type: "string" }, confidence: { type: "number" }, }, required: ["ensembl_id"], }); assert.equal(a, b, "field names must not influence the fingerprint"); // Type change must change the fingerprint. const c = st.schemaFingerprint({ type: "object", properties: { target_id: { type: "integer" }, score: { type: "number" }, }, required: ["target_id"], }); assert.notEqual(a, c, "type changes must alter the fingerprint"); }); await t.test("computeSeamFingerprint is deterministic and salted", async () => { const st = await import("../spawn-templates"); const input: import("../spawn-templates").SeamFingerprintInput = { failureMode: "missing_required_field", downstreamInputSchema: { type: "object", properties: { id: { type: "string" }, count: { type: "integer" } }, required: ["id", "count"], }, upstreamOutputSchema: { type: "object", properties: { id: { type: "string" }, extra: { type: "boolean" } }, }, missingRequiredFieldNames: ["count"], unusedFieldNames: ["extra"], capabilityTag: "demo_tag", }; const fp1 = st.computeSeamFingerprint(input); const fp2 = st.computeSeamFingerprint(input); assert.equal(fp1.hash, fp2.hash, "same input → same hash"); assert.deepEqual(fp1.breakdown.missingFieldTypes, ["integer"]); assert.deepEqual(fp1.breakdown.unusedFieldTypes, ["boolean"]); // Salt change must alter the hash (we cannot mutate env safely; // exercise the canonical-string is identical instead). assert.equal(fp1.canonical, fp2.canonical); assert.match(fp1.hash, /^[0-9a-f]{64}$/); }); await t.test("searchTemplates returns null when no rows exist", async () => { const st = await import("../spawn-templates"); const r = await st.searchTemplates({ failureMode: "capability_gap", downstreamInputSchema: { type: "object", properties: {} }, capabilityTag: "no_match_tag_xyzzy", }); assert.equal(r, null); }); await t.test("persistTemplateOnPromote → exact match found by searchTemplates", async () => { const st = await import("../spawn-templates"); const fpInput: import("../spawn-templates").SeamFingerprintInput = { failureMode: "capability_gap", downstreamInputSchema: { type: "object", properties: { query: { type: "string" } }, required: ["query"], }, capabilityTag: "exact_match_test", }; const persisted = await st.persistTemplateOnPromote({ fingerprintInput: fpInput, promotedNodeName: "auto_exact_match_test_aaa111", promotedInputSchema: { type: "object", properties: { query: { type: "string" } }, }, promotedOutputSchema: { type: "object" }, handlerSkeleton: "export async function invoke(args) { return { ok: true }; }", specSkeleton: { name: "auto_exact_match_test_aaa111", description: "templated handler", parameters: { type: "object", properties: { query: { type: "string" } }, required: ["query"], }, }, }); assert.ok(persisted, "template must persist"); const match = await st.searchTemplates(fpInput); assert.ok(match, "exact match must be found"); assert.equal(match!.strength, "exact"); assert.equal(match!.score, 1); assert.equal(match!.template.id, persisted!.id); }); await t.test("structurally identical seam across different tags is EXACT (tag is not in fingerprint)", async () => { const st = await import("../spawn-templates"); const baseSchema = { type: "object" as const, properties: { id: { type: "string" as const } }, required: ["id"], }; await st.persistTemplateOnPromote({ fingerprintInput: { failureMode: "missing_required_field", downstreamInputSchema: baseSchema, upstreamOutputSchema: { type: "object", properties: {} }, missingRequiredFieldNames: ["id"], capabilityTag: "tag_alpha_for_exact_test", }, promotedNodeName: "auto_tag_alpha_xxx", promotedInputSchema: baseSchema, promotedOutputSchema: { type: "object" }, handlerSkeleton: "export async function invoke(a) { return { id: a.id }; }", specSkeleton: { name: "auto_tag_alpha_xxx", parameters: baseSchema }, }); const r = await st.searchTemplates({ failureMode: "missing_required_field", downstreamInputSchema: baseSchema, upstreamOutputSchema: { type: "object", properties: {} }, missingRequiredFieldNames: ["id"], capabilityTag: "tag_beta_for_exact_test", }); assert.ok(r, "match must surface"); assert.equal(r!.strength, "exact", "tag-only differences must collapse to exact"); assert.equal(r!.score, 1); }); await t.test("genuine NEAR match: same failure_mode + same downstream fp + Jaccard ≥ 0.8 over slot bag", async () => { const st = await import("../spawn-templates"); const baseSchema = { type: "object" as const, properties: { a: { type: "string" as const }, b: { type: "string" as const }, c: { type: "string" as const }, d: { type: "string" as const }, e: { type: "string" as const }, }, required: ["a", "b", "c", "d", "e"], }; // Seed a template with 4 missing fields. await st.persistTemplateOnPromote({ fingerprintInput: { failureMode: "missing_required_field", downstreamInputSchema: baseSchema, upstreamOutputSchema: { type: "object", properties: {} }, missingRequiredFieldNames: ["a", "b", "c", "d"], capabilityTag: "near_real_seed", }, promotedNodeName: "auto_near_real_seed_xxx", promotedInputSchema: baseSchema, promotedOutputSchema: { type: "object" }, handlerSkeleton: "export async function invoke(args) { return args; }", specSkeleton: { name: "auto_near_real_seed_xxx", parameters: baseSchema }, }); // Query with 5 missing fields (slot bag is ⊋ of seed) → Jaccard // 4/5 = 0.8, exact key MISSES (different miss list → different // canonical → different hash) → must hit near with score = 0.8. const r = await st.searchTemplates({ failureMode: "missing_required_field", downstreamInputSchema: baseSchema, upstreamOutputSchema: { type: "object", properties: {} }, missingRequiredFieldNames: ["a", "b", "c", "d", "e"], capabilityTag: "near_real_query", }); assert.ok(r, "near match must surface"); assert.equal(r!.strength, "near"); assert.ok(r!.score >= 0.8 && r!.score < 1, `score must be in [0.8, 1) (got ${r!.score})`); assert.ok( r!.parameterizationGaps.includes("missing_field_types"), "missing_field_types must be flagged as a parameterization gap", ); }); await t.test("NO near match when downstream fingerprint differs (hard precondition)", async () => { const st = await import("../spawn-templates"); const r = await st.searchTemplates({ failureMode: "missing_required_field", downstreamInputSchema: { type: "object", properties: { totally_unique_field: { type: "boolean" } }, required: ["totally_unique_field"], }, upstreamOutputSchema: { type: "object", properties: {} }, missingRequiredFieldNames: ["totally_unique_field"], capabilityTag: "downstream_fp_mismatch", }); assert.equal(r, null, "downstream fingerprint mismatch must short-circuit near search"); }); await t.test("privacy: persisted template never contains user payload strings", async () => { const st = await import("../spawn-templates"); // Inject "user data"-looking strings in the spec the reviewer // edited and into the handler skeleton. Ingestion must strip them. const userSecret = "user_secret_value_aaaaaaaaaaaaaaaaaaaa"; const userExample = "patient-12345-PHI-record-DO-NOT-LEAK"; const persisted = await st.persistTemplateOnPromote({ fingerprintInput: { failureMode: "capability_gap", downstreamInputSchema: { type: "object", properties: {} }, capabilityTag: "privacy_test_tag", }, promotedNodeName: "auto_privacy_test_tag_zzz999", promotedInputSchema: { type: "object" }, promotedOutputSchema: { type: "object" }, handlerSkeleton: `export async function invoke(args) { const example = "${userExample}"; return { example, secret: "${userSecret}" }; }`, specSkeleton: { name: "auto_privacy_test_tag_zzz999", description: "OK desc", parameters: { type: "object", properties: {} }, // Reviewer left a comment field — must be dropped by allowlist. leaked_comment: userSecret, examples: [userExample], }, }); assert.ok(persisted); st.__assertNoUserDataLeaked(persisted!, [userSecret, userExample]); }); await t.test("auto-demotion: low success rate after threshold offers triggers silent demote", async () => { const st = await import("../spawn-templates"); const persisted = await st.persistTemplateOnPromote({ fingerprintInput: { failureMode: "type_mismatch", downstreamInputSchema: { type: "object", properties: { x: { type: "string" } }, required: ["x"], }, capabilityTag: "demote_test_tag", }, promotedNodeName: "auto_demote_test_xxx", promotedInputSchema: { type: "object" }, promotedOutputSchema: { type: "object" }, handlerSkeleton: "stub", specSkeleton: {}, }); assert.ok(persisted); // Set a low threshold via env then exercise. const prevMin = process.env["TEMPLATE_DEMOTE_MIN_OFFERS"]; const prevRate = process.env["TEMPLATE_DEMOTE_SUCCESS_RATE"]; process.env["TEMPLATE_DEMOTE_MIN_OFFERS"] = "5"; process.env["TEMPLATE_DEMOTE_SUCCESS_RATE"] = "0.3"; try { // Re-import to pick up env (module loads constants at import time // — for this test we simulate by issuing offers and choices then // calling maybeAutoDemote directly, which re-reads thresholds at // each call via constants — so we must reload the module). const stReload = await import("../spawn-templates"); // Bump offered_count to 6 (above threshold) and success_count to 1 // (rate ~ 0.17, below floor). for (let i = 0; i < 6; i += 1) { await stReload.recordTemplateOffered(persisted!.id); } await stReload.recordTemplatePromoteResult(persisted!.id); const r = await stReload.maybeAutoDemote(persisted!.id); assert.equal(r.demoted, true, "must demote when below floor"); // Demoted templates excluded from search. const search = await stReload.searchTemplates({ failureMode: "type_mismatch", downstreamInputSchema: { type: "object", properties: { x: { type: "string" } }, required: ["x"], }, capabilityTag: "demote_test_tag", }); assert.equal(search, null, "demoted template must not be returned by searchTemplates"); } finally { if (prevMin === undefined) delete process.env["TEMPLATE_DEMOTE_MIN_OFFERS"]; else process.env["TEMPLATE_DEMOTE_MIN_OFFERS"] = prevMin; if (prevRate === undefined) delete process.env["TEMPLATE_DEMOTE_SUCCESS_RATE"]; else process.env["TEMPLATE_DEMOTE_SUCCESS_RATE"] = prevRate; } }); await t.test( "end-to-end: spawn → promote → next spawn for identical seam offers the template", async () => { // Use unique capability tags to avoid touching prior tests. const tag1 = "e2e_template_a"; const tag2 = "e2e_template_b"; // First spawn for (let i = 0; i < tg.GAP_AUTO_EXTEND_THRESHOLD; i += 1) { await tg.recordPlannerGap(tag1, { ctx: i }); } const created1 = await tg.autoExtendIfNeeded(); assert.ok(created1 >= 1); const provs1 = await tg.listNodes({ status: "provisional" }); const target1 = provs1.find((n) => n.name.startsWith(`auto_${tag1}_`)); assert.ok(target1, "first provisional must exist"); // proposalContext must always be present (template_match may be // null or a match — earlier tests may have seeded templates). const ctx1 = (target1!.spec as { proposalContext?: { template_match?: unknown } }) .proposalContext; assert.ok(ctx1, "proposalContext must be present on auto-spawned node"); // Promote it. This persists a template into the library. const promoted = await tg.approveNode(target1!.id, { templateChoice: "fresh", reviewer: "tester", }); assert.ok(promoted, "promotion must succeed"); // Second spawn for an identical seam shape (different tag, same // structure) → near match expected. for (let i = 0; i < tg.GAP_AUTO_EXTEND_THRESHOLD; i += 1) { await tg.recordPlannerGap(tag2, { ctx: i }); } await tg.autoExtendIfNeeded(); const provs2 = await tg.listNodes({ status: "provisional" }); const target2 = provs2.find((n) => n.name.startsWith(`auto_${tag2}_`)); assert.ok(target2, "second provisional must exist"); const ctx2 = ( target2!.spec as { proposalContext?: { template_match?: { template_id: string; strength: string } | null; }; } ).proposalContext; assert.ok( ctx2 && ctx2.template_match, `second spawn must surface a template match (got ${JSON.stringify(ctx2)})`, ); assert.ok( ctx2!.template_match!.strength === "exact" || ctx2!.template_match!.strength === "near", "strength must be exact or near", ); // Reviewer chooses Use+edit; template counters update. const tplId = ctx2!.template_match!.template_id; const before = await pool.query( `SELECT offered_count, reuse_count, success_count FROM "${schema}".tool_spawn_templates WHERE id = $1`, [tplId], ); assert.ok(before.rows[0]); await tg.approveNode(target2!.id, { templateChoice: "use_edit", sourceTemplateId: tplId, reviewer: "tester", }); const after = await pool.query( `SELECT offered_count, reuse_count, success_count FROM "${schema}".tool_spawn_templates WHERE id = $1`, [tplId], ); assert.ok( after.rows[0].reuse_count > before.rows[0].reuse_count, "reuse_count must increment on use_edit", ); assert.ok( after.rows[0].success_count > before.rows[0].success_count, "success_count must increment on promote-after-reuse", ); }, ); // unused vars suppression void uniprotId; void summaryId; void pubmedId; });