doatlas-2 / artifacts /api-server /src /lib /__tests__ /tool-graph.test.ts
Iostream-Li's picture
Add files using upload-large-folder tool
ff78003 verified
/**
* Tool capability graph — Node-side tests.
*
* Covers:
* - tagsForIntent against the shared fixture (parity with Python via
* artifacts/research-engine/tests/test_graph_client.py)
* - resolveSubgraph: 1-hop expansion, empty intent, shadow nodes
* - describeSubgraph string shape
* - recordPlannerGap accumulator (one row per tag, count grows)
* - autoExtendIfNeeded creates exactly one provisional node + edge per
* crossed-threshold gap, marks the gap as extended
* - approveNode / rejectNode round-trip
*
* Skipped when DATABASE_URL is unset.
*/
import { test } from "node:test";
import assert from "node:assert/strict";
import { randomBytes } from "node:crypto";
import { readFileSync } from "node:fs";
import { dirname, resolve } from "node:path";
import { fileURLToPath } from "node:url";
const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);
const ORIGINAL_DSN = process.env.DATABASE_URL || "";
const SKIP = !ORIGINAL_DSN;
function dsnWithSearchPath(dsn: string, schema: string): string {
const opt = `options=-c%20search_path%3D${encodeURIComponent(schema)}`;
return dsn.includes("?") ? `${dsn}&${opt}` : `${dsn}?${opt}`;
}
interface IntentCase {
text: string;
expected: string[];
}
function loadFixture(): IntentCase[] {
const p = resolve(__dirname, "fixtures/intent-cases.json");
const data = JSON.parse(readFileSync(p, "utf8")) as { cases: IntentCase[] };
return data.cases;
}
test("tool-graph against live PG ephemeral schema", {
skip: SKIP && "DATABASE_URL not set",
}, async (t) => {
const schema = `tool_graph_test_${randomBytes(6).toString("hex")}`;
process.env.DATABASE_URL = dsnWithSearchPath(ORIGINAL_DSN, schema);
const { pool } = await import("@workspace/db");
await pool.query(`CREATE SCHEMA "${schema}"`);
// Mirror the four tables in lib/db/src/schema/toolGraph.ts.
await pool.query(`
CREATE TABLE "${schema}".tool_nodes (
id text PRIMARY KEY,
name text NOT NULL UNIQUE,
description text NOT NULL DEFAULT '',
capability_tags jsonb NOT NULL DEFAULT '[]'::jsonb,
input_kind text NOT NULL DEFAULT 'json',
output_kind text NOT NULL DEFAULT 'json',
status text NOT NULL DEFAULT 'verified',
owner_process text NOT NULL,
spec_json jsonb NOT NULL,
created_by text NOT NULL DEFAULT 'system',
handler_ref text,
handler_stub text,
cost_hint double precision,
latency_hint_ms integer,
version integer NOT NULL DEFAULT 1,
created_at timestamptz NOT NULL DEFAULT now(),
updated_at timestamptz NOT NULL DEFAULT now()
);
CREATE TABLE "${schema}".tool_edges (
id text PRIMARY KEY,
from_node text NOT NULL REFERENCES "${schema}".tool_nodes(id) ON DELETE CASCADE,
to_node text NOT NULL REFERENCES "${schema}".tool_nodes(id) ON DELETE CASCADE,
relation text NOT NULL,
weight double precision NOT NULL DEFAULT 1.0,
contract jsonb,
created_at timestamptz NOT NULL DEFAULT now(),
CONSTRAINT tool_edges_uniq UNIQUE (from_node, to_node, relation)
);
CREATE TABLE "${schema}".tool_node_evidence (
id text PRIMARY KEY,
node_id text NOT NULL REFERENCES "${schema}".tool_nodes(id) ON DELETE CASCADE,
kind text NOT NULL,
payload jsonb NOT NULL,
success integer NOT NULL DEFAULT 0,
failure integer NOT NULL DEFAULT 0,
shadow_user_id text,
created_at timestamptz NOT NULL DEFAULT now(),
seam_folded_at timestamptz
);
CREATE INDEX "${schema}_tne_seam_unfolded" ON "${schema}".tool_node_evidence(kind, seam_folded_at);
CREATE TABLE "${schema}".tool_edge_health (
id text PRIMARY KEY,
edge_id text NOT NULL REFERENCES "${schema}".tool_edges(id) ON DELETE CASCADE,
traversal_count integer NOT NULL DEFAULT 0,
contract_issue_count integer NOT NULL DEFAULT 0,
missing_field_count integer NOT NULL DEFAULT 0,
ema_coverage double precision NOT NULL DEFAULT 1.0,
ema_health_score double precision NOT NULL DEFAULT 1.0,
top_missing_fields jsonb NOT NULL DEFAULT '{}'::jsonb,
top_contract_issues jsonb NOT NULL DEFAULT '{}'::jsonb,
formula_version integer NOT NULL DEFAULT 1,
last_sample_at timestamptz,
last_folded_evidence_id text,
computed_at timestamptz NOT NULL DEFAULT now()
);
CREATE UNIQUE INDEX "${schema}_teh_edge" ON "${schema}".tool_edge_health(edge_id);
CREATE TABLE "${schema}".tool_gap_signals (
id text PRIMARY KEY,
capability_tag text NOT NULL UNIQUE,
invocation_count integer NOT NULL DEFAULT 0,
status text NOT NULL DEFAULT 'open',
last_context jsonb,
extended_node_id text,
first_seen_at timestamptz NOT NULL DEFAULT now(),
last_seen_at timestamptz NOT NULL DEFAULT now()
);
CREATE TABLE "${schema}".tool_deprecation_candidates (
id text PRIMARY KEY,
node_id text NOT NULL REFERENCES "${schema}".tool_nodes(id) ON DELETE CASCADE,
classification text NOT NULL,
status text NOT NULL DEFAULT 'open',
proposal_context jsonb NOT NULL DEFAULT '{}'::jsonb,
defer_until timestamptz,
re_arm_until timestamptz,
decided_by text,
decided_at timestamptz,
created_at timestamptz NOT NULL DEFAULT now(),
updated_at timestamptz NOT NULL DEFAULT now()
);
CREATE UNIQUE INDEX "${schema}_tdc_node" ON "${schema}".tool_deprecation_candidates(node_id);
CREATE TABLE "${schema}".tool_nodes_archive (
id text PRIMARY KEY,
name text NOT NULL,
description text NOT NULL DEFAULT '',
capability_tags jsonb NOT NULL DEFAULT '[]'::jsonb,
input_kind text NOT NULL DEFAULT 'json',
output_kind text NOT NULL DEFAULT 'json',
status text NOT NULL,
owner_process text NOT NULL,
spec_json jsonb NOT NULL,
created_by text NOT NULL DEFAULT 'system',
handler_ref text,
handler_stub text,
cost_hint double precision,
latency_hint_ms integer,
version integer NOT NULL DEFAULT 1,
created_at timestamptz NOT NULL,
updated_at timestamptz NOT NULL,
archived_at timestamptz NOT NULL DEFAULT now()
);
CREATE TABLE "${schema}".tool_edges_archive (
id text PRIMARY KEY,
from_node text NOT NULL,
to_node text NOT NULL,
relation text NOT NULL,
weight double precision NOT NULL DEFAULT 1.0,
contract jsonb,
created_at timestamptz NOT NULL,
archived_at timestamptz NOT NULL DEFAULT now()
);
CREATE TABLE "${schema}".tool_node_evidence_archive (
id text PRIMARY KEY,
node_id text NOT NULL,
kind text NOT NULL,
payload jsonb NOT NULL,
success integer NOT NULL DEFAULT 0,
failure integer NOT NULL DEFAULT 0,
shadow_user_id text,
created_at timestamptz NOT NULL,
seam_folded_at timestamptz,
archived_at timestamptz NOT NULL DEFAULT now()
);
CREATE TABLE "${schema}".tool_summary_paths (
id text PRIMARY KEY,
name text NOT NULL UNIQUE,
alias_node_id text,
description_derived text NOT NULL DEFAULT '',
description_override text,
expansion_node_names jsonb NOT NULL DEFAULT '[]'::jsonb,
head_atom_name text,
tail_atom_name text,
capability_tags jsonb NOT NULL DEFAULT '[]'::jsonb,
est_cost_hint double precision,
est_latency_ms_hint integer,
status text NOT NULL DEFAULT 'active',
version integer NOT NULL DEFAULT 1,
traversal_count integer NOT NULL DEFAULT 0,
created_at timestamptz NOT NULL DEFAULT now(),
updated_at timestamptz NOT NULL DEFAULT now()
);
CREATE INDEX "${schema}_tsp_status" ON "${schema}".tool_summary_paths(status);
CREATE TABLE "${schema}".tool_spawn_templates (
id text PRIMARY KEY,
fingerprint_hash text NOT NULL,
fingerprint_algo_version integer NOT NULL DEFAULT 1,
failure_mode text NOT NULL,
missing_field_types jsonb NOT NULL DEFAULT '[]'::jsonb,
unused_field_types jsonb NOT NULL DEFAULT '[]'::jsonb,
downstream_input_schema_fingerprint text NOT NULL,
promoted_input_schema_fingerprint text NOT NULL,
promoted_output_schema_fingerprint text NOT NULL,
handler_skeleton text NOT NULL DEFAULT '',
spec_skeleton jsonb NOT NULL DEFAULT '{}'::jsonb,
capability_tag text NOT NULL DEFAULT '',
source_node_name text NOT NULL,
offered_count integer NOT NULL DEFAULT 0,
reuse_count integer NOT NULL DEFAULT 0,
success_count integer NOT NULL DEFAULT 0,
reject_count integer NOT NULL DEFAULT 0,
status text NOT NULL DEFAULT 'active',
demoted_at timestamptz,
demoted_reason text,
version integer NOT NULL DEFAULT 1,
created_at timestamptz NOT NULL DEFAULT now(),
updated_at timestamptz NOT NULL DEFAULT now()
);
CREATE INDEX "${schema}_tspt_fp" ON "${schema}".tool_spawn_templates(fingerprint_hash);
CREATE INDEX "${schema}_tspt_status" ON "${schema}".tool_spawn_templates(status);
CREATE INDEX "${schema}_tspt_failure_mode" ON "${schema}".tool_spawn_templates(failure_mode);
CREATE UNIQUE INDEX "${schema}_tspt_uniq" ON "${schema}".tool_spawn_templates(fingerprint_hash, fingerprint_algo_version);
`);
const tg = await import("../tool-graph");
t.after(async () => {
try {
await pool.query(`DROP SCHEMA IF EXISTS "${schema}" CASCADE`);
} finally {
process.env.DATABASE_URL = ORIGINAL_DSN;
}
});
// -------------------------------------------------- intent fixture parity
await t.test("tagsForIntent matches the shared fixture", () => {
const cases = loadFixture();
for (const c of cases) {
const got = tg.tagsForIntent(c.text).slice().sort();
const want = c.expected.slice().sort();
assert.deepEqual(
got,
want,
`case "${c.text}" → got ${JSON.stringify(got)} want ${JSON.stringify(want)}`,
);
}
});
// -------------------------------------------------- subgraph behaviour
// Seed a tiny graph: search_pubmed (literature_search) feeds
// summarize_bucket (summarization). Plus an unrelated lookup_uniprot
// (protein_lookup) that should be excluded from a literature query.
let pubmedId = "";
let summaryId = "";
let uniprotId = "";
await t.test("seed minimal verified graph", async () => {
const a = await tg.upsertNode({
id: "tnode_pubmed_1",
name: "search_pubmed",
description: "Search PubMed for papers.",
capabilityTags: ["literature_search"],
inputKind: "json",
outputKind: "json",
status: "verified",
ownerProcess: "node",
specJson: { name: "search_pubmed" },
createdBy: "system",
});
const b = await tg.upsertNode({
id: "tnode_summary_1",
name: "summarize_bucket",
description: "Summarize a literature bucket.",
capabilityTags: ["summarization"],
inputKind: "json",
outputKind: "json",
status: "verified",
ownerProcess: "node",
specJson: { name: "summarize_bucket" },
createdBy: "system",
});
const c = await tg.upsertNode({
id: "tnode_uniprot_1",
name: "lookup_uniprot",
description: "Lookup a UniProt protein.",
capabilityTags: ["protein_lookup"],
inputKind: "json",
outputKind: "json",
status: "verified",
ownerProcess: "node",
specJson: { name: "lookup_uniprot" },
createdBy: "system",
});
pubmedId = a.id;
summaryId = b.id;
uniprotId = c.id;
await tg.upsertEdge({
id: "tedge_1",
fromNode: a.id,
toNode: b.id,
relation: "feeds",
weight: 1.0,
});
});
await t.test("resolveSubgraph: literature intent pulls in 1-hop summary", async () => {
const sg = await tg.resolveSubgraph({ intentText: "find pubmed papers" });
const names = sg.nodes.map((n) => n.name).sort();
assert.deepEqual(names, ["search_pubmed", "summarize_bucket"]);
assert.equal(sg.matchedTags[0], "literature_search");
assert.ok(sg.edges.some((e) => e.relation === "feeds"));
assert.ok(!names.includes("lookup_uniprot"));
});
await t.test("resolveSubgraph: empty intent returns empty subgraph", async () => {
const sg = await tg.resolveSubgraph({ intentText: "你好啊" });
assert.deepEqual(sg.nodes, []);
assert.deepEqual(sg.edges, []);
assert.deepEqual(sg.matchedTags, []);
});
await t.test("resolveSubgraph: includeShadowNodeIds appends provisional", async () => {
// Add a provisional node not connected to anything literature-y.
const prov = await tg.upsertNode({
id: "tnode_shadow_1",
name: "shadow_candidate",
description: "Shadow candidate.",
capabilityTags: ["literature_search"],
inputKind: "json",
outputKind: "json",
status: "provisional",
ownerProcess: "node",
specJson: { name: "shadow_candidate" },
createdBy: "auto",
});
const sg = await tg.resolveSubgraph({
intentText: "find pubmed papers",
includeShadowNodeIds: [prov.id],
});
const names = sg.nodes.map((n) => n.name).sort();
assert.ok(names.includes("shadow_candidate"));
// Without the shadow id, the provisional node must not appear.
const sgPlain = await tg.resolveSubgraph({ intentText: "find pubmed papers" });
assert.ok(!sgPlain.nodes.some((n) => n.name === "shadow_candidate"));
});
// -------------------------------------------------- describe
await t.test("describeSubgraph renders names + connections", async () => {
const sg = await tg.resolveSubgraph({ intentText: "find pubmed papers" });
const text = tg.describeSubgraph(sg);
assert.ok(text.includes("search_pubmed"));
assert.ok(text.includes("summarize_bucket"));
assert.ok(text.includes("Connections:"));
assert.ok(text.includes("—[feeds]→"));
});
// -------------------------------------------------- planner gap accumulator
await t.test("recordPlannerGap accumulates per tag", async () => {
await tg.recordPlannerGap("orphan_capability", { ctx: 1 });
await tg.recordPlannerGap("orphan_capability", { ctx: 2 });
const rows = await tg.listGapSignals();
const orphan = rows.find((r) => r.capabilityTag === "orphan_capability");
assert.ok(orphan, "gap signal row must exist");
assert.equal(orphan!.invocationCount, 2);
assert.equal(orphan!.status, "open");
});
// -------------------------------------------------- auto-extend
await t.test("autoExtendIfNeeded creates provisional + edge once threshold crossed", async () => {
// Push the orphan_capability gap signal up to threshold (3 by default).
await tg.recordPlannerGap("orphan_capability", { ctx: 3 });
const before = await tg.listNodes({ status: "provisional" });
const created = await tg.autoExtendIfNeeded();
assert.equal(created, 1, "exactly one provisional node should be created");
const after = await tg.listNodes({ status: "provisional" });
assert.equal(
after.length - before.length,
1,
"provisional node count must grow by 1",
);
const gaps = await tg.listGapSignals();
const ext = gaps.find((g) => g.capabilityTag === "orphan_capability");
assert.equal(ext?.status, "extended");
assert.ok(ext?.extendedNodeId, "gap row must point at the new node");
// Running auto-extend again should not duplicate.
const created2 = await tg.autoExtendIfNeeded();
assert.equal(created2, 0);
});
// -------------------------------------------------- approve / reject
await t.test("approveNode promotes provisional to verified", async () => {
const provs = await tg.listNodes({ status: "provisional" });
const target = provs[0]!;
const result = await tg.approveNode(target.id);
assert.ok(result, "approveNode should return a node");
assert.equal(result!.status, "verified");
const verifiedNow = await tg.listNodes({ status: "verified" });
assert.ok(verifiedNow.some((n) => n.id === target.id));
});
await t.test("rejectNode deletes node and dismisses linked gap", async () => {
// Force a fresh provisional via another auto-extend cycle.
await tg.recordPlannerGap("another_gap", { ctx: 1 });
await tg.recordPlannerGap("another_gap", { ctx: 2 });
await tg.recordPlannerGap("another_gap", { ctx: 3 });
await tg.autoExtendIfNeeded();
const provs = await tg.listNodes({ status: "provisional" });
assert.ok(provs.length > 0);
const target = provs[0]!;
const ok = await tg.rejectNode(target.id);
assert.equal(ok, true);
const stillThere = await tg.getNode(target.id);
assert.equal(stillThere, null);
const gaps = await tg.listGapSignals();
const linked = gaps.find((g) => g.extendedNodeId === target.id);
assert.equal(linked?.status, "dismissed");
});
// -------------------------------------------------- contract validator (#156)
await t.test("validateContract: matching shape passes", () => {
const r = tg.validateContract({
produces: {
type: "object",
properties: {
ensembl_id: { type: "string" },
name: { type: "string" },
},
},
consumes: {
type: "object",
properties: { ensembl_id: { type: "string" } },
required: ["ensembl_id"],
},
});
assert.equal(r.ok, true);
assert.deepEqual(r.issues, []);
});
await t.test("validateContract: missing required field flagged", () => {
const r = tg.validateContract({
produces: {
type: "object",
properties: { ensembl_id: { type: "string" } },
},
consumes: {
type: "object",
properties: {
ensembl_id: { type: "string" },
diseases: { type: "array" },
},
required: ["ensembl_id", "diseases"],
},
});
assert.equal(r.ok, false);
assert.equal(r.issues.length, 1);
assert.equal(r.issues[0]!.field, "diseases");
assert.equal(r.issues[0]!.reason, "missing_in_produces");
});
await t.test("validateContract: type mismatch flagged", () => {
const r = tg.validateContract({
produces: {
type: "object",
properties: { count: { type: "string" } },
},
consumes: {
type: "object",
properties: { count: { type: "integer" } },
required: ["count"],
},
});
assert.equal(r.ok, false);
assert.equal(r.issues[0]!.reason, "type_mismatch");
assert.equal(r.issues[0]!.expectedType, "integer");
assert.equal(r.issues[0]!.actualType, "string");
});
await t.test("validateContract: optional missing field ignored", () => {
const r = tg.validateContract({
produces: {
type: "object",
properties: { a: { type: "string" } },
},
consumes: {
type: "object",
properties: {
a: { type: "string" },
b: { type: "string" },
},
required: ["a"],
},
});
assert.equal(r.ok, true);
});
// -------------------------------------------------- composition_alias exclusion (#156)
await t.test("composition_alias is excluded from resolveSubgraph", async () => {
// Insert an alias node tagged "literature_search" — it would otherwise
// be picked up by a literature intent. The resolver must exclude it
// because its status is composition_alias, not verified.
await tg.upsertNode({
id: "tnode_alias_1",
name: "summarize_literature_bucket_alias",
description: "Decomposed alias.",
capabilityTags: ["literature_search"],
inputKind: "json",
outputKind: "json",
status: "composition_alias",
ownerProcess: "node",
specJson: { name: "summarize_literature_bucket_alias" },
createdBy: "system",
});
const sg = await tg.resolveSubgraph({ intentText: "find pubmed papers" });
const names = sg.nodes.map((n) => n.name);
assert.ok(
!names.includes("summarize_literature_bucket_alias"),
`alias must not appear in subgraph; got ${JSON.stringify(names)}`,
);
// listNodes(verified) must also exclude it.
const verified = await tg.listNodes({ status: "verified" });
assert.ok(!verified.some((n) => n.name === "summarize_literature_bucket_alias"));
// listNodes(any) must still see it (admin/debug surface).
const any = await tg.listNodes({ status: "any" });
assert.ok(any.some((n) => n.name === "summarize_literature_bucket_alias"));
});
// -------------------------------------------------- edge contract round-trip (#156)
await t.test("upsertEdge persists contract jsonb and surfaces it on listEdges", async () => {
// Two scratch nodes for the contract round-trip.
const a = await tg.upsertNode({
id: "tnode_ot_find_1",
name: "opentargets_find_target_id_test",
description: "Test atomic node A.",
capabilityTags: ["target_disease"],
inputKind: "json",
outputKind: "json",
status: "verified",
ownerProcess: "node",
specJson: { name: "opentargets_find_target_id_test" },
createdBy: "system",
});
const b = await tg.upsertNode({
id: "tnode_ot_assoc_1",
name: "opentargets_get_target_associations_test",
description: "Test atomic node B.",
capabilityTags: ["target_disease"],
inputKind: "json",
outputKind: "json",
status: "verified",
ownerProcess: "node",
specJson: { name: "opentargets_get_target_associations_test" },
createdBy: "system",
});
const contract = {
produces: {
type: "object" as const,
properties: { ensembl_id: { type: "string" as const } },
required: ["ensembl_id"],
},
consumes: {
type: "object" as const,
properties: { ensembl_id: { type: "string" as const } },
required: ["ensembl_id"],
},
};
await tg.upsertEdge({
id: "tedge_contract_1",
fromNode: a.id,
toNode: b.id,
relation: "feeds",
weight: 1.0,
contract,
});
const edges = await tg.listEdges([a.id, b.id]);
const ours = edges.find(
(e) => e.fromNode === a.id && e.toNode === b.id && e.relation === "feeds",
);
assert.ok(ours, "edge must be persisted");
assert.ok(ours!.contract, "contract must round-trip");
assert.equal(
ours!.contract!.produces.properties!["ensembl_id"]!.type,
"string",
);
});
// -------------------------------------------------- validateContract pure (#156)
await t.test("validateContract flags missing fields and type mismatches", async () => {
const okRes = tg.validateContract({
produces: {
type: "object",
properties: { x: { type: "string" }, y: { type: "number" } },
},
consumes: {
type: "object",
properties: { x: { type: "string" } },
required: ["x"],
},
});
assert.equal(okRes.ok, true);
const missing = tg.validateContract({
produces: { type: "object", properties: { other: { type: "string" } } },
consumes: {
type: "object",
properties: { x: { type: "string" } },
required: ["x"],
},
});
assert.equal(missing.ok, false);
assert.ok(
missing.issues.some((i) => i.field === "x" && i.reason === "missing_in_produces"),
);
const mismatch = tg.validateContract({
produces: { type: "object", properties: { x: { type: "number" } } },
consumes: {
type: "object",
properties: { x: { type: "string" } },
required: ["x"],
},
});
assert.equal(mismatch.ok, false);
assert.ok(
mismatch.issues.some((i) => i.field === "x" && i.reason === "type_mismatch"),
);
});
// -------------------------------------------------- seed structural invariants (#156)
await t.test("seed: every persisted edge carries a contract; aliases compose ≥2 atoms", async () => {
const seedMod = await import("../tool-graph-seed");
const { ATOMIC_NODES, ATOMIC_EDGES, TOOL_EDGES, ALIAS_TOOLS, DATA_SOURCES, deriveContract, aliasMembersOf } =
seedMod.__SEED_INTERNALS__;
// Universe of valid endpoint names: atoms ∪ aliases ∪ data sources.
const atomNames = new Set(ATOMIC_NODES.map((n) => n.name));
const aliasNames: Set<string> = ALIAS_TOOLS;
const dataNames = new Set(DATA_SOURCES.map((d) => d.name));
const known = new Set<string>([...atomNames, ...aliasNames, ...dataNames]);
for (const edge of ATOMIC_EDGES) {
assert.ok(
known.has(edge.from),
`ATOMIC_EDGES from "${edge.from}" must reference a known node`,
);
assert.ok(
known.has(edge.to),
`ATOMIC_EDGES to "${edge.to}" must reference a known node`,
);
}
// Every alias must compose ≥2 atomic members (i.e. real decomposition,
// not a degenerate one-step "alias"). ALIAS_TOOLS is a Set<string> of
// alias names.
for (const aliasName of ALIAS_TOOLS) {
const members = aliasMembersOf(aliasName);
assert.ok(
members.length >= 2,
`alias "${aliasName}" must compose ≥2 atoms, got ${members.length}`,
);
}
// deriveContract on a known atomic feeds-edge must validate.
// Pick the first feeds edge between two known atoms.
const feeds = ATOMIC_EDGES.find(
(e) =>
e.relation === "feeds" &&
atomNames.has(e.from) &&
atomNames.has(e.to),
);
assert.ok(feeds, "expected at least one atom→atom feeds edge in ATOMIC_EDGES");
const c = deriveContract(feeds!);
assert.ok(c, "deriveContract must produce a contract for known feeds edge");
const v = tg.validateContract(c);
assert.equal(
v.ok,
true,
`derived contract for ${feeds!.from}${feeds!.to} must validate; issues=${JSON.stringify(v.issues)}`,
);
// Heuristic TOOL_EDGES are coarser (cross-domain adjacency) and per
// #156 must NOT carry hand-authored contracts — they declare only
// (from,to,relation) and the seed mechanically derives the contract
// from each endpoint's declared schema. Assert the entries are
// shape-only and that the persisted edges they produce all carry
// a contract (see SEED_INTERNALS-backed structural test below).
for (const e of TOOL_EDGES) {
assert.ok(
!("contract" in e),
`TOOL_EDGES entry ${e.from}${e.to} (${e.relation}) must NOT carry a hand-authored contract — derive from endpoint schemas instead`,
);
}
// Substrate-drift regression (per #156 spec line 32). For every
// backed_by edge whose upstream declares an explicit
// `consumesSubstrateSchema`, the derived contract's `consumes`
// side must match that declaration (not the substrate's
// outputSchema). And the contract must validate as long as the
// substrate produces every required substrate-row field.
const backedBy = ATOMIC_EDGES.filter((e) => e.relation === "backed_by");
let substrateChecks = 0;
for (const e of backedBy) {
const upNode = ATOMIC_NODES.find((n: { name: string }) => n.name === e.from);
const dsNode = DATA_SOURCES.find((n: { name: string }) => n.name === e.to);
if (!upNode || !dsNode || !upNode.consumesSubstrateSchema) continue;
substrateChecks += 1;
const derived = deriveContract(e);
assert.deepEqual(
derived.consumes,
upNode.consumesSubstrateSchema,
`backed_by ${e.from}${e.to} consumes side must equal upstream's consumesSubstrateSchema`,
);
assert.deepEqual(
derived.produces,
dsNode.outputSchema,
`backed_by ${e.from}${e.to} produces side must equal substrate's outputSchema`,
);
}
assert.ok(
substrateChecks > 0,
"expected at least one backed_by edge whose upstream declares consumesSubstrateSchema",
);
// Drift-injection: synthesize a fake atomic op whose
// consumesSubstrateSchema requires a field the substrate does NOT
// produce. The derived contract must FAIL structural validation —
// proving substrate-shape drift is detectable at the seam.
const fakeOp = {
name: "__drift_test_op",
capabilityTags: ["test"],
sideEffectKind: "read" as const,
ownerProcess: "node" as const,
inputSchema: { type: "object" },
outputSchema: { type: "object" },
consumesSubstrateSchema: {
type: "object",
properties: {
field_that_substrate_does_not_emit: { type: "string" },
},
required: ["field_that_substrate_does_not_emit"],
},
};
const fakeContract: tg.ContractSpec = {
produces: DATA_SOURCES[0].outputSchema as Record<string, unknown>,
consumes: fakeOp.consumesSubstrateSchema as Record<string, unknown>,
};
const driftV = tg.validateContract(fakeContract);
assert.equal(
driftV.ok,
false,
"substrate-drift contract must fail structural validation",
);
assert.ok(
driftV.issues.some(
(i: { reason: string }) => i.reason === "missing_in_produces",
),
"drift validation must report missing_in_produces",
);
});
// -------------------------------------------------- planner provisional preservation (#156)
await t.test("seed cleanup preserves provisional uncontracted edges", async () => {
// Insert a provisional node + an uncontracted edge between two
// provisional nodes. The seed's stale-cleanup must NOT delete it,
// because both endpoints are outside the curated seed (TOOLS /
// DATA_SOURCES / ATOMIC_NODES). This is the regression test for
// the safety scope of the cleanup query.
const provA = await tg.upsertNode({
id: "tnode_prov_a",
name: "provisional_atom_alpha",
description: "Planner-extended provisional node A.",
capabilityTags: ["custom_capability"],
inputKind: "json",
outputKind: "json",
status: "provisional",
ownerProcess: "node",
specJson: { name: "provisional_atom_alpha" },
createdBy: "planner",
});
const provB = await tg.upsertNode({
id: "tnode_prov_b",
name: "provisional_atom_beta",
description: "Planner-extended provisional node B.",
capabilityTags: ["custom_capability"],
inputKind: "json",
outputKind: "json",
status: "provisional",
ownerProcess: "node",
specJson: { name: "provisional_atom_beta" },
createdBy: "planner",
});
await tg.upsertEdge({
id: "tedge_prov_uncontracted",
fromNode: provA.id,
toNode: provB.id,
relation: "feeds",
weight: 1.0,
// contract intentionally omitted — provisional edges are free of
// contracts until an admin verifies them.
});
// Run seed (full driver — touches the same DB).
const seedMod = await import("../tool-graph-seed");
await seedMod.seedToolGraph();
// Per #156 spec line 82 ("Every edge ships with a contract or it
// does not exist"), the seed cleanup must REMOVE this uncontracted
// edge — even though both endpoints are provisional planner nodes.
// The invariant is absolute; planner auto-extension code must
// synthesize a contract before upserting any edge.
const edges = await tg.listEdges([provA.id, provB.id]);
const survived = edges.find(
(e) => e.fromNode === provA.id && e.toNode === provB.id && e.relation === "feeds",
);
assert.equal(
survived,
undefined,
"uncontracted edges must be removed by seed cleanup (spec invariant)",
);
});
// -------------------------------------------------- seam health (#157)
await t.test("seam-health detector: clean payload yields perfect score", async () => {
const { evaluateSeam } = await import("../seam-health-detector");
const r = evaluateSeam(
{
produces: { type: "object" },
consumes: {
type: "object",
properties: { id: { type: "string" }, score: { type: "number" } },
required: ["id"],
},
},
{ id: "abc", score: 0.5 },
);
assert.equal(r.missingFields.length, 0);
assert.equal(r.contractIssues.length, 0);
assert.equal(r.coverage, 1);
assert.equal(r.healthScore, 1);
});
await t.test("seam-health detector: missing required field is flagged and scored down", async () => {
const { evaluateSeam } = await import("../seam-health-detector");
const r = evaluateSeam(
{
produces: { type: "object" },
consumes: {
type: "object",
properties: { id: { type: "string" }, score: { type: "number" } },
required: ["id", "score"],
},
},
{ id: "abc" },
);
assert.deepEqual(r.missingFields, ["score"]);
assert.equal(r.coverage, 0.5);
assert.ok(r.healthScore < 0.5, `expected healthScore < 0.5, got ${r.healthScore}`);
});
await t.test("seam-health detector: type mismatch raises contract issue", async () => {
const { evaluateSeam } = await import("../seam-health-detector");
const r = evaluateSeam(
{
produces: { type: "object" },
consumes: {
type: "object",
properties: { items: { type: "array" }, name: { type: "string" } },
required: ["items"],
},
},
{ items: "not-an-array", name: 42 },
);
assert.ok(r.contractIssues.includes("expected_array:items"));
assert.ok(r.contractIssues.includes("type_mismatch:name"));
});
await t.test("seam-health detector: advisory contract suppresses contract issues", async () => {
const { evaluateSeam } = await import("../seam-health-detector");
const r = evaluateSeam(
{
produces: { type: "object" },
consumes: {
type: "object",
properties: { items: { type: "array" } },
required: ["items"],
},
mappingHints: { advisory: true },
},
{ items: "broken" },
);
assert.deepEqual(r.contractIssues, []);
});
await t.test("seam-health detector: malformed contract degrades to empty sample (no throw)", async () => {
const { evaluateSeam } = await import("../seam-health-detector");
const r = evaluateSeam(null, { foo: 1 });
assert.equal(r.healthScore, 1);
assert.equal(r.coverage, 1);
});
await t.test("captureSeamHealth + recomputeEdgeHealth round-trip: missing field drops score, clean recovery raises EMA", async () => {
// Set up a tiny seam: producer → consumer with a contract requiring
// `target.id`. Then write 3 broken samples followed by 3 clean
// samples through the public capture API and watch the EMA move.
const prod = await tg.upsertNode({
id: "tnode_seam_prod",
name: "seam_test_producer",
description: "producer",
capabilityTags: ["test_producer"],
inputKind: "json",
outputKind: "json",
status: "verified",
ownerProcess: "node",
specJson: { name: "seam_test_producer" },
createdBy: "system",
});
const cons = await tg.upsertNode({
id: "tnode_seam_cons",
name: "seam_test_consumer",
description: "consumer",
capabilityTags: ["test_consumer"],
inputKind: "json",
outputKind: "json",
status: "verified",
ownerProcess: "node",
specJson: { name: "seam_test_consumer" },
createdBy: "system",
});
await tg.upsertEdge({
id: "tedge_seam_test",
fromNode: prod.id,
toNode: cons.id,
relation: "feeds",
weight: 1.0,
contract: {
produces: { type: "object" },
consumes: {
type: "object",
properties: { id: { type: "string" }, score: { type: "number" } },
required: ["id", "score"],
},
},
});
// 3 broken samples (missing score)
for (let i = 0; i < 3; i++) {
const r = await tg.captureSeamHealth({
upstreamName: "seam_test_producer",
downstreamName: "seam_test_consumer",
payload: { id: `x${i}` },
});
assert.ok(r, "captureSeamHealth must return a result for declared edge");
assert.deepEqual(r!.sample.missingFields, ["score"]);
}
let agg = await tg.recomputeEdgeHealth();
assert.ok(agg.samplesFolded >= 3);
let listed = await tg.listEdgeHealth();
let mine = listed.find((h) => h.edgeId === "tedge_seam_test");
assert.ok(mine, "edge health row must exist after recompute");
assert.equal(mine!.traversalCount, 3);
assert.ok(mine!.missingFieldCount >= 3);
assert.ok(mine!.emaHealthScore < 0.7, `expected unhealthy EMA after misses, got ${mine!.emaHealthScore}`);
assert.deepEqual(mine!.topMissingFields, { score: 3 });
// listUnhealthyEdges surfaces it
const bad = await tg.listUnhealthyEdges({ minTraversals: 1, maxHealthScore: 0.85 });
assert.ok(bad.some((e) => e.edgeId === "tedge_seam_test"), "unhealthy listing must include the broken seam");
// 3 clean samples — EMA should recover upward
for (let i = 0; i < 3; i++) {
await tg.captureSeamHealth({
upstreamName: "seam_test_producer",
downstreamName: "seam_test_consumer",
payload: { id: `y${i}`, score: i * 0.1 },
});
}
agg = await tg.recomputeEdgeHealth();
listed = await tg.listEdgeHealth();
mine = listed.find((h) => h.edgeId === "tedge_seam_test")!;
assert.equal(mine.traversalCount, 6);
assert.ok(
mine.emaHealthScore > 0.4,
`expected EMA to climb after clean samples, got ${mine.emaHealthScore}`,
);
// getEdgeHealth returns recent samples
const detail = await tg.getEdgeHealth("tedge_seam_test", 10);
assert.ok(detail.edge);
assert.ok(detail.recent.length >= 1);
assert.ok(
typeof (detail.recent[0]!.payload as { health_score?: number }).health_score === "number",
);
});
await t.test("captureSeamHealth is non-blocking when downstream is unknown", async () => {
const r = await tg.captureSeamHealth({
downstreamName: "definitely_does_not_exist",
payload: { foo: 1 },
});
assert.equal(r, null);
});
// -------------------------------------------------- deprecation (#159)
await t.test("classifier: cold node flagged after coldDays of inactivity", async () => {
// Insert a verified node old enough to bypass the age cutoff and an
// ancient invocation row. coldDays=30 → 90 days old qualifies.
const old = await tg.upsertNode({
id: "tnode_cold_1",
name: "cold_widget_search",
description: "An unused capability.",
capabilityTags: ["cold_widget"],
inputKind: "json",
outputKind: "json",
status: "verified",
ownerProcess: "node",
specJson: { name: "cold_widget_search" },
createdBy: "system",
});
// Backdate created_at so the age filter inside runDeprecationDetector
// accepts it.
const ninetyDaysAgo = new Date(Date.now() - 90 * 86_400_000);
await pool.query(
`UPDATE "${schema}".tool_nodes SET created_at = $1, updated_at = $1 WHERE id = $2`,
[ninetyDaysAgo, old.id],
);
await pool.query(
`INSERT INTO "${schema}".tool_node_evidence (id, node_id, kind, payload, success, failure, created_at)
VALUES ($1, $2, 'invocation', '{}'::jsonb, 1, 0, $3)`,
["tev_cold_old", old.id, ninetyDaysAgo],
);
const summary = await tg.runDeprecationDetector({ coldDays: 30 });
assert.ok(summary.proposalsConsidered >= 1);
const open = await tg.listDeprecationCandidates({ status: "open" });
const mine = open.find((c) => c.nodeId === old.id);
assert.ok(mine, "cold node must produce an open candidate");
assert.equal(mine!.classification, "cold");
const supportingMetrics = (mine!.proposalContext as { supportingMetrics: { invocationCount: number; daysSinceLastInvocation: number } }).supportingMetrics;
assert.equal(supportingMetrics.invocationCount, 1);
assert.ok(supportingMetrics.daysSinceLastInvocation! >= 30);
});
await t.test("classifier: redundant requires sibling AND health gap (false-positive guard)", async () => {
// Two nodes with the same tag and similar invocation counts must NOT
// be flagged as redundant — only when the sibling dominates by ≥10×
// AND has a healthy edge AND target has an unhealthy edge.
const sibA = await tg.upsertNode({
id: "tnode_redA",
name: "popular_search",
description: "Popular implementation.",
capabilityTags: ["sibling_capability"],
inputKind: "json",
outputKind: "json",
status: "verified",
ownerProcess: "node",
specJson: { name: "popular_search" },
createdBy: "system",
});
const sibB = await tg.upsertNode({
id: "tnode_redB",
name: "rival_search",
description: "Equal-popularity rival.",
capabilityTags: ["sibling_capability"],
inputKind: "json",
outputKind: "json",
status: "verified",
ownerProcess: "node",
specJson: { name: "rival_search" },
createdBy: "system",
});
const ageOk = new Date(Date.now() - 90 * 86_400_000);
await pool.query(
`UPDATE "${schema}".tool_nodes SET created_at = $1 WHERE id IN ($2,$3)`,
[ageOk, sibA.id, sibB.id],
);
// Equal traversals → must not be flagged redundant for either.
for (let i = 0; i < 5; i++) {
await pool.query(
`INSERT INTO "${schema}".tool_node_evidence (id, node_id, kind, payload, success, failure)
VALUES ($1, $2, 'invocation', '{}'::jsonb, 1, 0)`,
[`tev_eq_a_${i}`, sibA.id],
);
await pool.query(
`INSERT INTO "${schema}".tool_node_evidence (id, node_id, kind, payload, success, failure)
VALUES ($1, $2, 'invocation', '{}'::jsonb, 1, 0)`,
[`tev_eq_b_${i}`, sibB.id],
);
}
await tg.runDeprecationDetector({ coldDays: 30 });
const cands = await tg.listDeprecationCandidates({ status: "any" });
assert.ok(
!cands.some((c) => c.classification === "redundant" && (c.nodeId === sibA.id || c.nodeId === sibB.id)),
"evenly used siblings must not be flagged redundant",
);
});
await t.test("classifier: superseded triggered by spec_json.replacedBy", async () => {
const sup = await tg.upsertNode({
id: "tnode_sup_1",
name: "ancient_widget",
description: "A node that has been spliced.",
capabilityTags: ["splice_test"],
inputKind: "json",
outputKind: "json",
status: "verified",
ownerProcess: "node",
specJson: { name: "ancient_widget", replacedBy: "tnode_modern" },
createdBy: "system",
});
const ageOk = new Date(Date.now() - 90 * 86_400_000);
await pool.query(
`UPDATE "${schema}".tool_nodes SET created_at = $1 WHERE id = $2`,
[ageOk, sup.id],
);
await tg.runDeprecationDetector({ coldDays: 30 });
const cands = await tg.listDeprecationCandidates({ status: "any" });
const mine = cands.find((c) => c.nodeId === sup.id);
assert.ok(mine, "spec_json.replacedBy must produce a candidate");
assert.equal(mine!.classification, "superseded");
const sm = (mine!.proposalContext as { supportingMetrics: { replacedBy?: string } }).supportingMetrics;
assert.equal(sm.replacedBy, "tnode_modern");
});
await t.test("resolveSubgraph excludes deprecated nodes", async () => {
// Promote one of our seeded literature nodes through the deprecation
// flow and confirm it disappears from resolveSubgraph.
const ageOk = new Date(Date.now() - 200 * 86_400_000);
await pool.query(
`UPDATE "${schema}".tool_nodes SET created_at = $1, updated_at = $1 WHERE id = $2`,
[ageOk, pubmedId],
);
await tg.runDeprecationDetector({ coldDays: 30 });
const cands = await tg.listDeprecationCandidates({ status: "open" });
const mine = cands.find((c) => c.nodeId === pubmedId);
assert.ok(mine, "pubmed seed must now be a candidate");
await tg.deprecateNode(mine!.id, "tester");
const sg = await tg.resolveSubgraph({ intentText: "find pubmed papers" });
assert.ok(!sg.nodes.some((n) => n.id === pubmedId), "deprecated node must not appear in subgraph");
// Direct reference helper exposes replacedBy when present.
const direct = await tg.resolveNodeForDirectReference("search_pubmed");
assert.ok(direct && direct.deprecated === true, "direct reference must flag deprecation");
});
await t.test("defer + re-arm windows are respected by the detector", async () => {
const node = await tg.upsertNode({
id: "tnode_defer_1",
name: "defer_target",
description: "Defer flow target.",
capabilityTags: ["defer_capability"],
inputKind: "json",
outputKind: "json",
status: "verified",
ownerProcess: "node",
specJson: { name: "defer_target" },
createdBy: "system",
});
const ageOk = new Date(Date.now() - 200 * 86_400_000);
await pool.query(
`UPDATE "${schema}".tool_nodes SET created_at = $1, updated_at = $1 WHERE id = $2`,
[ageOk, node.id],
);
await tg.runDeprecationDetector({ coldDays: 30 });
const opened = (await tg.listDeprecationCandidates({ status: "open" })).find(
(c) => c.nodeId === node.id,
);
assert.ok(opened, "first detector pass must open the candidate");
await tg.deferDeprecationCandidate(opened!.id, "tester", 14);
const r1 = await tg.runDeprecationDetector({ coldDays: 30 });
assert.ok(r1.skippedDeferred >= 1, "deferred candidate must be skipped");
const stillDeferred = (
await tg.listDeprecationCandidates({ status: "deferred" })
).find((c) => c.nodeId === node.id);
assert.ok(stillDeferred, "row stays deferred when defer_until is in the future");
// Reject path: re-arm window also blocks re-proposal.
await tg.rejectDeprecationCandidate(opened!.id, "tester", 30);
const r2 = await tg.runDeprecationDetector({ coldDays: 30 });
assert.ok(r2.skippedReArm >= 1, "rejected candidate must be skipped");
});
await t.test("archive job moves rows transactionally and leaves nothing in hot tables", async () => {
const node = await tg.upsertNode({
id: "tnode_arch_1",
name: "archive_target",
description: "Archive flow target.",
capabilityTags: ["arch_capability"],
inputKind: "json",
outputKind: "json",
status: "verified",
ownerProcess: "node",
specJson: { name: "archive_target" },
createdBy: "system",
});
// Add an edge + an evidence row so all three archive tables receive
// contributions.
const peer = await tg.upsertNode({
id: "tnode_arch_peer",
name: "archive_peer",
description: "peer",
capabilityTags: ["arch_capability"],
inputKind: "json",
outputKind: "json",
status: "verified",
ownerProcess: "node",
specJson: { name: "archive_peer" },
createdBy: "system",
});
await tg.upsertEdge({
id: "tedge_arch_1",
fromNode: node.id,
toNode: peer.id,
relation: "feeds",
weight: 1.0,
});
await pool.query(
`INSERT INTO "${schema}".tool_node_evidence (id, node_id, kind, payload, success, failure)
VALUES ($1, $2, 'invocation', '{}'::jsonb, 1, 0)`,
["tev_arch_1", node.id],
);
// Force-deprecate by creating a candidate row directly + approving.
await pool.query(
`INSERT INTO "${schema}".tool_deprecation_candidates (id, node_id, classification, status, proposal_context, created_at, updated_at)
VALUES ($1, $2, 'cold', 'open', '{}'::jsonb, now(), now())`,
["tdep_arch_1", node.id],
);
await tg.deprecateNode("tdep_arch_1", "tester");
// Backdate updated_at past the archive cutoff (default 180 days).
await pool.query(
`UPDATE "${schema}".tool_nodes SET updated_at = $1 WHERE id = $2`,
[new Date(Date.now() - 200 * 86_400_000), node.id],
);
const r = await tg.runArchiveJob();
assert.ok(r.nodesArchived >= 1, "archive job must move at least one node");
assert.ok(r.edgesArchived >= 1);
assert.ok(r.evidenceArchived >= 1);
// Hot tables must be clean.
const hotNode = await pool.query(
`SELECT 1 FROM "${schema}".tool_nodes WHERE id = $1`,
[node.id],
);
assert.equal(hotNode.rowCount, 0);
const hotEdge = await pool.query(
`SELECT 1 FROM "${schema}".tool_edges WHERE id = $1`,
["tedge_arch_1"],
);
assert.equal(hotEdge.rowCount, 0);
const archNode = await pool.query(
`SELECT 1 FROM "${schema}".tool_nodes_archive WHERE id = $1`,
[node.id],
);
assert.equal(archNode.rowCount, 1);
const archEdge = await pool.query(
`SELECT 1 FROM "${schema}".tool_edges_archive WHERE id = $1`,
["tedge_arch_1"],
);
assert.equal(archEdge.rowCount, 1);
});
// -------------------------------------------------- spawn templates (#161)
await t.test("schemaFingerprint strips field names but preserves structure", async () => {
const st = await import("../spawn-templates");
const a = st.schemaFingerprint({
type: "object",
properties: {
target_id: { type: "string" },
score: { type: "number" },
},
required: ["target_id"],
});
const b = st.schemaFingerprint({
type: "object",
properties: {
// Different names, same structure → same fingerprint.
ensembl_id: { type: "string" },
confidence: { type: "number" },
},
required: ["ensembl_id"],
});
assert.equal(a, b, "field names must not influence the fingerprint");
// Type change must change the fingerprint.
const c = st.schemaFingerprint({
type: "object",
properties: {
target_id: { type: "integer" },
score: { type: "number" },
},
required: ["target_id"],
});
assert.notEqual(a, c, "type changes must alter the fingerprint");
});
await t.test("computeSeamFingerprint is deterministic and salted", async () => {
const st = await import("../spawn-templates");
const input: import("../spawn-templates").SeamFingerprintInput = {
failureMode: "missing_required_field",
downstreamInputSchema: {
type: "object",
properties: { id: { type: "string" }, count: { type: "integer" } },
required: ["id", "count"],
},
upstreamOutputSchema: {
type: "object",
properties: { id: { type: "string" }, extra: { type: "boolean" } },
},
missingRequiredFieldNames: ["count"],
unusedFieldNames: ["extra"],
capabilityTag: "demo_tag",
};
const fp1 = st.computeSeamFingerprint(input);
const fp2 = st.computeSeamFingerprint(input);
assert.equal(fp1.hash, fp2.hash, "same input → same hash");
assert.deepEqual(fp1.breakdown.missingFieldTypes, ["integer"]);
assert.deepEqual(fp1.breakdown.unusedFieldTypes, ["boolean"]);
// Salt change must alter the hash (we cannot mutate env safely;
// exercise the canonical-string is identical instead).
assert.equal(fp1.canonical, fp2.canonical);
assert.match(fp1.hash, /^[0-9a-f]{64}$/);
});
await t.test("searchTemplates returns null when no rows exist", async () => {
const st = await import("../spawn-templates");
const r = await st.searchTemplates({
failureMode: "capability_gap",
downstreamInputSchema: { type: "object", properties: {} },
capabilityTag: "no_match_tag_xyzzy",
});
assert.equal(r, null);
});
await t.test("persistTemplateOnPromote → exact match found by searchTemplates", async () => {
const st = await import("../spawn-templates");
const fpInput: import("../spawn-templates").SeamFingerprintInput = {
failureMode: "capability_gap",
downstreamInputSchema: {
type: "object",
properties: { query: { type: "string" } },
required: ["query"],
},
capabilityTag: "exact_match_test",
};
const persisted = await st.persistTemplateOnPromote({
fingerprintInput: fpInput,
promotedNodeName: "auto_exact_match_test_aaa111",
promotedInputSchema: {
type: "object",
properties: { query: { type: "string" } },
},
promotedOutputSchema: { type: "object" },
handlerSkeleton:
"export async function invoke(args) { return { ok: true }; }",
specSkeleton: {
name: "auto_exact_match_test_aaa111",
description: "templated handler",
parameters: {
type: "object",
properties: { query: { type: "string" } },
required: ["query"],
},
},
});
assert.ok(persisted, "template must persist");
const match = await st.searchTemplates(fpInput);
assert.ok(match, "exact match must be found");
assert.equal(match!.strength, "exact");
assert.equal(match!.score, 1);
assert.equal(match!.template.id, persisted!.id);
});
await t.test("structurally identical seam across different tags is EXACT (tag is not in fingerprint)", async () => {
const st = await import("../spawn-templates");
const baseSchema = {
type: "object" as const,
properties: { id: { type: "string" as const } },
required: ["id"],
};
await st.persistTemplateOnPromote({
fingerprintInput: {
failureMode: "missing_required_field",
downstreamInputSchema: baseSchema,
upstreamOutputSchema: { type: "object", properties: {} },
missingRequiredFieldNames: ["id"],
capabilityTag: "tag_alpha_for_exact_test",
},
promotedNodeName: "auto_tag_alpha_xxx",
promotedInputSchema: baseSchema,
promotedOutputSchema: { type: "object" },
handlerSkeleton: "export async function invoke(a) { return { id: a.id }; }",
specSkeleton: { name: "auto_tag_alpha_xxx", parameters: baseSchema },
});
const r = await st.searchTemplates({
failureMode: "missing_required_field",
downstreamInputSchema: baseSchema,
upstreamOutputSchema: { type: "object", properties: {} },
missingRequiredFieldNames: ["id"],
capabilityTag: "tag_beta_for_exact_test",
});
assert.ok(r, "match must surface");
assert.equal(r!.strength, "exact", "tag-only differences must collapse to exact");
assert.equal(r!.score, 1);
});
await t.test("genuine NEAR match: same failure_mode + same downstream fp + Jaccard ≥ 0.8 over slot bag", async () => {
const st = await import("../spawn-templates");
const baseSchema = {
type: "object" as const,
properties: {
a: { type: "string" as const },
b: { type: "string" as const },
c: { type: "string" as const },
d: { type: "string" as const },
e: { type: "string" as const },
},
required: ["a", "b", "c", "d", "e"],
};
// Seed a template with 4 missing fields.
await st.persistTemplateOnPromote({
fingerprintInput: {
failureMode: "missing_required_field",
downstreamInputSchema: baseSchema,
upstreamOutputSchema: { type: "object", properties: {} },
missingRequiredFieldNames: ["a", "b", "c", "d"],
capabilityTag: "near_real_seed",
},
promotedNodeName: "auto_near_real_seed_xxx",
promotedInputSchema: baseSchema,
promotedOutputSchema: { type: "object" },
handlerSkeleton: "export async function invoke(args) { return args; }",
specSkeleton: { name: "auto_near_real_seed_xxx", parameters: baseSchema },
});
// Query with 5 missing fields (slot bag is ⊋ of seed) → Jaccard
// 4/5 = 0.8, exact key MISSES (different miss list → different
// canonical → different hash) → must hit near with score = 0.8.
const r = await st.searchTemplates({
failureMode: "missing_required_field",
downstreamInputSchema: baseSchema,
upstreamOutputSchema: { type: "object", properties: {} },
missingRequiredFieldNames: ["a", "b", "c", "d", "e"],
capabilityTag: "near_real_query",
});
assert.ok(r, "near match must surface");
assert.equal(r!.strength, "near");
assert.ok(r!.score >= 0.8 && r!.score < 1, `score must be in [0.8, 1) (got ${r!.score})`);
assert.ok(
r!.parameterizationGaps.includes("missing_field_types"),
"missing_field_types must be flagged as a parameterization gap",
);
});
await t.test("NO near match when downstream fingerprint differs (hard precondition)", async () => {
const st = await import("../spawn-templates");
const r = await st.searchTemplates({
failureMode: "missing_required_field",
downstreamInputSchema: {
type: "object",
properties: { totally_unique_field: { type: "boolean" } },
required: ["totally_unique_field"],
},
upstreamOutputSchema: { type: "object", properties: {} },
missingRequiredFieldNames: ["totally_unique_field"],
capabilityTag: "downstream_fp_mismatch",
});
assert.equal(r, null, "downstream fingerprint mismatch must short-circuit near search");
});
await t.test("privacy: persisted template never contains user payload strings", async () => {
const st = await import("../spawn-templates");
// Inject "user data"-looking strings in the spec the reviewer
// edited and into the handler skeleton. Ingestion must strip them.
const userSecret = "user_secret_value_aaaaaaaaaaaaaaaaaaaa";
const userExample = "patient-12345-PHI-record-DO-NOT-LEAK";
const persisted = await st.persistTemplateOnPromote({
fingerprintInput: {
failureMode: "capability_gap",
downstreamInputSchema: { type: "object", properties: {} },
capabilityTag: "privacy_test_tag",
},
promotedNodeName: "auto_privacy_test_tag_zzz999",
promotedInputSchema: { type: "object" },
promotedOutputSchema: { type: "object" },
handlerSkeleton: `export async function invoke(args) {
const example = "${userExample}";
return { example, secret: "${userSecret}" };
}`,
specSkeleton: {
name: "auto_privacy_test_tag_zzz999",
description: "OK desc",
parameters: { type: "object", properties: {} },
// Reviewer left a comment field — must be dropped by allowlist.
leaked_comment: userSecret,
examples: [userExample],
},
});
assert.ok(persisted);
st.__assertNoUserDataLeaked(persisted!, [userSecret, userExample]);
});
await t.test("auto-demotion: low success rate after threshold offers triggers silent demote", async () => {
const st = await import("../spawn-templates");
const persisted = await st.persistTemplateOnPromote({
fingerprintInput: {
failureMode: "type_mismatch",
downstreamInputSchema: {
type: "object",
properties: { x: { type: "string" } },
required: ["x"],
},
capabilityTag: "demote_test_tag",
},
promotedNodeName: "auto_demote_test_xxx",
promotedInputSchema: { type: "object" },
promotedOutputSchema: { type: "object" },
handlerSkeleton: "stub",
specSkeleton: {},
});
assert.ok(persisted);
// Set a low threshold via env then exercise.
const prevMin = process.env["TEMPLATE_DEMOTE_MIN_OFFERS"];
const prevRate = process.env["TEMPLATE_DEMOTE_SUCCESS_RATE"];
process.env["TEMPLATE_DEMOTE_MIN_OFFERS"] = "5";
process.env["TEMPLATE_DEMOTE_SUCCESS_RATE"] = "0.3";
try {
// Re-import to pick up env (module loads constants at import time
// — for this test we simulate by issuing offers and choices then
// calling maybeAutoDemote directly, which re-reads thresholds at
// each call via constants — so we must reload the module).
const stReload = await import("../spawn-templates");
// Bump offered_count to 6 (above threshold) and success_count to 1
// (rate ~ 0.17, below floor).
for (let i = 0; i < 6; i += 1) {
await stReload.recordTemplateOffered(persisted!.id);
}
await stReload.recordTemplatePromoteResult(persisted!.id);
const r = await stReload.maybeAutoDemote(persisted!.id);
assert.equal(r.demoted, true, "must demote when below floor");
// Demoted templates excluded from search.
const search = await stReload.searchTemplates({
failureMode: "type_mismatch",
downstreamInputSchema: {
type: "object",
properties: { x: { type: "string" } },
required: ["x"],
},
capabilityTag: "demote_test_tag",
});
assert.equal(search, null, "demoted template must not be returned by searchTemplates");
} finally {
if (prevMin === undefined) delete process.env["TEMPLATE_DEMOTE_MIN_OFFERS"];
else process.env["TEMPLATE_DEMOTE_MIN_OFFERS"] = prevMin;
if (prevRate === undefined) delete process.env["TEMPLATE_DEMOTE_SUCCESS_RATE"];
else process.env["TEMPLATE_DEMOTE_SUCCESS_RATE"] = prevRate;
}
});
await t.test(
"end-to-end: spawn → promote → next spawn for identical seam offers the template",
async () => {
// Use unique capability tags to avoid touching prior tests.
const tag1 = "e2e_template_a";
const tag2 = "e2e_template_b";
// First spawn
for (let i = 0; i < tg.GAP_AUTO_EXTEND_THRESHOLD; i += 1) {
await tg.recordPlannerGap(tag1, { ctx: i });
}
const created1 = await tg.autoExtendIfNeeded();
assert.ok(created1 >= 1);
const provs1 = await tg.listNodes({ status: "provisional" });
const target1 = provs1.find((n) => n.name.startsWith(`auto_${tag1}_`));
assert.ok(target1, "first provisional must exist");
// proposalContext must always be present (template_match may be
// null or a match — earlier tests may have seeded templates).
const ctx1 = (target1!.spec as { proposalContext?: { template_match?: unknown } })
.proposalContext;
assert.ok(ctx1, "proposalContext must be present on auto-spawned node");
// Promote it. This persists a template into the library.
const promoted = await tg.approveNode(target1!.id, {
templateChoice: "fresh",
reviewer: "tester",
});
assert.ok(promoted, "promotion must succeed");
// Second spawn for an identical seam shape (different tag, same
// structure) → near match expected.
for (let i = 0; i < tg.GAP_AUTO_EXTEND_THRESHOLD; i += 1) {
await tg.recordPlannerGap(tag2, { ctx: i });
}
await tg.autoExtendIfNeeded();
const provs2 = await tg.listNodes({ status: "provisional" });
const target2 = provs2.find((n) => n.name.startsWith(`auto_${tag2}_`));
assert.ok(target2, "second provisional must exist");
const ctx2 = (
target2!.spec as {
proposalContext?: {
template_match?: { template_id: string; strength: string } | null;
};
}
).proposalContext;
assert.ok(
ctx2 && ctx2.template_match,
`second spawn must surface a template match (got ${JSON.stringify(ctx2)})`,
);
assert.ok(
ctx2!.template_match!.strength === "exact" ||
ctx2!.template_match!.strength === "near",
"strength must be exact or near",
);
// Reviewer chooses Use+edit; template counters update.
const tplId = ctx2!.template_match!.template_id;
const before = await pool.query(
`SELECT offered_count, reuse_count, success_count FROM "${schema}".tool_spawn_templates WHERE id = $1`,
[tplId],
);
assert.ok(before.rows[0]);
await tg.approveNode(target2!.id, {
templateChoice: "use_edit",
sourceTemplateId: tplId,
reviewer: "tester",
});
const after = await pool.query(
`SELECT offered_count, reuse_count, success_count FROM "${schema}".tool_spawn_templates WHERE id = $1`,
[tplId],
);
assert.ok(
after.rows[0].reuse_count > before.rows[0].reuse_count,
"reuse_count must increment on use_edit",
);
assert.ok(
after.rows[0].success_count > before.rows[0].success_count,
"success_count must increment on promote-after-reuse",
);
},
);
// unused vars suppression
void uniprotId;
void summaryId;
void pubmedId;
});