doatlas-2 / artifacts /api-server /src /lib /__tests__ /tool-graph-search.test.ts
Iostream-Li's picture
Add files using upload-large-folder tool
ff78003 verified
/**
* tool-graph-search — Mode B (#169) end-to-end smoke test.
*
* Exercises:
* - bootstrapToolGoalsSchema (CREATE TABLE IF NOT EXISTS)
* - seedSyntheticToyData (toy dataset+evaluator+seed nodes)
* - createGoal validation (must reference verified nodes)
* - runGoalSearch end-to-end: all four primitives + seed
* - autoPromoteIfReady gates + audit row
* - rollbackPromotion restores prior status
* - diffSubgraphAgainstVerified surfaces a non-empty diff
* - recordHighConfidenceGap (Mode A boost) spawns a provisional
*
* Skipped when DATABASE_URL is unset.
*/
import { test } from "node:test";
import assert from "node:assert/strict";
import { randomBytes } from "node:crypto";
const ORIGINAL_DSN = process.env.DATABASE_URL || "";
const SKIP = !ORIGINAL_DSN;
function dsnWithSearchPath(dsn: string, schema: string): string {
const opt = `options=-c%20search_path%3D${encodeURIComponent(schema)}`;
return dsn.includes("?") ? `${dsn}&${opt}` : `${dsn}?${opt}`;
}
test(
"tool-graph-search Mode B end-to-end smoke",
{ skip: SKIP && "DATABASE_URL not set" },
async (t) => {
const schema = `tool_graph_search_test_${randomBytes(6).toString("hex")}`;
process.env.DATABASE_URL = dsnWithSearchPath(ORIGINAL_DSN, schema);
// Lower thresholds so the smoke test can drive promotion in a few rows.
process.env.AUTO_PROMOTE_MIN_EVIDENCE = "1";
process.env.AUTO_PROMOTE_MIN_SUCCESS_RATIO = "0.5";
const { pool } = await import("@workspace/db");
await pool.query(`CREATE SCHEMA "${schema}"`);
// Mirror the existing tool-graph tables (subset needed by Mode B).
await pool.query(`
CREATE TABLE "${schema}".tool_nodes (
id text PRIMARY KEY,
name text NOT NULL UNIQUE,
description text NOT NULL DEFAULT '',
capability_tags jsonb NOT NULL DEFAULT '[]'::jsonb,
input_kind text NOT NULL DEFAULT 'json',
output_kind text NOT NULL DEFAULT 'json',
status text NOT NULL DEFAULT 'verified',
owner_process text NOT NULL,
spec_json jsonb NOT NULL,
created_by text NOT NULL DEFAULT 'system',
handler_ref text,
handler_stub text,
cost_hint double precision,
latency_hint_ms integer,
version integer NOT NULL DEFAULT 1,
created_at timestamptz NOT NULL DEFAULT now(),
updated_at timestamptz NOT NULL DEFAULT now()
);
CREATE TABLE "${schema}".tool_edges (
id text PRIMARY KEY,
from_node text NOT NULL REFERENCES "${schema}".tool_nodes(id) ON DELETE CASCADE,
to_node text NOT NULL REFERENCES "${schema}".tool_nodes(id) ON DELETE CASCADE,
relation text NOT NULL,
weight double precision NOT NULL DEFAULT 1.0,
contract jsonb,
created_at timestamptz NOT NULL DEFAULT now(),
CONSTRAINT tool_edges_uniq UNIQUE (from_node, to_node, relation)
);
CREATE TABLE "${schema}".tool_node_evidence (
id text PRIMARY KEY,
node_id text NOT NULL REFERENCES "${schema}".tool_nodes(id) ON DELETE CASCADE,
kind text NOT NULL,
payload jsonb NOT NULL,
success integer NOT NULL DEFAULT 0,
failure integer NOT NULL DEFAULT 0,
shadow_user_id text,
created_at timestamptz NOT NULL DEFAULT now(),
seam_folded_at timestamptz
);
CREATE TABLE "${schema}".tool_gap_signals (
id text PRIMARY KEY,
capability_tag text NOT NULL UNIQUE,
invocation_count integer NOT NULL DEFAULT 0,
status text NOT NULL DEFAULT 'open',
last_context jsonb,
extended_node_id text,
first_seen_at timestamptz NOT NULL DEFAULT now(),
last_seen_at timestamptz NOT NULL DEFAULT now()
);
CREATE TABLE "${schema}".tool_edge_health (
id text PRIMARY KEY,
edge_id text NOT NULL REFERENCES "${schema}".tool_edges(id) ON DELETE CASCADE,
traversal_count integer NOT NULL DEFAULT 0,
contract_issue_count integer NOT NULL DEFAULT 0,
missing_field_count integer NOT NULL DEFAULT 0,
ema_coverage double precision NOT NULL DEFAULT 1.0,
ema_health_score double precision NOT NULL DEFAULT 1.0,
top_missing_fields jsonb NOT NULL DEFAULT '{}'::jsonb,
top_contract_issues jsonb NOT NULL DEFAULT '{}'::jsonb,
formula_version integer NOT NULL DEFAULT 1,
last_sample_at timestamptz,
last_folded_evidence_id text,
computed_at timestamptz NOT NULL DEFAULT now()
);
`);
const search = await import("../tool-graph-search");
const { db, toolNodes, toolNodeEvidence, toolGoalCandidates } = await import("@workspace/db");
const { eq } = await import("drizzle-orm");
t.after(async () => {
try {
await pool.query(`DROP SCHEMA IF EXISTS "${schema}" CASCADE`);
} finally {
process.env.DATABASE_URL = ORIGINAL_DSN;
}
});
await t.test("bootstrap creates the four #169 tables", async () => {
await search.bootstrapToolGoalsSchema();
const tables = await pool.query<{ table_name: string }>(
`SELECT table_name FROM information_schema.tables WHERE table_schema=$1`,
[schema],
);
const names = tables.rows.map((r) => r.table_name);
for (const t of ["tool_goals", "tool_goal_runs", "tool_goal_candidates", "tool_promotion_audit"]) {
assert.ok(names.includes(t), `expected table ${t}, got ${names.join(",")}`);
}
});
await t.test("synthetic seed creates verified dataset + evaluator", async () => {
const seed = await search.seedSyntheticToyData();
assert.ok(seed.datasetNodeId);
assert.ok(seed.evaluatorNodeId);
assert.ok(seed.seedNodeIds.length >= 2);
});
await t.test("createGoal rejects non-verified references", async () => {
// Make a provisional node and try to use it as evaluator.
const tg = await import("../tool-graph");
const prov = await tg.upsertNode({
id: "tnode_test_prov",
name: "test_provisional_eval",
description: "",
capabilityTags: [],
inputKind: "json",
outputKind: "json",
status: "provisional",
ownerProcess: "node",
specJson: {},
});
const ds = await tg.getNodeByName(search.SYNTHETIC_DATASET_NAME);
assert.ok(ds);
await assert.rejects(
() =>
search.createGoal({
name: "bad",
datasetNodeId: ds!.id,
evaluatorNodeId: prov.id,
}),
/must be verified/,
);
});
let goalId = "";
await t.test("createGoal succeeds with verified nodes", async () => {
const tg = await import("../tool-graph");
const ds = await tg.getNodeByName(search.SYNTHETIC_DATASET_NAME);
const ev = await tg.getNodeByName(search.SYNTHETIC_EVALUATOR_NAME);
const g = await search.createGoal({
name: "smoke goal",
description: "Mode B smoke test",
datasetNodeId: ds!.id,
evaluatorNodeId: ev!.id,
budget: { maxIterations: 4, maxCandidates: 24, wallClockMs: 15_000 },
});
goalId = g.id;
assert.equal(g.status, "active");
});
await t.test("runGoalSearch evaluates candidates across all primitives", async () => {
const run = await search.runGoalSearch(goalId, { actor: "smoke_test" });
assert.equal(run.status, "completed");
assert.ok(run.candidatesEvaluated >= 2, `evaluated ${run.candidatesEvaluated}`);
assert.ok(run.bestCandidateId);
const out = await search.getRun(run.id);
assert.ok(out);
const primitives = new Set(out!.candidates.map((c) => c.primitive));
assert.ok(primitives.has("seed"), "seed candidate present");
// At least one of the four mutation primitives should fire on the toy graph.
const mutators = ["expand", "compose", "replace", "tune"].filter((p) => primitives.has(p));
assert.ok(mutators.length >= 1, `expected ≥1 of expand/compose/replace/tune, got ${[...primitives].join(",")}`);
});
await t.test("autoPromoteIfReady gates on evidence + records audit", async () => {
// Pick a provisional node created by search and feed it some evidence.
const provs = await db.select().from(toolNodes).where(eq(toolNodes.status, "provisional"));
assert.ok(provs.length >= 1, "search should have created at least one provisional");
const target = provs[0]!;
// First check: no evidence → not promoted.
const r0 = await search.autoPromoteIfReady(target.id);
assert.equal(r0.promoted, false);
assert.match(r0.reason, /evidence count/);
// Add a passing evidence row → promoted.
await db.insert(toolNodeEvidence).values({
id: "tev_smoke_1",
nodeId: target.id,
kind: "manual_check",
payload: { ok: true },
success: 1,
failure: 0,
});
const r1 = await search.autoPromoteIfReady(target.id, { actor: "smoke" });
assert.equal(r1.promoted, true, r1.reason);
const after = await db.select().from(toolNodes).where(eq(toolNodes.id, target.id));
assert.equal(after[0]!.status, "verified");
const audit = await search.listPromotionAudit(target.id);
assert.ok(audit.find((a) => a.action === "auto_promote"));
});
await t.test("rollbackPromotion restores prior status", async () => {
const verifiedAfterPromo = await db.select().from(toolNodes).where(eq(toolNodes.status, "verified"));
// Find one that has a recent auto_promote audit row.
const audits = await search.listPromotionAudit();
const target = audits.find((a) => a.action === "auto_promote");
assert.ok(target, "expected an auto_promote audit row");
const r = await search.rollbackPromotion(target!.nodeId, "smoke");
assert.equal(r.ok, true, r.reason);
const node = (await db.select().from(toolNodes).where(eq(toolNodes.id, target!.nodeId)))[0]!;
assert.equal(node.status, "provisional");
// Idempotency: a second rollback should refuse (status no longer matches).
const r2 = await search.rollbackPromotion(target!.nodeId, "smoke");
assert.equal(r2.ok, false);
});
await t.test("diffSubgraphAgainstVerified surfaces additions", async () => {
const cands = await db.select().from(toolGoalCandidates);
const c = cands.find((x) => Array.isArray((x.subgraph as { nodeIds?: string[] }).nodeIds));
assert.ok(c);
const diff = await search.diffSubgraphAgainstVerified(c!.id);
assert.ok(diff);
assert.ok(Array.isArray(diff!.added.nodes));
assert.ok(Array.isArray(diff!.added.edges));
assert.ok(Array.isArray(diff!.status_changes));
});
await t.test("recordHighConfidenceGap (Mode A boost) spawns provisional immediately", async () => {
const before = await db.select().from(toolNodes).where(eq(toolNodes.status, "provisional"));
const r = await search.recordHighConfidenceGap(
"synth:novel_capability_xyz",
{ source: "smoke" },
0.95,
);
assert.ok(r.provisionalNodeId, "high-confidence gap should produce a provisional id");
const after = await db.select().from(toolNodes).where(eq(toolNodes.status, "provisional"));
assert.ok(after.length > before.length, "provisional pool should grow");
});
await t.test("low-confidence gap does NOT short-circuit promotion", async () => {
const r = await search.recordHighConfidenceGap(
"synth:weakly_signalled_capability",
{ source: "smoke" },
0.4,
);
assert.equal(r.provisionalNodeId, null);
});
},
);