/** * tool-graph-search — Mode B (#169) end-to-end smoke test. * * Exercises: * - bootstrapToolGoalsSchema (CREATE TABLE IF NOT EXISTS) * - seedSyntheticToyData (toy dataset+evaluator+seed nodes) * - createGoal validation (must reference verified nodes) * - runGoalSearch end-to-end: all four primitives + seed * - autoPromoteIfReady gates + audit row * - rollbackPromotion restores prior status * - diffSubgraphAgainstVerified surfaces a non-empty diff * - recordHighConfidenceGap (Mode A boost) spawns a provisional * * Skipped when DATABASE_URL is unset. */ import { test } from "node:test"; import assert from "node:assert/strict"; import { randomBytes } from "node:crypto"; const ORIGINAL_DSN = process.env.DATABASE_URL || ""; const SKIP = !ORIGINAL_DSN; function dsnWithSearchPath(dsn: string, schema: string): string { const opt = `options=-c%20search_path%3D${encodeURIComponent(schema)}`; return dsn.includes("?") ? `${dsn}&${opt}` : `${dsn}?${opt}`; } test( "tool-graph-search Mode B end-to-end smoke", { skip: SKIP && "DATABASE_URL not set" }, async (t) => { const schema = `tool_graph_search_test_${randomBytes(6).toString("hex")}`; process.env.DATABASE_URL = dsnWithSearchPath(ORIGINAL_DSN, schema); // Lower thresholds so the smoke test can drive promotion in a few rows. process.env.AUTO_PROMOTE_MIN_EVIDENCE = "1"; process.env.AUTO_PROMOTE_MIN_SUCCESS_RATIO = "0.5"; const { pool } = await import("@workspace/db"); await pool.query(`CREATE SCHEMA "${schema}"`); // Mirror the existing tool-graph tables (subset needed by Mode B). await pool.query(` CREATE TABLE "${schema}".tool_nodes ( id text PRIMARY KEY, name text NOT NULL UNIQUE, description text NOT NULL DEFAULT '', capability_tags jsonb NOT NULL DEFAULT '[]'::jsonb, input_kind text NOT NULL DEFAULT 'json', output_kind text NOT NULL DEFAULT 'json', status text NOT NULL DEFAULT 'verified', owner_process text NOT NULL, spec_json jsonb NOT NULL, created_by text NOT NULL DEFAULT 'system', handler_ref text, handler_stub text, cost_hint double precision, latency_hint_ms integer, version integer NOT NULL DEFAULT 1, created_at timestamptz NOT NULL DEFAULT now(), updated_at timestamptz NOT NULL DEFAULT now() ); CREATE TABLE "${schema}".tool_edges ( id text PRIMARY KEY, from_node text NOT NULL REFERENCES "${schema}".tool_nodes(id) ON DELETE CASCADE, to_node text NOT NULL REFERENCES "${schema}".tool_nodes(id) ON DELETE CASCADE, relation text NOT NULL, weight double precision NOT NULL DEFAULT 1.0, contract jsonb, created_at timestamptz NOT NULL DEFAULT now(), CONSTRAINT tool_edges_uniq UNIQUE (from_node, to_node, relation) ); CREATE TABLE "${schema}".tool_node_evidence ( id text PRIMARY KEY, node_id text NOT NULL REFERENCES "${schema}".tool_nodes(id) ON DELETE CASCADE, kind text NOT NULL, payload jsonb NOT NULL, success integer NOT NULL DEFAULT 0, failure integer NOT NULL DEFAULT 0, shadow_user_id text, created_at timestamptz NOT NULL DEFAULT now(), seam_folded_at timestamptz ); CREATE TABLE "${schema}".tool_gap_signals ( id text PRIMARY KEY, capability_tag text NOT NULL UNIQUE, invocation_count integer NOT NULL DEFAULT 0, status text NOT NULL DEFAULT 'open', last_context jsonb, extended_node_id text, first_seen_at timestamptz NOT NULL DEFAULT now(), last_seen_at timestamptz NOT NULL DEFAULT now() ); CREATE TABLE "${schema}".tool_edge_health ( id text PRIMARY KEY, edge_id text NOT NULL REFERENCES "${schema}".tool_edges(id) ON DELETE CASCADE, traversal_count integer NOT NULL DEFAULT 0, contract_issue_count integer NOT NULL DEFAULT 0, missing_field_count integer NOT NULL DEFAULT 0, ema_coverage double precision NOT NULL DEFAULT 1.0, ema_health_score double precision NOT NULL DEFAULT 1.0, top_missing_fields jsonb NOT NULL DEFAULT '{}'::jsonb, top_contract_issues jsonb NOT NULL DEFAULT '{}'::jsonb, formula_version integer NOT NULL DEFAULT 1, last_sample_at timestamptz, last_folded_evidence_id text, computed_at timestamptz NOT NULL DEFAULT now() ); `); const search = await import("../tool-graph-search"); const { db, toolNodes, toolNodeEvidence, toolGoalCandidates } = await import("@workspace/db"); const { eq } = await import("drizzle-orm"); t.after(async () => { try { await pool.query(`DROP SCHEMA IF EXISTS "${schema}" CASCADE`); } finally { process.env.DATABASE_URL = ORIGINAL_DSN; } }); await t.test("bootstrap creates the four #169 tables", async () => { await search.bootstrapToolGoalsSchema(); const tables = await pool.query<{ table_name: string }>( `SELECT table_name FROM information_schema.tables WHERE table_schema=$1`, [schema], ); const names = tables.rows.map((r) => r.table_name); for (const t of ["tool_goals", "tool_goal_runs", "tool_goal_candidates", "tool_promotion_audit"]) { assert.ok(names.includes(t), `expected table ${t}, got ${names.join(",")}`); } }); await t.test("synthetic seed creates verified dataset + evaluator", async () => { const seed = await search.seedSyntheticToyData(); assert.ok(seed.datasetNodeId); assert.ok(seed.evaluatorNodeId); assert.ok(seed.seedNodeIds.length >= 2); }); await t.test("createGoal rejects non-verified references", async () => { // Make a provisional node and try to use it as evaluator. const tg = await import("../tool-graph"); const prov = await tg.upsertNode({ id: "tnode_test_prov", name: "test_provisional_eval", description: "", capabilityTags: [], inputKind: "json", outputKind: "json", status: "provisional", ownerProcess: "node", specJson: {}, }); const ds = await tg.getNodeByName(search.SYNTHETIC_DATASET_NAME); assert.ok(ds); await assert.rejects( () => search.createGoal({ name: "bad", datasetNodeId: ds!.id, evaluatorNodeId: prov.id, }), /must be verified/, ); }); let goalId = ""; await t.test("createGoal succeeds with verified nodes", async () => { const tg = await import("../tool-graph"); const ds = await tg.getNodeByName(search.SYNTHETIC_DATASET_NAME); const ev = await tg.getNodeByName(search.SYNTHETIC_EVALUATOR_NAME); const g = await search.createGoal({ name: "smoke goal", description: "Mode B smoke test", datasetNodeId: ds!.id, evaluatorNodeId: ev!.id, budget: { maxIterations: 4, maxCandidates: 24, wallClockMs: 15_000 }, }); goalId = g.id; assert.equal(g.status, "active"); }); await t.test("runGoalSearch evaluates candidates across all primitives", async () => { const run = await search.runGoalSearch(goalId, { actor: "smoke_test" }); assert.equal(run.status, "completed"); assert.ok(run.candidatesEvaluated >= 2, `evaluated ${run.candidatesEvaluated}`); assert.ok(run.bestCandidateId); const out = await search.getRun(run.id); assert.ok(out); const primitives = new Set(out!.candidates.map((c) => c.primitive)); assert.ok(primitives.has("seed"), "seed candidate present"); // At least one of the four mutation primitives should fire on the toy graph. const mutators = ["expand", "compose", "replace", "tune"].filter((p) => primitives.has(p)); assert.ok(mutators.length >= 1, `expected ≥1 of expand/compose/replace/tune, got ${[...primitives].join(",")}`); }); await t.test("autoPromoteIfReady gates on evidence + records audit", async () => { // Pick a provisional node created by search and feed it some evidence. const provs = await db.select().from(toolNodes).where(eq(toolNodes.status, "provisional")); assert.ok(provs.length >= 1, "search should have created at least one provisional"); const target = provs[0]!; // First check: no evidence → not promoted. const r0 = await search.autoPromoteIfReady(target.id); assert.equal(r0.promoted, false); assert.match(r0.reason, /evidence count/); // Add a passing evidence row → promoted. await db.insert(toolNodeEvidence).values({ id: "tev_smoke_1", nodeId: target.id, kind: "manual_check", payload: { ok: true }, success: 1, failure: 0, }); const r1 = await search.autoPromoteIfReady(target.id, { actor: "smoke" }); assert.equal(r1.promoted, true, r1.reason); const after = await db.select().from(toolNodes).where(eq(toolNodes.id, target.id)); assert.equal(after[0]!.status, "verified"); const audit = await search.listPromotionAudit(target.id); assert.ok(audit.find((a) => a.action === "auto_promote")); }); await t.test("rollbackPromotion restores prior status", async () => { const verifiedAfterPromo = await db.select().from(toolNodes).where(eq(toolNodes.status, "verified")); // Find one that has a recent auto_promote audit row. const audits = await search.listPromotionAudit(); const target = audits.find((a) => a.action === "auto_promote"); assert.ok(target, "expected an auto_promote audit row"); const r = await search.rollbackPromotion(target!.nodeId, "smoke"); assert.equal(r.ok, true, r.reason); const node = (await db.select().from(toolNodes).where(eq(toolNodes.id, target!.nodeId)))[0]!; assert.equal(node.status, "provisional"); // Idempotency: a second rollback should refuse (status no longer matches). const r2 = await search.rollbackPromotion(target!.nodeId, "smoke"); assert.equal(r2.ok, false); }); await t.test("diffSubgraphAgainstVerified surfaces additions", async () => { const cands = await db.select().from(toolGoalCandidates); const c = cands.find((x) => Array.isArray((x.subgraph as { nodeIds?: string[] }).nodeIds)); assert.ok(c); const diff = await search.diffSubgraphAgainstVerified(c!.id); assert.ok(diff); assert.ok(Array.isArray(diff!.added.nodes)); assert.ok(Array.isArray(diff!.added.edges)); assert.ok(Array.isArray(diff!.status_changes)); }); await t.test("recordHighConfidenceGap (Mode A boost) spawns provisional immediately", async () => { const before = await db.select().from(toolNodes).where(eq(toolNodes.status, "provisional")); const r = await search.recordHighConfidenceGap( "synth:novel_capability_xyz", { source: "smoke" }, 0.95, ); assert.ok(r.provisionalNodeId, "high-confidence gap should produce a provisional id"); const after = await db.select().from(toolNodes).where(eq(toolNodes.status, "provisional")); assert.ok(after.length > before.length, "provisional pool should grow"); }); await t.test("low-confidence gap does NOT short-circuit promotion", async () => { const r = await search.recordHighConfidenceGap( "synth:weakly_signalled_capability", { source: "smoke" }, 0.4, ); assert.equal(r.provisionalNodeId, null); }); }, );