doatlas-2 / artifacts /api-server /src /lib /__tests__ /tool-graph-search.test.ts

Add files using upload-large-folder tool

ff78003 verified 27 days ago

11.7 kB

	/**
	* tool-graph-search — Mode B (#169) end-to-end smoke test.
	*
	* Exercises:
	* - bootstrapToolGoalsSchema (CREATE TABLE IF NOT EXISTS)
	* - seedSyntheticToyData (toy dataset+evaluator+seed nodes)
	* - createGoal validation (must reference verified nodes)
	* - runGoalSearch end-to-end: all four primitives + seed
	* - autoPromoteIfReady gates + audit row
	* - rollbackPromotion restores prior status
	* - diffSubgraphAgainstVerified surfaces a non-empty diff
	* - recordHighConfidenceGap (Mode A boost) spawns a provisional
	*
	* Skipped when DATABASE_URL is unset.
	*/
	import { test } from "node:test";
	import assert from "node:assert/strict";
	import { randomBytes } from "node:crypto";

	const ORIGINAL_DSN = process.env.DATABASE_URL \|\| "";
	const SKIP = !ORIGINAL_DSN;

	function dsnWithSearchPath(dsn: string, schema: string): string {
	const opt = `options=-c%20search_path%3D${encodeURIComponent(schema)}`;
	return dsn.includes("?") ? `${dsn}&${opt}` : `${dsn}?${opt}`;
	}

	test(
	"tool-graph-search Mode B end-to-end smoke",
	{ skip: SKIP && "DATABASE_URL not set" },
	async (t) => {
	const schema = `tool_graph_search_test_${randomBytes(6).toString("hex")}`;
	process.env.DATABASE_URL = dsnWithSearchPath(ORIGINAL_DSN, schema);
	// Lower thresholds so the smoke test can drive promotion in a few rows.
	process.env.AUTO_PROMOTE_MIN_EVIDENCE = "1";
	process.env.AUTO_PROMOTE_MIN_SUCCESS_RATIO = "0.5";

	const { pool } = await import("@workspace/db");
	await pool.query(`CREATE SCHEMA "${schema}"`);

	// Mirror the existing tool-graph tables (subset needed by Mode B).
	await pool.query(`
	CREATE TABLE "${schema}".tool_nodes (
	id text PRIMARY KEY,
	name text NOT NULL UNIQUE,
	description text NOT NULL DEFAULT '',
	capability_tags jsonb NOT NULL DEFAULT '[]'::jsonb,
	input_kind text NOT NULL DEFAULT 'json',
	output_kind text NOT NULL DEFAULT 'json',
	status text NOT NULL DEFAULT 'verified',
	owner_process text NOT NULL,
	spec_json jsonb NOT NULL,
	created_by text NOT NULL DEFAULT 'system',
	handler_ref text,
	handler_stub text,
	cost_hint double precision,
	latency_hint_ms integer,
	version integer NOT NULL DEFAULT 1,
	created_at timestamptz NOT NULL DEFAULT now(),
	updated_at timestamptz NOT NULL DEFAULT now()
	);
	CREATE TABLE "${schema}".tool_edges (
	id text PRIMARY KEY,
	from_node text NOT NULL REFERENCES "${schema}".tool_nodes(id) ON DELETE CASCADE,
	to_node text NOT NULL REFERENCES "${schema}".tool_nodes(id) ON DELETE CASCADE,
	relation text NOT NULL,
	weight double precision NOT NULL DEFAULT 1.0,
	contract jsonb,
	created_at timestamptz NOT NULL DEFAULT now(),
	CONSTRAINT tool_edges_uniq UNIQUE (from_node, to_node, relation)
	);
	CREATE TABLE "${schema}".tool_node_evidence (
	id text PRIMARY KEY,
	node_id text NOT NULL REFERENCES "${schema}".tool_nodes(id) ON DELETE CASCADE,
	kind text NOT NULL,
	payload jsonb NOT NULL,
	success integer NOT NULL DEFAULT 0,
	failure integer NOT NULL DEFAULT 0,
	shadow_user_id text,
	created_at timestamptz NOT NULL DEFAULT now(),
	seam_folded_at timestamptz
	);
	CREATE TABLE "${schema}".tool_gap_signals (
	id text PRIMARY KEY,
	capability_tag text NOT NULL UNIQUE,
	invocation_count integer NOT NULL DEFAULT 0,
	status text NOT NULL DEFAULT 'open',
	last_context jsonb,
	extended_node_id text,
	first_seen_at timestamptz NOT NULL DEFAULT now(),
	last_seen_at timestamptz NOT NULL DEFAULT now()
	);
	CREATE TABLE "${schema}".tool_edge_health (
	id text PRIMARY KEY,
	edge_id text NOT NULL REFERENCES "${schema}".tool_edges(id) ON DELETE CASCADE,
	traversal_count integer NOT NULL DEFAULT 0,
	contract_issue_count integer NOT NULL DEFAULT 0,
	missing_field_count integer NOT NULL DEFAULT 0,
	ema_coverage double precision NOT NULL DEFAULT 1.0,
	ema_health_score double precision NOT NULL DEFAULT 1.0,
	top_missing_fields jsonb NOT NULL DEFAULT '{}'::jsonb,
	top_contract_issues jsonb NOT NULL DEFAULT '{}'::jsonb,
	formula_version integer NOT NULL DEFAULT 1,
	last_sample_at timestamptz,
	last_folded_evidence_id text,
	computed_at timestamptz NOT NULL DEFAULT now()
	);
	`);

	const search = await import("../tool-graph-search");
	const { db, toolNodes, toolNodeEvidence, toolGoalCandidates } = await import("@workspace/db");
	const { eq } = await import("drizzle-orm");

	t.after(async () => {
	try {
	await pool.query(`DROP SCHEMA IF EXISTS "${schema}" CASCADE`);
	} finally {
	process.env.DATABASE_URL = ORIGINAL_DSN;
	}
	});

	await t.test("bootstrap creates the four #169 tables", async () => {
	await search.bootstrapToolGoalsSchema();
	const tables = await pool.query<{ table_name: string }>(
	`SELECT table_name FROM information_schema.tables WHERE table_schema=$1`,
	[schema],
	);
	const names = tables.rows.map((r) => r.table_name);
	for (const t of ["tool_goals", "tool_goal_runs", "tool_goal_candidates", "tool_promotion_audit"]) {
	assert.ok(names.includes(t), `expected table ${t}, got ${names.join(",")}`);
	}
	});

	await t.test("synthetic seed creates verified dataset + evaluator", async () => {
	const seed = await search.seedSyntheticToyData();
	assert.ok(seed.datasetNodeId);
	assert.ok(seed.evaluatorNodeId);
	assert.ok(seed.seedNodeIds.length >= 2);
	});

	await t.test("createGoal rejects non-verified references", async () => {
	// Make a provisional node and try to use it as evaluator.
	const tg = await import("../tool-graph");
	const prov = await tg.upsertNode({
	id: "tnode_test_prov",
	name: "test_provisional_eval",
	description: "",
	capabilityTags: [],
	inputKind: "json",
	outputKind: "json",
	status: "provisional",
	ownerProcess: "node",
	specJson: {},
	});
	const ds = await tg.getNodeByName(search.SYNTHETIC_DATASET_NAME);
	assert.ok(ds);
	await assert.rejects(
	() =>
	search.createGoal({
	name: "bad",
	datasetNodeId: ds!.id,
	evaluatorNodeId: prov.id,
	}),
	/must be verified/,
	);
	});

	let goalId = "";
	await t.test("createGoal succeeds with verified nodes", async () => {
	const tg = await import("../tool-graph");
	const ds = await tg.getNodeByName(search.SYNTHETIC_DATASET_NAME);
	const ev = await tg.getNodeByName(search.SYNTHETIC_EVALUATOR_NAME);
	const g = await search.createGoal({
	name: "smoke goal",
	description: "Mode B smoke test",
	datasetNodeId: ds!.id,
	evaluatorNodeId: ev!.id,
	budget: { maxIterations: 4, maxCandidates: 24, wallClockMs: 15_000 },
	});
	goalId = g.id;
	assert.equal(g.status, "active");
	});

	await t.test("runGoalSearch evaluates candidates across all primitives", async () => {
	const run = await search.runGoalSearch(goalId, { actor: "smoke_test" });
	assert.equal(run.status, "completed");
	assert.ok(run.candidatesEvaluated >= 2, `evaluated ${run.candidatesEvaluated}`);
	assert.ok(run.bestCandidateId);
	const out = await search.getRun(run.id);
	assert.ok(out);
	const primitives = new Set(out!.candidates.map((c) => c.primitive));
	assert.ok(primitives.has("seed"), "seed candidate present");
	// At least one of the four mutation primitives should fire on the toy graph.
	const mutators = ["expand", "compose", "replace", "tune"].filter((p) => primitives.has(p));
	assert.ok(mutators.length >= 1, `expected ≥1 of expand/compose/replace/tune, got ${[...primitives].join(",")}`);
	});

	await t.test("autoPromoteIfReady gates on evidence + records audit", async () => {
	// Pick a provisional node created by search and feed it some evidence.
	const provs = await db.select().from(toolNodes).where(eq(toolNodes.status, "provisional"));
	assert.ok(provs.length >= 1, "search should have created at least one provisional");
	const target = provs[0]!;
	// First check: no evidence → not promoted.
	const r0 = await search.autoPromoteIfReady(target.id);
	assert.equal(r0.promoted, false);
	assert.match(r0.reason, /evidence count/);
	// Add a passing evidence row → promoted.
	await db.insert(toolNodeEvidence).values({
	id: "tev_smoke_1",
	nodeId: target.id,
	kind: "manual_check",
	payload: { ok: true },
	success: 1,
	failure: 0,
	});
	const r1 = await search.autoPromoteIfReady(target.id, { actor: "smoke" });
	assert.equal(r1.promoted, true, r1.reason);
	const after = await db.select().from(toolNodes).where(eq(toolNodes.id, target.id));
	assert.equal(after[0]!.status, "verified");
	const audit = await search.listPromotionAudit(target.id);
	assert.ok(audit.find((a) => a.action === "auto_promote"));
	});

	await t.test("rollbackPromotion restores prior status", async () => {
	const verifiedAfterPromo = await db.select().from(toolNodes).where(eq(toolNodes.status, "verified"));
	// Find one that has a recent auto_promote audit row.
	const audits = await search.listPromotionAudit();
	const target = audits.find((a) => a.action === "auto_promote");
	assert.ok(target, "expected an auto_promote audit row");
	const r = await search.rollbackPromotion(target!.nodeId, "smoke");
	assert.equal(r.ok, true, r.reason);
	const node = (await db.select().from(toolNodes).where(eq(toolNodes.id, target!.nodeId)))[0]!;
	assert.equal(node.status, "provisional");
	// Idempotency: a second rollback should refuse (status no longer matches).
	const r2 = await search.rollbackPromotion(target!.nodeId, "smoke");
	assert.equal(r2.ok, false);
	});

	await t.test("diffSubgraphAgainstVerified surfaces additions", async () => {
	const cands = await db.select().from(toolGoalCandidates);
	const c = cands.find((x) => Array.isArray((x.subgraph as { nodeIds?: string[] }).nodeIds));
	assert.ok(c);
	const diff = await search.diffSubgraphAgainstVerified(c!.id);
	assert.ok(diff);
	assert.ok(Array.isArray(diff!.added.nodes));
	assert.ok(Array.isArray(diff!.added.edges));
	assert.ok(Array.isArray(diff!.status_changes));
	});

	await t.test("recordHighConfidenceGap (Mode A boost) spawns provisional immediately", async () => {
	const before = await db.select().from(toolNodes).where(eq(toolNodes.status, "provisional"));
	const r = await search.recordHighConfidenceGap(
	"synth:novel_capability_xyz",
	{ source: "smoke" },
	0.95,
	);
	assert.ok(r.provisionalNodeId, "high-confidence gap should produce a provisional id");
	const after = await db.select().from(toolNodes).where(eq(toolNodes.status, "provisional"));
	assert.ok(after.length > before.length, "provisional pool should grow");
	});

	await t.test("low-confidence gap does NOT short-circuit promotion", async () => {
	const r = await search.recordHighConfidenceGap(
	"synth:weakly_signalled_capability",
	{ source: "smoke" },
	0.4,
	);
	assert.equal(r.provisionalNodeId, null);
	});
	},
	);