| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| import { test } from "node:test"; |
| import assert from "node:assert/strict"; |
| import { randomBytes } from "node:crypto"; |
|
|
| const ORIGINAL_DSN = process.env.DATABASE_URL || ""; |
| const SKIP = !ORIGINAL_DSN; |
|
|
| function dsnWithSearchPath(dsn: string, schema: string): string { |
| const opt = `options=-c%20search_path%3D${encodeURIComponent(schema)}`; |
| return dsn.includes("?") ? `${dsn}&${opt}` : `${dsn}?${opt}`; |
| } |
|
|
| test( |
| "tool-graph-search Mode B end-to-end smoke", |
| { skip: SKIP && "DATABASE_URL not set" }, |
| async (t) => { |
| const schema = `tool_graph_search_test_${randomBytes(6).toString("hex")}`; |
| process.env.DATABASE_URL = dsnWithSearchPath(ORIGINAL_DSN, schema); |
| |
| process.env.AUTO_PROMOTE_MIN_EVIDENCE = "1"; |
| process.env.AUTO_PROMOTE_MIN_SUCCESS_RATIO = "0.5"; |
|
|
| const { pool } = await import("@workspace/db"); |
| await pool.query(`CREATE SCHEMA "${schema}"`); |
|
|
| |
| await pool.query(` |
| CREATE TABLE "${schema}".tool_nodes ( |
| id text PRIMARY KEY, |
| name text NOT NULL UNIQUE, |
| description text NOT NULL DEFAULT '', |
| capability_tags jsonb NOT NULL DEFAULT '[]'::jsonb, |
| input_kind text NOT NULL DEFAULT 'json', |
| output_kind text NOT NULL DEFAULT 'json', |
| status text NOT NULL DEFAULT 'verified', |
| owner_process text NOT NULL, |
| spec_json jsonb NOT NULL, |
| created_by text NOT NULL DEFAULT 'system', |
| handler_ref text, |
| handler_stub text, |
| cost_hint double precision, |
| latency_hint_ms integer, |
| version integer NOT NULL DEFAULT 1, |
| created_at timestamptz NOT NULL DEFAULT now(), |
| updated_at timestamptz NOT NULL DEFAULT now() |
| ); |
| CREATE TABLE "${schema}".tool_edges ( |
| id text PRIMARY KEY, |
| from_node text NOT NULL REFERENCES "${schema}".tool_nodes(id) ON DELETE CASCADE, |
| to_node text NOT NULL REFERENCES "${schema}".tool_nodes(id) ON DELETE CASCADE, |
| relation text NOT NULL, |
| weight double precision NOT NULL DEFAULT 1.0, |
| contract jsonb, |
| created_at timestamptz NOT NULL DEFAULT now(), |
| CONSTRAINT tool_edges_uniq UNIQUE (from_node, to_node, relation) |
| ); |
| CREATE TABLE "${schema}".tool_node_evidence ( |
| id text PRIMARY KEY, |
| node_id text NOT NULL REFERENCES "${schema}".tool_nodes(id) ON DELETE CASCADE, |
| kind text NOT NULL, |
| payload jsonb NOT NULL, |
| success integer NOT NULL DEFAULT 0, |
| failure integer NOT NULL DEFAULT 0, |
| shadow_user_id text, |
| created_at timestamptz NOT NULL DEFAULT now(), |
| seam_folded_at timestamptz |
| ); |
| CREATE TABLE "${schema}".tool_gap_signals ( |
| id text PRIMARY KEY, |
| capability_tag text NOT NULL UNIQUE, |
| invocation_count integer NOT NULL DEFAULT 0, |
| status text NOT NULL DEFAULT 'open', |
| last_context jsonb, |
| extended_node_id text, |
| first_seen_at timestamptz NOT NULL DEFAULT now(), |
| last_seen_at timestamptz NOT NULL DEFAULT now() |
| ); |
| CREATE TABLE "${schema}".tool_edge_health ( |
| id text PRIMARY KEY, |
| edge_id text NOT NULL REFERENCES "${schema}".tool_edges(id) ON DELETE CASCADE, |
| traversal_count integer NOT NULL DEFAULT 0, |
| contract_issue_count integer NOT NULL DEFAULT 0, |
| missing_field_count integer NOT NULL DEFAULT 0, |
| ema_coverage double precision NOT NULL DEFAULT 1.0, |
| ema_health_score double precision NOT NULL DEFAULT 1.0, |
| top_missing_fields jsonb NOT NULL DEFAULT '{}'::jsonb, |
| top_contract_issues jsonb NOT NULL DEFAULT '{}'::jsonb, |
| formula_version integer NOT NULL DEFAULT 1, |
| last_sample_at timestamptz, |
| last_folded_evidence_id text, |
| computed_at timestamptz NOT NULL DEFAULT now() |
| ); |
| `); |
|
|
| const search = await import("../tool-graph-search"); |
| const { db, toolNodes, toolNodeEvidence, toolGoalCandidates } = await import("@workspace/db"); |
| const { eq } = await import("drizzle-orm"); |
|
|
| t.after(async () => { |
| try { |
| await pool.query(`DROP SCHEMA IF EXISTS "${schema}" CASCADE`); |
| } finally { |
| process.env.DATABASE_URL = ORIGINAL_DSN; |
| } |
| }); |
|
|
| await t.test("bootstrap creates the four #169 tables", async () => { |
| await search.bootstrapToolGoalsSchema(); |
| const tables = await pool.query<{ table_name: string }>( |
| `SELECT table_name FROM information_schema.tables WHERE table_schema=$1`, |
| [schema], |
| ); |
| const names = tables.rows.map((r) => r.table_name); |
| for (const t of ["tool_goals", "tool_goal_runs", "tool_goal_candidates", "tool_promotion_audit"]) { |
| assert.ok(names.includes(t), `expected table ${t}, got ${names.join(",")}`); |
| } |
| }); |
|
|
| await t.test("synthetic seed creates verified dataset + evaluator", async () => { |
| const seed = await search.seedSyntheticToyData(); |
| assert.ok(seed.datasetNodeId); |
| assert.ok(seed.evaluatorNodeId); |
| assert.ok(seed.seedNodeIds.length >= 2); |
| }); |
|
|
| await t.test("createGoal rejects non-verified references", async () => { |
| |
| const tg = await import("../tool-graph"); |
| const prov = await tg.upsertNode({ |
| id: "tnode_test_prov", |
| name: "test_provisional_eval", |
| description: "", |
| capabilityTags: [], |
| inputKind: "json", |
| outputKind: "json", |
| status: "provisional", |
| ownerProcess: "node", |
| specJson: {}, |
| }); |
| const ds = await tg.getNodeByName(search.SYNTHETIC_DATASET_NAME); |
| assert.ok(ds); |
| await assert.rejects( |
| () => |
| search.createGoal({ |
| name: "bad", |
| datasetNodeId: ds!.id, |
| evaluatorNodeId: prov.id, |
| }), |
| /must be verified/, |
| ); |
| }); |
|
|
| let goalId = ""; |
| await t.test("createGoal succeeds with verified nodes", async () => { |
| const tg = await import("../tool-graph"); |
| const ds = await tg.getNodeByName(search.SYNTHETIC_DATASET_NAME); |
| const ev = await tg.getNodeByName(search.SYNTHETIC_EVALUATOR_NAME); |
| const g = await search.createGoal({ |
| name: "smoke goal", |
| description: "Mode B smoke test", |
| datasetNodeId: ds!.id, |
| evaluatorNodeId: ev!.id, |
| budget: { maxIterations: 4, maxCandidates: 24, wallClockMs: 15_000 }, |
| }); |
| goalId = g.id; |
| assert.equal(g.status, "active"); |
| }); |
|
|
| await t.test("runGoalSearch evaluates candidates across all primitives", async () => { |
| const run = await search.runGoalSearch(goalId, { actor: "smoke_test" }); |
| assert.equal(run.status, "completed"); |
| assert.ok(run.candidatesEvaluated >= 2, `evaluated ${run.candidatesEvaluated}`); |
| assert.ok(run.bestCandidateId); |
| const out = await search.getRun(run.id); |
| assert.ok(out); |
| const primitives = new Set(out!.candidates.map((c) => c.primitive)); |
| assert.ok(primitives.has("seed"), "seed candidate present"); |
| |
| const mutators = ["expand", "compose", "replace", "tune"].filter((p) => primitives.has(p)); |
| assert.ok(mutators.length >= 1, `expected ≥1 of expand/compose/replace/tune, got ${[...primitives].join(",")}`); |
| }); |
|
|
| await t.test("autoPromoteIfReady gates on evidence + records audit", async () => { |
| |
| const provs = await db.select().from(toolNodes).where(eq(toolNodes.status, "provisional")); |
| assert.ok(provs.length >= 1, "search should have created at least one provisional"); |
| const target = provs[0]!; |
| |
| const r0 = await search.autoPromoteIfReady(target.id); |
| assert.equal(r0.promoted, false); |
| assert.match(r0.reason, /evidence count/); |
| |
| await db.insert(toolNodeEvidence).values({ |
| id: "tev_smoke_1", |
| nodeId: target.id, |
| kind: "manual_check", |
| payload: { ok: true }, |
| success: 1, |
| failure: 0, |
| }); |
| const r1 = await search.autoPromoteIfReady(target.id, { actor: "smoke" }); |
| assert.equal(r1.promoted, true, r1.reason); |
| const after = await db.select().from(toolNodes).where(eq(toolNodes.id, target.id)); |
| assert.equal(after[0]!.status, "verified"); |
| const audit = await search.listPromotionAudit(target.id); |
| assert.ok(audit.find((a) => a.action === "auto_promote")); |
| }); |
|
|
| await t.test("rollbackPromotion restores prior status", async () => { |
| const verifiedAfterPromo = await db.select().from(toolNodes).where(eq(toolNodes.status, "verified")); |
| |
| const audits = await search.listPromotionAudit(); |
| const target = audits.find((a) => a.action === "auto_promote"); |
| assert.ok(target, "expected an auto_promote audit row"); |
| const r = await search.rollbackPromotion(target!.nodeId, "smoke"); |
| assert.equal(r.ok, true, r.reason); |
| const node = (await db.select().from(toolNodes).where(eq(toolNodes.id, target!.nodeId)))[0]!; |
| assert.equal(node.status, "provisional"); |
| |
| const r2 = await search.rollbackPromotion(target!.nodeId, "smoke"); |
| assert.equal(r2.ok, false); |
| }); |
|
|
| await t.test("diffSubgraphAgainstVerified surfaces additions", async () => { |
| const cands = await db.select().from(toolGoalCandidates); |
| const c = cands.find((x) => Array.isArray((x.subgraph as { nodeIds?: string[] }).nodeIds)); |
| assert.ok(c); |
| const diff = await search.diffSubgraphAgainstVerified(c!.id); |
| assert.ok(diff); |
| assert.ok(Array.isArray(diff!.added.nodes)); |
| assert.ok(Array.isArray(diff!.added.edges)); |
| assert.ok(Array.isArray(diff!.status_changes)); |
| }); |
|
|
| await t.test("recordHighConfidenceGap (Mode A boost) spawns provisional immediately", async () => { |
| const before = await db.select().from(toolNodes).where(eq(toolNodes.status, "provisional")); |
| const r = await search.recordHighConfidenceGap( |
| "synth:novel_capability_xyz", |
| { source: "smoke" }, |
| 0.95, |
| ); |
| assert.ok(r.provisionalNodeId, "high-confidence gap should produce a provisional id"); |
| const after = await db.select().from(toolNodes).where(eq(toolNodes.status, "provisional")); |
| assert.ok(after.length > before.length, "provisional pool should grow"); |
| }); |
|
|
| await t.test("low-confidence gap does NOT short-circuit promotion", async () => { |
| const r = await search.recordHighConfidenceGap( |
| "synth:weakly_signalled_capability", |
| { source: "smoke" }, |
| 0.4, |
| ); |
| assert.equal(r.provisionalNodeId, null); |
| }); |
| }, |
| ); |
|
|