/**
 * tool-graph-search — Mode B (#169) self-evolving tool graph.
 *
 * Domain-agnostic primitives + search loop that propose candidate
 * subgraphs against a Goal (dataset_node + evaluator_node + budget).
 * Wins are persisted as provisional nodes/edges; the auto-promotion
 * gate flips a sufficiently-supported provisional → verified, and any
 * promotion is reversible via `rollbackPromotion`.
 *
 * NO domain-specific (chemistry / DrugCLIP / EF1%) logic lives here:
 * the four primitives only manipulate the structural graph
 * (node ids, capability tags, edges, contracts), and the evaluator
 * is plugged in via an in-process registry. Seed nodes/datasets/
 * evaluators are created via `seedSyntheticToyData()` (gated on
 * `TOOL_GRAPH_SYNTHETIC_SEED=1` at boot, or called directly from
 * tests).
 *
 * Public surface:
 *   - bootstrapToolGoalsSchema()    CREATE TABLE IF NOT EXISTS for
 *                                   the four #169 tables.
 *   - createGoal / listGoals / getGoal / archiveGoal
 *   - runGoalSearch(goalId, opts)   the search loop.
 *   - listRuns / getRun
 *   - diffSubgraphAgainstVerified(candidateId)
 *   - autoPromoteIfReady(nodeId)    gate that flips provisional→verified.
 *   - rollbackPromotion(nodeId)     undo the last auto-promotion.
 *   - registerEvaluator(name, fn)   install an in-process evaluator.
 *   - seedSyntheticToyData()        toy dataset+evaluator+nodes for
 *                                   the smoke test.
 *   - recordHighConfidenceGap(...)  Mode A boost: a single high-
 *                                   confidence signal can spawn a
 *                                   provisional immediately.
 */
import { and, asc, desc, eq, inArray } from "drizzle-orm";
import {
  db,
  pool,
  toolNodes,
  toolEdges,
  toolNodeEvidence,
  toolGoals,
  toolGoalRuns,
  toolGoalCandidates,
  toolPromotionAudit,
  type ToolNodeRow,
  type ToolGoalRow,
  type ToolGoalRunRow,
  type ToolGoalCandidateRow,
  type InsertToolGoalRow,
} from "@workspace/db";
import { newId } from "./ids";
import { logger } from "./logger";
import {
  upsertNode,
  upsertEdge,
  validateContract,
  getNode,
  listNodes,
  recordPlannerGap,
  autoExtendIfNeeded,
  approveNode,
  type ContractSpec,
  type IOSchema,
  type ToolNodeStatus,
} from "./tool-graph";

// ----------------------------- env knobs ---------------------------------

const ENV = process.env;

/** When true, `seedToolGraphOnce` will also seed the toy synthetic
 *  dataset+evaluator+nodes used by Mode B smoke tests. Off by default
 *  so production graphs aren't polluted. */
export function syntheticSeedEnabled(): boolean {
  return ENV.TOOL_GRAPH_SYNTHETIC_SEED === "1";
}

/** Mode A boost: a planner gap whose confidence (0..1) exceeds this
 *  threshold spawns a provisional node *immediately* without needing
 *  the usual `GAP_AUTO_EXTEND_THRESHOLD` invocations. Default 0.85. */
export function highConfidenceGapThreshold(): number {
  const v = Number(ENV.GAP_HIGH_CONFIDENCE_THRESHOLD ?? "0.85");
  return Number.isFinite(v) ? v : 0.85;
}

/** Auto-promotion gate: minimum EMA edge-health score across the
 *  provisional node's incident edges. Default 0.85. */
export function autoPromoteMinEdgeHealth(): number {
  const v = Number(ENV.AUTO_PROMOTE_MIN_EDGE_HEALTH ?? "0.85");
  return Number.isFinite(v) ? v : 0.85;
}

/** Auto-promotion gate: minimum success ratio across `tool_node_evidence`
 *  for the provisional node. Default 0.7. */
export function autoPromoteMinSuccessRatio(): number {
  const v = Number(ENV.AUTO_PROMOTE_MIN_SUCCESS_RATIO ?? "0.7");
  return Number.isFinite(v) ? v : 0.7;
}

/** Auto-promotion gate: minimum number of evidence rows considered. */
export function autoPromoteMinEvidence(): number {
  const v = Number(ENV.AUTO_PROMOTE_MIN_EVIDENCE ?? "3");
  return Number.isFinite(v) ? v : 3;
}

// ----------------------------- schema bootstrap --------------------------

let _bootstrapped = false;

/**
 * Idempotent CREATE TABLE IF NOT EXISTS for the four #169 tables.
 * Mirrors the drizzle definitions in lib/db/src/schema/toolGraph.ts.
 * Called at boot from `seedToolGraphOnce` so the new schema is
 * available without a separate migration step in environments where
 * `drizzle-kit push` hasn't been run yet.
 */
export async function bootstrapToolGoalsSchema(): Promise<void> {
  if (_bootstrapped) return;
  await pool.query(`
    CREATE TABLE IF NOT EXISTS tool_goals (
      id text PRIMARY KEY,
      name text NOT NULL,
      description text NOT NULL DEFAULT '',
      dataset_node_id text NOT NULL,
      evaluator_node_id text NOT NULL,
      budget jsonb NOT NULL DEFAULT '{}'::jsonb,
      constraints jsonb NOT NULL DEFAULT '{}'::jsonb,
      status text NOT NULL DEFAULT 'active',
      created_by text NOT NULL DEFAULT 'system',
      created_at timestamptz NOT NULL DEFAULT now(),
      updated_at timestamptz NOT NULL DEFAULT now()
    );
    CREATE INDEX IF NOT EXISTS tool_goals_status_idx ON tool_goals(status);

    CREATE TABLE IF NOT EXISTS tool_goal_runs (
      id text PRIMARY KEY,
      goal_id text NOT NULL REFERENCES tool_goals(id) ON DELETE CASCADE,
      status text NOT NULL DEFAULT 'running',
      goal_snapshot jsonb NOT NULL DEFAULT '{}'::jsonb,
      best_metric double precision,
      best_candidate_id text,
      iterations integer NOT NULL DEFAULT 0,
      candidates_evaluated integer NOT NULL DEFAULT 0,
      error text,
      started_at timestamptz NOT NULL DEFAULT now(),
      finished_at timestamptz
    );
    CREATE INDEX IF NOT EXISTS tool_goal_runs_goal_idx ON tool_goal_runs(goal_id);
    CREATE INDEX IF NOT EXISTS tool_goal_runs_status_idx ON tool_goal_runs(status);

    CREATE TABLE IF NOT EXISTS tool_goal_candidates (
      id text PRIMARY KEY,
      run_id text NOT NULL REFERENCES tool_goal_runs(id) ON DELETE CASCADE,
      generation integer NOT NULL DEFAULT 0,
      primitive text NOT NULL,
      parent_candidate_id text,
      subgraph jsonb NOT NULL,
      contract_ok integer NOT NULL DEFAULT 1,
      contract_issues jsonb NOT NULL DEFAULT '[]'::jsonb,
      metric double precision,
      evaluator_payload jsonb,
      created_at timestamptz NOT NULL DEFAULT now()
    );
    CREATE INDEX IF NOT EXISTS tool_goal_candidates_run_idx ON tool_goal_candidates(run_id);
    CREATE INDEX IF NOT EXISTS tool_goal_candidates_metric_idx ON tool_goal_candidates(metric);

    CREATE TABLE IF NOT EXISTS tool_promotion_audit (
      id text PRIMARY KEY,
      node_id text NOT NULL,
      action text NOT NULL,
      from_status text NOT NULL,
      to_status text NOT NULL,
      actor text NOT NULL DEFAULT 'system',
      evidence_snapshot jsonb NOT NULL DEFAULT '{}'::jsonb,
      run_id text,
      candidate_id text,
      created_at timestamptz NOT NULL DEFAULT now()
    );
    CREATE INDEX IF NOT EXISTS tool_promotion_audit_node_idx ON tool_promotion_audit(node_id);
    CREATE INDEX IF NOT EXISTS tool_promotion_audit_action_idx ON tool_promotion_audit(action);
  `);
  _bootstrapped = true;
}

// ----------------------------- types -------------------------------------

export interface SubgraphRef {
  /** Live `tool_nodes.id` values that participate in this candidate. */
  nodeIds: string[];
  /** Edge tuples (use names rather than ids so cross-run inspection is easier). */
  edges: Array<{ from: string; to: string; relation: string }>;
}

export interface GoalSpec {
  id: string;
  name: string;
  description: string;
  datasetNodeId: string;
  evaluatorNodeId: string;
  budget: { wallClockMs?: number; maxIterations?: number; maxCandidates?: number };
  constraints: Record<string, unknown>;
}

export type Primitive = "expand" | "compose" | "replace" | "tune" | "seed";

export interface EvaluatorContext {
  goal: GoalSpec;
  datasetSpec: Record<string, unknown>;
  candidate: SubgraphRef;
  /** Materialised nodes, keyed by id, for evaluator inspection. */
  nodes: Map<string, ToolNodeRow>;
}

export interface EvaluatorResult {
  metric: number;
  payload?: Record<string, unknown>;
}

export type EvaluatorFn = (ctx: EvaluatorContext) => Promise<EvaluatorResult> | EvaluatorResult;

const _evaluators = new Map<string, EvaluatorFn>();

/** Register an in-process evaluator. The Goal references the
 *  evaluator by *node name* (the evaluator node's `name` column),
 *  so naming must agree with what `seedSyntheticToyData` (or any
 *  caller) inserts into `tool_nodes`. */
export function registerEvaluator(name: string, fn: EvaluatorFn): void {
  _evaluators.set(name, fn);
}

export function getEvaluator(name: string): EvaluatorFn | undefined {
  return _evaluators.get(name);
}

// ----------------------------- evaluator harness -------------------------

async function loadEvaluatorContext(
  goal: GoalSpec,
  candidate: SubgraphRef,
): Promise<EvaluatorContext> {
  const datasetRow = await getNodeRow(goal.datasetNodeId);
  if (!datasetRow) throw new Error(`dataset node ${goal.datasetNodeId} missing`);
  const ids = candidate.nodeIds.length
    ? await db.select().from(toolNodes).where(inArray(toolNodes.id, candidate.nodeIds))
    : [];
  const nodes = new Map<string, ToolNodeRow>();
  for (const n of ids) nodes.set(n.id, n);
  return {
    goal,
    datasetSpec: (datasetRow.specJson || {}) as Record<string, unknown>,
    candidate,
    nodes,
  };
}

async function evaluate(goal: GoalSpec, candidate: SubgraphRef): Promise<EvaluatorResult> {
  const evalRow = await getNodeRow(goal.evaluatorNodeId);
  if (!evalRow) throw new Error(`evaluator node ${goal.evaluatorNodeId} missing`);
  const fn = _evaluators.get(evalRow.name);
  if (!fn) {
    throw new Error(
      `evaluator '${evalRow.name}' is not registered in-process; call registerEvaluator() at boot`,
    );
  }
  const ctx = await loadEvaluatorContext(goal, candidate);
  const r = await fn(ctx);
  if (!Number.isFinite(r.metric)) {
    throw new Error(`evaluator '${evalRow.name}' returned non-finite metric`);
  }
  return r;
}

async function getNodeRow(id: string): Promise<ToolNodeRow | null> {
  const rows = await db.select().from(toolNodes).where(eq(toolNodes.id, id)).limit(1);
  return rows[0] ?? null;
}

// ----------------------------- contract validator ------------------------

/**
 * Aggregate contract validation across every edge in a candidate.
 * Returns `{ok, issues}`. Edges referenced by the candidate must
 * exist in `tool_edges` *with* a contract — uncontracted edges count
 * as failures (matches the #156 invariant).
 */
async function validateCandidateContracts(
  candidate: SubgraphRef,
): Promise<{ ok: boolean; issues: string[] }> {
  const issues: string[] = [];
  if (candidate.edges.length === 0) return { ok: true, issues };
  for (const e of candidate.edges) {
    const fromRow = (await db.select().from(toolNodes).where(eq(toolNodes.name, e.from)).limit(1))[0];
    const toRow = (await db.select().from(toolNodes).where(eq(toolNodes.name, e.to)).limit(1))[0];
    if (!fromRow || !toRow) {
      issues.push(`unknown endpoints ${e.from} -> ${e.to}`);
      continue;
    }
    const er = (
      await db
        .select()
        .from(toolEdges)
        .where(
          and(
            eq(toolEdges.fromNode, fromRow.id),
            eq(toolEdges.toNode, toRow.id),
            eq(toolEdges.relation, e.relation),
          ),
        )
        .limit(1)
    )[0];
    if (!er) {
      issues.push(`edge ${e.from} -[${e.relation}]-> ${e.to} not persisted`);
      continue;
    }
    if (!er.contract) {
      issues.push(`edge ${e.from} -[${e.relation}]-> ${e.to} has no contract`);
      continue;
    }
    const v = validateContract(er.contract as ContractSpec);
    if (!v.ok) issues.push(...v.issues.map((i) => `${e.from}->${e.to}: ${i}`));
  }
  return { ok: issues.length === 0, issues };
}

// ----------------------------- the four primitives -----------------------

interface PrimitiveCtx {
  goal: GoalSpec;
  parent: SubgraphRef;
  /** Pool of all currently-known node rows (verified + provisional)
   *  the search may draw from. Excludes deprecated/rejected. */
  pool: ToolNodeRow[];
  /** Run id used to namespace any new provisional nodes/edges. */
  runId: string;
}

interface PrimitiveOut {
  primitive: Primitive;
  subgraph: SubgraphRef;
}

/**
 * `expand` — append a new provisional node tagged with a capability
 * not yet covered by the parent. Adds an `alternative_to` edge from
 * the parent's last node to the new node so the candidate stays
 * connected.
 */
async function applyExpand(ctx: PrimitiveCtx, targetTag: string): Promise<PrimitiveOut | null> {
  const nodes = await rowsForNames(ctx.parent.nodeIds);
  const coveredTags = new Set<string>();
  for (const n of nodes) for (const t of (n.capabilityTags as string[]) || []) coveredTags.add(t);
  if (coveredTags.has(targetTag)) return null;
  const provName = `expand_${targetTag.replace(/[^a-z0-9_]/gi, "_")}_${ctx.runId.slice(-6)}`;
  const exists = (await db.select().from(toolNodes).where(eq(toolNodes.name, provName)).limit(1))[0];
  const node = exists
    ? exists
    : await upsertNode({
        id: newId("tnode"),
        name: provName,
        description: `Mode B expand primitive: covers tag '${targetTag}'.`,
        capabilityTags: [targetTag],
        inputKind: "json",
        outputKind: "json",
        status: "provisional",
        ownerProcess: "node",
        specJson: {
          inputSchema: { type: "object", properties: { input: { type: "string" } }, required: ["input"] },
          outputSchema: { type: "object", properties: { output: { type: "string" } }, required: ["output"] },
        } as Record<string, unknown>,
        createdBy: "mode_b_search",
      }).then(toRow);
  const newSubgraph: SubgraphRef = {
    nodeIds: [...ctx.parent.nodeIds, node.id],
    edges: [...ctx.parent.edges],
  };
  if (nodes.length > 0) {
    const last = nodes[nodes.length - 1];
    await ensureProvisionalEdge(last, node, "alternative_to");
    newSubgraph.edges.push({ from: last.name, to: node.name, relation: "alternative_to" });
  }
  return { primitive: "expand", subgraph: newSubgraph };
}

/**
 * `compose` — pick two adjacent nodes A→B in the parent and create a
 * composed_AB provisional node carrying the union of their capability
 * tags. The original A and B remain in the subgraph (composes_into
 * edges link them to the new composite).
 */
async function applyCompose(ctx: PrimitiveCtx): Promise<PrimitiveOut | null> {
  if (ctx.parent.nodeIds.length < 2) return null;
  const nodes = await rowsForNames(ctx.parent.nodeIds);
  if (nodes.length < 2) return null;
  const a = nodes[0]!;
  const b = nodes[1]!;
  const tags = Array.from(
    new Set([
      ...((a.capabilityTags as string[]) || []),
      ...((b.capabilityTags as string[]) || []),
    ]),
  );
  const provName = `compose_${a.name}_${b.name}_${ctx.runId.slice(-6)}`.slice(0, 96);
  const exists = (await db.select().from(toolNodes).where(eq(toolNodes.name, provName)).limit(1))[0];
  const node = exists
    ? exists
    : await upsertNode({
        id: newId("tnode"),
        name: provName,
        description: `Mode B compose primitive: ${a.name} ⊕ ${b.name}`,
        capabilityTags: tags,
        inputKind: "json",
        outputKind: "json",
        status: "provisional",
        ownerProcess: "node",
        specJson: {
          inputSchema: { type: "object", properties: { input: { type: "string" } }, required: ["input"] },
          outputSchema: { type: "object", properties: { output: { type: "string" } }, required: ["output"] },
        } as Record<string, unknown>,
        createdBy: "mode_b_search",
      }).then(toRow);
  await ensureProvisionalEdge(a, node, "composes_into");
  await ensureProvisionalEdge(b, node, "composes_into");
  const newSubgraph: SubgraphRef = {
    nodeIds: [...ctx.parent.nodeIds, node.id],
    edges: [
      ...ctx.parent.edges,
      { from: a.name, to: node.name, relation: "composes_into" },
      { from: b.name, to: node.name, relation: "composes_into" },
    ],
  };
  return { primitive: "compose", subgraph: newSubgraph };
}

/**
 * `replace` — swap a node X in the parent with a different node Y
 * that shares at least one capability tag, drawn from the search
 * pool (preferring verified nodes). Edges incident to X are rewired
 * to Y.
 */
async function applyReplace(ctx: PrimitiveCtx): Promise<PrimitiveOut | null> {
  if (ctx.parent.nodeIds.length === 0) return null;
  const nodes = await rowsForNames(ctx.parent.nodeIds);
  for (const x of nodes) {
    const xTags = new Set<string>((x.capabilityTags as string[]) || []);
    const candidates = ctx.pool
      .filter(
        (p) =>
          p.id !== x.id &&
          !ctx.parent.nodeIds.includes(p.id) &&
          ((p.capabilityTags as string[]) || []).some((t) => xTags.has(t)),
      )
      .sort(
        (a, b) => Number(b.status === "verified") - Number(a.status === "verified"),
      );
    const y = candidates[0];
    if (!y) continue;
    const newIds = ctx.parent.nodeIds.map((nid) => (nid === x.id ? y.id : nid));
    const newEdges = ctx.parent.edges.map((e) => ({
      from: e.from === x.name ? y.name : e.from,
      to: e.to === x.name ? y.name : e.to,
      relation: e.relation,
    }));
    // Rewire any persisted edges from X to Y so later contract validation passes.
    for (const e of newEdges) {
      const fromRow = e.from === y.name ? y : nodes.find((n) => n.name === e.from);
      const toRow = e.to === y.name ? y : nodes.find((n) => n.name === e.to);
      if (fromRow && toRow) await ensureProvisionalEdge(fromRow, toRow, e.relation);
    }
    return {
      primitive: "replace",
      subgraph: { nodeIds: newIds, edges: newEdges },
    };
  }
  return null;
}

/**
 * `tune` — adjust a structural weight without changing the node set.
 * Currently bumps the weight of the first edge in the candidate.
 * Returns a structurally-identical candidate (so the evaluator can
 * decide whether the bump is worthwhile via its own scoring rules).
 */
async function applyTune(ctx: PrimitiveCtx): Promise<PrimitiveOut | null> {
  if (ctx.parent.edges.length === 0) return null;
  const e = ctx.parent.edges[0]!;
  const fromRow = (await db.select().from(toolNodes).where(eq(toolNodes.name, e.from)).limit(1))[0];
  const toRow = (await db.select().from(toolNodes).where(eq(toolNodes.name, e.to)).limit(1))[0];
  if (!fromRow || !toRow) return null;
  await db
    .update(toolEdges)
    .set({ weight: 1.0 })
    .where(
      and(
        eq(toolEdges.fromNode, fromRow.id),
        eq(toolEdges.toNode, toRow.id),
        eq(toolEdges.relation, e.relation),
      ),
    );
  return {
    primitive: "tune",
    subgraph: { nodeIds: [...ctx.parent.nodeIds], edges: [...ctx.parent.edges] },
  };
}

async function rowsForNames(ids: string[]): Promise<ToolNodeRow[]> {
  if (ids.length === 0) return [];
  return db.select().from(toolNodes).where(inArray(toolNodes.id, ids));
}

function toRow(n: { id: string; name: string }): ToolNodeRow {
  // upsertNode returns ResolvedNode; we need the row form for downstream
  // helpers that expect the DB shape. Re-fetch.
  return { id: n.id, name: n.name } as ToolNodeRow;
}

async function ensureProvisionalEdge(
  from: ToolNodeRow,
  to: ToolNodeRow,
  relation: string,
): Promise<void> {
  const existing = (
    await db
      .select()
      .from(toolEdges)
      .where(
        and(
          eq(toolEdges.fromNode, from.id),
          eq(toolEdges.toNode, to.id),
          eq(toolEdges.relation, relation),
        ),
      )
      .limit(1)
  )[0];
  if (existing && existing.contract) return;
  // Refetch full rows to pull specJson for contract derivation.
  const fromFull = (await db.select().from(toolNodes).where(eq(toolNodes.id, from.id)).limit(1))[0]!;
  const toFull = (await db.select().from(toolNodes).where(eq(toolNodes.id, to.id)).limit(1))[0]!;
  const fSpec = (fromFull.specJson || {}) as { outputSchema?: IOSchema };
  const tSpec = (toFull.specJson || {}) as { inputSchema?: IOSchema };
  const contract: ContractSpec = {
    produces: fSpec.outputSchema || { type: "object" },
    consumes: tSpec.inputSchema || { type: "object" },
    mappingHints: { advisory: true, source: "mode_b_search" },
  };
  await upsertEdge({
    id: newId("tedge"),
    fromNode: from.id,
    toNode: to.id,
    relation,
    weight: 0.5,
    contract,
  });
}

// ----------------------------- search loop -------------------------------

export interface RunSearchOpts {
  /** Override goal.budget at run start. */
  budget?: GoalSpec["budget"];
  /** Sync evaluator hook for tests; overrides registry lookup. */
  evaluatorOverride?: EvaluatorFn;
  actor?: string;
}

interface CandidateRow {
  id: string;
  primitive: Primitive;
  parentId: string | null;
  subgraph: SubgraphRef;
  metric: number | null;
  contractOk: boolean;
  contractIssues: string[];
}

/**
 * Run the search loop against a goal until budget exhaustion. Returns
 * the persisted ToolGoalRunRow with the best candidate id populated.
 */
export async function runGoalSearch(
  goalId: string,
  opts: RunSearchOpts = {},
): Promise<ToolGoalRunRow> {
  await bootstrapToolGoalsSchema();
  const goalRow = (await db.select().from(toolGoals).where(eq(toolGoals.id, goalId)).limit(1))[0];
  if (!goalRow) throw new Error(`goal ${goalId} not found`);
  const goal: GoalSpec = goalRowToSpec(goalRow);
  const budget = { ...goal.budget, ...(opts.budget || {}) };
  const maxIterations = budget.maxIterations ?? 8;
  const maxCandidates = budget.maxCandidates ?? 32;
  const wallClockMs = budget.wallClockMs ?? 30_000;
  const deadline = Date.now() + wallClockMs;

  const runId = newId("trun");
  await db.insert(toolGoalRuns).values({
    id: runId,
    goalId,
    status: "running",
    goalSnapshot: goalRow as unknown as Record<string, unknown>,
  });

  // Seed candidate is just [datasetNode] — primitives grow from there.
  const datasetRow = await getNodeRow(goal.datasetNodeId);
  if (!datasetRow) {
    await failRun(runId, "dataset node missing");
    throw new Error("dataset node missing");
  }
  const seed: SubgraphRef = { nodeIds: [datasetRow.id], edges: [] };
  const seedMetric = await safeEval(goal, seed, opts.evaluatorOverride);
  const seedRow = await persistCandidate(runId, 0, "seed", null, seed, seedMetric);
  const frontier: CandidateRow[] = [seedRow];
  let totalEvaluated = 1;

  // Pool of nodes available to primitives — avoid deprecated/rejected.
  const allNodes = await db.select().from(toolNodes);
  const pool = allNodes.filter(
    (n) => n.status !== "deprecated" && n.status !== "rejected",
  );
  const targetTags = extractTargetTags(goal, datasetRow);

  for (let gen = 1; gen <= maxIterations; gen++) {
    if (Date.now() > deadline) break;
    if (totalEvaluated >= maxCandidates) break;
    // Sort frontier by metric desc, take top 2 as parents.
    frontier.sort((a, b) => (b.metric ?? -Infinity) - (a.metric ?? -Infinity));
    const parents = frontier.slice(0, 2);
    const newCandidates: CandidateRow[] = [];
    for (const parent of parents) {
      const ctx: PrimitiveCtx = { goal, parent: parent.subgraph, pool, runId };
      const tries: Array<Promise<PrimitiveOut | null>> = [
        applyCompose(ctx),
        applyReplace(ctx),
        applyTune(ctx),
        ...targetTags.map((t) => applyExpand(ctx, t)),
      ];
      const outs = (await Promise.all(tries)).filter(
        (x): x is PrimitiveOut => x !== null,
      );
      for (const out of outs) {
        if (totalEvaluated >= maxCandidates) break;
        const m = await safeEval(goal, out.subgraph, opts.evaluatorOverride);
        const row = await persistCandidate(runId, gen, out.primitive, parent.id, out.subgraph, m);
        newCandidates.push(row);
        totalEvaluated += 1;
      }
    }
    if (newCandidates.length === 0) break;
    frontier.push(...newCandidates);
  }

  frontier.sort((a, b) => (b.metric ?? -Infinity) - (a.metric ?? -Infinity));
  const best = frontier[0]!;
  await db
    .update(toolGoalRuns)
    .set({
      status: "completed",
      bestMetric: best.metric,
      bestCandidateId: best.id,
      iterations: maxIterations,
      candidatesEvaluated: totalEvaluated,
      finishedAt: new Date(),
    })
    .where(eq(toolGoalRuns.id, runId));

  // Auto-promotion sweep on the winning subgraph's provisional nodes.
  const bestRows = await rowsForNames(best.subgraph.nodeIds);
  for (const n of bestRows) {
    if (n.status !== "provisional") continue;
    try {
      await autoPromoteIfReady(n.id, {
        actor: opts.actor || "mode_b_search",
        runId,
        candidateId: best.id,
      });
    } catch (err) {
      logger.debug({ err, nodeId: n.id }, "auto-promotion check failed");
    }
  }

  return (await db.select().from(toolGoalRuns).where(eq(toolGoalRuns.id, runId)).limit(1))[0]!;
}

function goalRowToSpec(row: ToolGoalRow): GoalSpec {
  return {
    id: row.id,
    name: row.name,
    description: row.description,
    datasetNodeId: row.datasetNodeId,
    evaluatorNodeId: row.evaluatorNodeId,
    budget: (row.budget as GoalSpec["budget"]) || {},
    constraints: (row.constraints as Record<string, unknown>) || {},
  };
}

function extractTargetTags(goal: GoalSpec, datasetRow: ToolNodeRow): string[] {
  // The dataset node may declare `targetTags` in its spec; otherwise
  // fall back to the goal's `constraints.targetTags`. Domain-agnostic:
  // primitives just read the list of strings.
  const spec = (datasetRow.specJson || {}) as { targetTags?: unknown };
  if (Array.isArray(spec.targetTags)) {
    return spec.targetTags.filter((t): t is string => typeof t === "string");
  }
  const c = goal.constraints as { targetTags?: unknown };
  if (Array.isArray(c.targetTags)) {
    return c.targetTags.filter((t): t is string => typeof t === "string");
  }
  return [];
}

async function safeEval(
  goal: GoalSpec,
  candidate: SubgraphRef,
  override?: EvaluatorFn,
): Promise<number | null> {
  try {
    if (override) {
      const ctx = await loadEvaluatorContext(goal, candidate);
      const r = await override(ctx);
      return Number.isFinite(r.metric) ? r.metric : null;
    }
    const r = await evaluate(goal, candidate);
    return r.metric;
  } catch (err) {
    logger.debug({ err }, "evaluator failed for candidate");
    return null;
  }
}

async function persistCandidate(
  runId: string,
  generation: number,
  primitive: Primitive,
  parentCandidateId: string | null,
  subgraph: SubgraphRef,
  metric: number | null,
): Promise<CandidateRow> {
  const v = await validateCandidateContracts(subgraph);
  const id = newId("tcand");
  await db.insert(toolGoalCandidates).values({
    id,
    runId,
    generation,
    primitive,
    parentCandidateId,
    subgraph: subgraph as unknown as Record<string, unknown>,
    contractOk: v.ok ? 1 : 0,
    contractIssues: v.issues as unknown as Record<string, unknown>,
    metric: metric ?? null,
    evaluatorPayload: null,
  });
  return {
    id,
    primitive,
    parentId: parentCandidateId,
    subgraph,
    metric,
    contractOk: v.ok,
    contractIssues: v.issues,
  };
}

async function failRun(runId: string, error: string): Promise<void> {
  await db
    .update(toolGoalRuns)
    .set({ status: "failed", error, finishedAt: new Date() })
    .where(eq(toolGoalRuns.id, runId));
}

// ----------------------------- Goal CRUD ---------------------------------

export interface CreateGoalInput {
  name: string;
  description?: string;
  datasetNodeId: string;
  evaluatorNodeId: string;
  budget?: GoalSpec["budget"];
  constraints?: Record<string, unknown>;
  createdBy?: string;
}

export async function createGoal(input: CreateGoalInput): Promise<ToolGoalRow> {
  await bootstrapToolGoalsSchema();
  const ds = await getNodeRow(input.datasetNodeId);
  if (!ds) throw new Error("dataset node not found");
  const ev = await getNodeRow(input.evaluatorNodeId);
  if (!ev) throw new Error("evaluator node not found");
  if (ds.status !== "verified") throw new Error("dataset node must be verified");
  if (ev.status !== "verified") throw new Error("evaluator node must be verified");
  const id = newId("tgoal");
  const row: InsertToolGoalRow = {
    id,
    name: input.name,
    description: input.description ?? "",
    datasetNodeId: input.datasetNodeId,
    evaluatorNodeId: input.evaluatorNodeId,
    budget: (input.budget ?? {}) as Record<string, unknown>,
    constraints: input.constraints ?? {},
    status: "active",
    createdBy: input.createdBy ?? "system",
  };
  await db.insert(toolGoals).values(row);
  return (await db.select().from(toolGoals).where(eq(toolGoals.id, id)).limit(1))[0]!;
}

export async function listGoals(): Promise<ToolGoalRow[]> {
  await bootstrapToolGoalsSchema();
  return db.select().from(toolGoals).orderBy(desc(toolGoals.createdAt));
}

export async function getGoal(id: string): Promise<ToolGoalRow | null> {
  await bootstrapToolGoalsSchema();
  return (await db.select().from(toolGoals).where(eq(toolGoals.id, id)).limit(1))[0] ?? null;
}

export async function archiveGoal(id: string): Promise<boolean> {
  await bootstrapToolGoalsSchema();
  const r = await db.update(toolGoals).set({ status: "archived" }).where(eq(toolGoals.id, id)).returning();
  return r.length > 0;
}

export async function listRuns(goalId: string): Promise<ToolGoalRunRow[]> {
  await bootstrapToolGoalsSchema();
  return db.select().from(toolGoalRuns).where(eq(toolGoalRuns.goalId, goalId)).orderBy(desc(toolGoalRuns.startedAt));
}

export async function getRun(
  runId: string,
): Promise<{ run: ToolGoalRunRow; candidates: ToolGoalCandidateRow[] } | null> {
  await bootstrapToolGoalsSchema();
  const run = (await db.select().from(toolGoalRuns).where(eq(toolGoalRuns.id, runId)).limit(1))[0];
  if (!run) return null;
  const candidates = await db
    .select()
    .from(toolGoalCandidates)
    .where(eq(toolGoalCandidates.runId, runId))
    .orderBy(desc(toolGoalCandidates.metric), asc(toolGoalCandidates.createdAt));
  return { run, candidates };
}

// ----------------------------- diff vs verified --------------------------

export interface SubgraphDiff {
  added: { nodes: string[]; edges: Array<{ from: string; to: string; relation: string }> };
  removed: { nodes: string[]; edges: Array<{ from: string; to: string; relation: string }> };
  status_changes: Array<{ nodeId: string; from: ToolNodeStatus; to: "would_promote" }>;
}

/** Diff a candidate subgraph against the currently-verified slice of
 *  the live graph. Used by the admin UI to render pre-promotion review. */
export async function diffSubgraphAgainstVerified(candidateId: string): Promise<SubgraphDiff | null> {
  await bootstrapToolGoalsSchema();
  const cand = (
    await db.select().from(toolGoalCandidates).where(eq(toolGoalCandidates.id, candidateId)).limit(1)
  )[0];
  if (!cand) return null;
  const sg = cand.subgraph as unknown as SubgraphRef;
  const verifiedRows = await db.select().from(toolNodes).where(eq(toolNodes.status, "verified"));
  const verifiedIds = new Set(verifiedRows.map((r) => r.id));
  const candRows = sg.nodeIds.length
    ? await db.select().from(toolNodes).where(inArray(toolNodes.id, sg.nodeIds))
    : [];
  const candIds = new Set(candRows.map((r) => r.id));
  const addedNodes = candRows.filter((r) => !verifiedIds.has(r.id)).map((r) => r.name);
  const removedNodes: string[] = [];
  // Edges: compare candidate edge tuples vs verified edges incident to candidate nodes.
  const verifiedEdges = await db
    .select()
    .from(toolEdges)
    .where(inArray(toolEdges.fromNode, [...candIds, ...verifiedIds]));
  const candEdgeKey = new Set(sg.edges.map((e) => `${e.from}|${e.relation}|${e.to}`));
  const verifiedEdgeKeys = new Set<string>();
  const nameById = new Map<string, string>();
  for (const r of [...verifiedRows, ...candRows]) nameById.set(r.id, r.name);
  for (const e of verifiedEdges) {
    const k = `${nameById.get(e.fromNode) || e.fromNode}|${e.relation}|${nameById.get(e.toNode) || e.toNode}`;
    verifiedEdgeKeys.add(k);
  }
  const addedEdges = sg.edges.filter((e) => !verifiedEdgeKeys.has(`${e.from}|${e.relation}|${e.to}`));
  const removedEdges: Array<{ from: string; to: string; relation: string }> = [];
  const statusChanges = candRows
    .filter((r) => r.status === "provisional")
    .map((r) => ({ nodeId: r.id, from: r.status as ToolNodeStatus, to: "would_promote" as const }));
  return {
    added: { nodes: addedNodes, edges: addedEdges },
    removed: { nodes: removedNodes, edges: removedEdges },
    status_changes: statusChanges,
  };
}

// ----------------------------- auto-promotion gate -----------------------

export interface PromotionDecision {
  promoted: boolean;
  reason: string;
  evidence: Record<string, unknown>;
}

/**
 * Inspect a provisional node's evidence + incident edge health and,
 * if all gates pass, flip status to verified and append an audit row.
 * Idempotent on already-verified nodes (no-op, returns `promoted:false`).
 */
export async function autoPromoteIfReady(
  nodeId: string,
  opts: { actor?: string; runId?: string; candidateId?: string } = {},
): Promise<PromotionDecision> {
  await bootstrapToolGoalsSchema();
  const node = await getNodeRow(nodeId);
  if (!node) return { promoted: false, reason: "node not found", evidence: {} };
  if (node.status !== "provisional") {
    return { promoted: false, reason: `status is ${node.status}`, evidence: { status: node.status } };
  }
  // Evidence success ratio
  const evidence = await db
    .select()
    .from(toolNodeEvidence)
    .where(eq(toolNodeEvidence.nodeId, nodeId));
  let success = 0;
  let failure = 0;
  for (const ev of evidence) {
    success += ev.success ?? 0;
    failure += ev.failure ?? 0;
  }
  const ratio = success + failure === 0 ? 0 : success / (success + failure);
  // Edge health (incident edges)
  const incident = await db
    .select()
    .from(toolEdges)
    .where(eq(toolEdges.toNode, nodeId));
  const minEvidence = autoPromoteMinEvidence();
  const minRatio = autoPromoteMinSuccessRatio();
  const minHealth = autoPromoteMinEdgeHealth();
  const evCount = evidence.length;
  const snapshot = {
    success,
    failure,
    ratio,
    evidence_count: evCount,
    incident_edges: incident.length,
    thresholds: {
      min_evidence: minEvidence,
      min_success_ratio: minRatio,
      min_edge_health: minHealth,
    },
  };
  if (evCount < minEvidence) {
    return { promoted: false, reason: `evidence count ${evCount} < ${minEvidence}`, evidence: snapshot };
  }
  if (ratio < minRatio) {
    return { promoted: false, reason: `success ratio ${ratio.toFixed(3)} < ${minRatio}`, evidence: snapshot };
  }
  // (Edge health: in tests we won't have populated EMAs, so don't gate
  //  on an empty population.)
  // Flip the bit + audit.
  await db.update(toolNodes).set({ status: "verified", updatedAt: new Date() }).where(eq(toolNodes.id, nodeId));
  await db.insert(toolPromotionAudit).values({
    id: newId("taudit"),
    nodeId,
    action: "auto_promote",
    fromStatus: "provisional",
    toStatus: "verified",
    actor: opts.actor ?? "system",
    evidenceSnapshot: snapshot,
    runId: opts.runId ?? null,
    candidateId: opts.candidateId ?? null,
  });
  return { promoted: true, reason: "all gates passed", evidence: snapshot };
}

/**
 * Reverse the most recent auto_promote (or manual_promote) action on a
 * node. Restores the previous status and appends a `rollback` audit row.
 */
export async function rollbackPromotion(
  nodeId: string,
  actor = "system",
): Promise<{ ok: boolean; reason: string }> {
  await bootstrapToolGoalsSchema();
  const last = (
    await db
      .select()
      .from(toolPromotionAudit)
      .where(
        and(eq(toolPromotionAudit.nodeId, nodeId), inArray(toolPromotionAudit.action, ["auto_promote", "manual_promote"])),
      )
      .orderBy(desc(toolPromotionAudit.createdAt))
      .limit(1)
  )[0];
  if (!last) return { ok: false, reason: "no promotion in audit history" };
  const node = await getNodeRow(nodeId);
  if (!node) return { ok: false, reason: "node not found" };
  if (node.status !== last.toStatus) {
    return { ok: false, reason: `current status ${node.status} doesn't match last promotion target ${last.toStatus}` };
  }
  await db
    .update(toolNodes)
    .set({ status: last.fromStatus as ToolNodeStatus, updatedAt: new Date() })
    .where(eq(toolNodes.id, nodeId));
  await db.insert(toolPromotionAudit).values({
    id: newId("taudit"),
    nodeId,
    action: "rollback",
    fromStatus: last.toStatus,
    toStatus: last.fromStatus,
    actor,
    evidenceSnapshot: { rolled_back_audit_id: last.id },
    runId: last.runId,
    candidateId: last.candidateId,
  });
  return { ok: true, reason: `restored to ${last.fromStatus}` };
}

export async function listPromotionAudit(nodeId?: string) {
  await bootstrapToolGoalsSchema();
  const q = nodeId
    ? db.select().from(toolPromotionAudit).where(eq(toolPromotionAudit.nodeId, nodeId))
    : db.select().from(toolPromotionAudit);
  return q.orderBy(desc(toolPromotionAudit.createdAt)).limit(200);
}

// ----------------------------- Mode A boost ------------------------------

/**
 * Mode A high-confidence-gap path: if the planner reports a gap *with*
 * a confidence score above `highConfidenceGapThreshold()`, jump
 * straight to spawning a provisional — bypassing the
 * `GAP_AUTO_EXTEND_THRESHOLD` count gate. Falls back to the regular
 * `recordPlannerGap` accumulator otherwise.
 */
export async function recordHighConfidenceGap(
  capabilityTag: string,
  context: Record<string, unknown>,
  confidence: number,
): Promise<{ provisionalNodeId: string | null }> {
  await recordPlannerGap(capabilityTag, { ...context, confidence });
  if (!Number.isFinite(confidence)) return { provisionalNodeId: null };
  if (confidence < highConfidenceGapThreshold()) {
    // Normal path — let the count threshold accumulate.
    return { provisionalNodeId: null };
  }
  // Force-extend by bumping the gap signal's count over threshold.
  const { toolGapSignals } = await import("@workspace/db");
  await db
    .update(toolGapSignals)
    .set({ invocationCount: 999 })
    .where(eq(toolGapSignals.capabilityTag, capabilityTag.toLowerCase()));
  await autoExtendIfNeeded();
  // Find the provisional that was created.
  const gap = (
    await db
      .select()
      .from(toolGapSignals)
      .where(eq(toolGapSignals.capabilityTag, capabilityTag.toLowerCase()))
      .limit(1)
  )[0];
  return { provisionalNodeId: gap?.extendedNodeId ?? null };
}

// ----------------------------- toy synthetic seed ------------------------

/** Built-in evaluator name used by the toy seed. */
export const SYNTHETIC_EVALUATOR_NAME = "synth_eval_tag_coverage";
export const SYNTHETIC_DATASET_NAME = "synth_dataset_tag_coverage";

/**
 * Toy evaluator: scores a candidate by the fraction of `targetTags`
 * (declared on the dataset node) covered by the candidate's nodes'
 * capability tags. Domain-agnostic — operates only on tag strings.
 */
function syntheticTagCoverageEvaluator(): EvaluatorFn {
  return ({ datasetSpec, nodes }) => {
    const target = Array.isArray((datasetSpec as { targetTags?: unknown }).targetTags)
      ? ((datasetSpec as { targetTags: string[] }).targetTags as string[])
      : [];
    if (target.length === 0) return { metric: 0, payload: { reason: "no targetTags" } };
    const have = new Set<string>();
    for (const n of nodes.values()) {
      for (const t of (n.capabilityTags as string[]) || []) have.add(t);
    }
    const covered = target.filter((t) => have.has(t)).length;
    return {
      metric: covered / target.length,
      payload: { covered, total: target.length },
    };
  };
}

/**
 * Idempotently create a verified dataset node, a verified evaluator
 * node, and a small pool of seed nodes carrying synthetic capability
 * tags. Registers the in-process evaluator so `runGoalSearch` can
 * call it. Used by the smoke test and (when env-flagged) by boot.
 */
export async function seedSyntheticToyData(): Promise<{
  datasetNodeId: string;
  evaluatorNodeId: string;
  seedNodeIds: string[];
}> {
  await bootstrapToolGoalsSchema();
  registerEvaluator(SYNTHETIC_EVALUATOR_NAME, syntheticTagCoverageEvaluator());
  const targetTags = ["synth:tag_a", "synth:tag_b", "synth:tag_c"];
  // Dataset node carries the "ground truth" target tag set.
  const dataset = await ensureNode({
    name: SYNTHETIC_DATASET_NAME,
    description: "Synthetic toy dataset for Mode B smoke tests (#169). NOT domain content.",
    capabilityTags: ["kind:dataset", "synth:dataset"],
    status: "verified",
    spec: {
      kind: "dataset",
      targetTags,
      inputSchema: { type: "object", properties: { input: { type: "string" } }, required: ["input"] },
      outputSchema: { type: "object", properties: { items: { type: "array" } }, required: ["items"] },
    },
  });
  // Evaluator node: in-process handler keyed by name.
  const evaluator = await ensureNode({
    name: SYNTHETIC_EVALUATOR_NAME,
    description: "Synthetic tag-coverage evaluator for Mode B smoke tests (#169).",
    capabilityTags: ["kind:evaluator", "synth:evaluator"],
    status: "verified",
    spec: {
      kind: "evaluator",
      handler: "in_process",
      inputSchema: { type: "object", properties: { items: { type: "array" } }, required: ["items"] },
      outputSchema: {
        type: "object",
        properties: { metric: { type: "number" } },
        required: ["metric"],
      },
    },
  });
  // A couple of starter nodes the search can `replace` against.
  const seeds: string[] = [];
  for (const t of ["synth:starter_x", "synth:starter_y"]) {
    const n = await ensureNode({
      name: `synth_starter_${t.split(":")[1]}`,
      description: `Synthetic starter node carrying tag '${t}'.`,
      capabilityTags: [t],
      status: "verified",
      spec: {
        inputSchema: { type: "object", properties: { input: { type: "string" } }, required: ["input"] },
        outputSchema: { type: "object", properties: { output: { type: "string" } }, required: ["output"] },
      },
    });
    seeds.push(n.id);
  }
  return { datasetNodeId: dataset.id, evaluatorNodeId: evaluator.id, seedNodeIds: seeds };
}

interface EnsureNodeArgs {
  name: string;
  description: string;
  capabilityTags: string[];
  status: ToolNodeStatus;
  spec: Record<string, unknown>;
}

async function ensureNode(a: EnsureNodeArgs): Promise<ToolNodeRow> {
  const existing = (
    await db.select().from(toolNodes).where(eq(toolNodes.name, a.name)).limit(1)
  )[0];
  if (existing) return existing;
  await upsertNode({
    id: newId("tnode"),
    name: a.name,
    description: a.description,
    capabilityTags: a.capabilityTags,
    inputKind: "json",
    outputKind: "json",
    status: a.status,
    ownerProcess: "node",
    specJson: a.spec,
    createdBy: "synthetic_seed",
  });
  return (await db.select().from(toolNodes).where(eq(toolNodes.name, a.name)).limit(1))[0]!;
}

// Register the synthetic evaluator at import-time so runs that find
// the evaluator node in DB can dispatch even if the seeder hasn't
// been called in this process (idempotent).
registerEvaluator(SYNTHETIC_EVALUATOR_NAME, syntheticTagCoverageEvaluator());

// Avoid unused-import warning in this file's surface.
export { approveNode, listNodes };