/** * spawn-templates — cross-seam template library for the meta-evolution * loop (Task #161). * * Premise: every time #158 spawns a provisional node and a human reviewer * promotes it, we have learned a small lesson about how to fix one shape * of seam problem. This module canonicalises that lesson as a structural * fingerprint plus a contract-shaped handler skeleton, so that the next * spawn for a structurally similar seam can be offered as a high-quality * starting point — never auto-applied, always reviewer-gated. * * Public surface: * - schemaFingerprint(schema) — name-stripped structural fingerprint of * a JSONSchema-shaped object. * - templateFingerprint(input) — salted-hash of the seam fingerprint. * - searchTemplates(input) — exact + near match search in the * `tool_spawn_templates` table. * - persistTemplateOnPromote(p) — ingest one promoted spawn. * - recordTemplateOffered(id) — bump offered_count when proposer * surfaces a template. * - recordTemplateChoice(args) — record reviewer's choice * (use | use_edit | fresh) on a * candidate that was offered a match. * - recordTemplatePromoteResult — bump success_count when the candidate * that reused a template gets promoted. * - maybeAutoDemote(id) — silent demotion when track record * drops below threshold. * - listTemplates(filter) — admin read. * - setTemplateStatus(id,...) — admin override. * - sanitizeSpecSkeleton(spec) — strips concrete payload fields out * of a spec before persistence. * - sanitizeHandlerSkeleton(s) — replaces literal payload-looking * strings with placeholders. * - FINGERPRINT_ALGO_VERSION * * Privacy invariants: * - The salted hash uses `process.env.TEMPLATE_FINGERPRINT_SALT` (with * a deterministic dev-only fallback that is logged once on first use). * - The fingerprint inputs (`missingFieldTypes`, `unusedFieldTypes`, * `downstreamInputSchemaFingerprint`, …) are derived from JSONSchema * *type strings* only — never from field names or values. * - `sanitizeSpecSkeleton` and `sanitizeHandlerSkeleton` are the * last line of defense against payload data leaking into the * library through reviewer-edited specs. * - `__assertNoUserDataLeaked(template, candidatePayloads)` is a * test-only invariant checker. */ import { and, eq, sql } from "drizzle-orm"; import { createHash } from "node:crypto"; import { db, toolSpawnTemplates, type ToolSpawnTemplateRow, type InsertToolSpawnTemplateRow, } from "@workspace/db"; import { newId } from "./ids"; import { logger } from "./logger"; import type { IOSchema } from "./tool-graph"; export const FINGERPRINT_ALGO_VERSION = 1; /** Failure modes the fingerprint discriminates on. */ export type SeamFailureMode = | "missing_required_field" | "type_mismatch" | "capability_gap" | "unknown"; export interface SeamFingerprintInput { failureMode: SeamFailureMode; /** Canonical JSONSchema-shaped object describing what the consumer wants. */ downstreamInputSchema: IOSchema | null | undefined; /** Canonical JSONSchema-shaped object describing what the upstream emits. */ upstreamOutputSchema?: IOSchema | null | undefined; /** * Names of consumes-side required fields that the upstream did NOT * produce. Field names are used ONLY to look up their declared types * in `downstreamInputSchema.properties` — never persisted. */ missingRequiredFieldNames?: string[]; /** * Names of upstream produced fields that the consumer did not declare. * Used identically to extract types from `upstreamOutputSchema`; never * persisted. */ unusedFieldNames?: string[]; /** Capability tag the seam was associated with — curated label, safe. */ capabilityTag?: string; } export interface SeamFingerprintBreakdown { failureMode: SeamFailureMode; missingFieldTypes: string[]; unusedFieldTypes: string[]; downstreamInputSchemaFingerprint: string; capabilityTag: string; algoVersion: number; } export interface ComputedFingerprint { hash: string; breakdown: SeamFingerprintBreakdown; /** Pre-hash canonical string, exposed so tests can assert determinism. */ canonical: string; } // ---------------------------------------------------------------- helpers const DEV_SALT_FALLBACK = "doatlas-dev-template-salt-v1"; let warnedSaltOnce = false; function getSalt(): string { const env = (process.env["TEMPLATE_FINGERPRINT_SALT"] || "").trim(); if (env) return env; if (!warnedSaltOnce) { warnedSaltOnce = true; logger.warn( "TEMPLATE_FINGERPRINT_SALT is unset — using a deterministic dev-only fallback. Set this env in production so template fingerprints cannot be correlated across deployments.", ); } return DEV_SALT_FALLBACK; } /** Normalise a schema type to a stable string token, never undefined. */ function typeToken(t: IOSchema["type"] | undefined): string { return (t || "any") as string; } /** * Walk a JSONSchema and produce a deterministic structural fingerprint * string. Field NAMES are stripped — children are sorted by their own * recursively-computed fingerprint so two schemas that differ only in * field naming yield the same fingerprint. * * Required-ness IS preserved (without the field name) because it is a * structural property, not a label. */ export function schemaFingerprint( schema: IOSchema | null | undefined, ): string { if (!schema || typeof schema !== "object") return "{}"; const t = typeToken(schema.type); const items = schema.items ? `items=${schemaFingerprint(schema.items)}` : ""; const props = schema.properties || {}; const requiredSet = new Set(schema.required || []); const childFps: string[] = []; for (const [name, sub] of Object.entries(props)) { const req = requiredSet.has(name) ? "1" : "0"; childFps.push(`${req}:${schemaFingerprint(sub)}`); } childFps.sort(); const propsPart = childFps.length ? `props=[${childFps.join(",")}]` : ""; return `{t=${t}${propsPart ? "," + propsPart : ""}${items ? "," + items : ""}}`; } /** * Resolve a list of field names against a parent schema and return the * sorted list of their declared type tokens. Field NAMES are not * propagated into the result — only the types. */ function resolveFieldTypes( parent: IOSchema | null | undefined, fieldNames: string[] | undefined, ): string[] { if (!parent || !parent.properties || !fieldNames || fieldNames.length === 0) { return []; } const props = parent.properties; const out: string[] = []; for (const name of fieldNames) { const sub = props[name]; out.push(typeToken(sub?.type)); } out.sort(); return out; } export function computeSeamFingerprint( input: SeamFingerprintInput, ): ComputedFingerprint { const breakdown: SeamFingerprintBreakdown = { failureMode: input.failureMode, missingFieldTypes: resolveFieldTypes( input.downstreamInputSchema, input.missingRequiredFieldNames, ), unusedFieldTypes: resolveFieldTypes( input.upstreamOutputSchema, input.unusedFieldNames, ), downstreamInputSchemaFingerprint: schemaFingerprint( input.downstreamInputSchema, ), capabilityTag: (input.capabilityTag || "").toLowerCase(), algoVersion: FINGERPRINT_ALGO_VERSION, }; // Canonical string is STRUCTURAL ONLY — deliberately excludes // both field NAMES and the capability tag (which is lexical, not // structural). The tag is preserved on the row as auxiliary // metadata for analytics/ranking but does not participate in the // exact-match key. Two seams with the same failure mode and the // same shape across different tags are the same template. const canonical = [ `v=${breakdown.algoVersion}`, `mode=${breakdown.failureMode}`, `miss=[${breakdown.missingFieldTypes.join(",")}]`, `unused=[${breakdown.unusedFieldTypes.join(",")}]`, `dsfp=${breakdown.downstreamInputSchemaFingerprint}`, ].join("|"); const hash = createHash("sha256") .update(getSalt()) .update("\x00") .update(canonical) .digest("hex"); return { hash, breakdown, canonical }; } // ---------------------------------------------------------------- sanitisation /** * Strip any object keys NOT named in the allowlist from a candidate * spec object before persisting it as a skeleton. The allowlist * contains only contract-shaped property names (description, name, * parameters with its standard JSONSchema sub-keys). Anything else a * reviewer might have stuffed into the spec (a comment carrying user * text, a debug field) is dropped. */ const SPEC_ALLOWED_TOP = new Set([ "name", "description", "parameters", "inputSchema", "outputSchema", ]); const SCHEMA_ALLOWED_KEYS = new Set([ "type", "properties", "required", "items", "description", "enum", "format", "minimum", "maximum", "minLength", "maxLength", "pattern", ]); function stripSchema(node: unknown): unknown { if (!node || typeof node !== "object") return node; if (Array.isArray(node)) return node.map(stripSchema); const out: Record = {}; for (const [k, v] of Object.entries(node as Record)) { if (!SCHEMA_ALLOWED_KEYS.has(k)) continue; // Drop value-bearing fields outright — these are the most common // accidental payload-leak vectors: // - `description` may contain reviewer-pasted user text // - `enum` / `pattern` may pin specific user identifiers // - `default` / `examples` are explicit user-data slots if ( k === "description" || k === "enum" || k === "pattern" || k === "default" || k === "examples" ) { continue; } if (k === "properties" && v && typeof v === "object") { const cleaned: Record = {}; for (const [pk, pv] of Object.entries(v as Record)) { cleaned[pk] = stripSchema(pv); } out[k] = cleaned; } else if (k === "items") { out[k] = stripSchema(v); } else { out[k] = v; } } return out; } export function sanitizeSpecSkeleton( spec: Record | null | undefined, ): Record { if (!spec || typeof spec !== "object") return {}; const out: Record = {}; for (const [k, v] of Object.entries(spec)) { if (!SPEC_ALLOWED_TOP.has(k)) continue; if (k === "name" && typeof v === "string") { // Keep the synthesised name shape but no payload-derived suffix. out[k] = v.slice(0, 120); } else if (k === "description") { // Top-level description is FREE TEXT — exactly the place a // reviewer is most likely to paste user-derived strings (PHI, // example IDs, copy-pasted error logs). Drop it entirely; the // template name is enough for reviewer disambiguation. continue; } else if (k === "parameters" || k === "inputSchema" || k === "outputSchema") { out[k] = stripSchema(v); } } return out; } /** * Replace any string-literal that "looks payload-shaped" inside a * handler skeleton with a placeholder. We are deliberately conservative * — the goal is to keep contract-shaped scaffolding and drop anything * that could be a captured user value. Long string literals (>32 * chars) inside the body are replaced with `""`. */ export function sanitizeHandlerSkeleton(skel: string | null | undefined): string { if (!skel) return ""; let out = skel; // Mask ALL string literals (any length, double + single + template) // — payload data is just as likely to be short ids, emails, or // patient-record fragments as it is to be long blobs. Operators and // type names are never quoted, so this preserves useful structural // scaffolding while removing any captured user value. out = out.replace(/"((?:\\.|[^"\\\n])*)"/g, '""'); out = out.replace(/'((?:\\.|[^'\\\n])*)'/g, "''"); out = out.replace(/`((?:\\.|[^`\\])*)`/g, "``"); // Strip line + block comments — these often carry pasted examples. out = out.replace(/\/\*[\s\S]*?\*\//g, ""); out = out.replace(/(^|[^:])\/\/[^\n]*/g, "$1"); // Trim absurdly large skeletons. if (out.length > 8000) out = out.slice(0, 8000) + "\n// truncated\n"; return out; } // ---------------------------------------------------------------- search export interface TemplateMatch { templateId: string; strength: "exact" | "near"; score: number; // jaccard for near, 1 for exact template: ToolSpawnTemplateRow; /** * For near-match: the list of structural slots where the fingerprint * differs from the request. Used to render TODO markers in the * skeleton. */ parameterizationGaps: string[]; } const NEAR_MATCH_JACCARD_THRESHOLD = 0.8; function jaccard(a: Set, b: Set): number { if (a.size === 0 && b.size === 0) return 1; let inter = 0; for (const x of a) if (b.has(x)) inter += 1; const union = a.size + b.size - inter; return union === 0 ? 1 : inter / union; } export async function searchTemplates( input: SeamFingerprintInput, ): Promise { let computed: ComputedFingerprint; try { computed = computeSeamFingerprint(input); } catch (err) { logger.debug({ err }, "spawn-templates: fingerprint failed"); return null; } const breakdown = computed.breakdown; // 1. Exact match: same fingerprint, active, current algo version. const exact = await db .select() .from(toolSpawnTemplates) .where( and( eq(toolSpawnTemplates.fingerprintHash, computed.hash), eq(toolSpawnTemplates.fingerprintAlgoVersion, FINGERPRINT_ALGO_VERSION), eq(toolSpawnTemplates.status, "active"), ), ) .limit(1); if (exact[0]) { return { templateId: exact[0].id, strength: "exact", score: 1, template: exact[0], parameterizationGaps: [], }; } // 2. Near match per Task #161 spec: SAME failure_mode AND SAME // downstream input schema fingerprint are HARD preconditions; // score is the Jaccard overlap of the structural-slot bag // (missing-required-field types ∪ unused-upstream-field types). // Threshold: NEAR_MATCH_JACCARD_THRESHOLD (0.8). const candidates = await db .select() .from(toolSpawnTemplates) .where( and( eq(toolSpawnTemplates.failureMode, breakdown.failureMode), eq( toolSpawnTemplates.downstreamInputSchemaFingerprint, breakdown.downstreamInputSchemaFingerprint, ), eq(toolSpawnTemplates.fingerprintAlgoVersion, FINGERPRINT_ALGO_VERSION), eq(toolSpawnTemplates.status, "active"), ), ); if (candidates.length === 0) return null; // Slot bag: position-in-sorted-list keys so ["string","string"] vs // ["string"] does NOT collapse to perfect overlap. const reqBag = new Set([ ...breakdown.missingFieldTypes.map((t, i) => `m:${i}:${t}`), ...breakdown.unusedFieldTypes.map((t, i) => `u:${i}:${t}`), ]); let best: TemplateMatch | null = null; for (const t of candidates) { const tBag = new Set([ ...((t.missingFieldTypes as string[]) || []).map( (tt, i) => `m:${i}:${tt}`, ), ...((t.unusedFieldTypes as string[]) || []).map( (tt, i) => `u:${i}:${tt}`, ), ]); const score = jaccard(reqBag, tBag); if (score < NEAR_MATCH_JACCARD_THRESHOLD) continue; if (best && best.score >= score) continue; const gaps: string[] = []; if ( JSON.stringify(t.missingFieldTypes) !== JSON.stringify(breakdown.missingFieldTypes) ) { gaps.push("missing_field_types"); } if ( JSON.stringify(t.unusedFieldTypes) !== JSON.stringify(breakdown.unusedFieldTypes) ) { gaps.push("unused_field_types"); } best = { templateId: t.id, strength: "near", score, template: t, parameterizationGaps: gaps, }; } return best; } // ---------------------------------------------------------------- ingestion export interface PromotePersistInput { fingerprintInput: SeamFingerprintInput; promotedNodeName: string; promotedInputSchema: IOSchema | null | undefined; promotedOutputSchema: IOSchema | null | undefined; handlerSkeleton: string; specSkeleton: Record; } /** * Persist a template row from a freshly-promoted spawn. Idempotent on * `(fingerprintHash, fingerprintAlgoVersion)` — re-promoting an * identical spawn updates the existing row's skeleton + bumps version. */ export async function persistTemplateOnPromote( input: PromotePersistInput, ): Promise { try { const computed = computeSeamFingerprint(input.fingerprintInput); const cleanedSpec = sanitizeSpecSkeleton(input.specSkeleton); const cleanedHandler = sanitizeHandlerSkeleton(input.handlerSkeleton); const row: InsertToolSpawnTemplateRow = { id: newId("tspt"), fingerprintHash: computed.hash, fingerprintAlgoVersion: FINGERPRINT_ALGO_VERSION, failureMode: computed.breakdown.failureMode, missingFieldTypes: computed.breakdown.missingFieldTypes, unusedFieldTypes: computed.breakdown.unusedFieldTypes, downstreamInputSchemaFingerprint: computed.breakdown.downstreamInputSchemaFingerprint, promotedInputSchemaFingerprint: schemaFingerprint(input.promotedInputSchema), promotedOutputSchemaFingerprint: schemaFingerprint( input.promotedOutputSchema, ), handlerSkeleton: cleanedHandler, specSkeleton: cleanedSpec, capabilityTag: computed.breakdown.capabilityTag, sourceNodeName: input.promotedNodeName.slice(0, 200), offeredCount: 0, reuseCount: 0, successCount: 0, rejectCount: 0, status: "active", }; const inserted = await db .insert(toolSpawnTemplates) .values(row) .onConflictDoUpdate({ target: [ toolSpawnTemplates.fingerprintHash, toolSpawnTemplates.fingerprintAlgoVersion, ], set: { handlerSkeleton: row.handlerSkeleton, specSkeleton: row.specSkeleton, promotedInputSchemaFingerprint: row.promotedInputSchemaFingerprint, promotedOutputSchemaFingerprint: row.promotedOutputSchemaFingerprint, sourceNodeName: row.sourceNodeName, version: sql`${toolSpawnTemplates.version} + 1`, updatedAt: new Date(), }, }) .returning(); return inserted[0] ?? null; } catch (err) { logger.warn({ err }, "spawn-templates: persistTemplateOnPromote failed"); return null; } } // ---------------------------------------------------------------- accounting export async function recordTemplateOffered(templateId: string): Promise { try { await db .update(toolSpawnTemplates) .set({ offeredCount: sql`${toolSpawnTemplates.offeredCount} + 1`, updatedAt: new Date(), }) .where(eq(toolSpawnTemplates.id, templateId)); } catch (err) { logger.debug({ err }, "spawn-templates: recordTemplateOffered failed"); } } export type TemplateChoice = "use" | "use_edit" | "fresh"; export async function recordTemplateChoice(args: { templateId: string; choice: TemplateChoice; }): Promise { try { if (args.choice === "fresh") { await db .update(toolSpawnTemplates) .set({ rejectCount: sql`${toolSpawnTemplates.rejectCount} + 1`, updatedAt: new Date(), }) .where(eq(toolSpawnTemplates.id, args.templateId)); } else { await db .update(toolSpawnTemplates) .set({ reuseCount: sql`${toolSpawnTemplates.reuseCount} + 1`, updatedAt: new Date(), }) .where(eq(toolSpawnTemplates.id, args.templateId)); } } catch (err) { logger.debug({ err }, "spawn-templates: recordTemplateChoice failed"); } // Even non-success choices may push a template past the demote // threshold once it has been offered enough times. await maybeAutoDemote(args.templateId); } export async function recordTemplatePromoteResult( templateId: string, ): Promise { try { await db .update(toolSpawnTemplates) .set({ successCount: sql`${toolSpawnTemplates.successCount} + 1`, updatedAt: new Date(), }) .where(eq(toolSpawnTemplates.id, templateId)); } catch (err) { logger.debug({ err }, "spawn-templates: recordTemplatePromoteResult failed"); } } // ---------------------------------------------------------------- demotion /** * Auto-demote thresholds. A template is demoted (silently hidden from * the proposer, kept in admin view) once it has been offered at least * `MIN_OFFERS_FOR_DEMOTE` times AND its success rate is below * `SUCCESS_RATE_FLOOR`. Configurable through env for ops tuning. */ const MIN_OFFERS_FOR_DEMOTE = Number( process.env["TEMPLATE_DEMOTE_MIN_OFFERS"] || 5, ); const SUCCESS_RATE_FLOOR = Number( process.env["TEMPLATE_DEMOTE_SUCCESS_RATE"] || 0.3, ); export async function maybeAutoDemote( templateId: string, ): Promise<{ demoted: boolean; reason?: string }> { try { const rows = await db .select() .from(toolSpawnTemplates) .where(eq(toolSpawnTemplates.id, templateId)) .limit(1); const t = rows[0]; if (!t || t.status !== "active") return { demoted: false }; if (t.offeredCount < MIN_OFFERS_FOR_DEMOTE) return { demoted: false }; const rate = t.offeredCount === 0 ? 1 : t.successCount / t.offeredCount; if (rate < SUCCESS_RATE_FLOOR) { const reason = `success_rate ${rate.toFixed(2)} < floor ${SUCCESS_RATE_FLOOR} after ${t.offeredCount} offers`; await db .update(toolSpawnTemplates) .set({ status: "demoted", demotedAt: new Date(), demotedReason: reason, updatedAt: new Date(), }) .where(eq(toolSpawnTemplates.id, templateId)); logger.info( { templateId, reason, sourceNode: t.sourceNodeName }, "spawn-templates: auto-demoted template", ); return { demoted: true, reason }; } return { demoted: false }; } catch (err) { logger.debug({ err }, "spawn-templates: maybeAutoDemote failed"); return { demoted: false }; } } // ---------------------------------------------------------------- admin export async function listTemplates(filter?: { status?: "active" | "demoted" | "any"; }): Promise { const status = filter?.status ?? "any"; const q = db.select().from(toolSpawnTemplates); const rows = status === "any" ? await q : await db .select() .from(toolSpawnTemplates) .where(eq(toolSpawnTemplates.status, status)); rows.sort((a, b) => (b.updatedAt?.getTime() ?? 0) - (a.updatedAt?.getTime() ?? 0)); return rows; } export async function setTemplateStatus( templateId: string, status: "active" | "demoted", reason: string, ): Promise { try { const rows = await db .update(toolSpawnTemplates) .set({ status, demotedAt: status === "demoted" ? new Date() : null, demotedReason: status === "demoted" ? reason.slice(0, 500) : null, updatedAt: new Date(), }) .where(eq(toolSpawnTemplates.id, templateId)) .returning(); return rows[0] ?? null; } catch (err) { logger.debug({ err }, "spawn-templates: setTemplateStatus failed"); return null; } } // ---------------------------------------------------------------- skeletons /** * Render a near-match handler skeleton with TODO markers spliced in * where the matched template's structural slots disagree with the * requested fingerprint. Returns the original skeleton unchanged for * exact matches. */ export function applyParameterizationMarkers( skeleton: string, match: TemplateMatch, ): string { if (match.strength === "exact" || match.parameterizationGaps.length === 0) { return skeleton; } const banner = match.parameterizationGaps .map((g) => `// TODO(template-reuse): adjust for differing ${g}`) .join("\n"); return `${banner}\n${skeleton}`; } // ---------------------------------------------------------------- privacy invariant /** * Test-only: assert that no string from `payloadStrings` (representative * of user data observed at the seam) appears anywhere in the persisted * template row. Throws on violation so property tests fail loudly. */ export function __assertNoUserDataLeaked( row: ToolSpawnTemplateRow, payloadStrings: string[], ): void { const haystack = JSON.stringify({ handler: row.handlerSkeleton, spec: row.specSkeleton, miss: row.missingFieldTypes, unused: row.unusedFieldTypes, ds: row.downstreamInputSchemaFingerprint, promIn: row.promotedInputSchemaFingerprint, promOut: row.promotedOutputSchemaFingerprint, cap: row.capabilityTag, src: row.sourceNodeName, fp: row.fingerprintHash, }); for (const s of payloadStrings) { if (s && s.length >= 4 && haystack.includes(s)) { throw new Error( `privacy invariant violated: payload string "${s.slice(0, 40)}…" leaked into template row ${row.id}`, ); } } }