doatlas-2 / artifacts /api-server /src /lib /spawn-templates.ts
Iostream-Li's picture
Add files using upload-large-folder tool
5871090 verified
/**
* spawn-templates — cross-seam template library for the meta-evolution
* loop (Task #161).
*
* Premise: every time #158 spawns a provisional node and a human reviewer
* promotes it, we have learned a small lesson about how to fix one shape
* of seam problem. This module canonicalises that lesson as a structural
* fingerprint plus a contract-shaped handler skeleton, so that the next
* spawn for a structurally similar seam can be offered as a high-quality
* starting point — never auto-applied, always reviewer-gated.
*
* Public surface:
* - schemaFingerprint(schema) — name-stripped structural fingerprint of
* a JSONSchema-shaped object.
* - templateFingerprint(input) — salted-hash of the seam fingerprint.
* - searchTemplates(input) — exact + near match search in the
* `tool_spawn_templates` table.
* - persistTemplateOnPromote(p) — ingest one promoted spawn.
* - recordTemplateOffered(id) — bump offered_count when proposer
* surfaces a template.
* - recordTemplateChoice(args) — record reviewer's choice
* (use | use_edit | fresh) on a
* candidate that was offered a match.
* - recordTemplatePromoteResult — bump success_count when the candidate
* that reused a template gets promoted.
* - maybeAutoDemote(id) — silent demotion when track record
* drops below threshold.
* - listTemplates(filter) — admin read.
* - setTemplateStatus(id,...) — admin override.
* - sanitizeSpecSkeleton(spec) — strips concrete payload fields out
* of a spec before persistence.
* - sanitizeHandlerSkeleton(s) — replaces literal payload-looking
* strings with placeholders.
* - FINGERPRINT_ALGO_VERSION
*
* Privacy invariants:
* - The salted hash uses `process.env.TEMPLATE_FINGERPRINT_SALT` (with
* a deterministic dev-only fallback that is logged once on first use).
* - The fingerprint inputs (`missingFieldTypes`, `unusedFieldTypes`,
* `downstreamInputSchemaFingerprint`, …) are derived from JSONSchema
* *type strings* only — never from field names or values.
* - `sanitizeSpecSkeleton` and `sanitizeHandlerSkeleton` are the
* last line of defense against payload data leaking into the
* library through reviewer-edited specs.
* - `__assertNoUserDataLeaked(template, candidatePayloads)` is a
* test-only invariant checker.
*/
import { and, eq, sql } from "drizzle-orm";
import { createHash } from "node:crypto";
import {
db,
toolSpawnTemplates,
type ToolSpawnTemplateRow,
type InsertToolSpawnTemplateRow,
} from "@workspace/db";
import { newId } from "./ids";
import { logger } from "./logger";
import type { IOSchema } from "./tool-graph";
export const FINGERPRINT_ALGO_VERSION = 1;
/** Failure modes the fingerprint discriminates on. */
export type SeamFailureMode =
| "missing_required_field"
| "type_mismatch"
| "capability_gap"
| "unknown";
export interface SeamFingerprintInput {
failureMode: SeamFailureMode;
/** Canonical JSONSchema-shaped object describing what the consumer wants. */
downstreamInputSchema: IOSchema | null | undefined;
/** Canonical JSONSchema-shaped object describing what the upstream emits. */
upstreamOutputSchema?: IOSchema | null | undefined;
/**
* Names of consumes-side required fields that the upstream did NOT
* produce. Field names are used ONLY to look up their declared types
* in `downstreamInputSchema.properties` — never persisted.
*/
missingRequiredFieldNames?: string[];
/**
* Names of upstream produced fields that the consumer did not declare.
* Used identically to extract types from `upstreamOutputSchema`; never
* persisted.
*/
unusedFieldNames?: string[];
/** Capability tag the seam was associated with — curated label, safe. */
capabilityTag?: string;
}
export interface SeamFingerprintBreakdown {
failureMode: SeamFailureMode;
missingFieldTypes: string[];
unusedFieldTypes: string[];
downstreamInputSchemaFingerprint: string;
capabilityTag: string;
algoVersion: number;
}
export interface ComputedFingerprint {
hash: string;
breakdown: SeamFingerprintBreakdown;
/** Pre-hash canonical string, exposed so tests can assert determinism. */
canonical: string;
}
// ---------------------------------------------------------------- helpers
const DEV_SALT_FALLBACK = "doatlas-dev-template-salt-v1";
let warnedSaltOnce = false;
function getSalt(): string {
const env = (process.env["TEMPLATE_FINGERPRINT_SALT"] || "").trim();
if (env) return env;
if (!warnedSaltOnce) {
warnedSaltOnce = true;
logger.warn(
"TEMPLATE_FINGERPRINT_SALT is unset — using a deterministic dev-only fallback. Set this env in production so template fingerprints cannot be correlated across deployments.",
);
}
return DEV_SALT_FALLBACK;
}
/** Normalise a schema type to a stable string token, never undefined. */
function typeToken(t: IOSchema["type"] | undefined): string {
return (t || "any") as string;
}
/**
* Walk a JSONSchema and produce a deterministic structural fingerprint
* string. Field NAMES are stripped — children are sorted by their own
* recursively-computed fingerprint so two schemas that differ only in
* field naming yield the same fingerprint.
*
* Required-ness IS preserved (without the field name) because it is a
* structural property, not a label.
*/
export function schemaFingerprint(
schema: IOSchema | null | undefined,
): string {
if (!schema || typeof schema !== "object") return "{}";
const t = typeToken(schema.type);
const items = schema.items
? `items=${schemaFingerprint(schema.items)}`
: "";
const props = schema.properties || {};
const requiredSet = new Set(schema.required || []);
const childFps: string[] = [];
for (const [name, sub] of Object.entries(props)) {
const req = requiredSet.has(name) ? "1" : "0";
childFps.push(`${req}:${schemaFingerprint(sub)}`);
}
childFps.sort();
const propsPart = childFps.length ? `props=[${childFps.join(",")}]` : "";
return `{t=${t}${propsPart ? "," + propsPart : ""}${items ? "," + items : ""}}`;
}
/**
* Resolve a list of field names against a parent schema and return the
* sorted list of their declared type tokens. Field NAMES are not
* propagated into the result — only the types.
*/
function resolveFieldTypes(
parent: IOSchema | null | undefined,
fieldNames: string[] | undefined,
): string[] {
if (!parent || !parent.properties || !fieldNames || fieldNames.length === 0) {
return [];
}
const props = parent.properties;
const out: string[] = [];
for (const name of fieldNames) {
const sub = props[name];
out.push(typeToken(sub?.type));
}
out.sort();
return out;
}
export function computeSeamFingerprint(
input: SeamFingerprintInput,
): ComputedFingerprint {
const breakdown: SeamFingerprintBreakdown = {
failureMode: input.failureMode,
missingFieldTypes: resolveFieldTypes(
input.downstreamInputSchema,
input.missingRequiredFieldNames,
),
unusedFieldTypes: resolveFieldTypes(
input.upstreamOutputSchema,
input.unusedFieldNames,
),
downstreamInputSchemaFingerprint: schemaFingerprint(
input.downstreamInputSchema,
),
capabilityTag: (input.capabilityTag || "").toLowerCase(),
algoVersion: FINGERPRINT_ALGO_VERSION,
};
// Canonical string is STRUCTURAL ONLY — deliberately excludes
// both field NAMES and the capability tag (which is lexical, not
// structural). The tag is preserved on the row as auxiliary
// metadata for analytics/ranking but does not participate in the
// exact-match key. Two seams with the same failure mode and the
// same shape across different tags are the same template.
const canonical = [
`v=${breakdown.algoVersion}`,
`mode=${breakdown.failureMode}`,
`miss=[${breakdown.missingFieldTypes.join(",")}]`,
`unused=[${breakdown.unusedFieldTypes.join(",")}]`,
`dsfp=${breakdown.downstreamInputSchemaFingerprint}`,
].join("|");
const hash = createHash("sha256")
.update(getSalt())
.update("\x00")
.update(canonical)
.digest("hex");
return { hash, breakdown, canonical };
}
// ---------------------------------------------------------------- sanitisation
/**
* Strip any object keys NOT named in the allowlist from a candidate
* spec object before persisting it as a skeleton. The allowlist
* contains only contract-shaped property names (description, name,
* parameters with its standard JSONSchema sub-keys). Anything else a
* reviewer might have stuffed into the spec (a comment carrying user
* text, a debug field) is dropped.
*/
const SPEC_ALLOWED_TOP = new Set([
"name",
"description",
"parameters",
"inputSchema",
"outputSchema",
]);
const SCHEMA_ALLOWED_KEYS = new Set([
"type",
"properties",
"required",
"items",
"description",
"enum",
"format",
"minimum",
"maximum",
"minLength",
"maxLength",
"pattern",
]);
function stripSchema(node: unknown): unknown {
if (!node || typeof node !== "object") return node;
if (Array.isArray(node)) return node.map(stripSchema);
const out: Record<string, unknown> = {};
for (const [k, v] of Object.entries(node as Record<string, unknown>)) {
if (!SCHEMA_ALLOWED_KEYS.has(k)) continue;
// Drop value-bearing fields outright — these are the most common
// accidental payload-leak vectors:
// - `description` may contain reviewer-pasted user text
// - `enum` / `pattern` may pin specific user identifiers
// - `default` / `examples` are explicit user-data slots
if (
k === "description" ||
k === "enum" ||
k === "pattern" ||
k === "default" ||
k === "examples"
) {
continue;
}
if (k === "properties" && v && typeof v === "object") {
const cleaned: Record<string, unknown> = {};
for (const [pk, pv] of Object.entries(v as Record<string, unknown>)) {
cleaned[pk] = stripSchema(pv);
}
out[k] = cleaned;
} else if (k === "items") {
out[k] = stripSchema(v);
} else {
out[k] = v;
}
}
return out;
}
export function sanitizeSpecSkeleton(
spec: Record<string, unknown> | null | undefined,
): Record<string, unknown> {
if (!spec || typeof spec !== "object") return {};
const out: Record<string, unknown> = {};
for (const [k, v] of Object.entries(spec)) {
if (!SPEC_ALLOWED_TOP.has(k)) continue;
if (k === "name" && typeof v === "string") {
// Keep the synthesised name shape but no payload-derived suffix.
out[k] = v.slice(0, 120);
} else if (k === "description") {
// Top-level description is FREE TEXT — exactly the place a
// reviewer is most likely to paste user-derived strings (PHI,
// example IDs, copy-pasted error logs). Drop it entirely; the
// template name is enough for reviewer disambiguation.
continue;
} else if (k === "parameters" || k === "inputSchema" || k === "outputSchema") {
out[k] = stripSchema(v);
}
}
return out;
}
/**
* Replace any string-literal that "looks payload-shaped" inside a
* handler skeleton with a placeholder. We are deliberately conservative
* — the goal is to keep contract-shaped scaffolding and drop anything
* that could be a captured user value. Long string literals (>32
* chars) inside the body are replaced with `"<value>"`.
*/
export function sanitizeHandlerSkeleton(skel: string | null | undefined): string {
if (!skel) return "";
let out = skel;
// Mask ALL string literals (any length, double + single + template)
// — payload data is just as likely to be short ids, emails, or
// patient-record fragments as it is to be long blobs. Operators and
// type names are never quoted, so this preserves useful structural
// scaffolding while removing any captured user value.
out = out.replace(/"((?:\\.|[^"\\\n])*)"/g, '"<value>"');
out = out.replace(/'((?:\\.|[^'\\\n])*)'/g, "'<value>'");
out = out.replace(/`((?:\\.|[^`\\])*)`/g, "`<value>`");
// Strip line + block comments — these often carry pasted examples.
out = out.replace(/\/\*[\s\S]*?\*\//g, "");
out = out.replace(/(^|[^:])\/\/[^\n]*/g, "$1");
// Trim absurdly large skeletons.
if (out.length > 8000) out = out.slice(0, 8000) + "\n// truncated\n";
return out;
}
// ---------------------------------------------------------------- search
export interface TemplateMatch {
templateId: string;
strength: "exact" | "near";
score: number; // jaccard for near, 1 for exact
template: ToolSpawnTemplateRow;
/**
* For near-match: the list of structural slots where the fingerprint
* differs from the request. Used to render TODO markers in the
* skeleton.
*/
parameterizationGaps: string[];
}
const NEAR_MATCH_JACCARD_THRESHOLD = 0.8;
function jaccard(a: Set<string>, b: Set<string>): number {
if (a.size === 0 && b.size === 0) return 1;
let inter = 0;
for (const x of a) if (b.has(x)) inter += 1;
const union = a.size + b.size - inter;
return union === 0 ? 1 : inter / union;
}
export async function searchTemplates(
input: SeamFingerprintInput,
): Promise<TemplateMatch | null> {
let computed: ComputedFingerprint;
try {
computed = computeSeamFingerprint(input);
} catch (err) {
logger.debug({ err }, "spawn-templates: fingerprint failed");
return null;
}
const breakdown = computed.breakdown;
// 1. Exact match: same fingerprint, active, current algo version.
const exact = await db
.select()
.from(toolSpawnTemplates)
.where(
and(
eq(toolSpawnTemplates.fingerprintHash, computed.hash),
eq(toolSpawnTemplates.fingerprintAlgoVersion, FINGERPRINT_ALGO_VERSION),
eq(toolSpawnTemplates.status, "active"),
),
)
.limit(1);
if (exact[0]) {
return {
templateId: exact[0].id,
strength: "exact",
score: 1,
template: exact[0],
parameterizationGaps: [],
};
}
// 2. Near match per Task #161 spec: SAME failure_mode AND SAME
// downstream input schema fingerprint are HARD preconditions;
// score is the Jaccard overlap of the structural-slot bag
// (missing-required-field types ∪ unused-upstream-field types).
// Threshold: NEAR_MATCH_JACCARD_THRESHOLD (0.8).
const candidates = await db
.select()
.from(toolSpawnTemplates)
.where(
and(
eq(toolSpawnTemplates.failureMode, breakdown.failureMode),
eq(
toolSpawnTemplates.downstreamInputSchemaFingerprint,
breakdown.downstreamInputSchemaFingerprint,
),
eq(toolSpawnTemplates.fingerprintAlgoVersion, FINGERPRINT_ALGO_VERSION),
eq(toolSpawnTemplates.status, "active"),
),
);
if (candidates.length === 0) return null;
// Slot bag: position-in-sorted-list keys so ["string","string"] vs
// ["string"] does NOT collapse to perfect overlap.
const reqBag = new Set([
...breakdown.missingFieldTypes.map((t, i) => `m:${i}:${t}`),
...breakdown.unusedFieldTypes.map((t, i) => `u:${i}:${t}`),
]);
let best: TemplateMatch | null = null;
for (const t of candidates) {
const tBag = new Set([
...((t.missingFieldTypes as string[]) || []).map(
(tt, i) => `m:${i}:${tt}`,
),
...((t.unusedFieldTypes as string[]) || []).map(
(tt, i) => `u:${i}:${tt}`,
),
]);
const score = jaccard(reqBag, tBag);
if (score < NEAR_MATCH_JACCARD_THRESHOLD) continue;
if (best && best.score >= score) continue;
const gaps: string[] = [];
if (
JSON.stringify(t.missingFieldTypes) !==
JSON.stringify(breakdown.missingFieldTypes)
) {
gaps.push("missing_field_types");
}
if (
JSON.stringify(t.unusedFieldTypes) !==
JSON.stringify(breakdown.unusedFieldTypes)
) {
gaps.push("unused_field_types");
}
best = {
templateId: t.id,
strength: "near",
score,
template: t,
parameterizationGaps: gaps,
};
}
return best;
}
// ---------------------------------------------------------------- ingestion
export interface PromotePersistInput {
fingerprintInput: SeamFingerprintInput;
promotedNodeName: string;
promotedInputSchema: IOSchema | null | undefined;
promotedOutputSchema: IOSchema | null | undefined;
handlerSkeleton: string;
specSkeleton: Record<string, unknown>;
}
/**
* Persist a template row from a freshly-promoted spawn. Idempotent on
* `(fingerprintHash, fingerprintAlgoVersion)` — re-promoting an
* identical spawn updates the existing row's skeleton + bumps version.
*/
export async function persistTemplateOnPromote(
input: PromotePersistInput,
): Promise<ToolSpawnTemplateRow | null> {
try {
const computed = computeSeamFingerprint(input.fingerprintInput);
const cleanedSpec = sanitizeSpecSkeleton(input.specSkeleton);
const cleanedHandler = sanitizeHandlerSkeleton(input.handlerSkeleton);
const row: InsertToolSpawnTemplateRow = {
id: newId("tspt"),
fingerprintHash: computed.hash,
fingerprintAlgoVersion: FINGERPRINT_ALGO_VERSION,
failureMode: computed.breakdown.failureMode,
missingFieldTypes: computed.breakdown.missingFieldTypes,
unusedFieldTypes: computed.breakdown.unusedFieldTypes,
downstreamInputSchemaFingerprint:
computed.breakdown.downstreamInputSchemaFingerprint,
promotedInputSchemaFingerprint: schemaFingerprint(input.promotedInputSchema),
promotedOutputSchemaFingerprint: schemaFingerprint(
input.promotedOutputSchema,
),
handlerSkeleton: cleanedHandler,
specSkeleton: cleanedSpec,
capabilityTag: computed.breakdown.capabilityTag,
sourceNodeName: input.promotedNodeName.slice(0, 200),
offeredCount: 0,
reuseCount: 0,
successCount: 0,
rejectCount: 0,
status: "active",
};
const inserted = await db
.insert(toolSpawnTemplates)
.values(row)
.onConflictDoUpdate({
target: [
toolSpawnTemplates.fingerprintHash,
toolSpawnTemplates.fingerprintAlgoVersion,
],
set: {
handlerSkeleton: row.handlerSkeleton,
specSkeleton: row.specSkeleton,
promotedInputSchemaFingerprint: row.promotedInputSchemaFingerprint,
promotedOutputSchemaFingerprint: row.promotedOutputSchemaFingerprint,
sourceNodeName: row.sourceNodeName,
version: sql`${toolSpawnTemplates.version} + 1`,
updatedAt: new Date(),
},
})
.returning();
return inserted[0] ?? null;
} catch (err) {
logger.warn({ err }, "spawn-templates: persistTemplateOnPromote failed");
return null;
}
}
// ---------------------------------------------------------------- accounting
export async function recordTemplateOffered(templateId: string): Promise<void> {
try {
await db
.update(toolSpawnTemplates)
.set({
offeredCount: sql`${toolSpawnTemplates.offeredCount} + 1`,
updatedAt: new Date(),
})
.where(eq(toolSpawnTemplates.id, templateId));
} catch (err) {
logger.debug({ err }, "spawn-templates: recordTemplateOffered failed");
}
}
export type TemplateChoice = "use" | "use_edit" | "fresh";
export async function recordTemplateChoice(args: {
templateId: string;
choice: TemplateChoice;
}): Promise<void> {
try {
if (args.choice === "fresh") {
await db
.update(toolSpawnTemplates)
.set({
rejectCount: sql`${toolSpawnTemplates.rejectCount} + 1`,
updatedAt: new Date(),
})
.where(eq(toolSpawnTemplates.id, args.templateId));
} else {
await db
.update(toolSpawnTemplates)
.set({
reuseCount: sql`${toolSpawnTemplates.reuseCount} + 1`,
updatedAt: new Date(),
})
.where(eq(toolSpawnTemplates.id, args.templateId));
}
} catch (err) {
logger.debug({ err }, "spawn-templates: recordTemplateChoice failed");
}
// Even non-success choices may push a template past the demote
// threshold once it has been offered enough times.
await maybeAutoDemote(args.templateId);
}
export async function recordTemplatePromoteResult(
templateId: string,
): Promise<void> {
try {
await db
.update(toolSpawnTemplates)
.set({
successCount: sql`${toolSpawnTemplates.successCount} + 1`,
updatedAt: new Date(),
})
.where(eq(toolSpawnTemplates.id, templateId));
} catch (err) {
logger.debug({ err }, "spawn-templates: recordTemplatePromoteResult failed");
}
}
// ---------------------------------------------------------------- demotion
/**
* Auto-demote thresholds. A template is demoted (silently hidden from
* the proposer, kept in admin view) once it has been offered at least
* `MIN_OFFERS_FOR_DEMOTE` times AND its success rate is below
* `SUCCESS_RATE_FLOOR`. Configurable through env for ops tuning.
*/
const MIN_OFFERS_FOR_DEMOTE = Number(
process.env["TEMPLATE_DEMOTE_MIN_OFFERS"] || 5,
);
const SUCCESS_RATE_FLOOR = Number(
process.env["TEMPLATE_DEMOTE_SUCCESS_RATE"] || 0.3,
);
export async function maybeAutoDemote(
templateId: string,
): Promise<{ demoted: boolean; reason?: string }> {
try {
const rows = await db
.select()
.from(toolSpawnTemplates)
.where(eq(toolSpawnTemplates.id, templateId))
.limit(1);
const t = rows[0];
if (!t || t.status !== "active") return { demoted: false };
if (t.offeredCount < MIN_OFFERS_FOR_DEMOTE) return { demoted: false };
const rate = t.offeredCount === 0 ? 1 : t.successCount / t.offeredCount;
if (rate < SUCCESS_RATE_FLOOR) {
const reason = `success_rate ${rate.toFixed(2)} < floor ${SUCCESS_RATE_FLOOR} after ${t.offeredCount} offers`;
await db
.update(toolSpawnTemplates)
.set({
status: "demoted",
demotedAt: new Date(),
demotedReason: reason,
updatedAt: new Date(),
})
.where(eq(toolSpawnTemplates.id, templateId));
logger.info(
{ templateId, reason, sourceNode: t.sourceNodeName },
"spawn-templates: auto-demoted template",
);
return { demoted: true, reason };
}
return { demoted: false };
} catch (err) {
logger.debug({ err }, "spawn-templates: maybeAutoDemote failed");
return { demoted: false };
}
}
// ---------------------------------------------------------------- admin
export async function listTemplates(filter?: {
status?: "active" | "demoted" | "any";
}): Promise<ToolSpawnTemplateRow[]> {
const status = filter?.status ?? "any";
const q = db.select().from(toolSpawnTemplates);
const rows =
status === "any"
? await q
: await db
.select()
.from(toolSpawnTemplates)
.where(eq(toolSpawnTemplates.status, status));
rows.sort((a, b) => (b.updatedAt?.getTime() ?? 0) - (a.updatedAt?.getTime() ?? 0));
return rows;
}
export async function setTemplateStatus(
templateId: string,
status: "active" | "demoted",
reason: string,
): Promise<ToolSpawnTemplateRow | null> {
try {
const rows = await db
.update(toolSpawnTemplates)
.set({
status,
demotedAt: status === "demoted" ? new Date() : null,
demotedReason: status === "demoted" ? reason.slice(0, 500) : null,
updatedAt: new Date(),
})
.where(eq(toolSpawnTemplates.id, templateId))
.returning();
return rows[0] ?? null;
} catch (err) {
logger.debug({ err }, "spawn-templates: setTemplateStatus failed");
return null;
}
}
// ---------------------------------------------------------------- skeletons
/**
* Render a near-match handler skeleton with TODO markers spliced in
* where the matched template's structural slots disagree with the
* requested fingerprint. Returns the original skeleton unchanged for
* exact matches.
*/
export function applyParameterizationMarkers(
skeleton: string,
match: TemplateMatch,
): string {
if (match.strength === "exact" || match.parameterizationGaps.length === 0) {
return skeleton;
}
const banner = match.parameterizationGaps
.map((g) => `// TODO(template-reuse): adjust for differing ${g}`)
.join("\n");
return `${banner}\n${skeleton}`;
}
// ---------------------------------------------------------------- privacy invariant
/**
* Test-only: assert that no string from `payloadStrings` (representative
* of user data observed at the seam) appears anywhere in the persisted
* template row. Throws on violation so property tests fail loudly.
*/
export function __assertNoUserDataLeaked(
row: ToolSpawnTemplateRow,
payloadStrings: string[],
): void {
const haystack = JSON.stringify({
handler: row.handlerSkeleton,
spec: row.specSkeleton,
miss: row.missingFieldTypes,
unused: row.unusedFieldTypes,
ds: row.downstreamInputSchemaFingerprint,
promIn: row.promotedInputSchemaFingerprint,
promOut: row.promotedOutputSchemaFingerprint,
cap: row.capabilityTag,
src: row.sourceNodeName,
fp: row.fingerprintHash,
});
for (const s of payloadStrings) {
if (s && s.length >= 4 && haystack.includes(s)) {
throw new Error(
`privacy invariant violated: payload string "${s.slice(0, 40)}…" leaked into template row ${row.id}`,
);
}
}
}