doatlas-2 / lib /db /src /schema /toolNetwork.ts

Add files using upload-large-folder tool

9c12e58 verified 20 days ago

24.7 kB

	/**
	* tool-network — Wave A architecture.
	*
	* Promotes "tool networks" (curated, executable subgraphs solving a
	* problem class end-to-end) to first-class citizens, replacing the
	* intent-regex + 1-hop expansion path that exposed every atomic tool to
	* the LLM. The planner now sees ≤5 service cards (one per network) and
	* the network's own executor walks the internal subgraph.
	*
	* Tables:
	* - problem_classes — taxonomy of problem domains (paths
	* "ligand.screening.binding").
	* - tool_networks — one row per network, with input/output
	* contracts and an internal subgraph
	* snapshot.
	* - network_versions — versioned snapshots; an "active" + zero
	* or more "shadow" variants per network.
	* - network_promotions — audit log for variant changes (manual,
	* auto-promote, auto-rollback).
	* - network_version_metrics — Reviewer scores aggregated per turn,
	* feeding Wave B's online-evolution gate.
	* - execution_plans — Blueprint→Execution two-phase: planner
	* writes the plan, user/Reviewer confirms
	* before runner consumes it.
	* - task_ledger — durable per-task state for long-running
	* multi-task jobs (DrugCLIP-style 117
	* screens). Survives restarts; resumes
	* from last `pending\|running` row.
	*/
	import {
	pgTable,
	text,
	timestamp,
	jsonb,
	integer,
	doublePrecision,
	boolean,
	index,
	uniqueIndex,
	} from "drizzle-orm/pg-core";

	// -------------------------------------------------------- problem_classes

	export const problemClasses = pgTable(
	"problem_classes",
	{
	id: text("id").primaryKey(),
	/** Dotted path, e.g. "ligand.screening.binding". UNIQUE. */
	path: text("path").notNull().unique(),
	parentPath: text("parent_path"),
	label: text("label").notNull(),
	description: text("description").notNull().default(""),
	/**
	* Legacy fallback: capability tags from intent-rules.json that map
	* onto this class. Used when the LLM classifier is unavailable
	* (kill-switch / cold start). Empty array disables fallback.
	*/
	capabilityTags: jsonb("capability_tags").notNull().default([]),
	/** Reviewer channel weights override; null → use defaults. */
	reviewerWeights: jsonb("reviewer_weights"),
	/**
	* Lifecycle status of a problem class. Newly registered classes from
	* `/admin/networks` start as `'draft'` (admin-only visibility, planner
	* skips them); they flip to `'active'` once an admin promotes them.
	* Wave A seed classes (e.g. `ligand.screening.binding`) keep the
	* default `'active'`. Allowed values: `'active' \| 'draft'`.
	*/
	status: text("status").notNull().default("active"),
	createdAt: timestamp("created_at", { withTimezone: true })
	.notNull()
	.defaultNow(),
	updatedAt: timestamp("updated_at", { withTimezone: true })
	.notNull()
	.defaultNow(),
	},
	(t) => ({
	byParent: index("problem_classes_parent_idx").on(t.parentPath),
	byStatus: index("problem_classes_status_idx").on(t.status),
	}),
	);

	export type ProblemClassRow = typeof problemClasses.$inferSelect;
	export type InsertProblemClassRow = typeof problemClasses.$inferInsert;

	// -------------------------------------------------------- tool_networks

	export const toolNetworks = pgTable(
	"tool_networks",
	{
	id: text("id").primaryKey(),
	/** Stable handle, e.g. "drugclip_v1". UNIQUE. */
	name: text("name").notNull().unique(),
	/** Reference into problem_classes.path. Soft FK (text). */
	problemClassPath: text("problem_class_path").notNull(),
	description: text("description").notNull().default(""),
	/**
	* JSONSchema describing the wire shape the network accepts. The
	* planner exposes this as the parameters of the synthetic
	* `run_<network_name>` tool surfaced to the LLM.
	*/
	inputContract: jsonb("input_contract").notNull(),
	/** JSONSchema describing the network's output shape. */
	outputContract: jsonb("output_contract").notNull(),
	/**
	* Snapshot of the internal subgraph the executor walks.
	* Shape: `{nodes: string[], edges: Array<{from, to, relation}>}`
	* Names refer to atomic tool_nodes registered in the catalog.
	*/
	internalGraph: jsonb("internal_graph").notNull(),
	/**
	* Pointer to the active variant. Null on first insert; the seeder
	* creates a v1 network_versions row and back-fills this.
	*/
	activeVariantId: text("active_variant_id"),
	/**
	* "weak" \| "strong" — gates Wave B's autoPromote (weak-tier
	* builders cannot push variants into the active slot without a
	* stronger model's sign-off).
	*/
	builderModelTier: text("builder_model_tier").notNull().default("strong"),
	/**
	* Wave B — minimum builder tier required for a candidate to be
	* promoted into the shared `active_variant_id` slot. Weak-tier
	* candidates that pass quality gates still install themselves but
	* only into a `private_namespace` on the variant row.
	*/
	releaseTierFloor: text("release_tier_floor").notNull().default("strong"),
	/**
	* Per-network knobs (Wave B trigger thresholds, regression floor).
	* Read by `lib/evolution/triggers.ts` under the `evolution` key.
	*/
	config: jsonb("config").notNull().default({}),
	/** "active" \| "deprecated" \| "draft" */
	status: text("status").notNull().default("active"),
	costHint: doublePrecision("cost_hint"),
	latencyHintMs: integer("latency_hint_ms"),
	/**
	* Legacy fallback: tags drawn from intent-rules.json. Used when
	* the LLM classifier doesn't fire and we need to pick a network
	* from the regex hit set. Empty array means classifier-only.
	*/
	capabilityTags: jsonb("capability_tags").notNull().default([]),
	/**
	* Migration source pointer: when a composition_alias node is
	* rebadged into this networks row the alias's tool_nodes id is
	* recorded here so we can keep both during gray rollout.
	*/
	legacyAliasNodeId: text("legacy_alias_node_id"),
	createdAt: timestamp("created_at", { withTimezone: true })
	.notNull()
	.defaultNow(),
	updatedAt: timestamp("updated_at", { withTimezone: true })
	.notNull()
	.defaultNow(),
	},
	(t) => ({
	byClass: index("tool_networks_class_idx").on(t.problemClassPath),
	byStatus: index("tool_networks_status_idx").on(t.status),
	}),
	);

	export type ToolNetworkRow = typeof toolNetworks.$inferSelect;
	export type InsertToolNetworkRow = typeof toolNetworks.$inferInsert;

	// -------------------------------------------------------- network_versions

	export const networkVersions = pgTable(
	"network_versions",
	{
	id: text("id").primaryKey(),
	networkId: text("network_id").notNull(),
	versionLabel: text("version_label").notNull(),
	internalGraph: jsonb("internal_graph").notNull(),
	/** Per-version params (handler tweaks, hyperparams). */
	config: jsonb("config").notNull().default({}),
	/** "active" \| "shadow" \| "promoted" \| "demoted" \| "draft" */
	status: text("status").notNull().default("draft"),
	/** "system" \| "user:<id>" \| "auto:builder" */
	builtBy: text("built_by").notNull().default("system"),
	builderModelTier: text("builder_model_tier").notNull().default("strong"),
	/**
	* Wave B — non-null pins this variant to a single user/session.
	* Used by `attemptAutoPromote` when a candidate passes quality
	* gates but its builder tier is below the network's release floor:
	* the variant still serves the user who built it, but never
	* occupies the shared `tool_networks.active_variant_id` slot.
	*/
	privateNamespace: text("private_namespace"),
	createdAt: timestamp("created_at", { withTimezone: true })
	.notNull()
	.defaultNow(),
	},
	(t) => ({
	byNetwork: index("network_versions_net_idx").on(t.networkId),
	byStatus: index("network_versions_status_idx").on(t.status),
	uniqLabel: uniqueIndex("network_versions_uniq_label").on(
	t.networkId,
	t.versionLabel,
	),
	byPrivateNs: index("network_versions_private_ns_idx").on(t.privateNamespace),
	// Task #242 (B1) — partial unique index that guarantees a single
	// user-private active variant per (network, user). Created via raw
	// SQL outside drizzle-kit (partial WHERE not yet in drizzle DSL):
	// CREATE UNIQUE INDEX network_versions_uniq_active_private
	// ON network_versions (network_id, private_namespace)
	// WHERE status = 'active' AND private_namespace IS NOT NULL;
	// The matching index name is referenced from `getOrCreateUserPrivateVariant`
	// which catches its conflict to stay idempotent under concurrency.
	}),
	);

	export type NetworkVersionRow = typeof networkVersions.$inferSelect;
	export type InsertNetworkVersionRow = typeof networkVersions.$inferInsert;

	// -------------------------------------------------------- network_promotions

	export const networkPromotions = pgTable(
	"network_promotions",
	{
	id: text("id").primaryKey(),
	networkId: text("network_id").notNull(),
	fromVariantId: text("from_variant_id"),
	toVariantId: text("to_variant_id").notNull(),
	/** "manual" \| "auto_promote" \| "auto_rollback" \| "seed" */
	reason: text("reason").notNull(),
	metricsSnapshot: jsonb("metrics_snapshot").notNull().default({}),
	decidedBy: text("decided_by").notNull().default("system"),
	createdAt: timestamp("created_at", { withTimezone: true })
	.notNull()
	.defaultNow(),
	},
	(t) => ({
	byNetwork: index("network_promotions_net_idx").on(t.networkId),
	}),
	);

	export type NetworkPromotionRow = typeof networkPromotions.$inferSelect;
	export type InsertNetworkPromotionRow = typeof networkPromotions.$inferInsert;

	// -------------------------------------------------------- metrics

	export const networkVersionMetrics = pgTable(
	"network_version_metrics",
	{
	id: text("id").primaryKey(),
	networkId: text("network_id").notNull(),
	versionId: text("version_id").notNull(),
	problemClassPath: text("problem_class_path").notNull(),
	/** Aggregated 0..1 reviewer score. */
	reviewerScore: doublePrecision("reviewer_score").notNull(),
	/** Per-channel breakdown {contract, health, trace, coverage, factual, feedback}. */
	channelBreakdown: jsonb("channel_breakdown").notNull().default({}),
	/**
	* Task #254 (B0, CONT-007) — per-channel boolean flag set whenever the
	* grader returned a NEUTRAL fallback (no real signal). Shape:
	* `{ intent_coverage: true, factual_consistency: true, traceability: false, ... }`.
	* B11 验收用:`fallback_rate = mean(any flag true)` 必须 < 0.5。
	* 旧行没有此字段 → 视作全 false(B4 真化前的旧 metric 默认是 fallback)。
	*/
	channelFallbackFlags: jsonb("channel_fallback_flags").notNull().default({}),
	/** Wall-clock ms for the full turn (planner + tool calls + reviewer). */
	costMs: integer("cost_ms"),
	/** Number of L1+L2 retries within the turn. */
	retries: integer("retries").notNull().default(0),
	/** True if K=3× baseline budget was exceeded; partial result returned. */
	budgetExceeded: boolean("budget_exceeded").notNull().default(false),
	/** Optional links to source turn for audit. */
	conversationId: text("conversation_id"),
	messageId: text("message_id"),
	createdAt: timestamp("created_at", { withTimezone: true })
	.notNull()
	.defaultNow(),
	},
	(t) => ({
	byNetwork: index("network_version_metrics_net_idx").on(t.networkId),
	byVersion: index("network_version_metrics_ver_idx").on(t.versionId),
	byClass: index("network_version_metrics_class_idx").on(t.problemClassPath),
	}),
	);

	export type NetworkVersionMetricRow =
	typeof networkVersionMetrics.$inferSelect;
	export type InsertNetworkVersionMetricRow =
	typeof networkVersionMetrics.$inferInsert;

	// -------------------------------------------------------- execution_plans

	export const executionPlans = pgTable(
	"execution_plans",
	{
	id: text("id").primaryKey(),
	/** Optional link back to the conversation that spawned it. */
	conversationId: text("conversation_id"),
	/** Optional link to the user who owns this plan (for ACL). */
	ownerUserId: text("owner_user_id"),
	problemClassPath: text("problem_class_path").notNull(),
	networkId: text("network_id").notNull(),
	versionId: text("version_id").notNull(),
	/**
	* Blueprint payload:
	* {
	* summary: string,
	* tasks: Array<{
	* taskKey, networkName, params, expectedCostMs, parallelism
	* }>,
	* globalConfig: Record<string, unknown>,
	* }
	*/
	blueprint: jsonb("blueprint").notNull(),
	/**
	* "draft" — planner is still drafting.
	* "awaiting_confirmation" — UI shows this; Execute button enabled.
	* "approved" — runner may pick up.
	* "executing" — task_ledger rows being processed.
	* "completed" — all tasks finished (success or terminal failure).
	* "rejected" — user explicitly rejected the plan.
	* "failed" — terminal infrastructure failure.
	*/
	status: text("status").notNull().default("draft"),
	approvedBy: text("approved_by"),
	approvedAt: timestamp("approved_at", { withTimezone: true }),
	/** Path to packaged artifact (e.g. result.zip) when completed. */
	outputArtifactPath: text("output_artifact_path"),
	/**
	* Free-form notes the runner appends as it makes decisions.
	* Becomes the body of `result.log` for benchmark submissions.
	*/
	notes: text("notes").notNull().default(""),
	createdAt: timestamp("created_at", { withTimezone: true })
	.notNull()
	.defaultNow(),
	updatedAt: timestamp("updated_at", { withTimezone: true })
	.notNull()
	.defaultNow(),
	},
	(t) => ({
	byStatus: index("execution_plans_status_idx").on(t.status),
	byOwner: index("execution_plans_owner_idx").on(t.ownerUserId),
	}),
	);

	export type ExecutionPlanRow = typeof executionPlans.$inferSelect;
	export type InsertExecutionPlanRow = typeof executionPlans.$inferInsert;

	// -------------------------------------------------------- task_ledger

	export const taskLedger = pgTable(
	"task_ledger",
	{
	id: text("id").primaryKey(),
	planId: text("plan_id").notNull(),
	/** Stable per-plan key (e.g. "dude_aa2ar"). UNIQUE per plan. */
	taskKey: text("task_key").notNull(),
	params: jsonb("params").notNull().default({}),
	/** "pending" \| "running" \| "done" \| "failed" \| "skipped" */
	status: text("status").notNull().default("pending"),
	attempts: integer("attempts").notNull().default(0),
	maxAttempts: integer("max_attempts").notNull().default(3),
	result: jsonb("result"),
	errorText: text("error_text"),
	/** EF1%, ranks, per-task quality metrics. */
	metrics: jsonb("metrics"),
	/** Wall-clock for the most recent run. */
	durationMs: integer("duration_ms"),
	startedAt: timestamp("started_at", { withTimezone: true }),
	finishedAt: timestamp("finished_at", { withTimezone: true }),
	createdAt: timestamp("created_at", { withTimezone: true })
	.notNull()
	.defaultNow(),
	updatedAt: timestamp("updated_at", { withTimezone: true })
	.notNull()
	.defaultNow(),
	},
	(t) => ({
	byPlan: index("task_ledger_plan_idx").on(t.planId),
	byStatus: index("task_ledger_status_idx").on(t.status),
	uniq: uniqueIndex("task_ledger_uniq").on(t.planId, t.taskKey),
	}),
	);

	export type TaskLedgerRow = typeof taskLedger.$inferSelect;
	export type InsertTaskLedgerRow = typeof taskLedger.$inferInsert;

	// ====================================================================
	// Wave B — online-evolution flywheel
	// ====================================================================
	//
	// The three Wave B tables below ride on top of the Wave A surface:
	//
	// - network_shadow_samples — paired (active, shadow) per-turn rows
	// used to compute the 95% CI gate.
	// - network_evolution_events — append-only event stream that drives
	// the admin "Evolution Live" page and
	// the auto-rollback watchdog.
	// - network_regression_samples
	// — archived representative inputs the
	// pre-promotion regression suite replays
	// against any candidate before promote.
	//
	// Two extra columns also appear on Wave A tables (added via
	// `_create.mjs` ALTER TABLE IF NOT EXISTS, so existing data is
	// preserved):
	//
	// - tool_networks.release_tier_floor — required builder tier for a
	// candidate to enter the
	// shared release channel.
	// - network_versions.private_namespace — non-null ⇒ this version
	// only ever serves the user
	// encoded in the namespace,
	// never the global active.

	// -------------------------------------------------------- shadow samples

	export const networkShadowSamples = pgTable(
	"network_shadow_samples",
	{
	id: text("id").primaryKey(),
	networkId: text("network_id").notNull(),
	activeVariantId: text("active_variant_id").notNull(),
	shadowVariantId: text("shadow_variant_id").notNull(),
	problemClassPath: text("problem_class_path").notNull(),
	/** Aggregated 0..1 reviewer score for the active execution. */
	activeScore: doublePrecision("active_score").notNull(),
	/** Aggregated 0..1 reviewer score for the shadow execution. */
	shadowScore: doublePrecision("shadow_score").notNull(),
	/** Reviewer hard-fail signals (factuality, contract, safety). */
	criticalSignal: boolean("critical_signal").notNull().default(false),
	activeCostMs: integer("active_cost_ms"),
	shadowCostMs: integer("shadow_cost_ms"),
	/** True ⇒ shadow exceeded 1.5× active budget cap and was skipped. */
	budgetSkipped: boolean("budget_skipped").notNull().default(false),
	conversationId: text("conversation_id"),
	messageId: text("message_id"),
	createdAt: timestamp("created_at", { withTimezone: true })
	.notNull()
	.defaultNow(),
	},
	(t) => ({
	byNetwork: index("network_shadow_samples_net_idx").on(t.networkId),
	byShadow: index("network_shadow_samples_shadow_idx").on(t.shadowVariantId),
	byCreated: index("network_shadow_samples_created_idx").on(t.createdAt),
	}),
	);

	export type NetworkShadowSampleRow = typeof networkShadowSamples.$inferSelect;
	export type InsertNetworkShadowSampleRow =
	typeof networkShadowSamples.$inferInsert;

	// -------------------------------------------------------- evolution events

	export const networkEvolutionEvents = pgTable(
	"network_evolution_events",
	{
	id: text("id").primaryKey(),
	networkId: text("network_id").notNull(),
	/**
	* "cadence_trigger" \| "regression_trigger" \| "coverage_trigger"
	* "shadow_started" \| "shadow_budget_skipped" \| "shadow_budget_threshold"
	* "shadow_runner_error" \| "shadow_reviewer_fallback"
	* "auto_promote_skipped" \| "promote" \| "rollback"
	* "regression_suite_failed"
	*/
	kind: text("kind").notNull(),
	/** Variant the event is about (candidate for trigger / promote). */
	variantId: text("variant_id"),
	/** Free-form event payload (metric snapshots, gate decisions). */
	payload: jsonb("payload").notNull().default({}),
	/** Optional pointer for chaining — e.g. rollback references promote. */
	relatedEventId: text("related_event_id"),
	/** Optional pointer to network_promotions row. */
	promotionId: text("promotion_id"),
	createdAt: timestamp("created_at", { withTimezone: true })
	.notNull()
	.defaultNow(),
	},
	(t) => ({
	byNetwork: index("network_evolution_events_net_idx").on(t.networkId),
	byKind: index("network_evolution_events_kind_idx").on(t.kind),
	byCreated: index("network_evolution_events_created_idx").on(t.createdAt),
	}),
	);

	export type NetworkEvolutionEventRow =
	typeof networkEvolutionEvents.$inferSelect;
	export type InsertNetworkEvolutionEventRow =
	typeof networkEvolutionEvents.$inferInsert;

	// -------------------------------------------------------- regression samples

	export const networkRegressionSamples = pgTable(
	"network_regression_samples",
	{
	id: text("id").primaryKey(),
	networkId: text("network_id").notNull(),
	problemClassPath: text("problem_class_path").notNull(),
	label: text("label").notNull().default(""),
	/** Network input payload to replay against any candidate. */
	inputPayload: jsonb("input_payload").notNull(),
	/**
	* Minimum reviewer score the candidate must achieve to pass this
	* sample. Pre-promotion suite fails if the candidate falls below.
	*/
	expectedFloor: doublePrecision("expected_floor").notNull().default(0.6),
	/** Optional structural assertions, e.g. {must_include_keys:[...]} */
	expectedShape: jsonb("expected_shape").notNull().default({}),
	/** "active" rows feed the suite; "archived" rows are kept for audit. */
	status: text("status").notNull().default("active"),
	createdBy: text("created_by").notNull().default("system"),
	createdAt: timestamp("created_at", { withTimezone: true })
	.notNull()
	.defaultNow(),
	},
	(t) => ({
	byNetwork: index("network_regression_samples_net_idx").on(t.networkId),
	byStatus: index("network_regression_samples_status_idx").on(t.status),
	}),
	);

	export type NetworkRegressionSampleRow =
	typeof networkRegressionSamples.$inferSelect;
	export type InsertNetworkRegressionSampleRow =
	typeof networkRegressionSamples.$inferInsert;

	// -------------------------------------------------------- submission_feedback_ledger
	//
	// Task #226 — competition 117 真集 driver 提交后的反向映射台账。
	// 一行 = 一个 (submission_id, task_id) 在某个 network_version 上跑
	// 出的 metric_row(指 network_version_metrics.id)。阶段 3 拿到主办方
	// 回算 EF1 真值时,按 (submission_id, task_id) 反查 metric_row_id,
	// 再回灌到对应 reviewer 通道(external_truth)。
	//
	// 独立成表的理由:
	// - jsonb 写不下足够语义(driver 一次跑 117 行,需要可索引);
	// - 不污染 network_version_metrics 自身(它每行就是一次 reviewer
	// 评分快照,干净没冗余字段)。

	export const submissionFeedbackLedger = pgTable(
	"submission_feedback_ledger",
	{
	id: text("id").primaryKey(),
	/** driver run_id, 形如 ISO 时间戳 "2026-04-26T10-00-00Z"。 */
	submissionId: text("submission_id").notNull(),
	networkId: text("network_id").notNull(),
	networkVersionId: text("network_version_id").notNull(),
	/** task.json 里的 task_id,如 "dude_ada"。 */
	taskId: text("task_id").notNull(),
	/** 指向 network_version_metrics.id;此行是 driver 当次跑产出的 reviewer 评分行。 */
	metricRowId: text("metric_row_id").notNull(),
	submittedAt: timestamp("submitted_at", { withTimezone: true })
	.notNull()
	.defaultNow(),
	/** "pending" \| "received" \| "failed" */
	externalTruthStatus: text("external_truth_status")
	.notNull()
	.default("pending"),
	/** 主办方回算的 EF1% 等真值数字, 阶段 3 写入。 */
	externalTruthValue: doublePrecision("external_truth_value"),
	externalTruthReceivedAt: timestamp("external_truth_received_at", {
	withTimezone: true,
	}),
	},
	(t) => ({
	bySubmission: index("submission_feedback_ledger_sub_idx").on(t.submissionId),
	byTask: index("submission_feedback_ledger_task_idx").on(t.taskId),
	byMetric: index("submission_feedback_ledger_metric_idx").on(t.metricRowId),
	byStatus: index("submission_feedback_ledger_status_idx").on(
	t.externalTruthStatus,
	),
	uniqByPair: uniqueIndex("submission_feedback_ledger_uniq").on(
	t.submissionId,
	t.taskId,
	),
	}),
	);

	export type SubmissionFeedbackLedgerRow =
	typeof submissionFeedbackLedger.$inferSelect;
	export type InsertSubmissionFeedbackLedgerRow =
	typeof submissionFeedbackLedger.$inferInsert;