/** * tool-network — Wave A architecture. * * Promotes "tool networks" (curated, executable subgraphs solving a * problem class end-to-end) to first-class citizens, replacing the * intent-regex + 1-hop expansion path that exposed every atomic tool to * the LLM. The planner now sees ≤5 service cards (one per network) and * the network's own executor walks the internal subgraph. * * Tables: * - problem_classes — taxonomy of problem domains (paths * "ligand.screening.binding"). * - tool_networks — one row per network, with input/output * contracts and an internal subgraph * snapshot. * - network_versions — versioned snapshots; an "active" + zero * or more "shadow" variants per network. * - network_promotions — audit log for variant changes (manual, * auto-promote, auto-rollback). * - network_version_metrics — Reviewer scores aggregated per turn, * feeding Wave B's online-evolution gate. * - execution_plans — Blueprint→Execution two-phase: planner * writes the plan, user/Reviewer confirms * before runner consumes it. * - task_ledger — durable per-task state for long-running * multi-task jobs (DrugCLIP-style 117 * screens). Survives restarts; resumes * from last `pending|running` row. */ import { pgTable, text, timestamp, jsonb, integer, doublePrecision, boolean, index, uniqueIndex, } from "drizzle-orm/pg-core"; // -------------------------------------------------------- problem_classes export const problemClasses = pgTable( "problem_classes", { id: text("id").primaryKey(), /** Dotted path, e.g. "ligand.screening.binding". UNIQUE. */ path: text("path").notNull().unique(), parentPath: text("parent_path"), label: text("label").notNull(), description: text("description").notNull().default(""), /** * Legacy fallback: capability tags from intent-rules.json that map * onto this class. Used when the LLM classifier is unavailable * (kill-switch / cold start). Empty array disables fallback. */ capabilityTags: jsonb("capability_tags").notNull().default([]), /** Reviewer channel weights override; null → use defaults. */ reviewerWeights: jsonb("reviewer_weights"), /** * Lifecycle status of a problem class. Newly registered classes from * `/admin/networks` start as `'draft'` (admin-only visibility, planner * skips them); they flip to `'active'` once an admin promotes them. * Wave A seed classes (e.g. `ligand.screening.binding`) keep the * default `'active'`. Allowed values: `'active' | 'draft'`. */ status: text("status").notNull().default("active"), createdAt: timestamp("created_at", { withTimezone: true }) .notNull() .defaultNow(), updatedAt: timestamp("updated_at", { withTimezone: true }) .notNull() .defaultNow(), }, (t) => ({ byParent: index("problem_classes_parent_idx").on(t.parentPath), byStatus: index("problem_classes_status_idx").on(t.status), }), ); export type ProblemClassRow = typeof problemClasses.$inferSelect; export type InsertProblemClassRow = typeof problemClasses.$inferInsert; // -------------------------------------------------------- tool_networks export const toolNetworks = pgTable( "tool_networks", { id: text("id").primaryKey(), /** Stable handle, e.g. "drugclip_v1". UNIQUE. */ name: text("name").notNull().unique(), /** Reference into problem_classes.path. Soft FK (text). */ problemClassPath: text("problem_class_path").notNull(), description: text("description").notNull().default(""), /** * JSONSchema describing the wire shape the network accepts. The * planner exposes this as the parameters of the synthetic * `run_` tool surfaced to the LLM. */ inputContract: jsonb("input_contract").notNull(), /** JSONSchema describing the network's output shape. */ outputContract: jsonb("output_contract").notNull(), /** * Snapshot of the internal subgraph the executor walks. * Shape: `{nodes: string[], edges: Array<{from, to, relation}>}` * Names refer to atomic tool_nodes registered in the catalog. */ internalGraph: jsonb("internal_graph").notNull(), /** * Pointer to the active variant. Null on first insert; the seeder * creates a v1 network_versions row and back-fills this. */ activeVariantId: text("active_variant_id"), /** * "weak" | "strong" — gates Wave B's autoPromote (weak-tier * builders cannot push variants into the active slot without a * stronger model's sign-off). */ builderModelTier: text("builder_model_tier").notNull().default("strong"), /** * Wave B — minimum builder tier required for a candidate to be * promoted into the shared `active_variant_id` slot. Weak-tier * candidates that pass quality gates still install themselves but * only into a `private_namespace` on the variant row. */ releaseTierFloor: text("release_tier_floor").notNull().default("strong"), /** * Per-network knobs (Wave B trigger thresholds, regression floor). * Read by `lib/evolution/triggers.ts` under the `evolution` key. */ config: jsonb("config").notNull().default({}), /** "active" | "deprecated" | "draft" */ status: text("status").notNull().default("active"), costHint: doublePrecision("cost_hint"), latencyHintMs: integer("latency_hint_ms"), /** * Legacy fallback: tags drawn from intent-rules.json. Used when * the LLM classifier doesn't fire and we need to pick a network * from the regex hit set. Empty array means classifier-only. */ capabilityTags: jsonb("capability_tags").notNull().default([]), /** * Migration source pointer: when a composition_alias node is * rebadged into this networks row the alias's tool_nodes id is * recorded here so we can keep both during gray rollout. */ legacyAliasNodeId: text("legacy_alias_node_id"), createdAt: timestamp("created_at", { withTimezone: true }) .notNull() .defaultNow(), updatedAt: timestamp("updated_at", { withTimezone: true }) .notNull() .defaultNow(), }, (t) => ({ byClass: index("tool_networks_class_idx").on(t.problemClassPath), byStatus: index("tool_networks_status_idx").on(t.status), }), ); export type ToolNetworkRow = typeof toolNetworks.$inferSelect; export type InsertToolNetworkRow = typeof toolNetworks.$inferInsert; // -------------------------------------------------------- network_versions export const networkVersions = pgTable( "network_versions", { id: text("id").primaryKey(), networkId: text("network_id").notNull(), versionLabel: text("version_label").notNull(), internalGraph: jsonb("internal_graph").notNull(), /** Per-version params (handler tweaks, hyperparams). */ config: jsonb("config").notNull().default({}), /** "active" | "shadow" | "promoted" | "demoted" | "draft" */ status: text("status").notNull().default("draft"), /** "system" | "user:" | "auto:builder" */ builtBy: text("built_by").notNull().default("system"), builderModelTier: text("builder_model_tier").notNull().default("strong"), /** * Wave B — non-null pins this variant to a single user/session. * Used by `attemptAutoPromote` when a candidate passes quality * gates but its builder tier is below the network's release floor: * the variant still serves the user who built it, but never * occupies the shared `tool_networks.active_variant_id` slot. */ privateNamespace: text("private_namespace"), createdAt: timestamp("created_at", { withTimezone: true }) .notNull() .defaultNow(), }, (t) => ({ byNetwork: index("network_versions_net_idx").on(t.networkId), byStatus: index("network_versions_status_idx").on(t.status), uniqLabel: uniqueIndex("network_versions_uniq_label").on( t.networkId, t.versionLabel, ), byPrivateNs: index("network_versions_private_ns_idx").on(t.privateNamespace), // Task #242 (B1) — partial unique index that guarantees a single // user-private active variant per (network, user). Created via raw // SQL outside drizzle-kit (partial WHERE not yet in drizzle DSL): // CREATE UNIQUE INDEX network_versions_uniq_active_private // ON network_versions (network_id, private_namespace) // WHERE status = 'active' AND private_namespace IS NOT NULL; // The matching index name is referenced from `getOrCreateUserPrivateVariant` // which catches its conflict to stay idempotent under concurrency. }), ); export type NetworkVersionRow = typeof networkVersions.$inferSelect; export type InsertNetworkVersionRow = typeof networkVersions.$inferInsert; // -------------------------------------------------------- network_promotions export const networkPromotions = pgTable( "network_promotions", { id: text("id").primaryKey(), networkId: text("network_id").notNull(), fromVariantId: text("from_variant_id"), toVariantId: text("to_variant_id").notNull(), /** "manual" | "auto_promote" | "auto_rollback" | "seed" */ reason: text("reason").notNull(), metricsSnapshot: jsonb("metrics_snapshot").notNull().default({}), decidedBy: text("decided_by").notNull().default("system"), createdAt: timestamp("created_at", { withTimezone: true }) .notNull() .defaultNow(), }, (t) => ({ byNetwork: index("network_promotions_net_idx").on(t.networkId), }), ); export type NetworkPromotionRow = typeof networkPromotions.$inferSelect; export type InsertNetworkPromotionRow = typeof networkPromotions.$inferInsert; // -------------------------------------------------------- metrics export const networkVersionMetrics = pgTable( "network_version_metrics", { id: text("id").primaryKey(), networkId: text("network_id").notNull(), versionId: text("version_id").notNull(), problemClassPath: text("problem_class_path").notNull(), /** Aggregated 0..1 reviewer score. */ reviewerScore: doublePrecision("reviewer_score").notNull(), /** Per-channel breakdown {contract, health, trace, coverage, factual, feedback}. */ channelBreakdown: jsonb("channel_breakdown").notNull().default({}), /** * Task #254 (B0, CONT-007) — per-channel boolean flag set whenever the * grader returned a NEUTRAL fallback (no real signal). Shape: * `{ intent_coverage: true, factual_consistency: true, traceability: false, ... }`. * B11 验收用:`fallback_rate = mean(any flag true)` 必须 < 0.5。 * 旧行没有此字段 → 视作全 false(B4 真化前的旧 metric 默认是 fallback)。 */ channelFallbackFlags: jsonb("channel_fallback_flags").notNull().default({}), /** Wall-clock ms for the full turn (planner + tool calls + reviewer). */ costMs: integer("cost_ms"), /** Number of L1+L2 retries within the turn. */ retries: integer("retries").notNull().default(0), /** True if K=3× baseline budget was exceeded; partial result returned. */ budgetExceeded: boolean("budget_exceeded").notNull().default(false), /** Optional links to source turn for audit. */ conversationId: text("conversation_id"), messageId: text("message_id"), createdAt: timestamp("created_at", { withTimezone: true }) .notNull() .defaultNow(), }, (t) => ({ byNetwork: index("network_version_metrics_net_idx").on(t.networkId), byVersion: index("network_version_metrics_ver_idx").on(t.versionId), byClass: index("network_version_metrics_class_idx").on(t.problemClassPath), }), ); export type NetworkVersionMetricRow = typeof networkVersionMetrics.$inferSelect; export type InsertNetworkVersionMetricRow = typeof networkVersionMetrics.$inferInsert; // -------------------------------------------------------- execution_plans export const executionPlans = pgTable( "execution_plans", { id: text("id").primaryKey(), /** Optional link back to the conversation that spawned it. */ conversationId: text("conversation_id"), /** Optional link to the user who owns this plan (for ACL). */ ownerUserId: text("owner_user_id"), problemClassPath: text("problem_class_path").notNull(), networkId: text("network_id").notNull(), versionId: text("version_id").notNull(), /** * Blueprint payload: * { * summary: string, * tasks: Array<{ * taskKey, networkName, params, expectedCostMs, parallelism * }>, * globalConfig: Record, * } */ blueprint: jsonb("blueprint").notNull(), /** * "draft" — planner is still drafting. * "awaiting_confirmation" — UI shows this; Execute button enabled. * "approved" — runner may pick up. * "executing" — task_ledger rows being processed. * "completed" — all tasks finished (success or terminal failure). * "rejected" — user explicitly rejected the plan. * "failed" — terminal infrastructure failure. */ status: text("status").notNull().default("draft"), approvedBy: text("approved_by"), approvedAt: timestamp("approved_at", { withTimezone: true }), /** Path to packaged artifact (e.g. result.zip) when completed. */ outputArtifactPath: text("output_artifact_path"), /** * Free-form notes the runner appends as it makes decisions. * Becomes the body of `result.log` for benchmark submissions. */ notes: text("notes").notNull().default(""), createdAt: timestamp("created_at", { withTimezone: true }) .notNull() .defaultNow(), updatedAt: timestamp("updated_at", { withTimezone: true }) .notNull() .defaultNow(), }, (t) => ({ byStatus: index("execution_plans_status_idx").on(t.status), byOwner: index("execution_plans_owner_idx").on(t.ownerUserId), }), ); export type ExecutionPlanRow = typeof executionPlans.$inferSelect; export type InsertExecutionPlanRow = typeof executionPlans.$inferInsert; // -------------------------------------------------------- task_ledger export const taskLedger = pgTable( "task_ledger", { id: text("id").primaryKey(), planId: text("plan_id").notNull(), /** Stable per-plan key (e.g. "dude_aa2ar"). UNIQUE per plan. */ taskKey: text("task_key").notNull(), params: jsonb("params").notNull().default({}), /** "pending" | "running" | "done" | "failed" | "skipped" */ status: text("status").notNull().default("pending"), attempts: integer("attempts").notNull().default(0), maxAttempts: integer("max_attempts").notNull().default(3), result: jsonb("result"), errorText: text("error_text"), /** EF1%, ranks, per-task quality metrics. */ metrics: jsonb("metrics"), /** Wall-clock for the most recent run. */ durationMs: integer("duration_ms"), startedAt: timestamp("started_at", { withTimezone: true }), finishedAt: timestamp("finished_at", { withTimezone: true }), createdAt: timestamp("created_at", { withTimezone: true }) .notNull() .defaultNow(), updatedAt: timestamp("updated_at", { withTimezone: true }) .notNull() .defaultNow(), }, (t) => ({ byPlan: index("task_ledger_plan_idx").on(t.planId), byStatus: index("task_ledger_status_idx").on(t.status), uniq: uniqueIndex("task_ledger_uniq").on(t.planId, t.taskKey), }), ); export type TaskLedgerRow = typeof taskLedger.$inferSelect; export type InsertTaskLedgerRow = typeof taskLedger.$inferInsert; // ==================================================================== // Wave B — online-evolution flywheel // ==================================================================== // // The three Wave B tables below ride on top of the Wave A surface: // // - network_shadow_samples — paired (active, shadow) per-turn rows // used to compute the 95% CI gate. // - network_evolution_events — append-only event stream that drives // the admin "Evolution Live" page and // the auto-rollback watchdog. // - network_regression_samples // — archived representative inputs the // pre-promotion regression suite replays // against any candidate before promote. // // Two extra columns also appear on Wave A tables (added via // `_create.mjs` ALTER TABLE IF NOT EXISTS, so existing data is // preserved): // // - tool_networks.release_tier_floor — required builder tier for a // candidate to enter the // shared release channel. // - network_versions.private_namespace — non-null ⇒ this version // only ever serves the user // encoded in the namespace, // never the global active. // -------------------------------------------------------- shadow samples export const networkShadowSamples = pgTable( "network_shadow_samples", { id: text("id").primaryKey(), networkId: text("network_id").notNull(), activeVariantId: text("active_variant_id").notNull(), shadowVariantId: text("shadow_variant_id").notNull(), problemClassPath: text("problem_class_path").notNull(), /** Aggregated 0..1 reviewer score for the active execution. */ activeScore: doublePrecision("active_score").notNull(), /** Aggregated 0..1 reviewer score for the shadow execution. */ shadowScore: doublePrecision("shadow_score").notNull(), /** Reviewer hard-fail signals (factuality, contract, safety). */ criticalSignal: boolean("critical_signal").notNull().default(false), activeCostMs: integer("active_cost_ms"), shadowCostMs: integer("shadow_cost_ms"), /** True ⇒ shadow exceeded 1.5× active budget cap and was skipped. */ budgetSkipped: boolean("budget_skipped").notNull().default(false), conversationId: text("conversation_id"), messageId: text("message_id"), createdAt: timestamp("created_at", { withTimezone: true }) .notNull() .defaultNow(), }, (t) => ({ byNetwork: index("network_shadow_samples_net_idx").on(t.networkId), byShadow: index("network_shadow_samples_shadow_idx").on(t.shadowVariantId), byCreated: index("network_shadow_samples_created_idx").on(t.createdAt), }), ); export type NetworkShadowSampleRow = typeof networkShadowSamples.$inferSelect; export type InsertNetworkShadowSampleRow = typeof networkShadowSamples.$inferInsert; // -------------------------------------------------------- evolution events export const networkEvolutionEvents = pgTable( "network_evolution_events", { id: text("id").primaryKey(), networkId: text("network_id").notNull(), /** * "cadence_trigger" | "regression_trigger" | "coverage_trigger" * "shadow_started" | "shadow_budget_skipped" | "shadow_budget_threshold" * "shadow_runner_error" | "shadow_reviewer_fallback" * "auto_promote_skipped" | "promote" | "rollback" * "regression_suite_failed" */ kind: text("kind").notNull(), /** Variant the event is about (candidate for trigger / promote). */ variantId: text("variant_id"), /** Free-form event payload (metric snapshots, gate decisions). */ payload: jsonb("payload").notNull().default({}), /** Optional pointer for chaining — e.g. rollback references promote. */ relatedEventId: text("related_event_id"), /** Optional pointer to network_promotions row. */ promotionId: text("promotion_id"), createdAt: timestamp("created_at", { withTimezone: true }) .notNull() .defaultNow(), }, (t) => ({ byNetwork: index("network_evolution_events_net_idx").on(t.networkId), byKind: index("network_evolution_events_kind_idx").on(t.kind), byCreated: index("network_evolution_events_created_idx").on(t.createdAt), }), ); export type NetworkEvolutionEventRow = typeof networkEvolutionEvents.$inferSelect; export type InsertNetworkEvolutionEventRow = typeof networkEvolutionEvents.$inferInsert; // -------------------------------------------------------- regression samples export const networkRegressionSamples = pgTable( "network_regression_samples", { id: text("id").primaryKey(), networkId: text("network_id").notNull(), problemClassPath: text("problem_class_path").notNull(), label: text("label").notNull().default(""), /** Network input payload to replay against any candidate. */ inputPayload: jsonb("input_payload").notNull(), /** * Minimum reviewer score the candidate must achieve to pass this * sample. Pre-promotion suite fails if the candidate falls below. */ expectedFloor: doublePrecision("expected_floor").notNull().default(0.6), /** Optional structural assertions, e.g. {must_include_keys:[...]} */ expectedShape: jsonb("expected_shape").notNull().default({}), /** "active" rows feed the suite; "archived" rows are kept for audit. */ status: text("status").notNull().default("active"), createdBy: text("created_by").notNull().default("system"), createdAt: timestamp("created_at", { withTimezone: true }) .notNull() .defaultNow(), }, (t) => ({ byNetwork: index("network_regression_samples_net_idx").on(t.networkId), byStatus: index("network_regression_samples_status_idx").on(t.status), }), ); export type NetworkRegressionSampleRow = typeof networkRegressionSamples.$inferSelect; export type InsertNetworkRegressionSampleRow = typeof networkRegressionSamples.$inferInsert; // -------------------------------------------------------- submission_feedback_ledger // // Task #226 — competition 117 真集 driver 提交后的反向映射台账。 // 一行 = 一个 (submission_id, task_id) 在某个 network_version 上跑 // 出的 metric_row(指 network_version_metrics.id)。阶段 3 拿到主办方 // 回算 EF1 真值时,按 (submission_id, task_id) 反查 metric_row_id, // 再回灌到对应 reviewer 通道(external_truth)。 // // 独立成表的理由: // - jsonb 写不下足够语义(driver 一次跑 117 行,需要可索引); // - 不污染 network_version_metrics 自身(它每行就是一次 reviewer // 评分快照,干净没冗余字段)。 export const submissionFeedbackLedger = pgTable( "submission_feedback_ledger", { id: text("id").primaryKey(), /** driver run_id, 形如 ISO 时间戳 "2026-04-26T10-00-00Z"。 */ submissionId: text("submission_id").notNull(), networkId: text("network_id").notNull(), networkVersionId: text("network_version_id").notNull(), /** task.json 里的 task_id,如 "dude_ada"。 */ taskId: text("task_id").notNull(), /** 指向 network_version_metrics.id;此行是 driver 当次跑产出的 reviewer 评分行。 */ metricRowId: text("metric_row_id").notNull(), submittedAt: timestamp("submitted_at", { withTimezone: true }) .notNull() .defaultNow(), /** "pending" | "received" | "failed" */ externalTruthStatus: text("external_truth_status") .notNull() .default("pending"), /** 主办方回算的 EF1% 等真值数字, 阶段 3 写入。 */ externalTruthValue: doublePrecision("external_truth_value"), externalTruthReceivedAt: timestamp("external_truth_received_at", { withTimezone: true, }), }, (t) => ({ bySubmission: index("submission_feedback_ledger_sub_idx").on(t.submissionId), byTask: index("submission_feedback_ledger_task_idx").on(t.taskId), byMetric: index("submission_feedback_ledger_metric_idx").on(t.metricRowId), byStatus: index("submission_feedback_ledger_status_idx").on( t.externalTruthStatus, ), uniqByPair: uniqueIndex("submission_feedback_ledger_uniq").on( t.submissionId, t.taskId, ), }), ); export type SubmissionFeedbackLedgerRow = typeof submissionFeedbackLedger.$inferSelect; export type InsertSubmissionFeedbackLedgerRow = typeof submissionFeedbackLedger.$inferInsert;