doatlas-2 / lib /db /src /schema /toolNetwork.ts
Iostream-Li's picture
Add files using upload-large-folder tool
9c12e58 verified
/**
* tool-network — Wave A architecture.
*
* Promotes "tool networks" (curated, executable subgraphs solving a
* problem class end-to-end) to first-class citizens, replacing the
* intent-regex + 1-hop expansion path that exposed every atomic tool to
* the LLM. The planner now sees ≤5 service cards (one per network) and
* the network's own executor walks the internal subgraph.
*
* Tables:
* - problem_classes — taxonomy of problem domains (paths
* "ligand.screening.binding").
* - tool_networks — one row per network, with input/output
* contracts and an internal subgraph
* snapshot.
* - network_versions — versioned snapshots; an "active" + zero
* or more "shadow" variants per network.
* - network_promotions — audit log for variant changes (manual,
* auto-promote, auto-rollback).
* - network_version_metrics — Reviewer scores aggregated per turn,
* feeding Wave B's online-evolution gate.
* - execution_plans — Blueprint→Execution two-phase: planner
* writes the plan, user/Reviewer confirms
* before runner consumes it.
* - task_ledger — durable per-task state for long-running
* multi-task jobs (DrugCLIP-style 117
* screens). Survives restarts; resumes
* from last `pending|running` row.
*/
import {
pgTable,
text,
timestamp,
jsonb,
integer,
doublePrecision,
boolean,
index,
uniqueIndex,
} from "drizzle-orm/pg-core";
// -------------------------------------------------------- problem_classes
export const problemClasses = pgTable(
"problem_classes",
{
id: text("id").primaryKey(),
/** Dotted path, e.g. "ligand.screening.binding". UNIQUE. */
path: text("path").notNull().unique(),
parentPath: text("parent_path"),
label: text("label").notNull(),
description: text("description").notNull().default(""),
/**
* Legacy fallback: capability tags from intent-rules.json that map
* onto this class. Used when the LLM classifier is unavailable
* (kill-switch / cold start). Empty array disables fallback.
*/
capabilityTags: jsonb("capability_tags").notNull().default([]),
/** Reviewer channel weights override; null → use defaults. */
reviewerWeights: jsonb("reviewer_weights"),
/**
* Lifecycle status of a problem class. Newly registered classes from
* `/admin/networks` start as `'draft'` (admin-only visibility, planner
* skips them); they flip to `'active'` once an admin promotes them.
* Wave A seed classes (e.g. `ligand.screening.binding`) keep the
* default `'active'`. Allowed values: `'active' | 'draft'`.
*/
status: text("status").notNull().default("active"),
createdAt: timestamp("created_at", { withTimezone: true })
.notNull()
.defaultNow(),
updatedAt: timestamp("updated_at", { withTimezone: true })
.notNull()
.defaultNow(),
},
(t) => ({
byParent: index("problem_classes_parent_idx").on(t.parentPath),
byStatus: index("problem_classes_status_idx").on(t.status),
}),
);
export type ProblemClassRow = typeof problemClasses.$inferSelect;
export type InsertProblemClassRow = typeof problemClasses.$inferInsert;
// -------------------------------------------------------- tool_networks
export const toolNetworks = pgTable(
"tool_networks",
{
id: text("id").primaryKey(),
/** Stable handle, e.g. "drugclip_v1". UNIQUE. */
name: text("name").notNull().unique(),
/** Reference into problem_classes.path. Soft FK (text). */
problemClassPath: text("problem_class_path").notNull(),
description: text("description").notNull().default(""),
/**
* JSONSchema describing the wire shape the network accepts. The
* planner exposes this as the parameters of the synthetic
* `run_<network_name>` tool surfaced to the LLM.
*/
inputContract: jsonb("input_contract").notNull(),
/** JSONSchema describing the network's output shape. */
outputContract: jsonb("output_contract").notNull(),
/**
* Snapshot of the internal subgraph the executor walks.
* Shape: `{nodes: string[], edges: Array<{from, to, relation}>}`
* Names refer to atomic tool_nodes registered in the catalog.
*/
internalGraph: jsonb("internal_graph").notNull(),
/**
* Pointer to the active variant. Null on first insert; the seeder
* creates a v1 network_versions row and back-fills this.
*/
activeVariantId: text("active_variant_id"),
/**
* "weak" | "strong" — gates Wave B's autoPromote (weak-tier
* builders cannot push variants into the active slot without a
* stronger model's sign-off).
*/
builderModelTier: text("builder_model_tier").notNull().default("strong"),
/**
* Wave B — minimum builder tier required for a candidate to be
* promoted into the shared `active_variant_id` slot. Weak-tier
* candidates that pass quality gates still install themselves but
* only into a `private_namespace` on the variant row.
*/
releaseTierFloor: text("release_tier_floor").notNull().default("strong"),
/**
* Per-network knobs (Wave B trigger thresholds, regression floor).
* Read by `lib/evolution/triggers.ts` under the `evolution` key.
*/
config: jsonb("config").notNull().default({}),
/** "active" | "deprecated" | "draft" */
status: text("status").notNull().default("active"),
costHint: doublePrecision("cost_hint"),
latencyHintMs: integer("latency_hint_ms"),
/**
* Legacy fallback: tags drawn from intent-rules.json. Used when
* the LLM classifier doesn't fire and we need to pick a network
* from the regex hit set. Empty array means classifier-only.
*/
capabilityTags: jsonb("capability_tags").notNull().default([]),
/**
* Migration source pointer: when a composition_alias node is
* rebadged into this networks row the alias's tool_nodes id is
* recorded here so we can keep both during gray rollout.
*/
legacyAliasNodeId: text("legacy_alias_node_id"),
createdAt: timestamp("created_at", { withTimezone: true })
.notNull()
.defaultNow(),
updatedAt: timestamp("updated_at", { withTimezone: true })
.notNull()
.defaultNow(),
},
(t) => ({
byClass: index("tool_networks_class_idx").on(t.problemClassPath),
byStatus: index("tool_networks_status_idx").on(t.status),
}),
);
export type ToolNetworkRow = typeof toolNetworks.$inferSelect;
export type InsertToolNetworkRow = typeof toolNetworks.$inferInsert;
// -------------------------------------------------------- network_versions
export const networkVersions = pgTable(
"network_versions",
{
id: text("id").primaryKey(),
networkId: text("network_id").notNull(),
versionLabel: text("version_label").notNull(),
internalGraph: jsonb("internal_graph").notNull(),
/** Per-version params (handler tweaks, hyperparams). */
config: jsonb("config").notNull().default({}),
/** "active" | "shadow" | "promoted" | "demoted" | "draft" */
status: text("status").notNull().default("draft"),
/** "system" | "user:<id>" | "auto:builder" */
builtBy: text("built_by").notNull().default("system"),
builderModelTier: text("builder_model_tier").notNull().default("strong"),
/**
* Wave B — non-null pins this variant to a single user/session.
* Used by `attemptAutoPromote` when a candidate passes quality
* gates but its builder tier is below the network's release floor:
* the variant still serves the user who built it, but never
* occupies the shared `tool_networks.active_variant_id` slot.
*/
privateNamespace: text("private_namespace"),
createdAt: timestamp("created_at", { withTimezone: true })
.notNull()
.defaultNow(),
},
(t) => ({
byNetwork: index("network_versions_net_idx").on(t.networkId),
byStatus: index("network_versions_status_idx").on(t.status),
uniqLabel: uniqueIndex("network_versions_uniq_label").on(
t.networkId,
t.versionLabel,
),
byPrivateNs: index("network_versions_private_ns_idx").on(t.privateNamespace),
// Task #242 (B1) — partial unique index that guarantees a single
// user-private active variant per (network, user). Created via raw
// SQL outside drizzle-kit (partial WHERE not yet in drizzle DSL):
// CREATE UNIQUE INDEX network_versions_uniq_active_private
// ON network_versions (network_id, private_namespace)
// WHERE status = 'active' AND private_namespace IS NOT NULL;
// The matching index name is referenced from `getOrCreateUserPrivateVariant`
// which catches its conflict to stay idempotent under concurrency.
}),
);
export type NetworkVersionRow = typeof networkVersions.$inferSelect;
export type InsertNetworkVersionRow = typeof networkVersions.$inferInsert;
// -------------------------------------------------------- network_promotions
export const networkPromotions = pgTable(
"network_promotions",
{
id: text("id").primaryKey(),
networkId: text("network_id").notNull(),
fromVariantId: text("from_variant_id"),
toVariantId: text("to_variant_id").notNull(),
/** "manual" | "auto_promote" | "auto_rollback" | "seed" */
reason: text("reason").notNull(),
metricsSnapshot: jsonb("metrics_snapshot").notNull().default({}),
decidedBy: text("decided_by").notNull().default("system"),
createdAt: timestamp("created_at", { withTimezone: true })
.notNull()
.defaultNow(),
},
(t) => ({
byNetwork: index("network_promotions_net_idx").on(t.networkId),
}),
);
export type NetworkPromotionRow = typeof networkPromotions.$inferSelect;
export type InsertNetworkPromotionRow = typeof networkPromotions.$inferInsert;
// -------------------------------------------------------- metrics
export const networkVersionMetrics = pgTable(
"network_version_metrics",
{
id: text("id").primaryKey(),
networkId: text("network_id").notNull(),
versionId: text("version_id").notNull(),
problemClassPath: text("problem_class_path").notNull(),
/** Aggregated 0..1 reviewer score. */
reviewerScore: doublePrecision("reviewer_score").notNull(),
/** Per-channel breakdown {contract, health, trace, coverage, factual, feedback}. */
channelBreakdown: jsonb("channel_breakdown").notNull().default({}),
/**
* Task #254 (B0, CONT-007) — per-channel boolean flag set whenever the
* grader returned a NEUTRAL fallback (no real signal). Shape:
* `{ intent_coverage: true, factual_consistency: true, traceability: false, ... }`.
* B11 验收用:`fallback_rate = mean(any flag true)` 必须 < 0.5。
* 旧行没有此字段 → 视作全 false(B4 真化前的旧 metric 默认是 fallback)。
*/
channelFallbackFlags: jsonb("channel_fallback_flags").notNull().default({}),
/** Wall-clock ms for the full turn (planner + tool calls + reviewer). */
costMs: integer("cost_ms"),
/** Number of L1+L2 retries within the turn. */
retries: integer("retries").notNull().default(0),
/** True if K=3× baseline budget was exceeded; partial result returned. */
budgetExceeded: boolean("budget_exceeded").notNull().default(false),
/** Optional links to source turn for audit. */
conversationId: text("conversation_id"),
messageId: text("message_id"),
createdAt: timestamp("created_at", { withTimezone: true })
.notNull()
.defaultNow(),
},
(t) => ({
byNetwork: index("network_version_metrics_net_idx").on(t.networkId),
byVersion: index("network_version_metrics_ver_idx").on(t.versionId),
byClass: index("network_version_metrics_class_idx").on(t.problemClassPath),
}),
);
export type NetworkVersionMetricRow =
typeof networkVersionMetrics.$inferSelect;
export type InsertNetworkVersionMetricRow =
typeof networkVersionMetrics.$inferInsert;
// -------------------------------------------------------- execution_plans
export const executionPlans = pgTable(
"execution_plans",
{
id: text("id").primaryKey(),
/** Optional link back to the conversation that spawned it. */
conversationId: text("conversation_id"),
/** Optional link to the user who owns this plan (for ACL). */
ownerUserId: text("owner_user_id"),
problemClassPath: text("problem_class_path").notNull(),
networkId: text("network_id").notNull(),
versionId: text("version_id").notNull(),
/**
* Blueprint payload:
* {
* summary: string,
* tasks: Array<{
* taskKey, networkName, params, expectedCostMs, parallelism
* }>,
* globalConfig: Record<string, unknown>,
* }
*/
blueprint: jsonb("blueprint").notNull(),
/**
* "draft" — planner is still drafting.
* "awaiting_confirmation" — UI shows this; Execute button enabled.
* "approved" — runner may pick up.
* "executing" — task_ledger rows being processed.
* "completed" — all tasks finished (success or terminal failure).
* "rejected" — user explicitly rejected the plan.
* "failed" — terminal infrastructure failure.
*/
status: text("status").notNull().default("draft"),
approvedBy: text("approved_by"),
approvedAt: timestamp("approved_at", { withTimezone: true }),
/** Path to packaged artifact (e.g. result.zip) when completed. */
outputArtifactPath: text("output_artifact_path"),
/**
* Free-form notes the runner appends as it makes decisions.
* Becomes the body of `result.log` for benchmark submissions.
*/
notes: text("notes").notNull().default(""),
createdAt: timestamp("created_at", { withTimezone: true })
.notNull()
.defaultNow(),
updatedAt: timestamp("updated_at", { withTimezone: true })
.notNull()
.defaultNow(),
},
(t) => ({
byStatus: index("execution_plans_status_idx").on(t.status),
byOwner: index("execution_plans_owner_idx").on(t.ownerUserId),
}),
);
export type ExecutionPlanRow = typeof executionPlans.$inferSelect;
export type InsertExecutionPlanRow = typeof executionPlans.$inferInsert;
// -------------------------------------------------------- task_ledger
export const taskLedger = pgTable(
"task_ledger",
{
id: text("id").primaryKey(),
planId: text("plan_id").notNull(),
/** Stable per-plan key (e.g. "dude_aa2ar"). UNIQUE per plan. */
taskKey: text("task_key").notNull(),
params: jsonb("params").notNull().default({}),
/** "pending" | "running" | "done" | "failed" | "skipped" */
status: text("status").notNull().default("pending"),
attempts: integer("attempts").notNull().default(0),
maxAttempts: integer("max_attempts").notNull().default(3),
result: jsonb("result"),
errorText: text("error_text"),
/** EF1%, ranks, per-task quality metrics. */
metrics: jsonb("metrics"),
/** Wall-clock for the most recent run. */
durationMs: integer("duration_ms"),
startedAt: timestamp("started_at", { withTimezone: true }),
finishedAt: timestamp("finished_at", { withTimezone: true }),
createdAt: timestamp("created_at", { withTimezone: true })
.notNull()
.defaultNow(),
updatedAt: timestamp("updated_at", { withTimezone: true })
.notNull()
.defaultNow(),
},
(t) => ({
byPlan: index("task_ledger_plan_idx").on(t.planId),
byStatus: index("task_ledger_status_idx").on(t.status),
uniq: uniqueIndex("task_ledger_uniq").on(t.planId, t.taskKey),
}),
);
export type TaskLedgerRow = typeof taskLedger.$inferSelect;
export type InsertTaskLedgerRow = typeof taskLedger.$inferInsert;
// ====================================================================
// Wave B — online-evolution flywheel
// ====================================================================
//
// The three Wave B tables below ride on top of the Wave A surface:
//
// - network_shadow_samples — paired (active, shadow) per-turn rows
// used to compute the 95% CI gate.
// - network_evolution_events — append-only event stream that drives
// the admin "Evolution Live" page and
// the auto-rollback watchdog.
// - network_regression_samples
// — archived representative inputs the
// pre-promotion regression suite replays
// against any candidate before promote.
//
// Two extra columns also appear on Wave A tables (added via
// `_create.mjs` ALTER TABLE IF NOT EXISTS, so existing data is
// preserved):
//
// - tool_networks.release_tier_floor — required builder tier for a
// candidate to enter the
// shared release channel.
// - network_versions.private_namespace — non-null ⇒ this version
// only ever serves the user
// encoded in the namespace,
// never the global active.
// -------------------------------------------------------- shadow samples
export const networkShadowSamples = pgTable(
"network_shadow_samples",
{
id: text("id").primaryKey(),
networkId: text("network_id").notNull(),
activeVariantId: text("active_variant_id").notNull(),
shadowVariantId: text("shadow_variant_id").notNull(),
problemClassPath: text("problem_class_path").notNull(),
/** Aggregated 0..1 reviewer score for the active execution. */
activeScore: doublePrecision("active_score").notNull(),
/** Aggregated 0..1 reviewer score for the shadow execution. */
shadowScore: doublePrecision("shadow_score").notNull(),
/** Reviewer hard-fail signals (factuality, contract, safety). */
criticalSignal: boolean("critical_signal").notNull().default(false),
activeCostMs: integer("active_cost_ms"),
shadowCostMs: integer("shadow_cost_ms"),
/** True ⇒ shadow exceeded 1.5× active budget cap and was skipped. */
budgetSkipped: boolean("budget_skipped").notNull().default(false),
conversationId: text("conversation_id"),
messageId: text("message_id"),
createdAt: timestamp("created_at", { withTimezone: true })
.notNull()
.defaultNow(),
},
(t) => ({
byNetwork: index("network_shadow_samples_net_idx").on(t.networkId),
byShadow: index("network_shadow_samples_shadow_idx").on(t.shadowVariantId),
byCreated: index("network_shadow_samples_created_idx").on(t.createdAt),
}),
);
export type NetworkShadowSampleRow = typeof networkShadowSamples.$inferSelect;
export type InsertNetworkShadowSampleRow =
typeof networkShadowSamples.$inferInsert;
// -------------------------------------------------------- evolution events
export const networkEvolutionEvents = pgTable(
"network_evolution_events",
{
id: text("id").primaryKey(),
networkId: text("network_id").notNull(),
/**
* "cadence_trigger" | "regression_trigger" | "coverage_trigger"
* "shadow_started" | "shadow_budget_skipped" | "shadow_budget_threshold"
* "shadow_runner_error" | "shadow_reviewer_fallback"
* "auto_promote_skipped" | "promote" | "rollback"
* "regression_suite_failed"
*/
kind: text("kind").notNull(),
/** Variant the event is about (candidate for trigger / promote). */
variantId: text("variant_id"),
/** Free-form event payload (metric snapshots, gate decisions). */
payload: jsonb("payload").notNull().default({}),
/** Optional pointer for chaining — e.g. rollback references promote. */
relatedEventId: text("related_event_id"),
/** Optional pointer to network_promotions row. */
promotionId: text("promotion_id"),
createdAt: timestamp("created_at", { withTimezone: true })
.notNull()
.defaultNow(),
},
(t) => ({
byNetwork: index("network_evolution_events_net_idx").on(t.networkId),
byKind: index("network_evolution_events_kind_idx").on(t.kind),
byCreated: index("network_evolution_events_created_idx").on(t.createdAt),
}),
);
export type NetworkEvolutionEventRow =
typeof networkEvolutionEvents.$inferSelect;
export type InsertNetworkEvolutionEventRow =
typeof networkEvolutionEvents.$inferInsert;
// -------------------------------------------------------- regression samples
export const networkRegressionSamples = pgTable(
"network_regression_samples",
{
id: text("id").primaryKey(),
networkId: text("network_id").notNull(),
problemClassPath: text("problem_class_path").notNull(),
label: text("label").notNull().default(""),
/** Network input payload to replay against any candidate. */
inputPayload: jsonb("input_payload").notNull(),
/**
* Minimum reviewer score the candidate must achieve to pass this
* sample. Pre-promotion suite fails if the candidate falls below.
*/
expectedFloor: doublePrecision("expected_floor").notNull().default(0.6),
/** Optional structural assertions, e.g. {must_include_keys:[...]} */
expectedShape: jsonb("expected_shape").notNull().default({}),
/** "active" rows feed the suite; "archived" rows are kept for audit. */
status: text("status").notNull().default("active"),
createdBy: text("created_by").notNull().default("system"),
createdAt: timestamp("created_at", { withTimezone: true })
.notNull()
.defaultNow(),
},
(t) => ({
byNetwork: index("network_regression_samples_net_idx").on(t.networkId),
byStatus: index("network_regression_samples_status_idx").on(t.status),
}),
);
export type NetworkRegressionSampleRow =
typeof networkRegressionSamples.$inferSelect;
export type InsertNetworkRegressionSampleRow =
typeof networkRegressionSamples.$inferInsert;
// -------------------------------------------------------- submission_feedback_ledger
//
// Task #226 — competition 117 真集 driver 提交后的反向映射台账。
// 一行 = 一个 (submission_id, task_id) 在某个 network_version 上跑
// 出的 metric_row(指 network_version_metrics.id)。阶段 3 拿到主办方
// 回算 EF1 真值时,按 (submission_id, task_id) 反查 metric_row_id,
// 再回灌到对应 reviewer 通道(external_truth)。
//
// 独立成表的理由:
// - jsonb 写不下足够语义(driver 一次跑 117 行,需要可索引);
// - 不污染 network_version_metrics 自身(它每行就是一次 reviewer
// 评分快照,干净没冗余字段)。
export const submissionFeedbackLedger = pgTable(
"submission_feedback_ledger",
{
id: text("id").primaryKey(),
/** driver run_id, 形如 ISO 时间戳 "2026-04-26T10-00-00Z"。 */
submissionId: text("submission_id").notNull(),
networkId: text("network_id").notNull(),
networkVersionId: text("network_version_id").notNull(),
/** task.json 里的 task_id,如 "dude_ada"。 */
taskId: text("task_id").notNull(),
/** 指向 network_version_metrics.id;此行是 driver 当次跑产出的 reviewer 评分行。 */
metricRowId: text("metric_row_id").notNull(),
submittedAt: timestamp("submitted_at", { withTimezone: true })
.notNull()
.defaultNow(),
/** "pending" | "received" | "failed" */
externalTruthStatus: text("external_truth_status")
.notNull()
.default("pending"),
/** 主办方回算的 EF1% 等真值数字, 阶段 3 写入。 */
externalTruthValue: doublePrecision("external_truth_value"),
externalTruthReceivedAt: timestamp("external_truth_received_at", {
withTimezone: true,
}),
},
(t) => ({
bySubmission: index("submission_feedback_ledger_sub_idx").on(t.submissionId),
byTask: index("submission_feedback_ledger_task_idx").on(t.taskId),
byMetric: index("submission_feedback_ledger_metric_idx").on(t.metricRowId),
byStatus: index("submission_feedback_ledger_status_idx").on(
t.externalTruthStatus,
),
uniqByPair: uniqueIndex("submission_feedback_ledger_uniq").on(
t.submissionId,
t.taskId,
),
}),
);
export type SubmissionFeedbackLedgerRow =
typeof submissionFeedbackLedger.$inferSelect;
export type InsertSubmissionFeedbackLedgerRow =
typeof submissionFeedbackLedger.$inferInsert;