Iostream-Li's picture
Add files using upload-large-folder tool
ff78003 verified
/**
* Wave B — auto-promote pipeline with model-strength gate.
*
* Five gates run in series; the first to fail short-circuits and writes
* an `auto_promote_skipped` event with the failing gate's payload. The
* caller (orchestrator / admin endpoint) just gets back the decision.
*
* Gates (in order):
* 1. shadow sample count ≥ MIN_SHADOW_SAMPLES (default 30)
* 2. CI of (shadow-active) strictly above 0 (95% CI lower > 0)
* 3. critical reviewer signals on shadow side == 0
* 4. pre-promotion regression suite passes for every active sample
* 5. candidate.builder_model_tier ≥ network.release_tier_floor
*
* Tier ordering: weak < medium < strong. A weak-builder candidate that
* passes gates 1-4 is *still* not promoted to the global active slot;
* instead we mark `private_namespace` so the runtime can serve that
* variant only to the user who built it (the runtime hook lives in the
* tool-network resolver and is incremental — Wave B records the
* private flag; #178 wires it into the planner).
*
* The actual flip uses a SERIALIZABLE transaction with FOR UPDATE on
* `tool_networks` so two concurrent promote attempts can't both win.
*/
import { and, eq, sql } from "drizzle-orm";
import {
db,
networkPromotions,
networkVersions,
toolNetworks,
} from "@workspace/db";
import { newId } from "../ids";
import { logger } from "../logger";
import { recordEvent } from "./events";
import { rollingFitness } from "./fitness";
import { runSuiteAgainstVariant } from "./regression-suite";
import { summariseShadow, type ShadowSummary } from "./shadow";
export const MIN_SHADOW_SAMPLES = 30;
const TIER_ORDER: Record<string, number> = {
weak: 1,
medium: 2,
strong: 3,
};
export type AutoPromoteOutcome =
| "promoted"
| "promoted_private"
| "skipped_no_active"
| "skipped_same_variant"
| "skipped_low_samples"
| "skipped_ci"
| "skipped_critical"
| "skipped_regression"
| "skipped_tier";
export interface AutoPromoteDecision {
networkId: string;
candidateVariantId: string;
outcome: AutoPromoteOutcome;
promotionId?: string;
shadow?: ShadowSummary;
payload: Record<string, unknown>;
}
export interface AttemptAutoPromoteInput {
networkId: string;
candidateVariantId: string;
actor?: string;
/** Override the default 30-sample minimum (smoke tests only). */
minSamplesOverride?: number;
}
/**
* Run the full gate chain. Returns the decision; on success also flips
* `tool_networks.active_variant_id` and writes a `network_promotions`
* row with a snapshot of the gate evidence.
*/
export async function attemptAutoPromote(
input: AttemptAutoPromoteInput,
): Promise<AutoPromoteDecision> {
const { networkId, candidateVariantId } = input;
const minSamples = input.minSamplesOverride ?? MIN_SHADOW_SAMPLES;
const actor = input.actor ?? "auto_promote";
const network = (
await db.select().from(toolNetworks).where(eq(toolNetworks.id, networkId)).limit(1)
)[0];
if (!network) throw new Error(`network ${networkId} not found`);
const candidate = (
await db
.select()
.from(networkVersions)
.where(eq(networkVersions.id, candidateVariantId))
.limit(1)
)[0];
if (!candidate) throw new Error(`variant ${candidateVariantId} not found`);
if (candidate.networkId !== networkId) {
throw new Error(
`variant ${candidateVariantId} does not belong to network ${networkId}`,
);
}
if (!network.activeVariantId) {
// QUARANTINE-CONT-006 — "no active variant" 直接 skip 是 cold-start 兜底,
// 不走任何真实 gate(shadow CI / regression suite / tier)。B5 会把
// cold-start 路径换成"先 promote-as-active 再回填 shadow"。
// @deprecated CONT-006. Cold-start skip bypasses all gates. Real fix in B5.
void import("../quarantine/index.ts").then((q) =>
q.recordQuarantineHit("CONT-006", {
gate: "promote_no_active_skip",
site: "evolution/promote.ts:attemptAutoPromote",
networkId,
candidateVariantId,
}),
);
return await skip(networkId, candidateVariantId, "skipped_no_active", {});
}
if (network.activeVariantId === candidateVariantId) {
return await skip(
networkId,
candidateVariantId,
"skipped_same_variant",
{ activeVariantId: network.activeVariantId },
);
}
// -- Gate 1 + 2 + 3: shadow sample size, CI gate, critical signals --
const shadow = await summariseShadow(networkId, candidateVariantId);
if (shadow.sampleCount < minSamples) {
return await skip(networkId, candidateVariantId, "skipped_low_samples", {
shadow,
threshold: minSamples,
});
}
if (shadow.deltaCiLower <= 0) {
return await skip(networkId, candidateVariantId, "skipped_ci", { shadow });
}
if (shadow.criticalCount > 0) {
return await skip(networkId, candidateVariantId, "skipped_critical", {
shadow,
});
}
// -- Gate 4: full regression suite must pass ---------------------
const suite = await runSuiteAgainstVariant(networkId, candidateVariantId);
if (suite.totalSamples > 0 && !suite.allPassed) {
await recordEvent({
networkId,
kind: "regression_suite_failed",
variantId: candidateVariantId,
payload: { suite },
});
return await skip(networkId, candidateVariantId, "skipped_regression", {
suite,
shadow,
});
}
// -- Gate 5: model-strength tier gate ----------------------------
const candidateTier = TIER_ORDER[candidate.builderModelTier] ?? 0;
const floorTier =
TIER_ORDER[network.releaseTierFloor as string | "strong"] ?? TIER_ORDER.strong!;
if (candidateTier < floorTier) {
// Weak-tier candidates that pass quality gates still go through —
// but only into a private namespace; never the shared active slot.
let effectiveNamespace = candidate.privateNamespace;
if (!effectiveNamespace) {
// Fall back to creator user as the namespace if the variant
// didn't pre-declare one. `built_by` is "user:<id>" for human
// builders and "auto:..." for evolution loops.
effectiveNamespace = candidate.builtBy?.startsWith("user:")
? candidate.builtBy
: `system:${candidate.id}`;
await db
.update(networkVersions)
.set({ privateNamespace: effectiveNamespace })
.where(eq(networkVersions.id, candidateVariantId));
}
await recordEvent({
networkId,
kind: "auto_promote_skipped",
variantId: candidateVariantId,
payload: {
reason: "tier_gate",
candidateTier: candidate.builderModelTier,
floor: network.releaseTierFloor,
},
});
return {
networkId,
candidateVariantId,
outcome: "promoted_private",
shadow,
payload: {
privateNamespace: effectiveNamespace,
candidateTier: candidate.builderModelTier,
floor: network.releaseTierFloor,
},
};
}
// -- All gates passed: atomic flip + audit row --------------------
const fromVariantId = network.activeVariantId;
const baselineFitness = await rollingFitness(
networkId,
fromVariantId,
network.problemClassPath,
);
const promotionId = newId("nprm");
try {
await db.transaction(async (tx) => {
const locked = (
await tx.execute(sql`
select active_variant_id
from tool_networks
where id = ${networkId}
for update
`)
).rows[0] as { active_variant_id: string | null } | undefined;
if (!locked || locked.active_variant_id !== fromVariantId) {
// Another concurrent promotion won; bail out cleanly.
throw new Error("active variant changed under us");
}
await tx.insert(networkPromotions).values({
id: promotionId,
networkId,
fromVariantId,
toVariantId: candidateVariantId,
reason: "auto_promote",
decidedBy: actor,
metricsSnapshot: {
shadow,
regression: { passed: suite.passed, total: suite.totalSamples },
baseline: {
mean: baselineFitness.mean,
weightedMean: baselineFitness.weightedMean,
sampleCount: baselineFitness.sampleCount,
ciLower: baselineFitness.ciLower,
ciUpper: baselineFitness.ciUpper,
},
candidateTier: candidate.builderModelTier,
floor: network.releaseTierFloor,
},
});
await tx
.update(networkVersions)
.set({ status: "active" })
.where(eq(networkVersions.id, candidateVariantId));
await tx
.update(networkVersions)
.set({ status: "demoted" })
.where(
and(
eq(networkVersions.id, fromVariantId),
eq(networkVersions.networkId, networkId),
),
);
await tx
.update(toolNetworks)
.set({ activeVariantId: candidateVariantId, updatedAt: new Date() })
.where(eq(toolNetworks.id, networkId));
});
} catch (err) {
logger.warn(
{ err, networkId, candidateVariantId },
"auto-promote transaction aborted",
);
return await skip(networkId, candidateVariantId, "skipped_same_variant", {
reason: "concurrent_promotion",
});
}
await recordEvent({
networkId,
kind: "promote",
variantId: candidateVariantId,
promotionId,
payload: {
fromVariantId,
shadow,
regression: { passed: suite.passed, total: suite.totalSamples },
baseline: {
mean: baselineFitness.mean,
weightedMean: baselineFitness.weightedMean,
sampleCount: baselineFitness.sampleCount,
},
},
});
return {
networkId,
candidateVariantId,
outcome: "promoted",
promotionId,
shadow,
payload: { fromVariantId },
};
}
async function skip(
networkId: string,
variantId: string,
outcome: AutoPromoteOutcome,
payload: Record<string, unknown>,
): Promise<AutoPromoteDecision> {
await recordEvent({
networkId,
kind: "auto_promote_skipped",
variantId,
payload: { outcome, ...payload },
});
return { networkId, candidateVariantId: variantId, outcome, payload };
}