Iostream-Li's picture
Add files using upload-large-folder tool
ff78003 verified
/**
* Wave B — three triggers that wake the evolution worker.
*
* The pure detection layer: it inspects the recent metrics window for
* a (network, version) and returns three independent booleans. The
* caller decides whether to schedule a Mode B candidate search, an
* extra shadow run, or to no-op. Detection is idempotent — calling
* `evaluateTriggers` repeatedly with the same DB state yields the same
* answer (events are written each time so the admin can see the
* cadence, but the gate evaluation itself is deterministic).
*
* Trigger semantics:
* - cadence : the active variant has accumulated ≥ N samples in
* the rolling window since its last cadence event.
* - regression : current rolling fitness ≤ floor; the floor is read
* from `tool_networks.config` (or sane default) and is
* compared against `weightedMean` so problem-class
* weights are honoured.
* - coverage : ≥ K reviewer rows in the window flagged
* `budget_exceeded` or `retries >= maxRetries`. Acts
* as a coarse "uncovered semantic cluster" proxy
* until the optional clustering layer lands.
*/
import { and, count, desc, eq, gte, sql } from "drizzle-orm";
import {
db,
networkEvolutionEvents,
networkVersionMetrics,
toolNetworks,
} from "@workspace/db";
import { rollingFitness, type FitnessSummary } from "./fitness";
import { recordEvent, type EvolutionEventKind } from "./events";
const DEFAULT_CADENCE_N = 30;
const DEFAULT_REGRESSION_FLOOR = 0.6;
const DEFAULT_COVERAGE_K = 5;
const DEFAULT_MAX_RETRIES = 2;
export interface TriggerThresholds {
cadenceN: number;
regressionFloor: number;
coverageK: number;
}
export interface TriggerSignal {
kind: EvolutionEventKind;
fired: boolean;
payload: Record<string, unknown>;
}
export interface TriggerEvaluation {
networkId: string;
activeVersionId: string | null;
problemClassPath: string;
fitness: FitnessSummary | null;
thresholds: TriggerThresholds;
signals: TriggerSignal[];
/** True when at least one signal fired. */
anyFired: boolean;
}
/**
* Read per-network thresholds from `tool_networks.config`. Falls back
* to defaults whenever the field is missing or malformed — this keeps
* cold-start safe.
*/
function readThresholds(
config: Record<string, unknown> | null,
): TriggerThresholds {
const c = (config ?? {}) as Record<string, unknown>;
const evo = (c.evolution as Record<string, unknown>) ?? {};
// QUARANTINE-CONT-006 — trigger thresholds fall back to hard-coded defaults
// when network config doesn't carry an `evolution` block. B5 will surface a
// per-problem-class config UI + sensible learned defaults.
// @deprecated CONT-006. Defaults-as-thresholds. Real fix in B5.
const usingDefaults = !c.evolution;
if (usingDefaults) {
void import("../quarantine/index.ts").then((q) =>
q.recordQuarantineHit("CONT-006", {
gate: "trigger_thresholds_default",
site: "evolution/triggers.ts:readThresholds",
defaults: {
cadenceN: DEFAULT_CADENCE_N,
regressionFloor: DEFAULT_REGRESSION_FLOOR,
coverageK: DEFAULT_COVERAGE_K,
},
}),
);
}
const num = (v: unknown, d: number): number => {
const n = typeof v === "number" ? v : Number(v);
return Number.isFinite(n) && n > 0 ? n : d;
};
return {
cadenceN: num(evo.cadenceN, DEFAULT_CADENCE_N),
regressionFloor: clamp01(num(evo.regressionFloor, DEFAULT_REGRESSION_FLOOR)),
coverageK: num(evo.coverageK, DEFAULT_COVERAGE_K),
};
}
function clamp01(n: number): number {
if (n < 0) return 0;
if (n > 1) return 1;
return n;
}
/**
* Evaluate the three triggers against the active variant of `networkId`.
*
* Writes one `*_trigger` event per signal that fired. Returns the full
* evaluation so the caller (orchestrator / admin endpoint) can render
* a decision panel even when nothing fired.
*/
export async function evaluateTriggers(
networkId: string,
): Promise<TriggerEvaluation> {
const netRow = (
await db.select().from(toolNetworks).where(eq(toolNetworks.id, networkId)).limit(1)
)[0];
if (!netRow) throw new Error(`network ${networkId} not found`);
const thresholds = readThresholds(netRow.config as Record<string, unknown>);
const activeVersionId = netRow.activeVariantId;
if (!activeVersionId) {
return {
networkId,
activeVersionId: null,
problemClassPath: netRow.problemClassPath,
fitness: null,
thresholds,
signals: [],
anyFired: false,
};
}
const fitness = await rollingFitness(
networkId,
activeVersionId,
netRow.problemClassPath,
);
// ----- Cadence: count samples since last cadence_trigger event ----
const lastCadenceEvent = (
await db
.select()
.from(networkEvolutionEvents)
.where(
and(
eq(networkEvolutionEvents.networkId, networkId),
eq(networkEvolutionEvents.kind, "cadence_trigger"),
),
)
.orderBy(desc(networkEvolutionEvents.createdAt))
.limit(1)
)[0];
const cadenceWhere = and(
eq(networkVersionMetrics.networkId, networkId),
eq(networkVersionMetrics.versionId, activeVersionId),
lastCadenceEvent
? gte(networkVersionMetrics.createdAt, lastCadenceEvent.createdAt)
: sql`true`,
);
const sinceCount = Number(
(
await db
.select({ c: count() })
.from(networkVersionMetrics)
.where(cadenceWhere)
)[0]?.c ?? 0,
);
const cadenceFired = sinceCount >= thresholds.cadenceN;
// ----- Regression: weighted mean below floor with ≥10 samples -----
const regressionFired =
fitness.sampleCount >= 10 && fitness.weightedMean < thresholds.regressionFloor;
// ----- Coverage: budget-exceeded or saturated-retry rows ≥ K ------
const coverageRows = await db
.select({
retries: networkVersionMetrics.retries,
budgetExceeded: networkVersionMetrics.budgetExceeded,
})
.from(networkVersionMetrics)
.where(
and(
eq(networkVersionMetrics.networkId, networkId),
eq(networkVersionMetrics.versionId, activeVersionId),
gte(
networkVersionMetrics.createdAt,
new Date(Date.now() - 7 * 24 * 60 * 60 * 1000),
),
),
);
const coverageMisses = coverageRows.filter(
(r) => r.budgetExceeded || (r.retries ?? 0) >= DEFAULT_MAX_RETRIES,
).length;
const coverageFired = coverageMisses >= thresholds.coverageK;
// ----- External truth: backfill happened since last consumption -----
// Task #227 — 当主办方真值通过 admin import 写到 ledger 后,会落一条
// external_truth_backfilled 事件;evaluateTriggers 把"最近一次 backfill
// 比最近一次 external_truth_trigger 更新"作为唤醒信号,确保 scheduler
// 在拿到新真值后立刻重算 fitness(LEFT JOIN ledger 已自动反映新权重)
// 并按需 propose 新 candidate。signal 自带 latestBackfillId payload,
// 便于 admin UI 反链原始事件。
const lastBackfill = (
await db
.select()
.from(networkEvolutionEvents)
.where(
and(
eq(networkEvolutionEvents.networkId, networkId),
eq(networkEvolutionEvents.kind, "external_truth_backfilled"),
),
)
.orderBy(desc(networkEvolutionEvents.createdAt))
.limit(1)
)[0];
const lastTruthConsume = (
await db
.select()
.from(networkEvolutionEvents)
.where(
and(
eq(networkEvolutionEvents.networkId, networkId),
eq(networkEvolutionEvents.kind, "external_truth_trigger"),
),
)
.orderBy(desc(networkEvolutionEvents.createdAt))
.limit(1)
)[0];
const externalTruthFired =
!!lastBackfill &&
(!lastTruthConsume || lastBackfill.createdAt > lastTruthConsume.createdAt);
const signals: TriggerSignal[] = [
{
kind: "cadence_trigger",
fired: cadenceFired,
payload: {
sampleCountSinceLast: sinceCount,
threshold: thresholds.cadenceN,
sinceEventId: lastCadenceEvent?.id ?? null,
},
},
{
kind: "regression_trigger",
fired: regressionFired,
payload: {
weightedMean: fitness.weightedMean,
floor: thresholds.regressionFloor,
sampleCount: fitness.sampleCount,
ciLower: fitness.ciLower,
},
},
{
kind: "coverage_trigger",
fired: coverageFired,
payload: {
misses: coverageMisses,
threshold: thresholds.coverageK,
windowDays: 7,
},
},
{
kind: "external_truth_trigger",
fired: externalTruthFired,
payload: {
latestBackfillId: lastBackfill?.id ?? null,
latestBackfillAt: lastBackfill?.createdAt
? new Date(lastBackfill.createdAt).toISOString()
: null,
sinceTriggerId: lastTruthConsume?.id ?? null,
},
},
];
for (const s of signals) {
if (!s.fired) continue;
await recordEvent({
networkId,
kind: s.kind,
variantId: activeVersionId,
payload: s.payload,
});
}
return {
networkId,
activeVersionId,
problemClassPath: netRow.problemClassPath,
fitness,
thresholds,
signals,
anyFired: signals.some((s) => s.fired),
};
}