| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| import { and, desc, eq, gte, sql } from "drizzle-orm"; |
| import { |
| db, |
| networkVersionMetrics, |
| problemClasses, |
| submissionFeedbackLedger, |
| } from "@workspace/db"; |
| import { |
| REVIEWER_CHANNEL_KEYS, |
| NEUTRAL_CHANNEL_SCORE, |
| } from "../reviewer/channels"; |
|
|
| const DEFAULT_WINDOW_DAYS = 7; |
| const DEFAULT_MAX_ROWS = 100; |
|
|
| export interface FitnessSampleRow { |
| reviewerScore: number; |
| channelBreakdown: Record<string, number>; |
| costMs: number | null; |
| retries: number; |
| budgetExceeded: boolean; |
| createdAt: Date; |
| |
| |
| |
| |
| |
| |
| externalTruthStatus?: "pending" | "received" | "failed" | null; |
| externalTruthValue?: number | null; |
| } |
|
|
| export interface FitnessSummary { |
| networkId: string; |
| versionId: string; |
| problemClassPath: string; |
| |
| sampleCount: number; |
| |
| mean: number; |
| |
| stddev: number; |
| |
| ciLower: number; |
| |
| ciUpper: number; |
| |
| weightedMean: number; |
| |
| latestAt: Date | null; |
| |
| oldestAt: Date | null; |
| |
| sampleBounded: boolean; |
| } |
|
|
| export interface FitnessOptions { |
| windowDays?: number; |
| maxRows?: number; |
| } |
|
|
| export interface ChannelWeights { |
| |
| |
| |
| |
| weights: Record<string, number>; |
| |
| |
| |
| labelFree: Record<string, number>; |
| |
| |
| |
| |
| |
| |
| |
| withTruth: Record<string, number>; |
| |
| fromConfig: boolean; |
| } |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| export async function resolveChannelWeights( |
| problemClassPath: string, |
| ): Promise<ChannelWeights> { |
| const rows = await db |
| .select() |
| .from(problemClasses) |
| .where(eq(problemClasses.path, problemClassPath)) |
| .limit(1); |
| const row = rows[0]; |
| if (!row || !row.reviewerWeights) { |
| return { weights: {}, labelFree: {}, withTruth: {}, fromConfig: false }; |
| } |
| const raw = row.reviewerWeights as Record<string, unknown>; |
|
|
| |
| const dualLabelFree = (raw as { weightsLabelFree?: unknown }).weightsLabelFree; |
| const dualWithTruth = (raw as { weightsWithTruth?: unknown }).weightsWithTruth; |
| if ( |
| (dualLabelFree && typeof dualLabelFree === "object") || |
| (dualWithTruth && typeof dualWithTruth === "object") |
| ) { |
| const labelFree = sanitiseWeightMap(dualLabelFree); |
| |
| const withTruth = |
| dualWithTruth && typeof dualWithTruth === "object" |
| ? sanitiseWeightMap(dualWithTruth) |
| : { ...labelFree }; |
| return { |
| weights: labelFree, |
| labelFree, |
| withTruth, |
| fromConfig: |
| Object.keys(labelFree).length > 0 || Object.keys(withTruth).length > 0, |
| }; |
| } |
|
|
| |
| const flat = sanitiseWeightMap(raw); |
| return { |
| weights: flat, |
| labelFree: flat, |
| withTruth: flat, |
| fromConfig: Object.keys(flat).length > 0, |
| }; |
| } |
|
|
| function sanitiseWeightMap(raw: unknown): Record<string, number> { |
| const out: Record<string, number> = {}; |
| if (!raw || typeof raw !== "object") return out; |
| for (const [k, v] of Object.entries(raw as Record<string, unknown>)) { |
| const n = typeof v === "number" ? v : Number(v); |
| if (Number.isFinite(n) && n >= 0) out[k] = n; |
| } |
| return out; |
| } |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| export function flattenReviewerWeights( |
| raw: unknown, |
| mode: "labelFree" | "withTruth" = "labelFree", |
| ): Record<string, number> { |
| if (!raw || typeof raw !== "object") return {}; |
| const obj = raw as Record<string, unknown>; |
| const dualLabelFree = obj.weightsLabelFree; |
| const dualWithTruth = obj.weightsWithTruth; |
| const isDual = |
| (dualLabelFree && typeof dualLabelFree === "object") || |
| (dualWithTruth && typeof dualWithTruth === "object"); |
| if (isDual) { |
| if (mode === "withTruth") { |
| const wt = sanitiseWeightMap(dualWithTruth); |
| if (Object.keys(wt).length > 0) return wt; |
| return sanitiseWeightMap(dualLabelFree); |
| } |
| return sanitiseWeightMap(dualLabelFree); |
| } |
| return sanitiseWeightMap(raw); |
| } |
|
|
| |
| |
| |
| |
| |
| export async function rollingFitness( |
| networkId: string, |
| versionId: string, |
| problemClassPath: string, |
| opts: FitnessOptions = {}, |
| ): Promise<FitnessSummary> { |
| const windowDays = opts.windowDays ?? DEFAULT_WINDOW_DAYS; |
| const maxRows = opts.maxRows ?? DEFAULT_MAX_ROWS; |
| const since = new Date(Date.now() - windowDays * 24 * 60 * 60 * 1000); |
|
|
| |
| |
| |
| |
| const raw = await db |
| .select({ |
| reviewerScore: networkVersionMetrics.reviewerScore, |
| channelBreakdown: networkVersionMetrics.channelBreakdown, |
| costMs: networkVersionMetrics.costMs, |
| retries: networkVersionMetrics.retries, |
| budgetExceeded: networkVersionMetrics.budgetExceeded, |
| createdAt: networkVersionMetrics.createdAt, |
| externalTruthStatus: submissionFeedbackLedger.externalTruthStatus, |
| externalTruthValue: submissionFeedbackLedger.externalTruthValue, |
| }) |
| .from(networkVersionMetrics) |
| .leftJoin( |
| submissionFeedbackLedger, |
| eq(submissionFeedbackLedger.metricRowId, networkVersionMetrics.id), |
| ) |
| .where( |
| and( |
| eq(networkVersionMetrics.networkId, networkId), |
| eq(networkVersionMetrics.versionId, versionId), |
| eq(networkVersionMetrics.problemClassPath, problemClassPath), |
| gte(networkVersionMetrics.createdAt, since), |
| ), |
| ) |
| .orderBy(desc(networkVersionMetrics.createdAt)) |
| .limit(maxRows); |
| const rows: FitnessSampleRow[] = raw.map((r) => ({ |
| reviewerScore: r.reviewerScore, |
| channelBreakdown: ((r.channelBreakdown ?? {}) as Record<string, number>), |
| costMs: r.costMs, |
| retries: r.retries, |
| budgetExceeded: r.budgetExceeded, |
| createdAt: r.createdAt, |
| externalTruthStatus: |
| (r.externalTruthStatus as FitnessSampleRow["externalTruthStatus"]) ?? null, |
| externalTruthValue: r.externalTruthValue ?? null, |
| })); |
|
|
| return summarise(rows, networkId, versionId, problemClassPath, { |
| windowDays, |
| maxRows, |
| weights: await resolveChannelWeights(problemClassPath), |
| }); |
| } |
|
|
| interface SummariseOpts { |
| windowDays: number; |
| maxRows: number; |
| weights: ChannelWeights; |
| } |
|
|
| |
| |
| |
| |
| export function summarise( |
| rows: FitnessSampleRow[], |
| networkId: string, |
| versionId: string, |
| problemClassPath: string, |
| opts: SummariseOpts, |
| ): FitnessSummary { |
| const n = rows.length; |
| if (n === 0) { |
| return { |
| networkId, |
| versionId, |
| problemClassPath, |
| sampleCount: 0, |
| mean: 0, |
| stddev: 0, |
| ciLower: 0, |
| ciUpper: 0, |
| weightedMean: 0, |
| latestAt: null, |
| oldestAt: null, |
| sampleBounded: false, |
| }; |
| } |
| let sum = 0; |
| let weightedSum = 0; |
| for (const r of rows) { |
| sum += clamp01(r.reviewerScore); |
| |
| |
| |
| |
| |
| const useWithTruth = r.externalTruthStatus === "received"; |
| const wmap = useWithTruth ? opts.weights.withTruth : opts.weights.labelFree; |
| let rowForScore = r; |
| if ( |
| useWithTruth && |
| typeof r.externalTruthValue === "number" && |
| Number.isFinite(r.externalTruthValue) |
| ) { |
| rowForScore = { |
| ...r, |
| channelBreakdown: { |
| ...r.channelBreakdown, |
| external_truth: clamp01(r.externalTruthValue / 100), |
| }, |
| }; |
| } |
| weightedSum += weightedScore(rowForScore, wmap); |
| } |
| const mean = sum / n; |
| const weightedMean = weightedSum / n; |
| let varSum = 0; |
| for (const r of rows) { |
| const d = clamp01(r.reviewerScore) - mean; |
| varSum += d * d; |
| } |
| const stddev = n > 1 ? Math.sqrt(varSum / (n - 1)) : 0; |
| |
| |
| |
| const half = 1.96 * (stddev / Math.sqrt(n)); |
| const ciLower = Math.max(0, mean - half); |
| const ciUpper = Math.min(1, mean + half); |
| const latestAt = rows[0]?.createdAt ?? null; |
| const oldestAt = rows[rows.length - 1]?.createdAt ?? null; |
| const sampleBounded = n >= opts.maxRows; |
| return { |
| networkId, |
| versionId, |
| problemClassPath, |
| sampleCount: n, |
| mean, |
| stddev, |
| ciLower, |
| ciUpper, |
| weightedMean, |
| latestAt, |
| oldestAt, |
| sampleBounded, |
| }; |
| } |
|
|
| |
| |
| |
| |
| |
| export function weightedScore( |
| row: FitnessSampleRow, |
| weights: Record<string, number>, |
| ): number { |
| const breakdown = row.channelBreakdown || {}; |
| const channelKeys = Object.keys(breakdown); |
| if (channelKeys.length === 0 || Object.keys(weights).length === 0) { |
| |
| |
| |
| void import("../quarantine/index.ts").then((q) => |
| q.recordQuarantineHit("CONT-006", { |
| gate: "weighted_score_fallback", |
| site: "evolution/fitness.ts:weightedScore", |
| hasBreakdown: channelKeys.length > 0, |
| hasWeights: Object.keys(weights).length > 0, |
| }), |
| ); |
| return clamp01(row.reviewerScore); |
| } |
| let weightSum = 0; |
| let valSum = 0; |
| for (const k of channelKeys) { |
| const w = weights[k]; |
| if (typeof w !== "number" || w <= 0) continue; |
| const v = clamp01(Number(breakdown[k]) || 0); |
| weightSum += w; |
| valSum += w * v; |
| } |
| if (weightSum === 0) return clamp01(row.reviewerScore); |
| return valSum / weightSum; |
| } |
|
|
| function clamp01(n: number): number { |
| if (!Number.isFinite(n)) return 0; |
| if (n < 0) return 0; |
| if (n > 1) return 1; |
| return n; |
| } |
|
|
| |
| |
| |
| |
| export async function countWindowSamples( |
| networkId: string, |
| versionId: string, |
| windowDays: number = DEFAULT_WINDOW_DAYS, |
| ): Promise<number> { |
| const since = new Date(Date.now() - windowDays * 24 * 60 * 60 * 1000); |
| const r = await db |
| .select({ c: sql<number>`count(*)::int` }) |
| .from(networkVersionMetrics) |
| .where( |
| and( |
| eq(networkVersionMetrics.networkId, networkId), |
| eq(networkVersionMetrics.versionId, versionId), |
| gte(networkVersionMetrics.createdAt, since), |
| ), |
| ); |
| return Number(r[0]?.c ?? 0); |
| } |
|
|