| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| import { and, desc, eq } from "drizzle-orm"; |
| import { |
| db, |
| networkRegressionSamples, |
| networkVersions, |
| toolNetworks, |
| type NetworkRegressionSampleRow, |
| } from "@workspace/db"; |
| import { newId } from "../ids"; |
| import { runNetwork } from "../tool-network"; |
| import { gradeNetworkResult } from "../reviewer"; |
|
|
| export interface ArchiveSampleInput { |
| networkId: string; |
| problemClassPath: string; |
| label?: string; |
| inputPayload: Record<string, unknown>; |
| expectedFloor?: number; |
| expectedShape?: Record<string, unknown>; |
| createdBy?: string; |
| } |
|
|
| export async function archiveSample( |
| input: ArchiveSampleInput, |
| ): Promise<NetworkRegressionSampleRow> { |
| const id = newId("nrgs"); |
| await db.insert(networkRegressionSamples).values({ |
| id, |
| networkId: input.networkId, |
| problemClassPath: input.problemClassPath, |
| label: input.label ?? "", |
| inputPayload: input.inputPayload as Record<string, unknown>, |
| expectedFloor: clamp01(input.expectedFloor ?? 0.6), |
| expectedShape: (input.expectedShape ?? {}) as Record<string, unknown>, |
| status: "active", |
| createdBy: input.createdBy ?? "system", |
| }); |
| return ( |
| await db |
| .select() |
| .from(networkRegressionSamples) |
| .where(eq(networkRegressionSamples.id, id)) |
| .limit(1) |
| )[0]!; |
| } |
|
|
| export async function listSamples( |
| networkId: string, |
| status: "active" | "archived" = "active", |
| ): Promise<NetworkRegressionSampleRow[]> { |
| return db |
| .select() |
| .from(networkRegressionSamples) |
| .where( |
| and( |
| eq(networkRegressionSamples.networkId, networkId), |
| eq(networkRegressionSamples.status, status), |
| ), |
| ) |
| .orderBy(desc(networkRegressionSamples.createdAt)); |
| } |
|
|
| export interface SampleResult { |
| sampleId: string; |
| label: string; |
| expectedFloor: number; |
| achievedScore: number; |
| passed: boolean; |
| errorText?: string; |
| shapeOk: boolean; |
| shapeIssues: string[]; |
| } |
|
|
| export interface SuiteResult { |
| networkId: string; |
| variantId: string; |
| totalSamples: number; |
| passed: number; |
| failed: number; |
| results: SampleResult[]; |
| |
| allPassed: boolean; |
| } |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| export async function runSuiteAgainstVariant( |
| networkId: string, |
| variantId: string, |
| ): Promise<SuiteResult> { |
| const network = ( |
| await db.select().from(toolNetworks).where(eq(toolNetworks.id, networkId)).limit(1) |
| )[0]; |
| if (!network) throw new Error(`network ${networkId} not found`); |
| const variant = ( |
| await db.select().from(networkVersions).where(eq(networkVersions.id, variantId)).limit(1) |
| )[0]; |
| if (!variant) throw new Error(`variant ${variantId} not found`); |
|
|
| const samples = await listSamples(networkId, "active"); |
| const results: SampleResult[] = []; |
| for (const s of samples) { |
| let achieved = 0; |
| let errorText: string | undefined; |
| let output: Record<string, unknown> | null = null; |
| try { |
| const r = await runNetwork({ |
| networkName: network.name, |
| input: (s.inputPayload as Record<string, unknown>) ?? {}, |
| variantOverride: variantId, |
| actor: "regression_suite", |
| }); |
| output = (r.output as Record<string, unknown>) ?? null; |
| achieved = gradeNetworkResult({ |
| network: { id: networkId, name: network.name }, |
| result: r, |
| }).score; |
| } catch (err) { |
| errorText = err instanceof Error ? err.message : String(err); |
| } |
| const shape = checkShape( |
| output, |
| (s.expectedShape as Record<string, unknown>) ?? {}, |
| ); |
| const passed = |
| !errorText && achieved >= s.expectedFloor && shape.ok; |
| results.push({ |
| sampleId: s.id, |
| label: s.label, |
| expectedFloor: s.expectedFloor, |
| achievedScore: achieved, |
| passed, |
| errorText, |
| shapeOk: shape.ok, |
| shapeIssues: shape.issues, |
| }); |
| } |
| const passedN = results.filter((r) => r.passed).length; |
| return { |
| networkId, |
| variantId, |
| totalSamples: results.length, |
| passed: passedN, |
| failed: results.length - passedN, |
| results, |
| allPassed: results.length > 0 && passedN === results.length, |
| }; |
| } |
|
|
| function checkShape( |
| output: Record<string, unknown> | null, |
| expected: Record<string, unknown>, |
| ): { ok: boolean; issues: string[] } { |
| if (!expected || Object.keys(expected).length === 0) { |
| return { ok: true, issues: [] }; |
| } |
| if (!output) { |
| return { ok: false, issues: ["output is null"] }; |
| } |
| const issues: string[] = []; |
| const mustKeys = Array.isArray(expected.must_include_keys) |
| ? (expected.must_include_keys as unknown[]).filter( |
| (x): x is string => typeof x === "string", |
| ) |
| : []; |
| for (const k of mustKeys) { |
| if (!(k in output)) issues.push(`missing top-level key: ${k}`); |
| } |
| const mustPaths = Array.isArray(expected.must_include_paths) |
| ? (expected.must_include_paths as unknown[]).filter( |
| (x): x is string => typeof x === "string", |
| ) |
| : []; |
| for (const p of mustPaths) { |
| if (!resolveDotPath(output, p)) issues.push(`missing path: ${p}`); |
| } |
| return { ok: issues.length === 0, issues }; |
| } |
|
|
| function resolveDotPath(obj: unknown, path: string): boolean { |
| const parts = path.split("."); |
| let cur: unknown = obj; |
| for (const p of parts) { |
| if (cur && typeof cur === "object" && p in (cur as Record<string, unknown>)) { |
| cur = (cur as Record<string, unknown>)[p]; |
| } else { |
| return false; |
| } |
| } |
| return cur !== undefined && cur !== null; |
| } |
|
|
| function clamp01(n: number): number { |
| if (!Number.isFinite(n)) return 0; |
| if (n < 0) return 0; |
| if (n > 1) return 1; |
| return n; |
| } |
|
|