general-eval-card / lib /view-data.ts
evijit's picture
evijit HF Staff
Precompute eval matrices for multi-metric + per-slice leaderboards
553b175
import "server-only"
import fs from "node:fs"
import path from "node:path"
import { getConnection } from "@/lib/duckdb"
import { fetchHeadline } from "@/lib/sidecars"
import {
EVALUATION_CATEGORIES,
type BenchmarkCard,
type BenchmarkEvaluation,
type CategoryType,
type EvaluationCardData,
type EvaluationResult,
type GenerationConfig,
type MetricConfig,
type ModelInfo,
type ModelEvaluationSummary,
type ModelVariantSummary,
type ScoreDetails,
type SourceData,
type SourceMetadata,
} from "@/lib/benchmark-schema"
import type { DeveloperListEntry } from "@/lib/backend-artifacts"
import type {
BenchmarkEvalListItem,
BenchmarkEvalSummary,
ModelResultForBenchmark,
} from "@/lib/eval-processing"
type Row = Record<string, any>
const MODEL_CARD_COLUMNS = `
id, model_key, route_id, model_name, model_id, canonical_model_name, developer,
evaluations_count, benchmarks_count, variant_count,
categories, category_stats, latest_timestamp,
evaluator_count, evaluator_names, source_type_count, source_types,
evidence_count, missing_generation_config_count,
third_party_eval_count, independent_verification_ratio,
reproducibility_status, eval_libraries, latest_source_name,
params_billions, benchmark_names, score_summary,
reproducibility_summary, provenance_summary, comparability_summary,
top_scores, source_urls, detail_urls,
model_url, release_date,
architecture, params, inference_engine, inference_platform
`
// The composite/family/slice taxonomy refactor (eval_card_backend
// notes/09-) replaced the legacy `composite_benchmark_key` /
// `composite_benchmark_name` columns with `composite_slug` /
// `composite_display_name`. The `family_id` / `family_display_name` /
// `is_slice` columns are the canonical identity surface; we still
// alias the composite_* legacy names for backward compat with
// consumers that haven't migrated yet. Mapping:
// composite_benchmark_key/name → composite_slug/display_name
// (the leaderboard, e.g. "wasp"/"WASP" — what the eval-detail
// "Composite" label shows)
const EVAL_LIST_COLUMNS = `
evaluation_id, evaluation_name, canonical_display_name,
benchmark_id,
composite_slug, composite_display_name,
family_id, family_display_name, is_slice,
parent_benchmark_id,
composite_slug AS composite_benchmark_key,
composite_display_name AS composite_benchmark_name,
family_display_name AS benchmark_family_name,
category,
metric_config, models_count, evaluator_names, source_types,
latest_source_name, third_party_ratio,
missing_generation_config_count, best_model, worst_model,
avg_score, avg_score_norm, has_card, benchmark_card,
is_aggregated, aggregate_sources, tags,
metrics_count, metric_names, instance_data, top_score,
subtasks_count, is_summary_score, summary_eval_ids,
root_metrics, subtasks, leaderboard_metrics,
reproducibility_summary, provenance_summary, comparability_summary,
source_data
`
const CELL_JOIN_COLUMNS = `
r.*,
e.evaluation_name AS eval_evaluation_name,
e.canonical_display_name AS eval_canonical_display_name,
e.benchmark_id AS eval_benchmark_id,
e.composite_slug AS eval_composite_slug,
e.composite_display_name AS eval_composite_display_name,
e.family_id AS eval_family_id,
e.family_display_name AS eval_family_display_name,
e.is_slice AS eval_is_slice,
e.parent_benchmark_id AS eval_parent_benchmark_id,
e.composite_slug AS eval_composite_benchmark_key,
e.composite_display_name AS eval_composite_benchmark_name,
e.family_display_name AS eval_benchmark_family_name,
e.category AS eval_category,
e.metric_config AS eval_metric_config,
e.source_data AS eval_source_data,
e.benchmark_card AS eval_benchmark_card,
e.tags AS eval_tags,
e.is_summary_score AS eval_is_summary_score,
e.summary_eval_ids AS eval_summary_eval_ids
`
function normalizeDuckDBValue(value: unknown): unknown {
if (typeof value === "bigint") {
return Number(value)
}
if (value instanceof Date) {
return value.toISOString()
}
if (value instanceof Map) {
return Object.fromEntries(
Array.from(value.entries()).map(([key, mapValue]) => [String(key), normalizeDuckDBValue(mapValue)])
)
}
if (Array.isArray(value)) {
return value.map(normalizeDuckDBValue)
}
if (value && typeof value === "object") {
const duckValue = value as {
constructor?: { name?: string }
entries?: unknown
items?: unknown
scale?: unknown
value?: unknown
toString?: () => string
}
const constructorName = duckValue.constructor?.name ?? ""
if (constructorName === "DuckDBStructValue" && duckValue.entries && typeof duckValue.entries === "object") {
return normalizeDuckDBValue(duckValue.entries)
}
if (
(constructorName === "DuckDBListValue" || constructorName === "DuckDBArrayValue") &&
Array.isArray(duckValue.items)
) {
return duckValue.items.map(normalizeDuckDBValue)
}
if (constructorName === "DuckDBMapValue" && Array.isArray(duckValue.entries)) {
return Object.fromEntries(
duckValue.entries.map((entry) => {
const pair = entry as { key: unknown; value: unknown }
return [String(pair.key), normalizeDuckDBValue(pair.value)]
})
)
}
if (constructorName === "DuckDBDecimalValue" && typeof duckValue.toString === "function") {
return Number(duckValue.toString())
}
if (constructorName.startsWith("DuckDB") && typeof duckValue.toString === "function") {
return duckValue.toString()
}
return Object.fromEntries(
Object.entries(value).map(([key, objectValue]) => [key, normalizeDuckDBValue(objectValue)])
)
}
return value
}
async function readRows<T = Row>(sql: string, params: unknown[] = []): Promise<T[]> {
const connection = await getConnection()
const reader = params.length > 0
? await connection.runAndReadAll(sql, params as any[])
: await connection.runAndReadAll(sql)
return reader.getRowObjects().map((row) => normalizeDuckDBValue(row) as T)
}
function asNumber(value: unknown, fallback = 0) {
if (typeof value === "number" && Number.isFinite(value)) return value
if (typeof value === "bigint") return Number(value)
if (typeof value === "string" && value.trim() !== "") {
const parsed = Number(value)
if (Number.isFinite(parsed)) return parsed
}
return fallback
}
function optionalNumber(value: unknown) {
if (value == null) return undefined
const parsed = asNumber(value, Number.NaN)
return Number.isFinite(parsed) ? parsed : undefined
}
function asString(value: unknown, fallback = "") {
return typeof value === "string" ? value : fallback
}
function optionalString(value: unknown) {
return typeof value === "string" && value.length > 0 ? value : undefined
}
function asArray<T>(value: unknown): T[] {
return Array.isArray(value) ? value as T[] : []
}
function normalizeCategory(value: unknown): CategoryType {
return EVALUATION_CATEGORIES.includes(value as CategoryType)
? value as CategoryType
: "General"
}
function emptyEvaluationsByCategory(): Record<CategoryType, BenchmarkEvaluation[]> {
return EVALUATION_CATEGORIES.reduce((acc, category) => {
acc[category] = []
return acc
}, {} as Record<CategoryType, BenchmarkEvaluation[]>)
}
function sourceMetadataFromRow(row: Row): SourceMetadata {
if (row.source_metadata && typeof row.source_metadata === "object") {
return row.source_metadata as SourceMetadata
}
return {
source_type: "documentation",
source_organization_name: asString(row.latest_source_name, "Unknown"),
evaluator_relationship: "other",
}
}
function sourceDataFromRow(row: Row): BenchmarkEvaluation["source_data"] {
const sourceData = row.source_data ?? row.eval_source_data
if (sourceData) {
return sourceData as BenchmarkEvaluation["source_data"]
}
return {
dataset_name: asString(row.eval_evaluation_name ?? row.evaluation_name ?? row.benchmark_id, "Unknown dataset"),
} satisfies SourceData
}
function scoreDetailsFromRow(row: Row): ScoreDetails {
const details = row.score_details && typeof row.score_details === "object"
? row.score_details as Partial<ScoreDetails>
: {}
const score = asNumber(details.score ?? row.score)
return {
...details,
score,
} as ScoreDetails
}
function metricConfigFromRow(row: Row): MetricConfig {
const config = (row.metric_config ?? row.eval_metric_config ?? {}) as Partial<MetricConfig>
const scoreType = config.score_type === "binary" || config.score_type === "discrete"
? config.score_type
: "continuous"
return {
evaluation_description: asString(
config.evaluation_description ??
row.metric_description ??
row.metric_display_name ??
row.eval_evaluation_name ??
row.evaluation_name,
""
),
lower_is_better: Boolean(row.lower_is_better ?? config.lower_is_better ?? false),
score_type: scoreType,
min_score: optionalNumber(config.min_score ?? row.min_score),
max_score: optionalNumber(config.max_score ?? row.max_score),
unit: optionalString(row.metric_unit ?? config.unit),
}
}
function modelInfoFromModelRow(row: Row): ModelInfo {
return {
name: asString(row.model_name ?? row.model_family_name ?? row.model_id ?? row.model_key, "Unknown model"),
id: asString(row.model_key ?? row.model_id ?? row.id ?? row.route_id, "unknown-model"),
developer: optionalString(row.developer),
inference_platform: optionalString(row.inference_platform),
inference_engine: optionalString(row.inference_engine),
architecture: optionalString(row.architecture),
parameter_count: optionalString(row.params),
release_date: optionalString(row.release_date),
model_url: optionalString(row.model_url),
additional_details: {
params_billions: row.params_billions,
},
modalities: {
input: asArray<string>(row.input_modalities),
output: asArray<string>(row.output_modalities),
},
}
}
function resultFromCell(row: Row): EvaluationResult {
const scoreDetails = scoreDetailsFromRow(row)
const generationConfig = row.generation_config as GenerationConfig | undefined
const annotations = row.evalcards_annotations
return {
evaluation_name: asString(row.metric_display_name ?? row.eval_evaluation_name ?? row.metric_id, "Score"),
display_name: optionalString(row.metric_display_name),
canonical_display_name: optionalString(row.metric_display_name),
metric_summary_id: optionalString(row.metric_summary_id),
metric_key: optionalString(row.metric_id),
evaluation_timestamp: asString(row.evaluation_timestamp, ""),
source_data: sourceDataFromRow(row),
metric_config: metricConfigFromRow(row),
score_details: scoreDetails,
generation_config: generationConfig,
detailed_evaluation_results_url: optionalString(row.instance_file_path),
evalcards: annotations ? { annotations } : undefined,
}
}
function reshapeCellToModelResult(row: Row): ModelResultForBenchmark {
const scoreDetails = scoreDetailsFromRow(row)
return {
model_info: (row.model_info ?? modelInfoFromModelRow(row)) as ModelInfo,
model_route_id: optionalString(row.model_route_id),
score: scoreDetails.score,
score_details: scoreDetails,
evaluation_timestamp: asString(row.evaluation_timestamp, ""),
source_metadata: sourceMetadataFromRow(row),
source_data: sourceDataFromRow(row),
source_record_url: optionalString(row.source_record_url),
aggregate_components: asArray<NonNullable<ModelResultForBenchmark["aggregate_components"]>[number]>(
row.aggregate_components
),
result: resultFromCell(row),
}
}
function reshapeCellToBenchmarkEvaluation(row: Row): BenchmarkEvaluation {
const result = resultFromCell(row)
const modelInfo = (row.model_info ?? modelInfoFromModelRow(row)) as ModelInfo
return {
schema_version: "1.0",
eval_summary_id: optionalString(row.evaluation_id),
evaluation_id: asString(row.evaluation_id ?? row.benchmark_id, "unknown-evaluation"),
retrieved_timestamp: asString(row.evaluation_timestamp, ""),
benchmark: optionalString(row.eval_evaluation_name ?? row.benchmark_id),
display_name: optionalString(row.eval_evaluation_name),
canonical_display_name: optionalString(row.eval_canonical_display_name),
category: normalizeCategory(row.eval_category ?? row.category),
family_id: optionalString(row.eval_family_id),
benchmark_family_name: optionalString(row.eval_family_display_name),
parent_benchmark_id: optionalString(row.eval_parent_benchmark_id),
benchmark_parent_name: optionalString(row.eval_composite_benchmark_name),
benchmark_leaf_name: optionalString(row.eval_evaluation_name),
is_slice: Boolean(row.eval_is_slice),
is_summary_score: Boolean(row.eval_is_summary_score ?? row.is_summary_score),
source_data: sourceDataFromRow(row),
source_metadata: sourceMetadataFromRow(row),
eval_library: row.eval_library,
model_info: modelInfo,
generation_config: row.generation_config,
evaluation_results: [result],
}
}
function modelSummaryFromRows(modelRow: Row, cellRows: Row[]): ModelEvaluationSummary {
const evaluationsByCategory = emptyEvaluationsByCategory()
for (const cellRow of cellRows) {
const evaluation = reshapeCellToBenchmarkEvaluation(cellRow)
const category = normalizeCategory(evaluation.category)
evaluationsByCategory[category].push(evaluation)
}
const categoriesCovered = asArray<CategoryType>(modelRow.categories).filter((category) =>
EVALUATION_CATEGORIES.includes(category)
)
const modelInfo = (modelRow.model_info ?? modelInfoFromModelRow(modelRow)) as ModelInfo
const totalEvaluations = asNumber(modelRow.total_evaluations ?? modelRow.evaluations_count)
const lastUpdated = asString(modelRow.last_updated ?? modelRow.latest_timestamp, "")
const rawModelIds = asArray<string>(modelRow.raw_model_ids)
const core = {
model_info: modelInfo,
evaluations_by_category: evaluationsByCategory,
total_evaluations: totalEvaluations,
last_updated: lastUpdated,
categories_covered: categoriesCovered.length > 0
? categoriesCovered
: EVALUATION_CATEGORIES.filter((category) => evaluationsByCategory[category].length > 0),
reproducibility_summary: modelRow.reproducibility_summary,
provenance_summary: modelRow.provenance_summary,
comparability_summary: modelRow.comparability_summary,
}
const variants = asArray<Row>(modelRow.variants).map((variant, index) => ({
...core,
...variant,
variant_id: asString(variant.variant_id ?? variant.variant_key, `variant-${index}`),
variant_key: asString(variant.variant_key, `variant-${index}`),
variant_label: asString(variant.variant_label ?? variant.variant_display_name, "Default"),
variant_display_name: asString(variant.variant_display_name ?? variant.variant_label ?? modelRow.model_name, modelRow.model_name),
raw_model_ids: asArray<string>(variant.raw_model_ids),
family_id: asString(variant.family_id ?? modelRow.model_family_id, modelRow.model_family_id),
family_name: asString(variant.family_name ?? modelRow.model_family_name, modelRow.model_family_name),
total_evaluations: asNumber(variant.total_evaluations ?? totalEvaluations),
last_updated: asString(variant.last_updated ?? lastUpdated, lastUpdated),
categories_covered: asArray<CategoryType>(variant.categories_covered).length > 0
? asArray<CategoryType>(variant.categories_covered)
: core.categories_covered,
model_info: {
...modelInfo,
name: asString(variant.variant_display_name ?? variant.variant_label ?? modelInfo.name, modelInfo.name),
},
})) as ModelVariantSummary[]
return {
...core,
model_family_id: asString(modelRow.model_family_id ?? modelRow.model_key ?? modelRow.model_id, modelRow.model_key ?? modelRow.model_id),
model_route_id: asString(modelRow.model_route_id ?? modelRow.route_id, modelRow.route_id),
model_family_name: asString(modelRow.model_family_name ?? modelRow.model_name, modelRow.model_name),
raw_model_ids: rawModelIds.length > 0 ? rawModelIds : [asString(modelRow.model_key ?? modelRow.model_id, "")].filter(Boolean),
variants,
}
}
async function getModelEvaluationRows(modelKey: string): Promise<Row[]> {
// model_key is the producer's addressable identifier — non-null for both
// resolved and unresolved models (the latter fall back to the raw source
// name). Querying by model_id alone would silently miss unresolved models.
return readRows<Row>(
`SELECT ${CELL_JOIN_COLUMNS}
FROM eval_results_view r
LEFT JOIN evals_view e ON r.evaluation_id = e.evaluation_id
WHERE r.model_key = ?
AND r.score IS NOT NULL
ORDER BY r.category, r.percentile DESC NULLS LAST`,
[modelKey]
)
}
export async function getModelCards(): Promise<EvaluationCardData[]> {
return readRows<EvaluationCardData>(
`SELECT ${MODEL_CARD_COLUMNS}
FROM models_view
ORDER BY latest_timestamp DESC NULLS LAST`
)
}
export async function getModelCardsLite(): Promise<EvaluationCardData[]> {
return readRows<EvaluationCardData>(
`SELECT ${MODEL_CARD_COLUMNS}
FROM models_view
ORDER BY benchmarks_count DESC NULLS LAST, evaluations_count DESC NULLS LAST, model_name ASC`
)
}
export async function getEvalListData(): Promise<{
evals: BenchmarkEvalListItem[]
totalModels: number
}> {
const [evals, countRows] = await Promise.all([
readRows<BenchmarkEvalListItem>(
`SELECT ${EVAL_LIST_COLUMNS}
FROM evals_view
ORDER BY evaluation_name ASC`
),
readRows<{ n: number }>("SELECT COUNT(*) AS n FROM models_view"),
])
return {
evals,
totalModels: asNumber(countRows[0]?.n),
}
}
export async function getEvalListLiteData(): Promise<{
evals: BenchmarkEvalListItem[]
totalModels: number
}> {
return getEvalListData()
}
export async function getEvalList() {
const { evals } = await getEvalListData()
return evals
}
export async function getDashboardData() {
const [models, evals] = await Promise.all([
getModelCards(),
getEvalList(),
])
return { models, evals }
}
export async function getModelSummaryById(routeId: string): Promise<ModelEvaluationSummary | null> {
// Lookups use the addressable identifier (`model_key`/`route_id`/
// `model_route_id`/`model_family_id`) so unresolved models — whose
// `model_id` is NULL — are still findable. `model_id` is kept in the
// OR chain as a back-compat fallback for old links.
//
// Three slug shapes flow into this route handler:
// - URL-encoded form (canonical, e.g. `google%2Fgemini-3-pro`) —
// Next.js already decodes path params before they reach here, so
// `routeId` lands as `google/gemini-3-pro`.
// - Plain canonical id with `/` (same shape after Next.js decode).
// - Legacy `__`-separated form (e.g. `google__gemini-3-pro`) — old
// `getModelFamilyRouteId` emitted this; bookmarks may still use
// it. Convert `__` → `/` for lookup.
const dunder = routeId.includes("__") ? routeId.replace(/__/g, "/") : routeId
const rows = await readRows<Row>(
`SELECT *
FROM models_view
WHERE model_key = ? OR route_id = ? OR model_route_id = ? OR model_family_id = ? OR model_id = ?
OR model_key = ? OR model_id = ?
LIMIT 1`,
[routeId, routeId, routeId, routeId, routeId, dunder, dunder]
)
const modelRow = rows[0]
if (!modelRow) return null
const cellRows = await getModelEvaluationRows(asString(modelRow.model_key ?? modelRow.model_id, routeId))
return modelSummaryFromRows(modelRow, cellRows)
}
// Build-time precomputed multi-metric / per-slice matrix produced by
// `scripts/build-eval-matrices.mjs`. Read once on first request and
// cached in module scope — the file is image-baked so this is a single
// disk read per server start. When the file is missing (local dev where
// nobody ran `pnpm build-eval-matrices` yet), we fall through and the
// summary degrades to single-metric exactly like before.
type MatrixEntry = {
leaderboard_rows: Array<{ model_route_id: string; values: Record<string, number | null> }>
subtask_metrics: Array<Record<string, unknown>>
}
let evalMatrixCache: Record<string, MatrixEntry> | null | undefined
function loadEvalMatrices(): Record<string, MatrixEntry> | null {
if (evalMatrixCache !== undefined) return evalMatrixCache
try {
const matrixPath = path.join(process.cwd(), "data", "eval-matrices.json")
const text = fs.readFileSync(matrixPath, "utf8")
const parsed = JSON.parse(text) as { evals?: Record<string, MatrixEntry> }
evalMatrixCache = parsed.evals ?? {}
} catch {
evalMatrixCache = null
}
return evalMatrixCache
}
export async function getEvalSummaryById(evalId: string): Promise<BenchmarkEvalSummary | null> {
// Use the same aliased projection as EVAL_LIST_COLUMNS so the legacy
// `composite_benchmark_*` / `benchmark_family_*` consumer fields are
// populated. A bare `SELECT *` returns the raw v2 column names which
// leaves the legacy fields NULL on the deserialised summary.
const evalRows = await readRows<Row>(
`SELECT ${EVAL_LIST_COLUMNS}
FROM evals_view
WHERE evaluation_id = ?
LIMIT 1`,
[evalId]
)
const evalRow = evalRows[0]
if (!evalRow) return null
let cellRows = await readRows<Row>(
`SELECT ${CELL_JOIN_COLUMNS}
FROM eval_results_view r
LEFT JOIN evals_view e ON r.evaluation_id = e.evaluation_id
WHERE r.evaluation_id = ?
AND r.metric_id = (SELECT primary_metric_id FROM evals_view WHERE evaluation_id = ?)
AND r.score IS NOT NULL
ORDER BY r.position ASC NULLS LAST`,
[evalId, evalId]
)
if (cellRows.length === 0) {
cellRows = await readRows<Row>(
`SELECT ${CELL_JOIN_COLUMNS}
FROM eval_results_view r
LEFT JOIN evals_view e ON r.evaluation_id = e.evaluation_id
WHERE r.evaluation_id = ?
AND r.score IS NOT NULL
ORDER BY r.position ASC NULLS LAST`,
[evalId]
)
}
const summary = {
...evalRow,
model_results: cellRows.map(reshapeCellToModelResult),
} as BenchmarkEvalSummary
// Splice in precomputed multi-metric leaderboard_rows and subtask
// leaderboard_metrics from data/eval-matrices.json. Models in the matrix
// but not in cellRows (zero-coverage primary metric) are also surfaced
// so a user can still see per-slice or non-primary scores. The base row
// shape comes from any matching cellRow when one exists.
const matrices = loadEvalMatrices()
const matrix = matrices?.[evalId]
if (matrix) {
const baseRowByRoute = new Map<string, ModelResultForBenchmark>()
for (const result of summary.model_results) {
if (result.model_route_id) {
baseRowByRoute.set(result.model_route_id, result)
}
}
const leaderboardRows = matrix.leaderboard_rows
.map((row) => {
const base = baseRowByRoute.get(row.model_route_id)
if (!base) return null
return {
model_info: base.model_info,
model_route_id: row.model_route_id,
evaluation_timestamp: base.evaluation_timestamp,
source_metadata: base.source_metadata,
source_data: base.source_data,
values: row.values,
metrics_present: Object.values(row.values).filter(
(v): v is number => typeof v === "number" && Number.isFinite(v),
).length,
}
})
.filter((row): row is NonNullable<typeof row> => row !== null)
if (leaderboardRows.length > 0) {
summary.leaderboard_rows = leaderboardRows
}
if (matrix.subtask_metrics.length > 0) {
const existing = (summary.leaderboard_metrics ?? []) as Array<{ column_key: string }>
const seen = new Set(existing.map((m) => m.column_key))
const merged = [
...existing,
...matrix.subtask_metrics.filter(
(m): m is typeof m & { column_key: string } =>
typeof m.column_key === "string" && !seen.has(m.column_key),
),
]
summary.leaderboard_metrics =
merged as unknown as BenchmarkEvalSummary["leaderboard_metrics"]
}
}
return summary
}
export async function getDeveloperList(): Promise<DeveloperListEntry[]> {
const headline = await fetchHeadline()
return [...(headline.developers ?? [])].sort((a, b) => a.developer.localeCompare(b.developer))
}
export async function getDeveloperSummaryById(routeId: string) {
const developers = await getDeveloperList()
const developer = developers.find((entry) => entry.route_id === routeId)
if (!developer) return null
const models = await readRows<EvaluationCardData>(
`SELECT ${MODEL_CARD_COLUMNS}
FROM models_view
WHERE developer = ?
ORDER BY benchmarks_count DESC NULLS LAST, evaluations_count DESC NULLS LAST, model_name ASC`,
[developer.developer]
)
return {
...developer,
models,
}
}
export async function getBenchmarkMetadataMap(): Promise<Record<string, BenchmarkCard>> {
const rows = await readRows<Row>(
`SELECT evaluation_id, evaluation_name,
family_id AS composite_benchmark_key,
benchmark_id,
benchmark_card
FROM evals_view
WHERE benchmark_card IS NOT NULL`
)
const result: Record<string, BenchmarkCard> = {}
for (const row of rows) {
const card = row.benchmark_card as BenchmarkCard | null | undefined
if (!card) continue
const keys = [
row.evaluation_id,
row.evaluation_name,
row.composite_benchmark_key,
row.benchmark_id,
card.benchmark_details?.name,
].filter((key): key is string => typeof key === "string" && key.length > 0)
for (const key of keys) {
result[key] = card
}
}
return result
}