Spaces:
Running on CPU Spr
view-data: wrap JSON-typed parquet columns in to_json() to dodge binding crash
Browse filesThe deployed Space surfaced "Invalid Error: don't know what type:"
on every eval-summary / model-summary query because the upstream
parquet schema landed JSON-typed fields nested inside structs —
specifically:
- eval_results_view.evalcards_annotations
(.variant_divergence.differing_fields[].values JSON,
.cross_party_divergence.differing_fields[].values JSON)
- evals_view.benchmark_card (.flagged_fields JSON)
The DuckDB Node binding can't materialise nested JSON, so SELECT-ing
those columns row-by-row crashes the whole query. Wrap the two
offending columns with to_json() in every SELECT (CELL_JOIN_COLUMNS,
EVAL_LIST_COLUMNS, getBenchmarkMetadataMap) so the binding sees a
single VARCHAR per row, and add a parseMaybeJson helper that
JSON.parses the string back to its original shape downstream. Local
prod build with DATA_BACKEND=v2 now serves /api/model-summary,
/api/eval-summary, and /api/eval-list at 200.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
- lib/view-data.ts +50 -8
|
@@ -68,7 +68,7 @@ const EVAL_LIST_COLUMNS = `
|
|
| 68 |
metric_config, models_count, evaluator_names, source_types,
|
| 69 |
latest_source_name, third_party_ratio,
|
| 70 |
missing_generation_config_count, best_model, worst_model,
|
| 71 |
-
avg_score, avg_score_norm, has_card, benchmark_card,
|
| 72 |
is_aggregated, aggregate_sources, tags,
|
| 73 |
metrics_count, metric_names, instance_data, top_score,
|
| 74 |
subtasks_count, is_summary_score, summary_eval_ids,
|
|
@@ -77,8 +77,18 @@ const EVAL_LIST_COLUMNS = `
|
|
| 77 |
source_data
|
| 78 |
`
|
| 79 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
const CELL_JOIN_COLUMNS = `
|
| 81 |
-
r.*,
|
| 82 |
e.evaluation_name AS eval_evaluation_name,
|
| 83 |
e.canonical_display_name AS eval_canonical_display_name,
|
| 84 |
e.benchmark_id AS eval_benchmark_id,
|
|
@@ -94,7 +104,7 @@ const CELL_JOIN_COLUMNS = `
|
|
| 94 |
e.category AS eval_category,
|
| 95 |
e.metric_config AS eval_metric_config,
|
| 96 |
e.source_data AS eval_source_data,
|
| 97 |
-
e.benchmark_card AS eval_benchmark_card,
|
| 98 |
e.tags AS eval_tags,
|
| 99 |
e.is_summary_score AS eval_is_summary_score,
|
| 100 |
e.summary_eval_ids AS eval_summary_eval_ids
|
|
@@ -210,6 +220,23 @@ function optionalString(value: unknown) {
|
|
| 210 |
return typeof value === "string" && value.length > 0 ? value : undefined
|
| 211 |
}
|
| 212 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 213 |
function asArray<T>(value: unknown): T[] {
|
| 214 |
return Array.isArray(value) ? value as T[] : []
|
| 215 |
}
|
|
@@ -309,7 +336,12 @@ function modelInfoFromModelRow(row: Row): ModelInfo {
|
|
| 309 |
function resultFromCell(row: Row): EvaluationResult {
|
| 310 |
const scoreDetails = scoreDetailsFromRow(row)
|
| 311 |
const generationConfig = row.generation_config as GenerationConfig | undefined
|
| 312 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 313 |
|
| 314 |
return {
|
| 315 |
evaluation_name: asString(row.metric_display_name ?? row.eval_evaluation_name ?? row.metric_id, "Score"),
|
|
@@ -471,7 +503,7 @@ export async function getEvalListData(): Promise<{
|
|
| 471 |
totalModels: number
|
| 472 |
}> {
|
| 473 |
const [evals, countRows] = await Promise.all([
|
| 474 |
-
readRows<BenchmarkEvalListItem>(
|
| 475 |
`SELECT ${EVAL_LIST_COLUMNS}
|
| 476 |
FROM evals_view
|
| 477 |
ORDER BY evaluation_name ASC`
|
|
@@ -479,8 +511,15 @@ export async function getEvalListData(): Promise<{
|
|
| 479 |
readRows<{ n: number }>("SELECT COUNT(*) AS n FROM models_view"),
|
| 480 |
])
|
| 481 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 482 |
return {
|
| 483 |
-
evals,
|
| 484 |
totalModels: asNumber(countRows[0]?.n),
|
| 485 |
}
|
| 486 |
}
|
|
@@ -600,6 +639,9 @@ export async function getEvalSummaryById(evalId: string): Promise<BenchmarkEvalS
|
|
| 600 |
|
| 601 |
const summary = {
|
| 602 |
...evalRow,
|
|
|
|
|
|
|
|
|
|
| 603 |
model_results: cellRows.map(reshapeCellToModelResult),
|
| 604 |
} as BenchmarkEvalSummary
|
| 605 |
|
|
@@ -723,14 +765,14 @@ export async function getBenchmarkMetadataMap(): Promise<Record<string, Benchmar
|
|
| 723 |
`SELECT evaluation_id, evaluation_name,
|
| 724 |
family_id AS composite_benchmark_key,
|
| 725 |
benchmark_id,
|
| 726 |
-
benchmark_card
|
| 727 |
FROM evals_view
|
| 728 |
WHERE benchmark_card IS NOT NULL`
|
| 729 |
)
|
| 730 |
const result: Record<string, BenchmarkCard> = {}
|
| 731 |
|
| 732 |
for (const row of rows) {
|
| 733 |
-
const card = row.benchmark_card as BenchmarkCard | null | undefined
|
| 734 |
if (!card) continue
|
| 735 |
|
| 736 |
const keys = [
|
|
|
|
| 68 |
metric_config, models_count, evaluator_names, source_types,
|
| 69 |
latest_source_name, third_party_ratio,
|
| 70 |
missing_generation_config_count, best_model, worst_model,
|
| 71 |
+
avg_score, avg_score_norm, has_card, to_json(benchmark_card) AS benchmark_card,
|
| 72 |
is_aggregated, aggregate_sources, tags,
|
| 73 |
metrics_count, metric_names, instance_data, top_score,
|
| 74 |
subtasks_count, is_summary_score, summary_eval_ids,
|
|
|
|
| 77 |
source_data
|
| 78 |
`
|
| 79 |
|
| 80 |
+
// The deployed Space started returning 500s ("Invalid Error: don't
|
| 81 |
+
// know what type:") on every eval-results / model-summary query after
|
| 82 |
+
// a parquet snapshot bump that landed nested `JSON` fields inside
|
| 83 |
+
// structs (e.g. `evalcards_annotations.variant_divergence.differing_
|
| 84 |
+
// fields[].values JSON` and `benchmark_card.flagged_fields JSON`).
|
| 85 |
+
// The DuckDB Node binding crashes when it tries to materialise those
|
| 86 |
+
// nested-JSON structs row-by-row. Wrap the offending columns with
|
| 87 |
+
// `to_json(...)` so the binding sees a single VARCHAR per row; the
|
| 88 |
+
// reshape helpers JSON.parse the string back into the same shape
|
| 89 |
+
// downstream code expects.
|
| 90 |
const CELL_JOIN_COLUMNS = `
|
| 91 |
+
r.* REPLACE (to_json(r.evalcards_annotations) AS evalcards_annotations),
|
| 92 |
e.evaluation_name AS eval_evaluation_name,
|
| 93 |
e.canonical_display_name AS eval_canonical_display_name,
|
| 94 |
e.benchmark_id AS eval_benchmark_id,
|
|
|
|
| 104 |
e.category AS eval_category,
|
| 105 |
e.metric_config AS eval_metric_config,
|
| 106 |
e.source_data AS eval_source_data,
|
| 107 |
+
to_json(e.benchmark_card) AS eval_benchmark_card,
|
| 108 |
e.tags AS eval_tags,
|
| 109 |
e.is_summary_score AS eval_is_summary_score,
|
| 110 |
e.summary_eval_ids AS eval_summary_eval_ids
|
|
|
|
| 220 |
return typeof value === "string" && value.length > 0 ? value : undefined
|
| 221 |
}
|
| 222 |
|
| 223 |
+
// Some parquet columns ship JSON-typed fields nested inside structs
|
| 224 |
+
// that the DuckDB Node binding can't materialise (crashes the entire
|
| 225 |
+
// query with "don't know what type:"). For those columns the SELECT
|
| 226 |
+
// wraps the value in `to_json(...)` so the binding sees a single
|
| 227 |
+
// VARCHAR; this helper undoes the wrap. If the value is already an
|
| 228 |
+
// object (legacy snapshots without the to_json wrap, or local dev
|
| 229 |
+
// where the binding handled the type), pass it through unchanged.
|
| 230 |
+
function parseMaybeJson(value: unknown): unknown {
|
| 231 |
+
if (typeof value !== "string") return value
|
| 232 |
+
if (value === "" || value === "null") return null
|
| 233 |
+
try {
|
| 234 |
+
return JSON.parse(value)
|
| 235 |
+
} catch {
|
| 236 |
+
return value
|
| 237 |
+
}
|
| 238 |
+
}
|
| 239 |
+
|
| 240 |
function asArray<T>(value: unknown): T[] {
|
| 241 |
return Array.isArray(value) ? value as T[] : []
|
| 242 |
}
|
|
|
|
| 336 |
function resultFromCell(row: Row): EvaluationResult {
|
| 337 |
const scoreDetails = scoreDetailsFromRow(row)
|
| 338 |
const generationConfig = row.generation_config as GenerationConfig | undefined
|
| 339 |
+
// evalcards_annotations arrives JSON-encoded — the parquet schema
|
| 340 |
+
// nests a JSON-typed field which the DuckDB Node binding can't
|
| 341 |
+
// materialise directly, so view-data's SELECT wraps the whole
|
| 342 |
+
// column in to_json() and we parse it back on the JS side. See
|
| 343 |
+
// CELL_JOIN_COLUMNS for the wrapping site.
|
| 344 |
+
const annotations = parseMaybeJson(row.evalcards_annotations)
|
| 345 |
|
| 346 |
return {
|
| 347 |
evaluation_name: asString(row.metric_display_name ?? row.eval_evaluation_name ?? row.metric_id, "Score"),
|
|
|
|
| 503 |
totalModels: number
|
| 504 |
}> {
|
| 505 |
const [evals, countRows] = await Promise.all([
|
| 506 |
+
readRows<BenchmarkEvalListItem & { benchmark_card?: unknown }>(
|
| 507 |
`SELECT ${EVAL_LIST_COLUMNS}
|
| 508 |
FROM evals_view
|
| 509 |
ORDER BY evaluation_name ASC`
|
|
|
|
| 511 |
readRows<{ n: number }>("SELECT COUNT(*) AS n FROM models_view"),
|
| 512 |
])
|
| 513 |
|
| 514 |
+
// benchmark_card is JSON-encoded at the SQL layer; parse before
|
| 515 |
+
// handing it to consumers that expect the object shape.
|
| 516 |
+
const decoded = evals.map((row) => ({
|
| 517 |
+
...row,
|
| 518 |
+
benchmark_card: parseMaybeJson(row.benchmark_card),
|
| 519 |
+
})) as BenchmarkEvalListItem[]
|
| 520 |
+
|
| 521 |
return {
|
| 522 |
+
evals: decoded,
|
| 523 |
totalModels: asNumber(countRows[0]?.n),
|
| 524 |
}
|
| 525 |
}
|
|
|
|
| 639 |
|
| 640 |
const summary = {
|
| 641 |
...evalRow,
|
| 642 |
+
// benchmark_card arrives JSON-encoded (the parquet schema nests a
|
| 643 |
+
// JSON-typed field — see CELL_JOIN_COLUMNS / EVAL_LIST_COLUMNS).
|
| 644 |
+
benchmark_card: parseMaybeJson(evalRow.benchmark_card),
|
| 645 |
model_results: cellRows.map(reshapeCellToModelResult),
|
| 646 |
} as BenchmarkEvalSummary
|
| 647 |
|
|
|
|
| 765 |
`SELECT evaluation_id, evaluation_name,
|
| 766 |
family_id AS composite_benchmark_key,
|
| 767 |
benchmark_id,
|
| 768 |
+
to_json(benchmark_card) AS benchmark_card
|
| 769 |
FROM evals_view
|
| 770 |
WHERE benchmark_card IS NOT NULL`
|
| 771 |
)
|
| 772 |
const result: Record<string, BenchmarkCard> = {}
|
| 773 |
|
| 774 |
for (const row of rows) {
|
| 775 |
+
const card = parseMaybeJson(row.benchmark_card) as BenchmarkCard | null | undefined
|
| 776 |
if (!card) continue
|
| 777 |
|
| 778 |
const keys = [
|